##// END OF EJS Templates
copies: clarify which case some conditional are handling...
marmoute -
r47127:154ded91 default
parent child Browse files
Show More
@@ -1,1231 +1,1231
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 6
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src: # case 5
64 64 del t[k]
65 65 # remove criss-crossed copies
66 66 elif k in src and v in dst:
67 67 del t[k]
68 68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst: # case 1
70 70 del t[k]
71 71
72 72
73 73 def _chain(prefix, suffix):
74 74 """chain two sets of copies 'prefix' and 'suffix'"""
75 75 result = prefix.copy()
76 76 for key, value in pycompat.iteritems(suffix):
77 77 result[key] = prefix.get(value, value)
78 78 return result
79 79
80 80
81 81 def _tracefile(fctx, am, basemf):
82 82 """return file context that is the ancestor of fctx present in ancestor
83 83 manifest am
84 84
85 85 Note: we used to try and stop after a given limit, however checking if that
86 86 limit is reached turned out to be very expensive. we are better off
87 87 disabling that feature."""
88 88
89 89 for f in fctx.ancestors():
90 90 path = f.path()
91 91 if am.get(path, None) == f.filenode():
92 92 return path
93 93 if basemf and basemf.get(path, None) == f.filenode():
94 94 return path
95 95
96 96
97 97 def _dirstatecopies(repo, match=None):
98 98 ds = repo.dirstate
99 99 c = ds.copies().copy()
100 100 for k in list(c):
101 101 if ds[k] not in b'anm' or (match and not match(k)):
102 102 del c[k]
103 103 return c
104 104
105 105
106 106 def _computeforwardmissing(a, b, match=None):
107 107 """Computes which files are in b but not a.
108 108 This is its own function so extensions can easily wrap this call to see what
109 109 files _forwardcopies is about to process.
110 110 """
111 111 ma = a.manifest()
112 112 mb = b.manifest()
113 113 return mb.filesnotin(ma, match=match)
114 114
115 115
116 116 def usechangesetcentricalgo(repo):
117 117 """Checks if we should use changeset-centric copy algorithms"""
118 118 if repo.filecopiesmode == b'changeset-sidedata':
119 119 return True
120 120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 121 changesetsource = (b'changeset-only', b'compatibility')
122 122 return readfrom in changesetsource
123 123
124 124
125 125 def _committedforwardcopies(a, b, base, match):
126 126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 127 # files might have to be traced back to the fctx parent of the last
128 128 # one-side-only changeset, but not further back than that
129 129 repo = a._repo
130 130
131 131 if usechangesetcentricalgo(repo):
132 132 return _changesetforwardcopies(a, b, match)
133 133
134 134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 135 dbg = repo.ui.debug
136 136 if debug:
137 137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 138 am = a.manifest()
139 139 basemf = None if base is None else base.manifest()
140 140
141 141 # find where new files came from
142 142 # we currently don't try to find where old files went, too expensive
143 143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 144 cm = {}
145 145
146 146 # Computing the forward missing is quite expensive on large manifests, since
147 147 # it compares the entire manifests. We can optimize it in the common use
148 148 # case of computing what copies are in a commit versus its parent (like
149 149 # during a rebase or histedit). Note, we exclude merge commits from this
150 150 # optimization, since the ctx.files() for a merge commit is not correct for
151 151 # this comparison.
152 152 forwardmissingmatch = match
153 153 if b.p1() == a and b.p2().node() == nullid:
154 154 filesmatcher = matchmod.exact(b.files())
155 155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157 157
158 158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159 159
160 160 if debug:
161 161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162 162
163 163 for f in sorted(missing):
164 164 if debug:
165 165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 166 fctx = b[f]
167 167 fctx._ancestrycontext = ancestrycontext
168 168
169 169 if debug:
170 170 start = util.timer()
171 171 opath = _tracefile(fctx, am, basemf)
172 172 if opath:
173 173 if debug:
174 174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 175 cm[f] = opath
176 176 if debug:
177 177 dbg(
178 178 b'debug.copies: time: %f seconds\n'
179 179 % (util.timer() - start)
180 180 )
181 181 return cm
182 182
183 183
184 184 def _revinfo_getter(repo, match):
185 185 """returns a function that returns the following data given a <rev>"
186 186
187 187 * p1: revision number of first parent
188 188 * p2: revision number of first parent
189 189 * changes: a ChangingFiles object
190 190 """
191 191 cl = repo.changelog
192 192 parents = cl.parentrevs
193 193 flags = cl.flags
194 194
195 195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196 196
197 197 changelogrevision = cl.changelogrevision
198 198
199 199 if rustmod is not None:
200 200
201 201 def revinfo(rev):
202 202 p1, p2 = parents(rev)
203 203 if flags(rev) & HASCOPIESINFO:
204 204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 205 else:
206 206 raw = None
207 207 return (p1, p2, raw)
208 208
209 209 else:
210 210
211 211 def revinfo(rev):
212 212 p1, p2 = parents(rev)
213 213 if flags(rev) & HASCOPIESINFO:
214 214 changes = changelogrevision(rev).changes
215 215 else:
216 216 changes = None
217 217 return (p1, p2, changes)
218 218
219 219 return revinfo
220 220
221 221
222 222 def cached_is_ancestor(is_ancestor):
223 223 """return a cached version of is_ancestor"""
224 224 cache = {}
225 225
226 226 def _is_ancestor(anc, desc):
227 227 if anc > desc:
228 228 return False
229 229 elif anc == desc:
230 230 return True
231 231 key = (anc, desc)
232 232 ret = cache.get(key)
233 233 if ret is None:
234 234 ret = cache[key] = is_ancestor(anc, desc)
235 235 return ret
236 236
237 237 return _is_ancestor
238 238
239 239
240 240 def _changesetforwardcopies(a, b, match):
241 241 if a.rev() in (nullrev, b.rev()):
242 242 return {}
243 243
244 244 repo = a.repo().unfiltered()
245 245 children = {}
246 246
247 247 cl = repo.changelog
248 248 isancestor = cl.isancestorrev
249 249
250 250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 251 # edges that are contains in `::B` but not in `::A`.
252 252 #
253 253 #
254 254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 256 # that belong to ::a. These are exactly all the revisions needed to express
257 257 # the parent β†’ children we need to combine.
258 258 #
259 259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 261 # keep this out of the explanation because it is hard enough without this special case..
262 262
263 263 parents = cl._uncheckedparentrevs
264 264 graph_roots = (nullrev, nullrev)
265 265
266 266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 268 roots = set()
269 269 has_graph_roots = False
270 270
271 271 # iterate over `only(B, A)`
272 272 for r in revs:
273 273 ps = parents(r)
274 274 if ps == graph_roots:
275 275 has_graph_roots = True
276 276 else:
277 277 p1, p2 = ps
278 278
279 279 # find all the "root points" (see larger comment above)
280 280 if p1 != nullrev and p1 in ancestors:
281 281 roots.add(p1)
282 282 if p2 != nullrev and p2 in ancestors:
283 283 roots.add(p2)
284 284 if not roots:
285 285 # no common revision to track copies from
286 286 return {}
287 287 if has_graph_roots:
288 288 # this deal with the special case mentionned in the [1] footnotes. We
289 289 # must filter out revisions that leads to non-common graphroots.
290 290 roots = list(roots)
291 291 m = min(roots)
292 292 h = [b.rev()]
293 293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 294 roots_to_head = set(roots_to_head)
295 295 revs = [r for r in revs if r in roots_to_head]
296 296
297 297 if repo.filecopiesmode == b'changeset-sidedata':
298 298 # When using side-data, we will process the edges "from" the children.
299 299 # We iterate over the childre, gathering previous collected data for
300 300 # the parents. Do know when the parents data is no longer necessary, we
301 301 # keep a counter of how many children each revision has.
302 302 #
303 303 # An interresting property of `children_count` is that it only contains
304 304 # revision that will be relevant for a edge of the graph. So if a
305 305 # children has parent not in `children_count`, that edges should not be
306 306 # processed.
307 307 children_count = dict((r, 0) for r in roots)
308 308 for r in revs:
309 309 for p in cl.parentrevs(r):
310 310 if p == nullrev:
311 311 continue
312 312 children_count[r] = 0
313 313 if p in children_count:
314 314 children_count[p] += 1
315 315 revinfo = _revinfo_getter(repo, match)
316 316 return _combine_changeset_copies(
317 317 revs, children_count, b.rev(), revinfo, match, isancestor
318 318 )
319 319 else:
320 320 # When not using side-data, we will process the edges "from" the parent.
321 321 # so we need a full mapping of the parent -> children relation.
322 322 children = dict((r, []) for r in roots)
323 323 for r in revs:
324 324 for p in cl.parentrevs(r):
325 325 if p == nullrev:
326 326 continue
327 327 children[r] = []
328 328 if p in children:
329 329 children[p].append(r)
330 330 x = revs.pop()
331 331 assert x == b.rev()
332 332 revs.extend(roots)
333 333 revs.sort()
334 334
335 335 revinfo = _revinfo_getter_extra(repo)
336 336 return _combine_changeset_copies_extra(
337 337 revs, children, b.rev(), revinfo, match, isancestor
338 338 )
339 339
340 340
341 341 def _combine_changeset_copies(
342 342 revs, children_count, targetrev, revinfo, match, isancestor
343 343 ):
344 344 """combine the copies information for each item of iterrevs
345 345
346 346 revs: sorted iterable of revision to visit
347 347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 348 targetrev: the final copies destination revision (not in iterrevs)
349 349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 350 match: a matcher
351 351
352 352 It returns the aggregated copies information for `targetrev`.
353 353 """
354 354
355 355 alwaysmatch = match.always()
356 356
357 357 if rustmod is not None:
358 358 final_copies = rustmod.combine_changeset_copies(
359 359 list(revs), children_count, targetrev, revinfo, isancestor
360 360 )
361 361 else:
362 362 isancestor = cached_is_ancestor(isancestor)
363 363
364 364 all_copies = {}
365 365 # iterate over all the "children" side of copy tracing "edge"
366 366 for current_rev in revs:
367 367 p1, p2, changes = revinfo(current_rev)
368 368 current_copies = None
369 369 # iterate over all parents to chain the existing data with the
370 370 # data from the parent β†’ child edge.
371 371 for parent, parent_rev in ((1, p1), (2, p2)):
372 372 if parent_rev == nullrev:
373 373 continue
374 374 remaining_children = children_count.get(parent_rev)
375 375 if remaining_children is None:
376 376 continue
377 377 remaining_children -= 1
378 378 children_count[parent_rev] = remaining_children
379 379 if remaining_children:
380 380 copies = all_copies.get(parent_rev, None)
381 381 else:
382 382 copies = all_copies.pop(parent_rev, None)
383 383
384 384 if copies is None:
385 385 # this is a root
386 386 newcopies = copies = {}
387 387 elif remaining_children:
388 388 newcopies = copies.copy()
389 389 else:
390 390 newcopies = copies
391 391 # chain the data in the edge with the existing data
392 392 if changes is not None:
393 393 childcopies = {}
394 394 if parent == 1:
395 395 childcopies = changes.copied_from_p1
396 396 elif parent == 2:
397 397 childcopies = changes.copied_from_p2
398 398
399 399 if childcopies:
400 400 newcopies = copies.copy()
401 401 for dest, source in pycompat.iteritems(childcopies):
402 402 prev = copies.get(source)
403 403 if prev is not None and prev[1] is not None:
404 404 source = prev[1]
405 405 newcopies[dest] = (current_rev, source)
406 406 assert newcopies is not copies
407 407 if changes.removed:
408 408 for f in changes.removed:
409 409 if f in newcopies:
410 410 if newcopies is copies:
411 411 # copy on write to avoid affecting potential other
412 412 # branches. when there are no other branches, this
413 413 # could be avoided.
414 414 newcopies = copies.copy()
415 415 newcopies[f] = (current_rev, None)
416 416 # check potential need to combine the data from another parent (for
417 417 # that child). See comment below for details.
418 418 if current_copies is None:
419 419 current_copies = newcopies
420 420 else:
421 421 # we are the second parent to work on c, we need to merge our
422 422 # work with the other.
423 423 #
424 424 # In case of conflict, parent 1 take precedence over parent 2.
425 425 # This is an arbitrary choice made anew when implementing
426 426 # changeset based copies. It was made without regards with
427 427 # potential filelog related behavior.
428 428 assert parent == 2
429 429 current_copies = _merge_copies_dict(
430 430 newcopies, current_copies, isancestor, changes
431 431 )
432 432 all_copies[current_rev] = current_copies
433 433
434 434 # filter out internal details and return a {dest: source mapping}
435 435 final_copies = {}
436 436 for dest, (tt, source) in all_copies[targetrev].items():
437 437 if source is not None:
438 438 final_copies[dest] = source
439 439 if not alwaysmatch:
440 440 for filename in list(final_copies.keys()):
441 441 if not match(filename):
442 442 del final_copies[filename]
443 443 return final_copies
444 444
445 445
446 446 # constant to decide which side to pick with _merge_copies_dict
447 447 PICK_MINOR = 0
448 448 PICK_MAJOR = 1
449 449 PICK_EITHER = 2
450 450
451 451
452 452 def _merge_copies_dict(minor, major, isancestor, changes):
453 453 """merge two copies-mapping together, minor and major
454 454
455 455 In case of conflict, value from "major" will be picked.
456 456
457 457 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
458 458 ancestors of `high_rev`,
459 459
460 460 - `ismerged(path)`: callable return True if `path` have been merged in the
461 461 current revision,
462 462
463 463 return the resulting dict (in practice, the "minor" object, updated)
464 464 """
465 465 for dest, value in major.items():
466 466 other = minor.get(dest)
467 467 if other is None:
468 468 minor[dest] = value
469 469 else:
470 470 pick = _compare_values(changes, isancestor, dest, other, value)
471 471 if pick == PICK_MAJOR:
472 472 minor[dest] = value
473 473 return minor
474 474
475 475
476 476 def _compare_values(changes, isancestor, dest, minor, major):
477 477 """compare two value within a _merge_copies_dict loop iteration"""
478 478 major_tt, major_value = major
479 479 minor_tt, minor_value = minor
480 480
481 481 # evacuate some simple case first:
482 482 if major_tt == minor_tt:
483 483 # if it comes from the same revision it must be the same value
484 484 assert major_value == minor_value
485 485 return PICK_EITHER
486 486 elif major[1] == minor[1]:
487 487 return PICK_EITHER
488 488
489 489 # actual merging needed: content from "major" wins, unless it is older than
490 490 # the branch point or there is a merge
491 491 elif changes is not None and major[1] is None and dest in changes.salvaged:
492 492 return PICK_MINOR
493 493 elif changes is not None and minor[1] is None and dest in changes.salvaged:
494 494 return PICK_MAJOR
495 495 elif changes is not None and dest in changes.merged:
496 496 return PICK_MAJOR
497 497 elif not isancestor(major_tt, minor_tt):
498 498 if major[1] is not None:
499 499 return PICK_MAJOR
500 500 elif isancestor(minor_tt, major_tt):
501 501 return PICK_MAJOR
502 502 return PICK_MINOR
503 503
504 504
505 505 def _revinfo_getter_extra(repo):
506 506 """return a function that return multiple data given a <rev>"i
507 507
508 508 * p1: revision number of first parent
509 509 * p2: revision number of first parent
510 510 * p1copies: mapping of copies from p1
511 511 * p2copies: mapping of copies from p2
512 512 * removed: a list of removed files
513 513 * ismerged: a callback to know if file was merged in that revision
514 514 """
515 515 cl = repo.changelog
516 516 parents = cl.parentrevs
517 517
518 518 def get_ismerged(rev):
519 519 ctx = repo[rev]
520 520
521 521 def ismerged(path):
522 522 if path not in ctx.files():
523 523 return False
524 524 fctx = ctx[path]
525 525 parents = fctx._filelog.parents(fctx._filenode)
526 526 nb_parents = 0
527 527 for n in parents:
528 528 if n != nullid:
529 529 nb_parents += 1
530 530 return nb_parents >= 2
531 531
532 532 return ismerged
533 533
534 534 def revinfo(rev):
535 535 p1, p2 = parents(rev)
536 536 ctx = repo[rev]
537 537 p1copies, p2copies = ctx._copies
538 538 removed = ctx.filesremoved()
539 539 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
540 540
541 541 return revinfo
542 542
543 543
544 544 def _combine_changeset_copies_extra(
545 545 revs, children, targetrev, revinfo, match, isancestor
546 546 ):
547 547 """version of `_combine_changeset_copies` that works with the Google
548 548 specific "extra" based storage for copy information"""
549 549 all_copies = {}
550 550 alwaysmatch = match.always()
551 551 for r in revs:
552 552 copies = all_copies.pop(r, None)
553 553 if copies is None:
554 554 # this is a root
555 555 copies = {}
556 556 for i, c in enumerate(children[r]):
557 557 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
558 558 if r == p1:
559 559 parent = 1
560 560 childcopies = p1copies
561 561 else:
562 562 assert r == p2
563 563 parent = 2
564 564 childcopies = p2copies
565 565 if not alwaysmatch:
566 566 childcopies = {
567 567 dst: src for dst, src in childcopies.items() if match(dst)
568 568 }
569 569 newcopies = copies
570 570 if childcopies:
571 571 newcopies = copies.copy()
572 572 for dest, source in pycompat.iteritems(childcopies):
573 573 prev = copies.get(source)
574 574 if prev is not None and prev[1] is not None:
575 575 source = prev[1]
576 576 newcopies[dest] = (c, source)
577 577 assert newcopies is not copies
578 578 for f in removed:
579 579 if f in newcopies:
580 580 if newcopies is copies:
581 581 # copy on write to avoid affecting potential other
582 582 # branches. when there are no other branches, this
583 583 # could be avoided.
584 584 newcopies = copies.copy()
585 585 newcopies[f] = (c, None)
586 586 othercopies = all_copies.get(c)
587 587 if othercopies is None:
588 588 all_copies[c] = newcopies
589 589 else:
590 590 # we are the second parent to work on c, we need to merge our
591 591 # work with the other.
592 592 #
593 593 # In case of conflict, parent 1 take precedence over parent 2.
594 594 # This is an arbitrary choice made anew when implementing
595 595 # changeset based copies. It was made without regards with
596 596 # potential filelog related behavior.
597 597 if parent == 1:
598 598 _merge_copies_dict_extra(
599 599 othercopies, newcopies, isancestor, ismerged
600 600 )
601 601 else:
602 602 _merge_copies_dict_extra(
603 603 newcopies, othercopies, isancestor, ismerged
604 604 )
605 605 all_copies[c] = newcopies
606 606
607 607 final_copies = {}
608 608 for dest, (tt, source) in all_copies[targetrev].items():
609 609 if source is not None:
610 610 final_copies[dest] = source
611 611 return final_copies
612 612
613 613
614 614 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
615 615 """version of `_merge_copies_dict` that works with the Google
616 616 specific "extra" based storage for copy information"""
617 617 for dest, value in major.items():
618 618 other = minor.get(dest)
619 619 if other is None:
620 620 minor[dest] = value
621 621 else:
622 622 new_tt = value[0]
623 623 other_tt = other[0]
624 624 if value[1] == other[1]:
625 625 continue
626 626 # content from "major" wins, unless it is older
627 627 # than the branch point or there is a merge
628 628 if (
629 629 new_tt == other_tt
630 630 or not isancestor(new_tt, other_tt)
631 631 or ismerged(dest)
632 632 ):
633 633 minor[dest] = value
634 634
635 635
636 636 def _forwardcopies(a, b, base=None, match=None):
637 637 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
638 638
639 639 if base is None:
640 640 base = a
641 641 match = a.repo().narrowmatch(match)
642 642 # check for working copy
643 643 if b.rev() is None:
644 644 cm = _committedforwardcopies(a, b.p1(), base, match)
645 645 # combine copies from dirstate if necessary
646 646 copies = _chain(cm, _dirstatecopies(b._repo, match))
647 647 else:
648 648 copies = _committedforwardcopies(a, b, base, match)
649 649 return copies
650 650
651 651
652 652 def _backwardrenames(a, b, match):
653 653 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
654 654 return {}
655 655
656 656 # Even though we're not taking copies into account, 1:n rename situations
657 657 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
658 658 # arbitrarily pick one of the renames.
659 659 # We don't want to pass in "match" here, since that would filter
660 660 # the destination by it. Since we're reversing the copies, we want
661 661 # to filter the source instead.
662 662 f = _forwardcopies(b, a)
663 663 r = {}
664 664 for k, v in sorted(pycompat.iteritems(f)):
665 665 if match and not match(v):
666 666 continue
667 667 # remove copies
668 668 if v in a:
669 669 continue
670 670 r[v] = k
671 671 return r
672 672
673 673
674 674 def pathcopies(x, y, match=None):
675 675 """find {dst@y: src@x} copy mapping for directed compare"""
676 676 repo = x._repo
677 677 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
678 678 if debug:
679 679 repo.ui.debug(
680 680 b'debug.copies: searching copies from %s to %s\n' % (x, y)
681 681 )
682 682 if x == y or not x or not y:
683 683 return {}
684 684 if y.rev() is None and x == y.p1():
685 685 if debug:
686 686 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
687 687 # short-circuit to avoid issues with merge states
688 688 return _dirstatecopies(repo, match)
689 689 a = y.ancestor(x)
690 690 if a == x:
691 691 if debug:
692 692 repo.ui.debug(b'debug.copies: search mode: forward\n')
693 693 copies = _forwardcopies(x, y, match=match)
694 694 elif a == y:
695 695 if debug:
696 696 repo.ui.debug(b'debug.copies: search mode: backward\n')
697 697 copies = _backwardrenames(x, y, match=match)
698 698 else:
699 699 if debug:
700 700 repo.ui.debug(b'debug.copies: search mode: combined\n')
701 701 base = None
702 702 if a.rev() != nullrev:
703 703 base = x
704 704 copies = _chain(
705 705 _backwardrenames(x, a, match=match),
706 706 _forwardcopies(a, y, base, match=match),
707 707 )
708 708 _filter(x, y, copies)
709 709 return copies
710 710
711 711
712 712 def mergecopies(repo, c1, c2, base):
713 713 """
714 714 Finds moves and copies between context c1 and c2 that are relevant for
715 715 merging. 'base' will be used as the merge base.
716 716
717 717 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
718 718 files that were moved/ copied in one merge parent and modified in another.
719 719 For example:
720 720
721 721 o ---> 4 another commit
722 722 |
723 723 | o ---> 3 commit that modifies a.txt
724 724 | /
725 725 o / ---> 2 commit that moves a.txt to b.txt
726 726 |/
727 727 o ---> 1 merge base
728 728
729 729 If we try to rebase revision 3 on revision 4, since there is no a.txt in
730 730 revision 4, and if user have copytrace disabled, we prints the following
731 731 message:
732 732
733 733 ```other changed <file> which local deleted```
734 734
735 735 Returns a tuple where:
736 736
737 737 "branch_copies" an instance of branch_copies.
738 738
739 739 "diverge" is a mapping of source name -> list of destination names
740 740 for divergent renames.
741 741
742 742 This function calls different copytracing algorithms based on config.
743 743 """
744 744 # avoid silly behavior for update from empty dir
745 745 if not c1 or not c2 or c1 == c2:
746 746 return branch_copies(), branch_copies(), {}
747 747
748 748 narrowmatch = c1.repo().narrowmatch()
749 749
750 750 # avoid silly behavior for parent -> working dir
751 751 if c2.node() is None and c1.node() == repo.dirstate.p1():
752 752 return (
753 753 branch_copies(_dirstatecopies(repo, narrowmatch)),
754 754 branch_copies(),
755 755 {},
756 756 )
757 757
758 758 copytracing = repo.ui.config(b'experimental', b'copytrace')
759 759 if stringutil.parsebool(copytracing) is False:
760 760 # stringutil.parsebool() returns None when it is unable to parse the
761 761 # value, so we should rely on making sure copytracing is on such cases
762 762 return branch_copies(), branch_copies(), {}
763 763
764 764 if usechangesetcentricalgo(repo):
765 765 # The heuristics don't make sense when we need changeset-centric algos
766 766 return _fullcopytracing(repo, c1, c2, base)
767 767
768 768 # Copy trace disabling is explicitly below the node == p1 logic above
769 769 # because the logic above is required for a simple copy to be kept across a
770 770 # rebase.
771 771 if copytracing == b'heuristics':
772 772 # Do full copytracing if only non-public revisions are involved as
773 773 # that will be fast enough and will also cover the copies which could
774 774 # be missed by heuristics
775 775 if _isfullcopytraceable(repo, c1, base):
776 776 return _fullcopytracing(repo, c1, c2, base)
777 777 return _heuristicscopytracing(repo, c1, c2, base)
778 778 else:
779 779 return _fullcopytracing(repo, c1, c2, base)
780 780
781 781
782 782 def _isfullcopytraceable(repo, c1, base):
783 783 """Checks that if base, source and destination are all no-public branches,
784 784 if yes let's use the full copytrace algorithm for increased capabilities
785 785 since it will be fast enough.
786 786
787 787 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
788 788 number of changesets from c1 to base such that if number of changesets are
789 789 more than the limit, full copytracing algorithm won't be used.
790 790 """
791 791 if c1.rev() is None:
792 792 c1 = c1.p1()
793 793 if c1.mutable() and base.mutable():
794 794 sourcecommitlimit = repo.ui.configint(
795 795 b'experimental', b'copytrace.sourcecommitlimit'
796 796 )
797 797 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
798 798 return commits < sourcecommitlimit
799 799 return False
800 800
801 801
802 802 def _checksinglesidecopies(
803 803 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
804 804 ):
805 805 if src not in m2:
806 806 # deleted on side 2
807 807 if src not in m1:
808 808 # renamed on side 1, deleted on side 2
809 809 renamedelete[src] = dsts1
810 810 elif src not in mb:
811 811 # Work around the "short-circuit to avoid issues with merge states"
812 812 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
813 813 # destination doesn't exist in y.
814 814 pass
815 815 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
816 816 return
817 817 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
818 818 # modified on side 2
819 819 for dst in dsts1:
820 820 copy[dst] = src
821 821
822 822
823 823 class branch_copies(object):
824 824 """Information about copies made on one side of a merge/graft.
825 825
826 826 "copy" is a mapping from destination name -> source name,
827 827 where source is in c1 and destination is in c2 or vice-versa.
828 828
829 829 "movewithdir" is a mapping from source name -> destination name,
830 830 where the file at source present in one context but not the other
831 831 needs to be moved to destination by the merge process, because the
832 832 other context moved the directory it is in.
833 833
834 834 "renamedelete" is a mapping of source name -> list of destination
835 835 names for files deleted in c1 that were renamed in c2 or vice-versa.
836 836
837 837 "dirmove" is a mapping of detected source dir -> destination dir renames.
838 838 This is needed for handling changes to new files previously grafted into
839 839 renamed directories.
840 840 """
841 841
842 842 def __init__(
843 843 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
844 844 ):
845 845 self.copy = {} if copy is None else copy
846 846 self.renamedelete = {} if renamedelete is None else renamedelete
847 847 self.dirmove = {} if dirmove is None else dirmove
848 848 self.movewithdir = {} if movewithdir is None else movewithdir
849 849
850 850 def __repr__(self):
851 851 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
852 852 self.copy,
853 853 self.renamedelete,
854 854 self.dirmove,
855 855 self.movewithdir,
856 856 )
857 857
858 858
859 859 def _fullcopytracing(repo, c1, c2, base):
860 860 """The full copytracing algorithm which finds all the new files that were
861 861 added from merge base up to the top commit and for each file it checks if
862 862 this file was copied from another file.
863 863
864 864 This is pretty slow when a lot of changesets are involved but will track all
865 865 the copies.
866 866 """
867 867 m1 = c1.manifest()
868 868 m2 = c2.manifest()
869 869 mb = base.manifest()
870 870
871 871 copies1 = pathcopies(base, c1)
872 872 copies2 = pathcopies(base, c2)
873 873
874 874 if not (copies1 or copies2):
875 875 return branch_copies(), branch_copies(), {}
876 876
877 877 inversecopies1 = {}
878 878 inversecopies2 = {}
879 879 for dst, src in copies1.items():
880 880 inversecopies1.setdefault(src, []).append(dst)
881 881 for dst, src in copies2.items():
882 882 inversecopies2.setdefault(src, []).append(dst)
883 883
884 884 copy1 = {}
885 885 copy2 = {}
886 886 diverge = {}
887 887 renamedelete1 = {}
888 888 renamedelete2 = {}
889 889 allsources = set(inversecopies1) | set(inversecopies2)
890 890 for src in allsources:
891 891 dsts1 = inversecopies1.get(src)
892 892 dsts2 = inversecopies2.get(src)
893 893 if dsts1 and dsts2:
894 894 # copied/renamed on both sides
895 895 if src not in m1 and src not in m2:
896 896 # renamed on both sides
897 897 dsts1 = set(dsts1)
898 898 dsts2 = set(dsts2)
899 899 # If there's some overlap in the rename destinations, we
900 900 # consider it not divergent. For example, if side 1 copies 'a'
901 901 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
902 902 # and 'd' and deletes 'a'.
903 903 if dsts1 & dsts2:
904 904 for dst in dsts1 & dsts2:
905 905 copy1[dst] = src
906 906 copy2[dst] = src
907 907 else:
908 908 diverge[src] = sorted(dsts1 | dsts2)
909 909 elif src in m1 and src in m2:
910 910 # copied on both sides
911 911 dsts1 = set(dsts1)
912 912 dsts2 = set(dsts2)
913 913 for dst in dsts1 & dsts2:
914 914 copy1[dst] = src
915 915 copy2[dst] = src
916 916 # TODO: Handle cases where it was renamed on one side and copied
917 917 # on the other side
918 918 elif dsts1:
919 919 # copied/renamed only on side 1
920 920 _checksinglesidecopies(
921 921 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
922 922 )
923 923 elif dsts2:
924 924 # copied/renamed only on side 2
925 925 _checksinglesidecopies(
926 926 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
927 927 )
928 928
929 929 # find interesting file sets from manifests
930 930 cache = []
931 931
932 932 def _get_addedfiles(idx):
933 933 if not cache:
934 934 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
935 935 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
936 936 u1 = sorted(addedinm1 - addedinm2)
937 937 u2 = sorted(addedinm2 - addedinm1)
938 938 cache.extend((u1, u2))
939 939 return cache[idx]
940 940
941 941 u1fn = lambda: _get_addedfiles(0)
942 942 u2fn = lambda: _get_addedfiles(1)
943 943 if repo.ui.debugflag:
944 944 u1 = u1fn()
945 945 u2 = u2fn()
946 946
947 947 header = b" unmatched files in %s"
948 948 if u1:
949 949 repo.ui.debug(
950 950 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
951 951 )
952 952 if u2:
953 953 repo.ui.debug(
954 954 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
955 955 )
956 956
957 957 renamedeleteset = set()
958 958 divergeset = set()
959 959 for dsts in diverge.values():
960 960 divergeset.update(dsts)
961 961 for dsts in renamedelete1.values():
962 962 renamedeleteset.update(dsts)
963 963 for dsts in renamedelete2.values():
964 964 renamedeleteset.update(dsts)
965 965
966 966 repo.ui.debug(
967 967 b" all copies found (* = to merge, ! = divergent, "
968 968 b"% = renamed and deleted):\n"
969 969 )
970 970 for side, copies in ((b"local", copies1), (b"remote", copies2)):
971 971 if not copies:
972 972 continue
973 973 repo.ui.debug(b" on %s side:\n" % side)
974 974 for f in sorted(copies):
975 975 note = b""
976 976 if f in copy1 or f in copy2:
977 977 note += b"*"
978 978 if f in divergeset:
979 979 note += b"!"
980 980 if f in renamedeleteset:
981 981 note += b"%"
982 982 repo.ui.debug(
983 983 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
984 984 )
985 985 del renamedeleteset
986 986 del divergeset
987 987
988 988 repo.ui.debug(b" checking for directory renames\n")
989 989
990 990 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
991 991 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
992 992
993 993 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
994 994 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
995 995
996 996 return branch_copies1, branch_copies2, diverge
997 997
998 998
999 999 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1000 1000 """Finds moved directories and files that should move with them.
1001 1001
1002 1002 ctx: the context for one of the sides
1003 1003 copy: files copied on the same side (as ctx)
1004 1004 fullcopy: files copied on the same side (as ctx), including those that
1005 1005 merge.manifestmerge() won't care about
1006 1006 addedfilesfn: function returning added files on the other side (compared to
1007 1007 ctx)
1008 1008 """
1009 1009 # generate a directory move map
1010 1010 invalid = set()
1011 1011 dirmove = {}
1012 1012
1013 1013 # examine each file copy for a potential directory move, which is
1014 1014 # when all the files in a directory are moved to a new directory
1015 1015 for dst, src in pycompat.iteritems(fullcopy):
1016 1016 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1017 1017 if dsrc in invalid:
1018 1018 # already seen to be uninteresting
1019 1019 continue
1020 1020 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1021 1021 # directory wasn't entirely moved locally
1022 1022 invalid.add(dsrc)
1023 1023 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1024 1024 # files from the same directory moved to two different places
1025 1025 invalid.add(dsrc)
1026 1026 else:
1027 1027 # looks good so far
1028 1028 dirmove[dsrc] = ddst
1029 1029
1030 1030 for i in invalid:
1031 1031 if i in dirmove:
1032 1032 del dirmove[i]
1033 1033 del invalid
1034 1034
1035 1035 if not dirmove:
1036 1036 return {}, {}
1037 1037
1038 1038 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1039 1039
1040 1040 for d in dirmove:
1041 1041 repo.ui.debug(
1042 1042 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1043 1043 )
1044 1044
1045 1045 movewithdir = {}
1046 1046 # check unaccounted nonoverlapping files against directory moves
1047 1047 for f in addedfilesfn():
1048 1048 if f not in fullcopy:
1049 1049 for d in dirmove:
1050 1050 if f.startswith(d):
1051 1051 # new file added in a directory that was moved, move it
1052 1052 df = dirmove[d] + f[len(d) :]
1053 1053 if df not in copy:
1054 1054 movewithdir[f] = df
1055 1055 repo.ui.debug(
1056 1056 b" pending file src: '%s' -> dst: '%s'\n"
1057 1057 % (f, df)
1058 1058 )
1059 1059 break
1060 1060
1061 1061 return dirmove, movewithdir
1062 1062
1063 1063
1064 1064 def _heuristicscopytracing(repo, c1, c2, base):
1065 1065 """Fast copytracing using filename heuristics
1066 1066
1067 1067 Assumes that moves or renames are of following two types:
1068 1068
1069 1069 1) Inside a directory only (same directory name but different filenames)
1070 1070 2) Move from one directory to another
1071 1071 (same filenames but different directory names)
1072 1072
1073 1073 Works only when there are no merge commits in the "source branch".
1074 1074 Source branch is commits from base up to c2 not including base.
1075 1075
1076 1076 If merge is involved it fallbacks to _fullcopytracing().
1077 1077
1078 1078 Can be used by setting the following config:
1079 1079
1080 1080 [experimental]
1081 1081 copytrace = heuristics
1082 1082
1083 1083 In some cases the copy/move candidates found by heuristics can be very large
1084 1084 in number and that will make the algorithm slow. The number of possible
1085 1085 candidates to check can be limited by using the config
1086 1086 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1087 1087 """
1088 1088
1089 1089 if c1.rev() is None:
1090 1090 c1 = c1.p1()
1091 1091 if c2.rev() is None:
1092 1092 c2 = c2.p1()
1093 1093
1094 1094 changedfiles = set()
1095 1095 m1 = c1.manifest()
1096 1096 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1097 1097 # If base is not in c2 branch, we switch to fullcopytracing
1098 1098 repo.ui.debug(
1099 1099 b"switching to full copytracing as base is not "
1100 1100 b"an ancestor of c2\n"
1101 1101 )
1102 1102 return _fullcopytracing(repo, c1, c2, base)
1103 1103
1104 1104 ctx = c2
1105 1105 while ctx != base:
1106 1106 if len(ctx.parents()) == 2:
1107 1107 # To keep things simple let's not handle merges
1108 1108 repo.ui.debug(b"switching to full copytracing because of merges\n")
1109 1109 return _fullcopytracing(repo, c1, c2, base)
1110 1110 changedfiles.update(ctx.files())
1111 1111 ctx = ctx.p1()
1112 1112
1113 1113 copies2 = {}
1114 1114 cp = _forwardcopies(base, c2)
1115 1115 for dst, src in pycompat.iteritems(cp):
1116 1116 if src in m1:
1117 1117 copies2[dst] = src
1118 1118
1119 1119 # file is missing if it isn't present in the destination, but is present in
1120 1120 # the base and present in the source.
1121 1121 # Presence in the base is important to exclude added files, presence in the
1122 1122 # source is important to exclude removed files.
1123 1123 filt = lambda f: f not in m1 and f in base and f in c2
1124 1124 missingfiles = [f for f in changedfiles if filt(f)]
1125 1125
1126 1126 copies1 = {}
1127 1127 if missingfiles:
1128 1128 basenametofilename = collections.defaultdict(list)
1129 1129 dirnametofilename = collections.defaultdict(list)
1130 1130
1131 1131 for f in m1.filesnotin(base.manifest()):
1132 1132 basename = os.path.basename(f)
1133 1133 dirname = os.path.dirname(f)
1134 1134 basenametofilename[basename].append(f)
1135 1135 dirnametofilename[dirname].append(f)
1136 1136
1137 1137 for f in missingfiles:
1138 1138 basename = os.path.basename(f)
1139 1139 dirname = os.path.dirname(f)
1140 1140 samebasename = basenametofilename[basename]
1141 1141 samedirname = dirnametofilename[dirname]
1142 1142 movecandidates = samebasename + samedirname
1143 1143 # f is guaranteed to be present in c2, that's why
1144 1144 # c2.filectx(f) won't fail
1145 1145 f2 = c2.filectx(f)
1146 1146 # we can have a lot of candidates which can slow down the heuristics
1147 1147 # config value to limit the number of candidates moves to check
1148 1148 maxcandidates = repo.ui.configint(
1149 1149 b'experimental', b'copytrace.movecandidateslimit'
1150 1150 )
1151 1151
1152 1152 if len(movecandidates) > maxcandidates:
1153 1153 repo.ui.status(
1154 1154 _(
1155 1155 b"skipping copytracing for '%s', more "
1156 1156 b"candidates than the limit: %d\n"
1157 1157 )
1158 1158 % (f, len(movecandidates))
1159 1159 )
1160 1160 continue
1161 1161
1162 1162 for candidate in movecandidates:
1163 1163 f1 = c1.filectx(candidate)
1164 1164 if _related(f1, f2):
1165 1165 # if there are a few related copies then we'll merge
1166 1166 # changes into all of them. This matches the behaviour
1167 1167 # of upstream copytracing
1168 1168 copies1[candidate] = f
1169 1169
1170 1170 return branch_copies(copies1), branch_copies(copies2), {}
1171 1171
1172 1172
1173 1173 def _related(f1, f2):
1174 1174 """return True if f1 and f2 filectx have a common ancestor
1175 1175
1176 1176 Walk back to common ancestor to see if the two files originate
1177 1177 from the same file. Since workingfilectx's rev() is None it messes
1178 1178 up the integer comparison logic, hence the pre-step check for
1179 1179 None (f1 and f2 can only be workingfilectx's initially).
1180 1180 """
1181 1181
1182 1182 if f1 == f2:
1183 1183 return True # a match
1184 1184
1185 1185 g1, g2 = f1.ancestors(), f2.ancestors()
1186 1186 try:
1187 1187 f1r, f2r = f1.linkrev(), f2.linkrev()
1188 1188
1189 1189 if f1r is None:
1190 1190 f1 = next(g1)
1191 1191 if f2r is None:
1192 1192 f2 = next(g2)
1193 1193
1194 1194 while True:
1195 1195 f1r, f2r = f1.linkrev(), f2.linkrev()
1196 1196 if f1r > f2r:
1197 1197 f1 = next(g1)
1198 1198 elif f2r > f1r:
1199 1199 f2 = next(g2)
1200 1200 else: # f1 and f2 point to files in the same linkrev
1201 1201 return f1 == f2 # true if they point to the same file
1202 1202 except StopIteration:
1203 1203 return False
1204 1204
1205 1205
1206 1206 def graftcopies(wctx, ctx, base):
1207 1207 """reproduce copies between base and ctx in the wctx
1208 1208
1209 1209 Unlike mergecopies(), this function will only consider copies between base
1210 1210 and ctx; it will ignore copies between base and wctx. Also unlike
1211 1211 mergecopies(), this function will apply copies to the working copy (instead
1212 1212 of just returning information about the copies). That makes it cheaper
1213 1213 (especially in the common case of base==ctx.p1()) and useful also when
1214 1214 experimental.copytrace=off.
1215 1215
1216 1216 merge.update() will have already marked most copies, but it will only
1217 1217 mark copies if it thinks the source files are related (see
1218 1218 merge._related()). It will also not mark copies if the file wasn't modified
1219 1219 on the local side. This function adds the copies that were "missed"
1220 1220 by merge.update().
1221 1221 """
1222 1222 new_copies = pathcopies(base, ctx)
1223 1223 parent = wctx.p1()
1224 1224 _filter(parent, wctx, new_copies)
1225 1225 # extra filtering to drop copy information for files that existed before
1226 1226 # the graft (otherwise we would create merge filelog for non-merge commit
1227 1227 for dest, __ in list(new_copies.items()):
1228 1228 if dest in parent:
1229 1229 del new_copies[dest]
1230 1230 for dst, src in pycompat.iteritems(new_copies):
1231 1231 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now