##// END OF EJS Templates
copies: deal with the "same revision" special case earlier...
marmoute -
r46799:ee63c117 default
parent child Browse files
Show More
@@ -1,1224 +1,1228
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 2
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 # remove copies from files that didn't exist
63 63 if v not in src:
64 64 del t[k]
65 65 # remove criss-crossed copies
66 66 elif k in src and v in dst:
67 67 del t[k]
68 68 # remove copies to files that were then removed
69 69 elif k not in dst:
70 70 del t[k]
71 71
72 72
73 73 def _chain(prefix, suffix):
74 74 """chain two sets of copies 'prefix' and 'suffix'"""
75 75 result = prefix.copy()
76 76 for key, value in pycompat.iteritems(suffix):
77 77 result[key] = prefix.get(value, value)
78 78 return result
79 79
80 80
81 81 def _tracefile(fctx, am, basemf):
82 82 """return file context that is the ancestor of fctx present in ancestor
83 83 manifest am
84 84
85 85 Note: we used to try and stop after a given limit, however checking if that
86 86 limit is reached turned out to be very expensive. we are better off
87 87 disabling that feature."""
88 88
89 89 for f in fctx.ancestors():
90 90 path = f.path()
91 91 if am.get(path, None) == f.filenode():
92 92 return path
93 93 if basemf and basemf.get(path, None) == f.filenode():
94 94 return path
95 95
96 96
97 97 def _dirstatecopies(repo, match=None):
98 98 ds = repo.dirstate
99 99 c = ds.copies().copy()
100 100 for k in list(c):
101 101 if ds[k] not in b'anm' or (match and not match(k)):
102 102 del c[k]
103 103 return c
104 104
105 105
106 106 def _computeforwardmissing(a, b, match=None):
107 107 """Computes which files are in b but not a.
108 108 This is its own function so extensions can easily wrap this call to see what
109 109 files _forwardcopies is about to process.
110 110 """
111 111 ma = a.manifest()
112 112 mb = b.manifest()
113 113 return mb.filesnotin(ma, match=match)
114 114
115 115
116 116 def usechangesetcentricalgo(repo):
117 117 """Checks if we should use changeset-centric copy algorithms"""
118 118 if repo.filecopiesmode == b'changeset-sidedata':
119 119 return True
120 120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 121 changesetsource = (b'changeset-only', b'compatibility')
122 122 return readfrom in changesetsource
123 123
124 124
125 125 def _committedforwardcopies(a, b, base, match):
126 126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 127 # files might have to be traced back to the fctx parent of the last
128 128 # one-side-only changeset, but not further back than that
129 129 repo = a._repo
130 130
131 131 if usechangesetcentricalgo(repo):
132 132 return _changesetforwardcopies(a, b, match)
133 133
134 134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 135 dbg = repo.ui.debug
136 136 if debug:
137 137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 138 am = a.manifest()
139 139 basemf = None if base is None else base.manifest()
140 140
141 141 # find where new files came from
142 142 # we currently don't try to find where old files went, too expensive
143 143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 144 cm = {}
145 145
146 146 # Computing the forward missing is quite expensive on large manifests, since
147 147 # it compares the entire manifests. We can optimize it in the common use
148 148 # case of computing what copies are in a commit versus its parent (like
149 149 # during a rebase or histedit). Note, we exclude merge commits from this
150 150 # optimization, since the ctx.files() for a merge commit is not correct for
151 151 # this comparison.
152 152 forwardmissingmatch = match
153 153 if b.p1() == a and b.p2().node() == nullid:
154 154 filesmatcher = matchmod.exact(b.files())
155 155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157 157
158 158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159 159
160 160 if debug:
161 161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162 162
163 163 for f in sorted(missing):
164 164 if debug:
165 165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 166 fctx = b[f]
167 167 fctx._ancestrycontext = ancestrycontext
168 168
169 169 if debug:
170 170 start = util.timer()
171 171 opath = _tracefile(fctx, am, basemf)
172 172 if opath:
173 173 if debug:
174 174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 175 cm[f] = opath
176 176 if debug:
177 177 dbg(
178 178 b'debug.copies: time: %f seconds\n'
179 179 % (util.timer() - start)
180 180 )
181 181 return cm
182 182
183 183
184 184 def _revinfo_getter(repo, match):
185 185 """returns a function that returns the following data given a <rev>"
186 186
187 187 * p1: revision number of first parent
188 188 * p2: revision number of first parent
189 189 * changes: a ChangingFiles object
190 190 """
191 191 cl = repo.changelog
192 192 parents = cl.parentrevs
193 193 flags = cl.flags
194 194
195 195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196 196
197 197 changelogrevision = cl.changelogrevision
198 198
199 199 if rustmod is not None:
200 200
201 201 def revinfo(rev):
202 202 p1, p2 = parents(rev)
203 203 if flags(rev) & HASCOPIESINFO:
204 204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 205 else:
206 206 raw = None
207 207 return (p1, p2, raw)
208 208
209 209 else:
210 210
211 211 def revinfo(rev):
212 212 p1, p2 = parents(rev)
213 213 if flags(rev) & HASCOPIESINFO:
214 214 changes = changelogrevision(rev).changes
215 215 else:
216 216 changes = None
217 217 return (p1, p2, changes)
218 218
219 219 return revinfo
220 220
221 221
222 222 def cached_is_ancestor(is_ancestor):
223 223 """return a cached version of is_ancestor"""
224 224 cache = {}
225 225
226 226 def _is_ancestor(anc, desc):
227 227 if anc > desc:
228 228 return False
229 229 elif anc == desc:
230 230 return True
231 231 key = (anc, desc)
232 232 ret = cache.get(key)
233 233 if ret is None:
234 234 ret = cache[key] = is_ancestor(anc, desc)
235 235 return ret
236 236
237 237 return _is_ancestor
238 238
239 239
240 240 def _changesetforwardcopies(a, b, match):
241 241 if a.rev() in (nullrev, b.rev()):
242 242 return {}
243 243
244 244 repo = a.repo().unfiltered()
245 245 children = {}
246 246
247 247 cl = repo.changelog
248 248 isancestor = cl.isancestorrev
249 249
250 250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 251 # edges that are contains in `::B` but not in `::A`.
252 252 #
253 253 #
254 254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 256 # that belong to ::a. These are exactly all the revisions needed to express
257 257 # the parent β†’ children we need to combine.
258 258 #
259 259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 261 # keep this out of the explanation because it is hard enough without this special case..
262 262
263 263 parents = cl._uncheckedparentrevs
264 264 graph_roots = (nullrev, nullrev)
265 265
266 266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 268 roots = set()
269 269 has_graph_roots = False
270 270
271 271 # iterate over `only(B, A)`
272 272 for r in revs:
273 273 ps = parents(r)
274 274 if ps == graph_roots:
275 275 has_graph_roots = True
276 276 else:
277 277 p1, p2 = ps
278 278
279 279 # find all the "root points" (see larger comment above)
280 280 if p1 != nullrev and p1 in ancestors:
281 281 roots.add(p1)
282 282 if p2 != nullrev and p2 in ancestors:
283 283 roots.add(p2)
284 284 if not roots:
285 285 # no common revision to track copies from
286 286 return {}
287 287 if has_graph_roots:
288 288 # this deal with the special case mentionned in the [1] footnotes. We
289 289 # must filter out revisions that leads to non-common graphroots.
290 290 roots = list(roots)
291 291 m = min(roots)
292 292 h = [b.rev()]
293 293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 294 roots_to_head = set(roots_to_head)
295 295 revs = [r for r in revs if r in roots_to_head]
296 296
297 297 if repo.filecopiesmode == b'changeset-sidedata':
298 298 # When using side-data, we will process the edges "from" the children.
299 299 # We iterate over the childre, gathering previous collected data for
300 300 # the parents. Do know when the parents data is no longer necessary, we
301 301 # keep a counter of how many children each revision has.
302 302 #
303 303 # An interresting property of `children_count` is that it only contains
304 304 # revision that will be relevant for a edge of the graph. So if a
305 305 # children has parent not in `children_count`, that edges should not be
306 306 # processed.
307 307 children_count = dict((r, 0) for r in roots)
308 308 for r in revs:
309 309 for p in cl.parentrevs(r):
310 310 if p == nullrev:
311 311 continue
312 312 children_count[r] = 0
313 313 if p in children_count:
314 314 children_count[p] += 1
315 315 revinfo = _revinfo_getter(repo, match)
316 316 return _combine_changeset_copies(
317 317 revs, children_count, b.rev(), revinfo, match, isancestor
318 318 )
319 319 else:
320 320 # When not using side-data, we will process the edges "from" the parent.
321 321 # so we need a full mapping of the parent -> children relation.
322 322 children = dict((r, []) for r in roots)
323 323 for r in revs:
324 324 for p in cl.parentrevs(r):
325 325 if p == nullrev:
326 326 continue
327 327 children[r] = []
328 328 if p in children:
329 329 children[p].append(r)
330 330 x = revs.pop()
331 331 assert x == b.rev()
332 332 revs.extend(roots)
333 333 revs.sort()
334 334
335 335 revinfo = _revinfo_getter_extra(repo)
336 336 return _combine_changeset_copies_extra(
337 337 revs, children, b.rev(), revinfo, match, isancestor
338 338 )
339 339
340 340
341 341 def _combine_changeset_copies(
342 342 revs, children_count, targetrev, revinfo, match, isancestor
343 343 ):
344 344 """combine the copies information for each item of iterrevs
345 345
346 346 revs: sorted iterable of revision to visit
347 347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 348 targetrev: the final copies destination revision (not in iterrevs)
349 349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 350 match: a matcher
351 351
352 352 It returns the aggregated copies information for `targetrev`.
353 353 """
354 354
355 355 alwaysmatch = match.always()
356 356
357 357 if rustmod is not None:
358 358 final_copies = rustmod.combine_changeset_copies(
359 359 list(revs), children_count, targetrev, revinfo, isancestor
360 360 )
361 361 else:
362 362 isancestor = cached_is_ancestor(isancestor)
363 363
364 364 all_copies = {}
365 365 # iterate over all the "children" side of copy tracing "edge"
366 366 for current_rev in revs:
367 367 p1, p2, changes = revinfo(current_rev)
368 368 current_copies = None
369 369 # iterate over all parents to chain the existing data with the
370 370 # data from the parent β†’ child edge.
371 371 for parent, parent_rev in ((1, p1), (2, p2)):
372 372 if parent_rev == nullrev:
373 373 continue
374 374 remaining_children = children_count.get(parent_rev)
375 375 if remaining_children is None:
376 376 continue
377 377 remaining_children -= 1
378 378 children_count[parent_rev] = remaining_children
379 379 if remaining_children:
380 380 copies = all_copies.get(parent_rev, None)
381 381 else:
382 382 copies = all_copies.pop(parent_rev, None)
383 383
384 384 if copies is None:
385 385 # this is a root
386 386 copies = {}
387 387
388 388 newcopies = copies
389 389 # chain the data in the edge with the existing data
390 390 if changes is not None:
391 391 childcopies = {}
392 392 if parent == 1:
393 393 childcopies = changes.copied_from_p1
394 394 elif parent == 2:
395 395 childcopies = changes.copied_from_p2
396 396
397 397 if childcopies:
398 398 newcopies = copies.copy()
399 399 for dest, source in pycompat.iteritems(childcopies):
400 400 prev = copies.get(source)
401 401 if prev is not None and prev[1] is not None:
402 402 source = prev[1]
403 403 newcopies[dest] = (current_rev, source)
404 404 assert newcopies is not copies
405 405 if changes.removed:
406 406 if newcopies is copies:
407 407 newcopies = copies.copy()
408 408 for f in changes.removed:
409 409 if f in newcopies:
410 410 if newcopies is copies:
411 411 # copy on write to avoid affecting potential other
412 412 # branches. when there are no other branches, this
413 413 # could be avoided.
414 414 newcopies = copies.copy()
415 415 newcopies[f] = (current_rev, None)
416 416 # check potential need to combine the data from another parent (for
417 417 # that child). See comment below for details.
418 418 if current_copies is None:
419 419 current_copies = newcopies
420 420 elif current_copies is newcopies:
421 421 # nothing to merge:
422 422 pass
423 423 else:
424 424 # we are the second parent to work on c, we need to merge our
425 425 # work with the other.
426 426 #
427 427 # In case of conflict, parent 1 take precedence over parent 2.
428 428 # This is an arbitrary choice made anew when implementing
429 429 # changeset based copies. It was made without regards with
430 430 # potential filelog related behavior.
431 431 assert parent == 2
432 432 current_copies = _merge_copies_dict(
433 433 newcopies, current_copies, isancestor, changes
434 434 )
435 435 all_copies[current_rev] = current_copies
436 436
437 437 # filter out internal details and return a {dest: source mapping}
438 438 final_copies = {}
439 439 for dest, (tt, source) in all_copies[targetrev].items():
440 440 if source is not None:
441 441 final_copies[dest] = source
442 442 if not alwaysmatch:
443 443 for filename in list(final_copies.keys()):
444 444 if not match(filename):
445 445 del final_copies[filename]
446 446 return final_copies
447 447
448 448
449 449 # constant to decide which side to pick with _merge_copies_dict
450 450 PICK_MINOR = 0
451 451 PICK_MAJOR = 1
452 452 PICK_EITHER = 2
453 453
454 454
455 455 def _merge_copies_dict(minor, major, isancestor, changes):
456 456 """merge two copies-mapping together, minor and major
457 457
458 458 In case of conflict, value from "major" will be picked.
459 459
460 460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 461 ancestors of `high_rev`,
462 462
463 463 - `ismerged(path)`: callable return True if `path` have been merged in the
464 464 current revision,
465 465
466 466 return the resulting dict (in practice, the "minor" object, updated)
467 467 """
468 468 for dest, value in major.items():
469 469 other = minor.get(dest)
470 470 if other is None:
471 471 minor[dest] = value
472 472 else:
473 473 pick = _compare_values(changes, isancestor, dest, other, value)
474 474 if pick == PICK_MAJOR:
475 475 minor[dest] = value
476 476 return minor
477 477
478 478
479 479 def _compare_values(changes, isancestor, dest, minor, major):
480 480 """compare two value within a _merge_copies_dict loop iteration"""
481 major_tt = major[0]
482 minor_tt = minor[0]
481 major_tt, major_value = major
482 minor_tt, minor_value = minor
483 483
484 if major[1] == minor[1]:
484 # evacuate some simple case first:
485 if major_tt == minor_tt:
486 # if it comes from the same revision it must be the same value
487 assert major_value == minor_value
485 488 return PICK_EITHER
486 # content from "major" wins, unless it is older
487 # than the branch point or there is a merge
488 if major_tt == minor_tt:
489 return PICK_MAJOR
489 elif major[1] == minor[1]:
490 return PICK_EITHER
491
492 # actual merging needed: content from "major" wins, unless it is older than
493 # the branch point or there is a merge
490 494 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 495 return PICK_MINOR
492 496 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 497 return PICK_MAJOR
494 498 elif changes is not None and dest in changes.merged:
495 499 return PICK_MAJOR
496 500 elif not isancestor(major_tt, minor_tt):
497 501 if major[1] is not None:
498 502 return PICK_MAJOR
499 503 elif isancestor(minor_tt, major_tt):
500 504 return PICK_MAJOR
501 505 return PICK_MINOR
502 506
503 507
504 508 def _revinfo_getter_extra(repo):
505 509 """return a function that return multiple data given a <rev>"i
506 510
507 511 * p1: revision number of first parent
508 512 * p2: revision number of first parent
509 513 * p1copies: mapping of copies from p1
510 514 * p2copies: mapping of copies from p2
511 515 * removed: a list of removed files
512 516 * ismerged: a callback to know if file was merged in that revision
513 517 """
514 518 cl = repo.changelog
515 519 parents = cl.parentrevs
516 520
517 521 def get_ismerged(rev):
518 522 ctx = repo[rev]
519 523
520 524 def ismerged(path):
521 525 if path not in ctx.files():
522 526 return False
523 527 fctx = ctx[path]
524 528 parents = fctx._filelog.parents(fctx._filenode)
525 529 nb_parents = 0
526 530 for n in parents:
527 531 if n != nullid:
528 532 nb_parents += 1
529 533 return nb_parents >= 2
530 534
531 535 return ismerged
532 536
533 537 def revinfo(rev):
534 538 p1, p2 = parents(rev)
535 539 ctx = repo[rev]
536 540 p1copies, p2copies = ctx._copies
537 541 removed = ctx.filesremoved()
538 542 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539 543
540 544 return revinfo
541 545
542 546
543 547 def _combine_changeset_copies_extra(
544 548 revs, children, targetrev, revinfo, match, isancestor
545 549 ):
546 550 """version of `_combine_changeset_copies` that works with the Google
547 551 specific "extra" based storage for copy information"""
548 552 all_copies = {}
549 553 alwaysmatch = match.always()
550 554 for r in revs:
551 555 copies = all_copies.pop(r, None)
552 556 if copies is None:
553 557 # this is a root
554 558 copies = {}
555 559 for i, c in enumerate(children[r]):
556 560 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 561 if r == p1:
558 562 parent = 1
559 563 childcopies = p1copies
560 564 else:
561 565 assert r == p2
562 566 parent = 2
563 567 childcopies = p2copies
564 568 if not alwaysmatch:
565 569 childcopies = {
566 570 dst: src for dst, src in childcopies.items() if match(dst)
567 571 }
568 572 newcopies = copies
569 573 if childcopies:
570 574 newcopies = copies.copy()
571 575 for dest, source in pycompat.iteritems(childcopies):
572 576 prev = copies.get(source)
573 577 if prev is not None and prev[1] is not None:
574 578 source = prev[1]
575 579 newcopies[dest] = (c, source)
576 580 assert newcopies is not copies
577 581 for f in removed:
578 582 if f in newcopies:
579 583 if newcopies is copies:
580 584 # copy on write to avoid affecting potential other
581 585 # branches. when there are no other branches, this
582 586 # could be avoided.
583 587 newcopies = copies.copy()
584 588 newcopies[f] = (c, None)
585 589 othercopies = all_copies.get(c)
586 590 if othercopies is None:
587 591 all_copies[c] = newcopies
588 592 else:
589 593 # we are the second parent to work on c, we need to merge our
590 594 # work with the other.
591 595 #
592 596 # In case of conflict, parent 1 take precedence over parent 2.
593 597 # This is an arbitrary choice made anew when implementing
594 598 # changeset based copies. It was made without regards with
595 599 # potential filelog related behavior.
596 600 if parent == 1:
597 601 _merge_copies_dict_extra(
598 602 othercopies, newcopies, isancestor, ismerged
599 603 )
600 604 else:
601 605 _merge_copies_dict_extra(
602 606 newcopies, othercopies, isancestor, ismerged
603 607 )
604 608 all_copies[c] = newcopies
605 609
606 610 final_copies = {}
607 611 for dest, (tt, source) in all_copies[targetrev].items():
608 612 if source is not None:
609 613 final_copies[dest] = source
610 614 return final_copies
611 615
612 616
613 617 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 618 """version of `_merge_copies_dict` that works with the Google
615 619 specific "extra" based storage for copy information"""
616 620 for dest, value in major.items():
617 621 other = minor.get(dest)
618 622 if other is None:
619 623 minor[dest] = value
620 624 else:
621 625 new_tt = value[0]
622 626 other_tt = other[0]
623 627 if value[1] == other[1]:
624 628 continue
625 629 # content from "major" wins, unless it is older
626 630 # than the branch point or there is a merge
627 631 if (
628 632 new_tt == other_tt
629 633 or not isancestor(new_tt, other_tt)
630 634 or ismerged(dest)
631 635 ):
632 636 minor[dest] = value
633 637
634 638
635 639 def _forwardcopies(a, b, base=None, match=None):
636 640 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637 641
638 642 if base is None:
639 643 base = a
640 644 match = a.repo().narrowmatch(match)
641 645 # check for working copy
642 646 if b.rev() is None:
643 647 cm = _committedforwardcopies(a, b.p1(), base, match)
644 648 # combine copies from dirstate if necessary
645 649 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 650 else:
647 651 copies = _committedforwardcopies(a, b, base, match)
648 652 return copies
649 653
650 654
651 655 def _backwardrenames(a, b, match):
652 656 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 657 return {}
654 658
655 659 # Even though we're not taking copies into account, 1:n rename situations
656 660 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 661 # arbitrarily pick one of the renames.
658 662 # We don't want to pass in "match" here, since that would filter
659 663 # the destination by it. Since we're reversing the copies, we want
660 664 # to filter the source instead.
661 665 f = _forwardcopies(b, a)
662 666 r = {}
663 667 for k, v in sorted(pycompat.iteritems(f)):
664 668 if match and not match(v):
665 669 continue
666 670 # remove copies
667 671 if v in a:
668 672 continue
669 673 r[v] = k
670 674 return r
671 675
672 676
673 677 def pathcopies(x, y, match=None):
674 678 """find {dst@y: src@x} copy mapping for directed compare"""
675 679 repo = x._repo
676 680 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 681 if debug:
678 682 repo.ui.debug(
679 683 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 684 )
681 685 if x == y or not x or not y:
682 686 return {}
683 687 if y.rev() is None and x == y.p1():
684 688 if debug:
685 689 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 690 # short-circuit to avoid issues with merge states
687 691 return _dirstatecopies(repo, match)
688 692 a = y.ancestor(x)
689 693 if a == x:
690 694 if debug:
691 695 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 696 copies = _forwardcopies(x, y, match=match)
693 697 elif a == y:
694 698 if debug:
695 699 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 700 copies = _backwardrenames(x, y, match=match)
697 701 else:
698 702 if debug:
699 703 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 704 base = None
701 705 if a.rev() != nullrev:
702 706 base = x
703 707 copies = _chain(
704 708 _backwardrenames(x, a, match=match),
705 709 _forwardcopies(a, y, base, match=match),
706 710 )
707 711 _filter(x, y, copies)
708 712 return copies
709 713
710 714
711 715 def mergecopies(repo, c1, c2, base):
712 716 """
713 717 Finds moves and copies between context c1 and c2 that are relevant for
714 718 merging. 'base' will be used as the merge base.
715 719
716 720 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 721 files that were moved/ copied in one merge parent and modified in another.
718 722 For example:
719 723
720 724 o ---> 4 another commit
721 725 |
722 726 | o ---> 3 commit that modifies a.txt
723 727 | /
724 728 o / ---> 2 commit that moves a.txt to b.txt
725 729 |/
726 730 o ---> 1 merge base
727 731
728 732 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 733 revision 4, and if user have copytrace disabled, we prints the following
730 734 message:
731 735
732 736 ```other changed <file> which local deleted```
733 737
734 738 Returns a tuple where:
735 739
736 740 "branch_copies" an instance of branch_copies.
737 741
738 742 "diverge" is a mapping of source name -> list of destination names
739 743 for divergent renames.
740 744
741 745 This function calls different copytracing algorithms based on config.
742 746 """
743 747 # avoid silly behavior for update from empty dir
744 748 if not c1 or not c2 or c1 == c2:
745 749 return branch_copies(), branch_copies(), {}
746 750
747 751 narrowmatch = c1.repo().narrowmatch()
748 752
749 753 # avoid silly behavior for parent -> working dir
750 754 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 755 return (
752 756 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 757 branch_copies(),
754 758 {},
755 759 )
756 760
757 761 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 762 if stringutil.parsebool(copytracing) is False:
759 763 # stringutil.parsebool() returns None when it is unable to parse the
760 764 # value, so we should rely on making sure copytracing is on such cases
761 765 return branch_copies(), branch_copies(), {}
762 766
763 767 if usechangesetcentricalgo(repo):
764 768 # The heuristics don't make sense when we need changeset-centric algos
765 769 return _fullcopytracing(repo, c1, c2, base)
766 770
767 771 # Copy trace disabling is explicitly below the node == p1 logic above
768 772 # because the logic above is required for a simple copy to be kept across a
769 773 # rebase.
770 774 if copytracing == b'heuristics':
771 775 # Do full copytracing if only non-public revisions are involved as
772 776 # that will be fast enough and will also cover the copies which could
773 777 # be missed by heuristics
774 778 if _isfullcopytraceable(repo, c1, base):
775 779 return _fullcopytracing(repo, c1, c2, base)
776 780 return _heuristicscopytracing(repo, c1, c2, base)
777 781 else:
778 782 return _fullcopytracing(repo, c1, c2, base)
779 783
780 784
781 785 def _isfullcopytraceable(repo, c1, base):
782 786 """Checks that if base, source and destination are all no-public branches,
783 787 if yes let's use the full copytrace algorithm for increased capabilities
784 788 since it will be fast enough.
785 789
786 790 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 791 number of changesets from c1 to base such that if number of changesets are
788 792 more than the limit, full copytracing algorithm won't be used.
789 793 """
790 794 if c1.rev() is None:
791 795 c1 = c1.p1()
792 796 if c1.mutable() and base.mutable():
793 797 sourcecommitlimit = repo.ui.configint(
794 798 b'experimental', b'copytrace.sourcecommitlimit'
795 799 )
796 800 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 801 return commits < sourcecommitlimit
798 802 return False
799 803
800 804
801 805 def _checksinglesidecopies(
802 806 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 807 ):
804 808 if src not in m2:
805 809 # deleted on side 2
806 810 if src not in m1:
807 811 # renamed on side 1, deleted on side 2
808 812 renamedelete[src] = dsts1
809 813 elif src not in mb:
810 814 # Work around the "short-circuit to avoid issues with merge states"
811 815 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 816 # destination doesn't exist in y.
813 817 pass
814 818 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 819 return
816 820 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 821 # modified on side 2
818 822 for dst in dsts1:
819 823 copy[dst] = src
820 824
821 825
822 826 class branch_copies(object):
823 827 """Information about copies made on one side of a merge/graft.
824 828
825 829 "copy" is a mapping from destination name -> source name,
826 830 where source is in c1 and destination is in c2 or vice-versa.
827 831
828 832 "movewithdir" is a mapping from source name -> destination name,
829 833 where the file at source present in one context but not the other
830 834 needs to be moved to destination by the merge process, because the
831 835 other context moved the directory it is in.
832 836
833 837 "renamedelete" is a mapping of source name -> list of destination
834 838 names for files deleted in c1 that were renamed in c2 or vice-versa.
835 839
836 840 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 841 This is needed for handling changes to new files previously grafted into
838 842 renamed directories.
839 843 """
840 844
841 845 def __init__(
842 846 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 847 ):
844 848 self.copy = {} if copy is None else copy
845 849 self.renamedelete = {} if renamedelete is None else renamedelete
846 850 self.dirmove = {} if dirmove is None else dirmove
847 851 self.movewithdir = {} if movewithdir is None else movewithdir
848 852
849 853 def __repr__(self):
850 854 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 855 self.copy,
852 856 self.renamedelete,
853 857 self.dirmove,
854 858 self.movewithdir,
855 859 )
856 860
857 861
858 862 def _fullcopytracing(repo, c1, c2, base):
859 863 """The full copytracing algorithm which finds all the new files that were
860 864 added from merge base up to the top commit and for each file it checks if
861 865 this file was copied from another file.
862 866
863 867 This is pretty slow when a lot of changesets are involved but will track all
864 868 the copies.
865 869 """
866 870 m1 = c1.manifest()
867 871 m2 = c2.manifest()
868 872 mb = base.manifest()
869 873
870 874 copies1 = pathcopies(base, c1)
871 875 copies2 = pathcopies(base, c2)
872 876
873 877 if not (copies1 or copies2):
874 878 return branch_copies(), branch_copies(), {}
875 879
876 880 inversecopies1 = {}
877 881 inversecopies2 = {}
878 882 for dst, src in copies1.items():
879 883 inversecopies1.setdefault(src, []).append(dst)
880 884 for dst, src in copies2.items():
881 885 inversecopies2.setdefault(src, []).append(dst)
882 886
883 887 copy1 = {}
884 888 copy2 = {}
885 889 diverge = {}
886 890 renamedelete1 = {}
887 891 renamedelete2 = {}
888 892 allsources = set(inversecopies1) | set(inversecopies2)
889 893 for src in allsources:
890 894 dsts1 = inversecopies1.get(src)
891 895 dsts2 = inversecopies2.get(src)
892 896 if dsts1 and dsts2:
893 897 # copied/renamed on both sides
894 898 if src not in m1 and src not in m2:
895 899 # renamed on both sides
896 900 dsts1 = set(dsts1)
897 901 dsts2 = set(dsts2)
898 902 # If there's some overlap in the rename destinations, we
899 903 # consider it not divergent. For example, if side 1 copies 'a'
900 904 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 905 # and 'd' and deletes 'a'.
902 906 if dsts1 & dsts2:
903 907 for dst in dsts1 & dsts2:
904 908 copy1[dst] = src
905 909 copy2[dst] = src
906 910 else:
907 911 diverge[src] = sorted(dsts1 | dsts2)
908 912 elif src in m1 and src in m2:
909 913 # copied on both sides
910 914 dsts1 = set(dsts1)
911 915 dsts2 = set(dsts2)
912 916 for dst in dsts1 & dsts2:
913 917 copy1[dst] = src
914 918 copy2[dst] = src
915 919 # TODO: Handle cases where it was renamed on one side and copied
916 920 # on the other side
917 921 elif dsts1:
918 922 # copied/renamed only on side 1
919 923 _checksinglesidecopies(
920 924 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 925 )
922 926 elif dsts2:
923 927 # copied/renamed only on side 2
924 928 _checksinglesidecopies(
925 929 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 930 )
927 931
928 932 # find interesting file sets from manifests
929 933 cache = []
930 934
931 935 def _get_addedfiles(idx):
932 936 if not cache:
933 937 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 938 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 939 u1 = sorted(addedinm1 - addedinm2)
936 940 u2 = sorted(addedinm2 - addedinm1)
937 941 cache.extend((u1, u2))
938 942 return cache[idx]
939 943
940 944 u1fn = lambda: _get_addedfiles(0)
941 945 u2fn = lambda: _get_addedfiles(1)
942 946 if repo.ui.debugflag:
943 947 u1 = u1fn()
944 948 u2 = u2fn()
945 949
946 950 header = b" unmatched files in %s"
947 951 if u1:
948 952 repo.ui.debug(
949 953 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 954 )
951 955 if u2:
952 956 repo.ui.debug(
953 957 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 958 )
955 959
956 960 renamedeleteset = set()
957 961 divergeset = set()
958 962 for dsts in diverge.values():
959 963 divergeset.update(dsts)
960 964 for dsts in renamedelete1.values():
961 965 renamedeleteset.update(dsts)
962 966 for dsts in renamedelete2.values():
963 967 renamedeleteset.update(dsts)
964 968
965 969 repo.ui.debug(
966 970 b" all copies found (* = to merge, ! = divergent, "
967 971 b"% = renamed and deleted):\n"
968 972 )
969 973 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 974 if not copies:
971 975 continue
972 976 repo.ui.debug(b" on %s side:\n" % side)
973 977 for f in sorted(copies):
974 978 note = b""
975 979 if f in copy1 or f in copy2:
976 980 note += b"*"
977 981 if f in divergeset:
978 982 note += b"!"
979 983 if f in renamedeleteset:
980 984 note += b"%"
981 985 repo.ui.debug(
982 986 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 987 )
984 988 del renamedeleteset
985 989 del divergeset
986 990
987 991 repo.ui.debug(b" checking for directory renames\n")
988 992
989 993 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 994 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991 995
992 996 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 997 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994 998
995 999 return branch_copies1, branch_copies2, diverge
996 1000
997 1001
998 1002 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 1003 """Finds moved directories and files that should move with them.
1000 1004
1001 1005 ctx: the context for one of the sides
1002 1006 copy: files copied on the same side (as ctx)
1003 1007 fullcopy: files copied on the same side (as ctx), including those that
1004 1008 merge.manifestmerge() won't care about
1005 1009 addedfilesfn: function returning added files on the other side (compared to
1006 1010 ctx)
1007 1011 """
1008 1012 # generate a directory move map
1009 1013 invalid = set()
1010 1014 dirmove = {}
1011 1015
1012 1016 # examine each file copy for a potential directory move, which is
1013 1017 # when all the files in a directory are moved to a new directory
1014 1018 for dst, src in pycompat.iteritems(fullcopy):
1015 1019 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 1020 if dsrc in invalid:
1017 1021 # already seen to be uninteresting
1018 1022 continue
1019 1023 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 1024 # directory wasn't entirely moved locally
1021 1025 invalid.add(dsrc)
1022 1026 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 1027 # files from the same directory moved to two different places
1024 1028 invalid.add(dsrc)
1025 1029 else:
1026 1030 # looks good so far
1027 1031 dirmove[dsrc] = ddst
1028 1032
1029 1033 for i in invalid:
1030 1034 if i in dirmove:
1031 1035 del dirmove[i]
1032 1036 del invalid
1033 1037
1034 1038 if not dirmove:
1035 1039 return {}, {}
1036 1040
1037 1041 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038 1042
1039 1043 for d in dirmove:
1040 1044 repo.ui.debug(
1041 1045 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 1046 )
1043 1047
1044 1048 movewithdir = {}
1045 1049 # check unaccounted nonoverlapping files against directory moves
1046 1050 for f in addedfilesfn():
1047 1051 if f not in fullcopy:
1048 1052 for d in dirmove:
1049 1053 if f.startswith(d):
1050 1054 # new file added in a directory that was moved, move it
1051 1055 df = dirmove[d] + f[len(d) :]
1052 1056 if df not in copy:
1053 1057 movewithdir[f] = df
1054 1058 repo.ui.debug(
1055 1059 b" pending file src: '%s' -> dst: '%s'\n"
1056 1060 % (f, df)
1057 1061 )
1058 1062 break
1059 1063
1060 1064 return dirmove, movewithdir
1061 1065
1062 1066
1063 1067 def _heuristicscopytracing(repo, c1, c2, base):
1064 1068 """Fast copytracing using filename heuristics
1065 1069
1066 1070 Assumes that moves or renames are of following two types:
1067 1071
1068 1072 1) Inside a directory only (same directory name but different filenames)
1069 1073 2) Move from one directory to another
1070 1074 (same filenames but different directory names)
1071 1075
1072 1076 Works only when there are no merge commits in the "source branch".
1073 1077 Source branch is commits from base up to c2 not including base.
1074 1078
1075 1079 If merge is involved it fallbacks to _fullcopytracing().
1076 1080
1077 1081 Can be used by setting the following config:
1078 1082
1079 1083 [experimental]
1080 1084 copytrace = heuristics
1081 1085
1082 1086 In some cases the copy/move candidates found by heuristics can be very large
1083 1087 in number and that will make the algorithm slow. The number of possible
1084 1088 candidates to check can be limited by using the config
1085 1089 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 1090 """
1087 1091
1088 1092 if c1.rev() is None:
1089 1093 c1 = c1.p1()
1090 1094 if c2.rev() is None:
1091 1095 c2 = c2.p1()
1092 1096
1093 1097 changedfiles = set()
1094 1098 m1 = c1.manifest()
1095 1099 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 1100 # If base is not in c2 branch, we switch to fullcopytracing
1097 1101 repo.ui.debug(
1098 1102 b"switching to full copytracing as base is not "
1099 1103 b"an ancestor of c2\n"
1100 1104 )
1101 1105 return _fullcopytracing(repo, c1, c2, base)
1102 1106
1103 1107 ctx = c2
1104 1108 while ctx != base:
1105 1109 if len(ctx.parents()) == 2:
1106 1110 # To keep things simple let's not handle merges
1107 1111 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 1112 return _fullcopytracing(repo, c1, c2, base)
1109 1113 changedfiles.update(ctx.files())
1110 1114 ctx = ctx.p1()
1111 1115
1112 1116 copies2 = {}
1113 1117 cp = _forwardcopies(base, c2)
1114 1118 for dst, src in pycompat.iteritems(cp):
1115 1119 if src in m1:
1116 1120 copies2[dst] = src
1117 1121
1118 1122 # file is missing if it isn't present in the destination, but is present in
1119 1123 # the base and present in the source.
1120 1124 # Presence in the base is important to exclude added files, presence in the
1121 1125 # source is important to exclude removed files.
1122 1126 filt = lambda f: f not in m1 and f in base and f in c2
1123 1127 missingfiles = [f for f in changedfiles if filt(f)]
1124 1128
1125 1129 copies1 = {}
1126 1130 if missingfiles:
1127 1131 basenametofilename = collections.defaultdict(list)
1128 1132 dirnametofilename = collections.defaultdict(list)
1129 1133
1130 1134 for f in m1.filesnotin(base.manifest()):
1131 1135 basename = os.path.basename(f)
1132 1136 dirname = os.path.dirname(f)
1133 1137 basenametofilename[basename].append(f)
1134 1138 dirnametofilename[dirname].append(f)
1135 1139
1136 1140 for f in missingfiles:
1137 1141 basename = os.path.basename(f)
1138 1142 dirname = os.path.dirname(f)
1139 1143 samebasename = basenametofilename[basename]
1140 1144 samedirname = dirnametofilename[dirname]
1141 1145 movecandidates = samebasename + samedirname
1142 1146 # f is guaranteed to be present in c2, that's why
1143 1147 # c2.filectx(f) won't fail
1144 1148 f2 = c2.filectx(f)
1145 1149 # we can have a lot of candidates which can slow down the heuristics
1146 1150 # config value to limit the number of candidates moves to check
1147 1151 maxcandidates = repo.ui.configint(
1148 1152 b'experimental', b'copytrace.movecandidateslimit'
1149 1153 )
1150 1154
1151 1155 if len(movecandidates) > maxcandidates:
1152 1156 repo.ui.status(
1153 1157 _(
1154 1158 b"skipping copytracing for '%s', more "
1155 1159 b"candidates than the limit: %d\n"
1156 1160 )
1157 1161 % (f, len(movecandidates))
1158 1162 )
1159 1163 continue
1160 1164
1161 1165 for candidate in movecandidates:
1162 1166 f1 = c1.filectx(candidate)
1163 1167 if _related(f1, f2):
1164 1168 # if there are a few related copies then we'll merge
1165 1169 # changes into all of them. This matches the behaviour
1166 1170 # of upstream copytracing
1167 1171 copies1[candidate] = f
1168 1172
1169 1173 return branch_copies(copies1), branch_copies(copies2), {}
1170 1174
1171 1175
1172 1176 def _related(f1, f2):
1173 1177 """return True if f1 and f2 filectx have a common ancestor
1174 1178
1175 1179 Walk back to common ancestor to see if the two files originate
1176 1180 from the same file. Since workingfilectx's rev() is None it messes
1177 1181 up the integer comparison logic, hence the pre-step check for
1178 1182 None (f1 and f2 can only be workingfilectx's initially).
1179 1183 """
1180 1184
1181 1185 if f1 == f2:
1182 1186 return True # a match
1183 1187
1184 1188 g1, g2 = f1.ancestors(), f2.ancestors()
1185 1189 try:
1186 1190 f1r, f2r = f1.linkrev(), f2.linkrev()
1187 1191
1188 1192 if f1r is None:
1189 1193 f1 = next(g1)
1190 1194 if f2r is None:
1191 1195 f2 = next(g2)
1192 1196
1193 1197 while True:
1194 1198 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 1199 if f1r > f2r:
1196 1200 f1 = next(g1)
1197 1201 elif f2r > f1r:
1198 1202 f2 = next(g2)
1199 1203 else: # f1 and f2 point to files in the same linkrev
1200 1204 return f1 == f2 # true if they point to the same file
1201 1205 except StopIteration:
1202 1206 return False
1203 1207
1204 1208
1205 1209 def graftcopies(wctx, ctx, base):
1206 1210 """reproduce copies between base and ctx in the wctx
1207 1211
1208 1212 Unlike mergecopies(), this function will only consider copies between base
1209 1213 and ctx; it will ignore copies between base and wctx. Also unlike
1210 1214 mergecopies(), this function will apply copies to the working copy (instead
1211 1215 of just returning information about the copies). That makes it cheaper
1212 1216 (especially in the common case of base==ctx.p1()) and useful also when
1213 1217 experimental.copytrace=off.
1214 1218
1215 1219 merge.update() will have already marked most copies, but it will only
1216 1220 mark copies if it thinks the source files are related (see
1217 1221 merge._related()). It will also not mark copies if the file wasn't modified
1218 1222 on the local side. This function adds the copies that were "missed"
1219 1223 by merge.update().
1220 1224 """
1221 1225 new_copies = pathcopies(base, ctx)
1222 1226 _filter(wctx.p1(), wctx, new_copies)
1223 1227 for dst, src in pycompat.iteritems(new_copies):
1224 1228 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now