##// END OF EJS Templates
copies: rename value/other variable to minor/major for clarity...
marmoute -
r46777:6b9d6529 default
parent child Browse files
Show More
@@ -1,1224 +1,1224 b''
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 2
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 # remove copies from files that didn't exist
63 63 if v not in src:
64 64 del t[k]
65 65 # remove criss-crossed copies
66 66 elif k in src and v in dst:
67 67 del t[k]
68 68 # remove copies to files that were then removed
69 69 elif k not in dst:
70 70 del t[k]
71 71
72 72
73 73 def _chain(prefix, suffix):
74 74 """chain two sets of copies 'prefix' and 'suffix'"""
75 75 result = prefix.copy()
76 76 for key, value in pycompat.iteritems(suffix):
77 77 result[key] = prefix.get(value, value)
78 78 return result
79 79
80 80
81 81 def _tracefile(fctx, am, basemf):
82 82 """return file context that is the ancestor of fctx present in ancestor
83 83 manifest am
84 84
85 85 Note: we used to try and stop after a given limit, however checking if that
86 86 limit is reached turned out to be very expensive. we are better off
87 87 disabling that feature."""
88 88
89 89 for f in fctx.ancestors():
90 90 path = f.path()
91 91 if am.get(path, None) == f.filenode():
92 92 return path
93 93 if basemf and basemf.get(path, None) == f.filenode():
94 94 return path
95 95
96 96
97 97 def _dirstatecopies(repo, match=None):
98 98 ds = repo.dirstate
99 99 c = ds.copies().copy()
100 100 for k in list(c):
101 101 if ds[k] not in b'anm' or (match and not match(k)):
102 102 del c[k]
103 103 return c
104 104
105 105
106 106 def _computeforwardmissing(a, b, match=None):
107 107 """Computes which files are in b but not a.
108 108 This is its own function so extensions can easily wrap this call to see what
109 109 files _forwardcopies is about to process.
110 110 """
111 111 ma = a.manifest()
112 112 mb = b.manifest()
113 113 return mb.filesnotin(ma, match=match)
114 114
115 115
116 116 def usechangesetcentricalgo(repo):
117 117 """Checks if we should use changeset-centric copy algorithms"""
118 118 if repo.filecopiesmode == b'changeset-sidedata':
119 119 return True
120 120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 121 changesetsource = (b'changeset-only', b'compatibility')
122 122 return readfrom in changesetsource
123 123
124 124
125 125 def _committedforwardcopies(a, b, base, match):
126 126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 127 # files might have to be traced back to the fctx parent of the last
128 128 # one-side-only changeset, but not further back than that
129 129 repo = a._repo
130 130
131 131 if usechangesetcentricalgo(repo):
132 132 return _changesetforwardcopies(a, b, match)
133 133
134 134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 135 dbg = repo.ui.debug
136 136 if debug:
137 137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 138 am = a.manifest()
139 139 basemf = None if base is None else base.manifest()
140 140
141 141 # find where new files came from
142 142 # we currently don't try to find where old files went, too expensive
143 143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 144 cm = {}
145 145
146 146 # Computing the forward missing is quite expensive on large manifests, since
147 147 # it compares the entire manifests. We can optimize it in the common use
148 148 # case of computing what copies are in a commit versus its parent (like
149 149 # during a rebase or histedit). Note, we exclude merge commits from this
150 150 # optimization, since the ctx.files() for a merge commit is not correct for
151 151 # this comparison.
152 152 forwardmissingmatch = match
153 153 if b.p1() == a and b.p2().node() == nullid:
154 154 filesmatcher = matchmod.exact(b.files())
155 155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157 157
158 158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159 159
160 160 if debug:
161 161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162 162
163 163 for f in sorted(missing):
164 164 if debug:
165 165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 166 fctx = b[f]
167 167 fctx._ancestrycontext = ancestrycontext
168 168
169 169 if debug:
170 170 start = util.timer()
171 171 opath = _tracefile(fctx, am, basemf)
172 172 if opath:
173 173 if debug:
174 174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 175 cm[f] = opath
176 176 if debug:
177 177 dbg(
178 178 b'debug.copies: time: %f seconds\n'
179 179 % (util.timer() - start)
180 180 )
181 181 return cm
182 182
183 183
184 184 def _revinfo_getter(repo, match):
185 185 """returns a function that returns the following data given a <rev>"
186 186
187 187 * p1: revision number of first parent
188 188 * p2: revision number of first parent
189 189 * changes: a ChangingFiles object
190 190 """
191 191 cl = repo.changelog
192 192 parents = cl.parentrevs
193 193 flags = cl.flags
194 194
195 195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196 196
197 197 changelogrevision = cl.changelogrevision
198 198
199 199 if rustmod is not None:
200 200
201 201 def revinfo(rev):
202 202 p1, p2 = parents(rev)
203 203 if flags(rev) & HASCOPIESINFO:
204 204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 205 else:
206 206 raw = None
207 207 return (p1, p2, raw)
208 208
209 209 else:
210 210
211 211 def revinfo(rev):
212 212 p1, p2 = parents(rev)
213 213 if flags(rev) & HASCOPIESINFO:
214 214 changes = changelogrevision(rev).changes
215 215 else:
216 216 changes = None
217 217 return (p1, p2, changes)
218 218
219 219 return revinfo
220 220
221 221
222 222 def cached_is_ancestor(is_ancestor):
223 223 """return a cached version of is_ancestor"""
224 224 cache = {}
225 225
226 226 def _is_ancestor(anc, desc):
227 227 if anc > desc:
228 228 return False
229 229 elif anc == desc:
230 230 return True
231 231 key = (anc, desc)
232 232 ret = cache.get(key)
233 233 if ret is None:
234 234 ret = cache[key] = is_ancestor(anc, desc)
235 235 return ret
236 236
237 237 return _is_ancestor
238 238
239 239
240 240 def _changesetforwardcopies(a, b, match):
241 241 if a.rev() in (nullrev, b.rev()):
242 242 return {}
243 243
244 244 repo = a.repo().unfiltered()
245 245 children = {}
246 246
247 247 cl = repo.changelog
248 248 isancestor = cl.isancestorrev
249 249
250 250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 251 # edges that are contains in `::B` but not in `::A`.
252 252 #
253 253 #
254 254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 256 # that belong to ::a. These are exactly all the revisions needed to express
257 257 # the parent β†’ children we need to combine.
258 258 #
259 259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 261 # keep this out of the explanation because it is hard enough without this special case..
262 262
263 263 parents = cl._uncheckedparentrevs
264 264 graph_roots = (nullrev, nullrev)
265 265
266 266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 268 roots = set()
269 269 has_graph_roots = False
270 270
271 271 # iterate over `only(B, A)`
272 272 for r in revs:
273 273 ps = parents(r)
274 274 if ps == graph_roots:
275 275 has_graph_roots = True
276 276 else:
277 277 p1, p2 = ps
278 278
279 279 # find all the "root points" (see larger comment above)
280 280 if p1 != nullrev and p1 in ancestors:
281 281 roots.add(p1)
282 282 if p2 != nullrev and p2 in ancestors:
283 283 roots.add(p2)
284 284 if not roots:
285 285 # no common revision to track copies from
286 286 return {}
287 287 if has_graph_roots:
288 288 # this deal with the special case mentionned in the [1] footnotes. We
289 289 # must filter out revisions that leads to non-common graphroots.
290 290 roots = list(roots)
291 291 m = min(roots)
292 292 h = [b.rev()]
293 293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 294 roots_to_head = set(roots_to_head)
295 295 revs = [r for r in revs if r in roots_to_head]
296 296
297 297 if repo.filecopiesmode == b'changeset-sidedata':
298 298 # When using side-data, we will process the edges "from" the children.
299 299 # We iterate over the childre, gathering previous collected data for
300 300 # the parents. Do know when the parents data is no longer necessary, we
301 301 # keep a counter of how many children each revision has.
302 302 #
303 303 # An interresting property of `children_count` is that it only contains
304 304 # revision that will be relevant for a edge of the graph. So if a
305 305 # children has parent not in `children_count`, that edges should not be
306 306 # processed.
307 307 children_count = dict((r, 0) for r in roots)
308 308 for r in revs:
309 309 for p in cl.parentrevs(r):
310 310 if p == nullrev:
311 311 continue
312 312 children_count[r] = 0
313 313 if p in children_count:
314 314 children_count[p] += 1
315 315 revinfo = _revinfo_getter(repo, match)
316 316 return _combine_changeset_copies(
317 317 revs, children_count, b.rev(), revinfo, match, isancestor
318 318 )
319 319 else:
320 320 # When not using side-data, we will process the edges "from" the parent.
321 321 # so we need a full mapping of the parent -> children relation.
322 322 children = dict((r, []) for r in roots)
323 323 for r in revs:
324 324 for p in cl.parentrevs(r):
325 325 if p == nullrev:
326 326 continue
327 327 children[r] = []
328 328 if p in children:
329 329 children[p].append(r)
330 330 x = revs.pop()
331 331 assert x == b.rev()
332 332 revs.extend(roots)
333 333 revs.sort()
334 334
335 335 revinfo = _revinfo_getter_extra(repo)
336 336 return _combine_changeset_copies_extra(
337 337 revs, children, b.rev(), revinfo, match, isancestor
338 338 )
339 339
340 340
341 341 def _combine_changeset_copies(
342 342 revs, children_count, targetrev, revinfo, match, isancestor
343 343 ):
344 344 """combine the copies information for each item of iterrevs
345 345
346 346 revs: sorted iterable of revision to visit
347 347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 348 targetrev: the final copies destination revision (not in iterrevs)
349 349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 350 match: a matcher
351 351
352 352 It returns the aggregated copies information for `targetrev`.
353 353 """
354 354
355 355 alwaysmatch = match.always()
356 356
357 357 if rustmod is not None:
358 358 final_copies = rustmod.combine_changeset_copies(
359 359 list(revs), children_count, targetrev, revinfo, isancestor
360 360 )
361 361 else:
362 362 isancestor = cached_is_ancestor(isancestor)
363 363
364 364 all_copies = {}
365 365 # iterate over all the "children" side of copy tracing "edge"
366 366 for current_rev in revs:
367 367 p1, p2, changes = revinfo(current_rev)
368 368 current_copies = None
369 369 # iterate over all parents to chain the existing data with the
370 370 # data from the parent β†’ child edge.
371 371 for parent, parent_rev in ((1, p1), (2, p2)):
372 372 if parent_rev == nullrev:
373 373 continue
374 374 remaining_children = children_count.get(parent_rev)
375 375 if remaining_children is None:
376 376 continue
377 377 remaining_children -= 1
378 378 children_count[parent_rev] = remaining_children
379 379 if remaining_children:
380 380 copies = all_copies.get(parent_rev, None)
381 381 else:
382 382 copies = all_copies.pop(parent_rev, None)
383 383
384 384 if copies is None:
385 385 # this is a root
386 386 copies = {}
387 387
388 388 newcopies = copies
389 389 # chain the data in the edge with the existing data
390 390 if changes is not None:
391 391 childcopies = {}
392 392 if parent == 1:
393 393 childcopies = changes.copied_from_p1
394 394 elif parent == 2:
395 395 childcopies = changes.copied_from_p2
396 396
397 397 if childcopies:
398 398 newcopies = copies.copy()
399 399 for dest, source in pycompat.iteritems(childcopies):
400 400 prev = copies.get(source)
401 401 if prev is not None and prev[1] is not None:
402 402 source = prev[1]
403 403 newcopies[dest] = (current_rev, source)
404 404 assert newcopies is not copies
405 405 if changes.removed:
406 406 if newcopies is copies:
407 407 newcopies = copies.copy()
408 408 for f in changes.removed:
409 409 if f in newcopies:
410 410 if newcopies is copies:
411 411 # copy on write to avoid affecting potential other
412 412 # branches. when there are no other branches, this
413 413 # could be avoided.
414 414 newcopies = copies.copy()
415 415 newcopies[f] = (current_rev, None)
416 416 # check potential need to combine the data from another parent (for
417 417 # that child). See comment below for details.
418 418 if current_copies is None:
419 419 current_copies = newcopies
420 420 elif current_copies is newcopies:
421 421 # nothing to merge:
422 422 pass
423 423 else:
424 424 # we are the second parent to work on c, we need to merge our
425 425 # work with the other.
426 426 #
427 427 # In case of conflict, parent 1 take precedence over parent 2.
428 428 # This is an arbitrary choice made anew when implementing
429 429 # changeset based copies. It was made without regards with
430 430 # potential filelog related behavior.
431 431 assert parent == 2
432 432 current_copies = _merge_copies_dict(
433 433 newcopies, current_copies, isancestor, changes
434 434 )
435 435 all_copies[current_rev] = current_copies
436 436
437 437 # filter out internal details and return a {dest: source mapping}
438 438 final_copies = {}
439 439 for dest, (tt, source) in all_copies[targetrev].items():
440 440 if source is not None:
441 441 final_copies[dest] = source
442 442 if not alwaysmatch:
443 443 for filename in list(final_copies.keys()):
444 444 if not match(filename):
445 445 del final_copies[filename]
446 446 return final_copies
447 447
448 448
449 449 # constant to decide which side to pick with _merge_copies_dict
450 450 PICK_MINOR = 0
451 451 PICK_MAJOR = 1
452 452 PICK_EITHER = 2
453 453
454 454
455 455 def _merge_copies_dict(minor, major, isancestor, changes):
456 456 """merge two copies-mapping together, minor and major
457 457
458 458 In case of conflict, value from "major" will be picked.
459 459
460 460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 461 ancestors of `high_rev`,
462 462
463 463 - `ismerged(path)`: callable return True if `path` have been merged in the
464 464 current revision,
465 465
466 466 return the resulting dict (in practice, the "minor" object, updated)
467 467 """
468 468 for dest, value in major.items():
469 469 other = minor.get(dest)
470 470 if other is None:
471 471 minor[dest] = value
472 472 else:
473 473 pick = _compare_values(changes, isancestor, dest, other, value)
474 474 if pick == PICK_MAJOR:
475 475 minor[dest] = value
476 476 return minor
477 477
478 478
479 def _compare_values(changes, isancestor, dest, other, value):
479 def _compare_values(changes, isancestor, dest, minor, major):
480 480 """compare two value within a _merge_copies_dict loop iteration"""
481 new_tt = value[0]
482 other_tt = other[0]
481 major_tt = major[0]
482 minor_tt = minor[0]
483 483
484 if value[1] == other[1]:
484 if major[1] == minor[1]:
485 485 return PICK_EITHER
486 486 # content from "major" wins, unless it is older
487 487 # than the branch point or there is a merge
488 if new_tt == other_tt:
488 if major_tt == minor_tt:
489 489 return PICK_MAJOR
490 elif changes is not None and value[1] is None and dest in changes.salvaged:
490 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 491 return PICK_MINOR
492 elif changes is not None and other[1] is None and dest in changes.salvaged:
492 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 493 return PICK_MAJOR
494 494 elif changes is not None and dest in changes.merged:
495 495 return PICK_MAJOR
496 elif not isancestor(new_tt, other_tt):
497 if value[1] is not None:
496 elif not isancestor(major_tt, minor_tt):
497 if major[1] is not None:
498 498 return PICK_MAJOR
499 elif isancestor(other_tt, new_tt):
499 elif isancestor(minor_tt, major_tt):
500 500 return PICK_MAJOR
501 501 return PICK_MINOR
502 502
503 503
504 504 def _revinfo_getter_extra(repo):
505 505 """return a function that return multiple data given a <rev>"i
506 506
507 507 * p1: revision number of first parent
508 508 * p2: revision number of first parent
509 509 * p1copies: mapping of copies from p1
510 510 * p2copies: mapping of copies from p2
511 511 * removed: a list of removed files
512 512 * ismerged: a callback to know if file was merged in that revision
513 513 """
514 514 cl = repo.changelog
515 515 parents = cl.parentrevs
516 516
517 517 def get_ismerged(rev):
518 518 ctx = repo[rev]
519 519
520 520 def ismerged(path):
521 521 if path not in ctx.files():
522 522 return False
523 523 fctx = ctx[path]
524 524 parents = fctx._filelog.parents(fctx._filenode)
525 525 nb_parents = 0
526 526 for n in parents:
527 527 if n != nullid:
528 528 nb_parents += 1
529 529 return nb_parents >= 2
530 530
531 531 return ismerged
532 532
533 533 def revinfo(rev):
534 534 p1, p2 = parents(rev)
535 535 ctx = repo[rev]
536 536 p1copies, p2copies = ctx._copies
537 537 removed = ctx.filesremoved()
538 538 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539 539
540 540 return revinfo
541 541
542 542
543 543 def _combine_changeset_copies_extra(
544 544 revs, children, targetrev, revinfo, match, isancestor
545 545 ):
546 546 """version of `_combine_changeset_copies` that works with the Google
547 547 specific "extra" based storage for copy information"""
548 548 all_copies = {}
549 549 alwaysmatch = match.always()
550 550 for r in revs:
551 551 copies = all_copies.pop(r, None)
552 552 if copies is None:
553 553 # this is a root
554 554 copies = {}
555 555 for i, c in enumerate(children[r]):
556 556 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 557 if r == p1:
558 558 parent = 1
559 559 childcopies = p1copies
560 560 else:
561 561 assert r == p2
562 562 parent = 2
563 563 childcopies = p2copies
564 564 if not alwaysmatch:
565 565 childcopies = {
566 566 dst: src for dst, src in childcopies.items() if match(dst)
567 567 }
568 568 newcopies = copies
569 569 if childcopies:
570 570 newcopies = copies.copy()
571 571 for dest, source in pycompat.iteritems(childcopies):
572 572 prev = copies.get(source)
573 573 if prev is not None and prev[1] is not None:
574 574 source = prev[1]
575 575 newcopies[dest] = (c, source)
576 576 assert newcopies is not copies
577 577 for f in removed:
578 578 if f in newcopies:
579 579 if newcopies is copies:
580 580 # copy on write to avoid affecting potential other
581 581 # branches. when there are no other branches, this
582 582 # could be avoided.
583 583 newcopies = copies.copy()
584 584 newcopies[f] = (c, None)
585 585 othercopies = all_copies.get(c)
586 586 if othercopies is None:
587 587 all_copies[c] = newcopies
588 588 else:
589 589 # we are the second parent to work on c, we need to merge our
590 590 # work with the other.
591 591 #
592 592 # In case of conflict, parent 1 take precedence over parent 2.
593 593 # This is an arbitrary choice made anew when implementing
594 594 # changeset based copies. It was made without regards with
595 595 # potential filelog related behavior.
596 596 if parent == 1:
597 597 _merge_copies_dict_extra(
598 598 othercopies, newcopies, isancestor, ismerged
599 599 )
600 600 else:
601 601 _merge_copies_dict_extra(
602 602 newcopies, othercopies, isancestor, ismerged
603 603 )
604 604 all_copies[c] = newcopies
605 605
606 606 final_copies = {}
607 607 for dest, (tt, source) in all_copies[targetrev].items():
608 608 if source is not None:
609 609 final_copies[dest] = source
610 610 return final_copies
611 611
612 612
613 613 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 614 """version of `_merge_copies_dict` that works with the Google
615 615 specific "extra" based storage for copy information"""
616 616 for dest, value in major.items():
617 617 other = minor.get(dest)
618 618 if other is None:
619 619 minor[dest] = value
620 620 else:
621 621 new_tt = value[0]
622 622 other_tt = other[0]
623 623 if value[1] == other[1]:
624 624 continue
625 625 # content from "major" wins, unless it is older
626 626 # than the branch point or there is a merge
627 627 if (
628 628 new_tt == other_tt
629 629 or not isancestor(new_tt, other_tt)
630 630 or ismerged(dest)
631 631 ):
632 632 minor[dest] = value
633 633
634 634
635 635 def _forwardcopies(a, b, base=None, match=None):
636 636 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637 637
638 638 if base is None:
639 639 base = a
640 640 match = a.repo().narrowmatch(match)
641 641 # check for working copy
642 642 if b.rev() is None:
643 643 cm = _committedforwardcopies(a, b.p1(), base, match)
644 644 # combine copies from dirstate if necessary
645 645 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 646 else:
647 647 copies = _committedforwardcopies(a, b, base, match)
648 648 return copies
649 649
650 650
651 651 def _backwardrenames(a, b, match):
652 652 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 653 return {}
654 654
655 655 # Even though we're not taking copies into account, 1:n rename situations
656 656 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 657 # arbitrarily pick one of the renames.
658 658 # We don't want to pass in "match" here, since that would filter
659 659 # the destination by it. Since we're reversing the copies, we want
660 660 # to filter the source instead.
661 661 f = _forwardcopies(b, a)
662 662 r = {}
663 663 for k, v in sorted(pycompat.iteritems(f)):
664 664 if match and not match(v):
665 665 continue
666 666 # remove copies
667 667 if v in a:
668 668 continue
669 669 r[v] = k
670 670 return r
671 671
672 672
673 673 def pathcopies(x, y, match=None):
674 674 """find {dst@y: src@x} copy mapping for directed compare"""
675 675 repo = x._repo
676 676 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 677 if debug:
678 678 repo.ui.debug(
679 679 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 680 )
681 681 if x == y or not x or not y:
682 682 return {}
683 683 if y.rev() is None and x == y.p1():
684 684 if debug:
685 685 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 686 # short-circuit to avoid issues with merge states
687 687 return _dirstatecopies(repo, match)
688 688 a = y.ancestor(x)
689 689 if a == x:
690 690 if debug:
691 691 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 692 copies = _forwardcopies(x, y, match=match)
693 693 elif a == y:
694 694 if debug:
695 695 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 696 copies = _backwardrenames(x, y, match=match)
697 697 else:
698 698 if debug:
699 699 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 700 base = None
701 701 if a.rev() != nullrev:
702 702 base = x
703 703 copies = _chain(
704 704 _backwardrenames(x, a, match=match),
705 705 _forwardcopies(a, y, base, match=match),
706 706 )
707 707 _filter(x, y, copies)
708 708 return copies
709 709
710 710
711 711 def mergecopies(repo, c1, c2, base):
712 712 """
713 713 Finds moves and copies between context c1 and c2 that are relevant for
714 714 merging. 'base' will be used as the merge base.
715 715
716 716 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 717 files that were moved/ copied in one merge parent and modified in another.
718 718 For example:
719 719
720 720 o ---> 4 another commit
721 721 |
722 722 | o ---> 3 commit that modifies a.txt
723 723 | /
724 724 o / ---> 2 commit that moves a.txt to b.txt
725 725 |/
726 726 o ---> 1 merge base
727 727
728 728 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 729 revision 4, and if user have copytrace disabled, we prints the following
730 730 message:
731 731
732 732 ```other changed <file> which local deleted```
733 733
734 734 Returns a tuple where:
735 735
736 736 "branch_copies" an instance of branch_copies.
737 737
738 738 "diverge" is a mapping of source name -> list of destination names
739 739 for divergent renames.
740 740
741 741 This function calls different copytracing algorithms based on config.
742 742 """
743 743 # avoid silly behavior for update from empty dir
744 744 if not c1 or not c2 or c1 == c2:
745 745 return branch_copies(), branch_copies(), {}
746 746
747 747 narrowmatch = c1.repo().narrowmatch()
748 748
749 749 # avoid silly behavior for parent -> working dir
750 750 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 751 return (
752 752 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 753 branch_copies(),
754 754 {},
755 755 )
756 756
757 757 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 758 if stringutil.parsebool(copytracing) is False:
759 759 # stringutil.parsebool() returns None when it is unable to parse the
760 760 # value, so we should rely on making sure copytracing is on such cases
761 761 return branch_copies(), branch_copies(), {}
762 762
763 763 if usechangesetcentricalgo(repo):
764 764 # The heuristics don't make sense when we need changeset-centric algos
765 765 return _fullcopytracing(repo, c1, c2, base)
766 766
767 767 # Copy trace disabling is explicitly below the node == p1 logic above
768 768 # because the logic above is required for a simple copy to be kept across a
769 769 # rebase.
770 770 if copytracing == b'heuristics':
771 771 # Do full copytracing if only non-public revisions are involved as
772 772 # that will be fast enough and will also cover the copies which could
773 773 # be missed by heuristics
774 774 if _isfullcopytraceable(repo, c1, base):
775 775 return _fullcopytracing(repo, c1, c2, base)
776 776 return _heuristicscopytracing(repo, c1, c2, base)
777 777 else:
778 778 return _fullcopytracing(repo, c1, c2, base)
779 779
780 780
781 781 def _isfullcopytraceable(repo, c1, base):
782 782 """Checks that if base, source and destination are all no-public branches,
783 783 if yes let's use the full copytrace algorithm for increased capabilities
784 784 since it will be fast enough.
785 785
786 786 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 787 number of changesets from c1 to base such that if number of changesets are
788 788 more than the limit, full copytracing algorithm won't be used.
789 789 """
790 790 if c1.rev() is None:
791 791 c1 = c1.p1()
792 792 if c1.mutable() and base.mutable():
793 793 sourcecommitlimit = repo.ui.configint(
794 794 b'experimental', b'copytrace.sourcecommitlimit'
795 795 )
796 796 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 797 return commits < sourcecommitlimit
798 798 return False
799 799
800 800
801 801 def _checksinglesidecopies(
802 802 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 803 ):
804 804 if src not in m2:
805 805 # deleted on side 2
806 806 if src not in m1:
807 807 # renamed on side 1, deleted on side 2
808 808 renamedelete[src] = dsts1
809 809 elif src not in mb:
810 810 # Work around the "short-circuit to avoid issues with merge states"
811 811 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 812 # destination doesn't exist in y.
813 813 pass
814 814 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 815 return
816 816 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 817 # modified on side 2
818 818 for dst in dsts1:
819 819 copy[dst] = src
820 820
821 821
822 822 class branch_copies(object):
823 823 """Information about copies made on one side of a merge/graft.
824 824
825 825 "copy" is a mapping from destination name -> source name,
826 826 where source is in c1 and destination is in c2 or vice-versa.
827 827
828 828 "movewithdir" is a mapping from source name -> destination name,
829 829 where the file at source present in one context but not the other
830 830 needs to be moved to destination by the merge process, because the
831 831 other context moved the directory it is in.
832 832
833 833 "renamedelete" is a mapping of source name -> list of destination
834 834 names for files deleted in c1 that were renamed in c2 or vice-versa.
835 835
836 836 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 837 This is needed for handling changes to new files previously grafted into
838 838 renamed directories.
839 839 """
840 840
841 841 def __init__(
842 842 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 843 ):
844 844 self.copy = {} if copy is None else copy
845 845 self.renamedelete = {} if renamedelete is None else renamedelete
846 846 self.dirmove = {} if dirmove is None else dirmove
847 847 self.movewithdir = {} if movewithdir is None else movewithdir
848 848
849 849 def __repr__(self):
850 850 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 851 self.copy,
852 852 self.renamedelete,
853 853 self.dirmove,
854 854 self.movewithdir,
855 855 )
856 856
857 857
858 858 def _fullcopytracing(repo, c1, c2, base):
859 859 """The full copytracing algorithm which finds all the new files that were
860 860 added from merge base up to the top commit and for each file it checks if
861 861 this file was copied from another file.
862 862
863 863 This is pretty slow when a lot of changesets are involved but will track all
864 864 the copies.
865 865 """
866 866 m1 = c1.manifest()
867 867 m2 = c2.manifest()
868 868 mb = base.manifest()
869 869
870 870 copies1 = pathcopies(base, c1)
871 871 copies2 = pathcopies(base, c2)
872 872
873 873 if not (copies1 or copies2):
874 874 return branch_copies(), branch_copies(), {}
875 875
876 876 inversecopies1 = {}
877 877 inversecopies2 = {}
878 878 for dst, src in copies1.items():
879 879 inversecopies1.setdefault(src, []).append(dst)
880 880 for dst, src in copies2.items():
881 881 inversecopies2.setdefault(src, []).append(dst)
882 882
883 883 copy1 = {}
884 884 copy2 = {}
885 885 diverge = {}
886 886 renamedelete1 = {}
887 887 renamedelete2 = {}
888 888 allsources = set(inversecopies1) | set(inversecopies2)
889 889 for src in allsources:
890 890 dsts1 = inversecopies1.get(src)
891 891 dsts2 = inversecopies2.get(src)
892 892 if dsts1 and dsts2:
893 893 # copied/renamed on both sides
894 894 if src not in m1 and src not in m2:
895 895 # renamed on both sides
896 896 dsts1 = set(dsts1)
897 897 dsts2 = set(dsts2)
898 898 # If there's some overlap in the rename destinations, we
899 899 # consider it not divergent. For example, if side 1 copies 'a'
900 900 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 901 # and 'd' and deletes 'a'.
902 902 if dsts1 & dsts2:
903 903 for dst in dsts1 & dsts2:
904 904 copy1[dst] = src
905 905 copy2[dst] = src
906 906 else:
907 907 diverge[src] = sorted(dsts1 | dsts2)
908 908 elif src in m1 and src in m2:
909 909 # copied on both sides
910 910 dsts1 = set(dsts1)
911 911 dsts2 = set(dsts2)
912 912 for dst in dsts1 & dsts2:
913 913 copy1[dst] = src
914 914 copy2[dst] = src
915 915 # TODO: Handle cases where it was renamed on one side and copied
916 916 # on the other side
917 917 elif dsts1:
918 918 # copied/renamed only on side 1
919 919 _checksinglesidecopies(
920 920 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 921 )
922 922 elif dsts2:
923 923 # copied/renamed only on side 2
924 924 _checksinglesidecopies(
925 925 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 926 )
927 927
928 928 # find interesting file sets from manifests
929 929 cache = []
930 930
931 931 def _get_addedfiles(idx):
932 932 if not cache:
933 933 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 934 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 935 u1 = sorted(addedinm1 - addedinm2)
936 936 u2 = sorted(addedinm2 - addedinm1)
937 937 cache.extend((u1, u2))
938 938 return cache[idx]
939 939
940 940 u1fn = lambda: _get_addedfiles(0)
941 941 u2fn = lambda: _get_addedfiles(1)
942 942 if repo.ui.debugflag:
943 943 u1 = u1fn()
944 944 u2 = u2fn()
945 945
946 946 header = b" unmatched files in %s"
947 947 if u1:
948 948 repo.ui.debug(
949 949 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 950 )
951 951 if u2:
952 952 repo.ui.debug(
953 953 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 954 )
955 955
956 956 renamedeleteset = set()
957 957 divergeset = set()
958 958 for dsts in diverge.values():
959 959 divergeset.update(dsts)
960 960 for dsts in renamedelete1.values():
961 961 renamedeleteset.update(dsts)
962 962 for dsts in renamedelete2.values():
963 963 renamedeleteset.update(dsts)
964 964
965 965 repo.ui.debug(
966 966 b" all copies found (* = to merge, ! = divergent, "
967 967 b"% = renamed and deleted):\n"
968 968 )
969 969 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 970 if not copies:
971 971 continue
972 972 repo.ui.debug(b" on %s side:\n" % side)
973 973 for f in sorted(copies):
974 974 note = b""
975 975 if f in copy1 or f in copy2:
976 976 note += b"*"
977 977 if f in divergeset:
978 978 note += b"!"
979 979 if f in renamedeleteset:
980 980 note += b"%"
981 981 repo.ui.debug(
982 982 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 983 )
984 984 del renamedeleteset
985 985 del divergeset
986 986
987 987 repo.ui.debug(b" checking for directory renames\n")
988 988
989 989 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 990 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991 991
992 992 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 993 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994 994
995 995 return branch_copies1, branch_copies2, diverge
996 996
997 997
998 998 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 999 """Finds moved directories and files that should move with them.
1000 1000
1001 1001 ctx: the context for one of the sides
1002 1002 copy: files copied on the same side (as ctx)
1003 1003 fullcopy: files copied on the same side (as ctx), including those that
1004 1004 merge.manifestmerge() won't care about
1005 1005 addedfilesfn: function returning added files on the other side (compared to
1006 1006 ctx)
1007 1007 """
1008 1008 # generate a directory move map
1009 1009 invalid = set()
1010 1010 dirmove = {}
1011 1011
1012 1012 # examine each file copy for a potential directory move, which is
1013 1013 # when all the files in a directory are moved to a new directory
1014 1014 for dst, src in pycompat.iteritems(fullcopy):
1015 1015 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 1016 if dsrc in invalid:
1017 1017 # already seen to be uninteresting
1018 1018 continue
1019 1019 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 1020 # directory wasn't entirely moved locally
1021 1021 invalid.add(dsrc)
1022 1022 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 1023 # files from the same directory moved to two different places
1024 1024 invalid.add(dsrc)
1025 1025 else:
1026 1026 # looks good so far
1027 1027 dirmove[dsrc] = ddst
1028 1028
1029 1029 for i in invalid:
1030 1030 if i in dirmove:
1031 1031 del dirmove[i]
1032 1032 del invalid
1033 1033
1034 1034 if not dirmove:
1035 1035 return {}, {}
1036 1036
1037 1037 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038 1038
1039 1039 for d in dirmove:
1040 1040 repo.ui.debug(
1041 1041 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 1042 )
1043 1043
1044 1044 movewithdir = {}
1045 1045 # check unaccounted nonoverlapping files against directory moves
1046 1046 for f in addedfilesfn():
1047 1047 if f not in fullcopy:
1048 1048 for d in dirmove:
1049 1049 if f.startswith(d):
1050 1050 # new file added in a directory that was moved, move it
1051 1051 df = dirmove[d] + f[len(d) :]
1052 1052 if df not in copy:
1053 1053 movewithdir[f] = df
1054 1054 repo.ui.debug(
1055 1055 b" pending file src: '%s' -> dst: '%s'\n"
1056 1056 % (f, df)
1057 1057 )
1058 1058 break
1059 1059
1060 1060 return dirmove, movewithdir
1061 1061
1062 1062
1063 1063 def _heuristicscopytracing(repo, c1, c2, base):
1064 1064 """Fast copytracing using filename heuristics
1065 1065
1066 1066 Assumes that moves or renames are of following two types:
1067 1067
1068 1068 1) Inside a directory only (same directory name but different filenames)
1069 1069 2) Move from one directory to another
1070 1070 (same filenames but different directory names)
1071 1071
1072 1072 Works only when there are no merge commits in the "source branch".
1073 1073 Source branch is commits from base up to c2 not including base.
1074 1074
1075 1075 If merge is involved it fallbacks to _fullcopytracing().
1076 1076
1077 1077 Can be used by setting the following config:
1078 1078
1079 1079 [experimental]
1080 1080 copytrace = heuristics
1081 1081
1082 1082 In some cases the copy/move candidates found by heuristics can be very large
1083 1083 in number and that will make the algorithm slow. The number of possible
1084 1084 candidates to check can be limited by using the config
1085 1085 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 1086 """
1087 1087
1088 1088 if c1.rev() is None:
1089 1089 c1 = c1.p1()
1090 1090 if c2.rev() is None:
1091 1091 c2 = c2.p1()
1092 1092
1093 1093 changedfiles = set()
1094 1094 m1 = c1.manifest()
1095 1095 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 1096 # If base is not in c2 branch, we switch to fullcopytracing
1097 1097 repo.ui.debug(
1098 1098 b"switching to full copytracing as base is not "
1099 1099 b"an ancestor of c2\n"
1100 1100 )
1101 1101 return _fullcopytracing(repo, c1, c2, base)
1102 1102
1103 1103 ctx = c2
1104 1104 while ctx != base:
1105 1105 if len(ctx.parents()) == 2:
1106 1106 # To keep things simple let's not handle merges
1107 1107 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 1108 return _fullcopytracing(repo, c1, c2, base)
1109 1109 changedfiles.update(ctx.files())
1110 1110 ctx = ctx.p1()
1111 1111
1112 1112 copies2 = {}
1113 1113 cp = _forwardcopies(base, c2)
1114 1114 for dst, src in pycompat.iteritems(cp):
1115 1115 if src in m1:
1116 1116 copies2[dst] = src
1117 1117
1118 1118 # file is missing if it isn't present in the destination, but is present in
1119 1119 # the base and present in the source.
1120 1120 # Presence in the base is important to exclude added files, presence in the
1121 1121 # source is important to exclude removed files.
1122 1122 filt = lambda f: f not in m1 and f in base and f in c2
1123 1123 missingfiles = [f for f in changedfiles if filt(f)]
1124 1124
1125 1125 copies1 = {}
1126 1126 if missingfiles:
1127 1127 basenametofilename = collections.defaultdict(list)
1128 1128 dirnametofilename = collections.defaultdict(list)
1129 1129
1130 1130 for f in m1.filesnotin(base.manifest()):
1131 1131 basename = os.path.basename(f)
1132 1132 dirname = os.path.dirname(f)
1133 1133 basenametofilename[basename].append(f)
1134 1134 dirnametofilename[dirname].append(f)
1135 1135
1136 1136 for f in missingfiles:
1137 1137 basename = os.path.basename(f)
1138 1138 dirname = os.path.dirname(f)
1139 1139 samebasename = basenametofilename[basename]
1140 1140 samedirname = dirnametofilename[dirname]
1141 1141 movecandidates = samebasename + samedirname
1142 1142 # f is guaranteed to be present in c2, that's why
1143 1143 # c2.filectx(f) won't fail
1144 1144 f2 = c2.filectx(f)
1145 1145 # we can have a lot of candidates which can slow down the heuristics
1146 1146 # config value to limit the number of candidates moves to check
1147 1147 maxcandidates = repo.ui.configint(
1148 1148 b'experimental', b'copytrace.movecandidateslimit'
1149 1149 )
1150 1150
1151 1151 if len(movecandidates) > maxcandidates:
1152 1152 repo.ui.status(
1153 1153 _(
1154 1154 b"skipping copytracing for '%s', more "
1155 1155 b"candidates than the limit: %d\n"
1156 1156 )
1157 1157 % (f, len(movecandidates))
1158 1158 )
1159 1159 continue
1160 1160
1161 1161 for candidate in movecandidates:
1162 1162 f1 = c1.filectx(candidate)
1163 1163 if _related(f1, f2):
1164 1164 # if there are a few related copies then we'll merge
1165 1165 # changes into all of them. This matches the behaviour
1166 1166 # of upstream copytracing
1167 1167 copies1[candidate] = f
1168 1168
1169 1169 return branch_copies(copies1), branch_copies(copies2), {}
1170 1170
1171 1171
1172 1172 def _related(f1, f2):
1173 1173 """return True if f1 and f2 filectx have a common ancestor
1174 1174
1175 1175 Walk back to common ancestor to see if the two files originate
1176 1176 from the same file. Since workingfilectx's rev() is None it messes
1177 1177 up the integer comparison logic, hence the pre-step check for
1178 1178 None (f1 and f2 can only be workingfilectx's initially).
1179 1179 """
1180 1180
1181 1181 if f1 == f2:
1182 1182 return True # a match
1183 1183
1184 1184 g1, g2 = f1.ancestors(), f2.ancestors()
1185 1185 try:
1186 1186 f1r, f2r = f1.linkrev(), f2.linkrev()
1187 1187
1188 1188 if f1r is None:
1189 1189 f1 = next(g1)
1190 1190 if f2r is None:
1191 1191 f2 = next(g2)
1192 1192
1193 1193 while True:
1194 1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 1195 if f1r > f2r:
1196 1196 f1 = next(g1)
1197 1197 elif f2r > f1r:
1198 1198 f2 = next(g2)
1199 1199 else: # f1 and f2 point to files in the same linkrev
1200 1200 return f1 == f2 # true if they point to the same file
1201 1201 except StopIteration:
1202 1202 return False
1203 1203
1204 1204
1205 1205 def graftcopies(wctx, ctx, base):
1206 1206 """reproduce copies between base and ctx in the wctx
1207 1207
1208 1208 Unlike mergecopies(), this function will only consider copies between base
1209 1209 and ctx; it will ignore copies between base and wctx. Also unlike
1210 1210 mergecopies(), this function will apply copies to the working copy (instead
1211 1211 of just returning information about the copies). That makes it cheaper
1212 1212 (especially in the common case of base==ctx.p1()) and useful also when
1213 1213 experimental.copytrace=off.
1214 1214
1215 1215 merge.update() will have already marked most copies, but it will only
1216 1216 mark copies if it thinks the source files are related (see
1217 1217 merge._related()). It will also not mark copies if the file wasn't modified
1218 1218 on the local side. This function adds the copies that were "missed"
1219 1219 by merge.update().
1220 1220 """
1221 1221 new_copies = pathcopies(base, ctx)
1222 1222 _filter(wctx.p1(), wctx, new_copies)
1223 1223 for dst, src in pycompat.iteritems(new_copies):
1224 1224 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now