##// END OF EJS Templates
copies: rename some function to the new naming scheme...
marmoute -
r46199:fb000408 default
parent child Browse files
Show More
@@ -1,997 +1,997 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 match as matchmod,
20 20 node,
21 21 pathutil,
22 22 pycompat,
23 23 util,
24 24 )
25 25
26 26
27 27 from .utils import stringutil
28 28
29 29
30 30 def _filter(src, dst, t):
31 31 """filters out invalid copies after chaining"""
32 32
33 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 35 # in the following table (not including trivial cases). For example, case 2
36 36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 37 # then was renamed between 'mid' and 'dst'.
38 38 #
39 39 # case src mid dst result
40 40 # 1 x y - -
41 41 # 2 x y y x->y
42 42 # 3 x y x -
43 43 # 4 x y z x->z
44 44 # 5 - x y -
45 45 # 6 x x y x->y
46 46 #
47 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 49 # between 5 and 6, so it includes all cases in its result.
50 50 # Cases 1, 3, and 5 are then removed by _filter().
51 51
52 52 for k, v in list(t.items()):
53 53 # remove copies from files that didn't exist
54 54 if v not in src:
55 55 del t[k]
56 56 # remove criss-crossed copies
57 57 elif k in src and v in dst:
58 58 del t[k]
59 59 # remove copies to files that were then removed
60 60 elif k not in dst:
61 61 del t[k]
62 62
63 63
64 64 def _chain(prefix, suffix):
65 65 """chain two sets of copies 'prefix' and 'suffix'"""
66 66 result = prefix.copy()
67 67 for key, value in pycompat.iteritems(suffix):
68 68 result[key] = prefix.get(value, value)
69 69 return result
70 70
71 71
72 72 def _tracefile(fctx, am, basemf):
73 73 """return file context that is the ancestor of fctx present in ancestor
74 74 manifest am
75 75
76 76 Note: we used to try and stop after a given limit, however checking if that
77 77 limit is reached turned out to be very expensive. we are better off
78 78 disabling that feature."""
79 79
80 80 for f in fctx.ancestors():
81 81 path = f.path()
82 82 if am.get(path, None) == f.filenode():
83 83 return path
84 84 if basemf and basemf.get(path, None) == f.filenode():
85 85 return path
86 86
87 87
88 88 def _dirstatecopies(repo, match=None):
89 89 ds = repo.dirstate
90 90 c = ds.copies().copy()
91 91 for k in list(c):
92 92 if ds[k] not in b'anm' or (match and not match(k)):
93 93 del c[k]
94 94 return c
95 95
96 96
97 97 def _computeforwardmissing(a, b, match=None):
98 98 """Computes which files are in b but not a.
99 99 This is its own function so extensions can easily wrap this call to see what
100 100 files _forwardcopies is about to process.
101 101 """
102 102 ma = a.manifest()
103 103 mb = b.manifest()
104 104 return mb.filesnotin(ma, match=match)
105 105
106 106
107 107 def usechangesetcentricalgo(repo):
108 108 """Checks if we should use changeset-centric copy algorithms"""
109 109 if repo.filecopiesmode == b'changeset-sidedata':
110 110 return True
111 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 112 changesetsource = (b'changeset-only', b'compatibility')
113 113 return readfrom in changesetsource
114 114
115 115
116 116 def _committedforwardcopies(a, b, base, match):
117 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 118 # files might have to be traced back to the fctx parent of the last
119 119 # one-side-only changeset, but not further back than that
120 120 repo = a._repo
121 121
122 122 if usechangesetcentricalgo(repo):
123 123 return _changesetforwardcopies(a, b, match)
124 124
125 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 126 dbg = repo.ui.debug
127 127 if debug:
128 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 129 am = a.manifest()
130 130 basemf = None if base is None else base.manifest()
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 136
137 137 # Computing the forward missing is quite expensive on large manifests, since
138 138 # it compares the entire manifests. We can optimize it in the common use
139 139 # case of computing what copies are in a commit versus its parent (like
140 140 # during a rebase or histedit). Note, we exclude merge commits from this
141 141 # optimization, since the ctx.files() for a merge commit is not correct for
142 142 # this comparison.
143 143 forwardmissingmatch = match
144 144 if b.p1() == a and b.p2().node() == node.nullid:
145 145 filesmatcher = matchmod.exact(b.files())
146 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 148
149 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 150
151 151 if debug:
152 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 153
154 154 for f in sorted(missing):
155 155 if debug:
156 156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 157 fctx = b[f]
158 158 fctx._ancestrycontext = ancestrycontext
159 159
160 160 if debug:
161 161 start = util.timer()
162 162 opath = _tracefile(fctx, am, basemf)
163 163 if opath:
164 164 if debug:
165 165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 166 cm[f] = opath
167 167 if debug:
168 168 dbg(
169 169 b'debug.copies: time: %f seconds\n'
170 170 % (util.timer() - start)
171 171 )
172 172 return cm
173 173
174 174
175 def _revinfogetter(repo):
175 def _revinfo_getter(repo):
176 176 """return a function that return multiple data given a <rev>"i
177 177
178 178 * p1: revision number of first parent
179 179 * p2: revision number of first parent
180 180 * p1copies: mapping of copies from p1
181 181 * p2copies: mapping of copies from p2
182 182 * removed: a list of removed files
183 183 * ismerged: a callback to know if file was merged in that revision
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187
188 188 def get_ismerged(rev):
189 189 ctx = repo[rev]
190 190
191 191 def ismerged(path):
192 192 if path not in ctx.files():
193 193 return False
194 194 fctx = ctx[path]
195 195 parents = fctx._filelog.parents(fctx._filenode)
196 196 nb_parents = 0
197 197 for n in parents:
198 198 if n != node.nullid:
199 199 nb_parents += 1
200 200 return nb_parents >= 2
201 201
202 202 return ismerged
203 203
204 204 if repo.filecopiesmode == b'changeset-sidedata':
205 205 changelogrevision = cl.changelogrevision
206 206 flags = cl.flags
207 207
208 208 # A small cache to avoid doing the work twice for merges
209 209 #
210 210 # In the vast majority of cases, if we ask information for a revision
211 211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 212 # keep the information around when reaching the first parent of a merge
213 213 # and dropping it after it was provided for the second parents.
214 214 #
215 215 # It exists cases were only one parent of the merge will be walked. It
216 216 # happens when the "destination" the copy tracing is descendant from a
217 217 # new root, not common with the "source". In that case, we will only walk
218 218 # through merge parents that are descendant of changesets common
219 219 # between "source" and "destination".
220 220 #
221 221 # With the current case implementation if such changesets have a copy
222 222 # information, we'll keep them in memory until the end of
223 223 # _changesetforwardcopies. We don't expect the case to be frequent
224 224 # enough to matters.
225 225 #
226 226 # In addition, it would be possible to reach pathological case, were
227 227 # many first parent are met before any second parent is reached. In
228 228 # that case the cache could grow. If this even become an issue one can
229 229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 230 # time to save memory.
231 231 merge_caches = {}
232 232
233 233 def revinfo(rev):
234 234 p1, p2 = parents(rev)
235 235 value = None
236 236 if flags(rev) & REVIDX_SIDEDATA:
237 237 e = merge_caches.pop(rev, None)
238 238 if e is not None:
239 239 return e
240 240 c = changelogrevision(rev)
241 241 p1copies = c.p1copies
242 242 p2copies = c.p2copies
243 243 removed = c.filesremoved
244 244 if p1 != node.nullrev and p2 != node.nullrev:
245 245 # XXX some case we over cache, IGNORE
246 246 value = merge_caches[rev] = (
247 247 p1,
248 248 p2,
249 249 p1copies,
250 250 p2copies,
251 251 removed,
252 252 get_ismerged(rev),
253 253 )
254 254 else:
255 255 p1copies = {}
256 256 p2copies = {}
257 257 removed = []
258 258
259 259 if value is None:
260 260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 261 return value
262 262
263 263 else:
264 264
265 265 def revinfo(rev):
266 266 p1, p2 = parents(rev)
267 267 ctx = repo[rev]
268 268 p1copies, p2copies = ctx._copies
269 269 removed = ctx.filesremoved()
270 270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
271 271
272 272 return revinfo
273 273
274 274
275 275 def _changesetforwardcopies(a, b, match):
276 276 if a.rev() in (node.nullrev, b.rev()):
277 277 return {}
278 278
279 279 repo = a.repo().unfiltered()
280 280 children = {}
281 revinfo = _revinfogetter(repo)
281 revinfo = _revinfo_getter(repo)
282 282
283 283 cl = repo.changelog
284 284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 286 mrset = set(missingrevs)
287 287 roots = set()
288 288 for r in missingrevs:
289 289 for p in cl.parentrevs(r):
290 290 if p == node.nullrev:
291 291 continue
292 292 if p not in children:
293 293 children[p] = [r]
294 294 else:
295 295 children[p].append(r)
296 296 if p not in mrset:
297 297 roots.add(p)
298 298 if not roots:
299 299 # no common revision to track copies from
300 300 return {}
301 301 min_root = min(roots)
302 302
303 303 from_head = set(
304 304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 305 )
306 306
307 307 iterrevs = set(from_head)
308 308 iterrevs &= mrset
309 309 iterrevs.update(roots)
310 310 iterrevs.remove(b.rev())
311 311 revs = sorted(iterrevs)
312 return _combinechangesetcopies(
312 return _combine_changeset_copies(
313 313 revs, children, b.rev(), revinfo, match, isancestor
314 314 )
315 315
316 316
317 def _combinechangesetcopies(
317 def _combine_changeset_copies(
318 318 revs, children, targetrev, revinfo, match, isancestor
319 319 ):
320 320 """combine the copies information for each item of iterrevs
321 321
322 322 revs: sorted iterable of revision to visit
323 323 children: a {parent: [children]} mapping.
324 324 targetrev: the final copies destination revision (not in iterrevs)
325 325 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
326 326 match: a matcher
327 327
328 328 It returns the aggregated copies information for `targetrev`.
329 329 """
330 330 all_copies = {}
331 331 alwaysmatch = match.always()
332 332 for r in revs:
333 333 copies = all_copies.pop(r, None)
334 334 if copies is None:
335 335 # this is a root
336 336 copies = {}
337 337 for i, c in enumerate(children[r]):
338 338 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
339 339 if r == p1:
340 340 parent = 1
341 341 childcopies = p1copies
342 342 else:
343 343 assert r == p2
344 344 parent = 2
345 345 childcopies = p2copies
346 346 if not alwaysmatch:
347 347 childcopies = {
348 348 dst: src for dst, src in childcopies.items() if match(dst)
349 349 }
350 350 newcopies = copies
351 351 if childcopies:
352 352 newcopies = copies.copy()
353 353 for dest, source in pycompat.iteritems(childcopies):
354 354 prev = copies.get(source)
355 355 if prev is not None and prev[1] is not None:
356 356 source = prev[1]
357 357 newcopies[dest] = (c, source)
358 358 assert newcopies is not copies
359 359 for f in removed:
360 360 if f in newcopies:
361 361 if newcopies is copies:
362 362 # copy on write to avoid affecting potential other
363 363 # branches. when there are no other branches, this
364 364 # could be avoided.
365 365 newcopies = copies.copy()
366 366 newcopies[f] = (c, None)
367 367 othercopies = all_copies.get(c)
368 368 if othercopies is None:
369 369 all_copies[c] = newcopies
370 370 else:
371 371 # we are the second parent to work on c, we need to merge our
372 372 # work with the other.
373 373 #
374 374 # In case of conflict, parent 1 take precedence over parent 2.
375 375 # This is an arbitrary choice made anew when implementing
376 376 # changeset based copies. It was made without regards with
377 377 # potential filelog related behavior.
378 378 if parent == 1:
379 379 _merge_copies_dict(
380 380 othercopies, newcopies, isancestor, ismerged
381 381 )
382 382 else:
383 383 _merge_copies_dict(
384 384 newcopies, othercopies, isancestor, ismerged
385 385 )
386 386 all_copies[c] = newcopies
387 387
388 388 final_copies = {}
389 389 for dest, (tt, source) in all_copies[targetrev].items():
390 390 if source is not None:
391 391 final_copies[dest] = source
392 392 return final_copies
393 393
394 394
395 395 def _merge_copies_dict(minor, major, isancestor, ismerged):
396 396 """merge two copies-mapping together, minor and major
397 397
398 398 In case of conflict, value from "major" will be picked.
399 399
400 400 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
401 401 ancestors of `high_rev`,
402 402
403 403 - `ismerged(path)`: callable return True if `path` have been merged in the
404 404 current revision,
405 405 """
406 406 for dest, value in major.items():
407 407 other = minor.get(dest)
408 408 if other is None:
409 409 minor[dest] = value
410 410 else:
411 411 new_tt = value[0]
412 412 other_tt = other[0]
413 413 if value[1] == other[1]:
414 414 continue
415 415 # content from "major" wins, unless it is older
416 416 # than the branch point or there is a merge
417 417 if (
418 418 new_tt == other_tt
419 419 or not isancestor(new_tt, other_tt)
420 420 or ismerged(dest)
421 421 ):
422 422 minor[dest] = value
423 423
424 424
425 425 def _forwardcopies(a, b, base=None, match=None):
426 426 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427 427
428 428 if base is None:
429 429 base = a
430 430 match = a.repo().narrowmatch(match)
431 431 # check for working copy
432 432 if b.rev() is None:
433 433 cm = _committedforwardcopies(a, b.p1(), base, match)
434 434 # combine copies from dirstate if necessary
435 435 copies = _chain(cm, _dirstatecopies(b._repo, match))
436 436 else:
437 437 copies = _committedforwardcopies(a, b, base, match)
438 438 return copies
439 439
440 440
441 441 def _backwardrenames(a, b, match):
442 442 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
443 443 return {}
444 444
445 445 # Even though we're not taking copies into account, 1:n rename situations
446 446 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
447 447 # arbitrarily pick one of the renames.
448 448 # We don't want to pass in "match" here, since that would filter
449 449 # the destination by it. Since we're reversing the copies, we want
450 450 # to filter the source instead.
451 451 f = _forwardcopies(b, a)
452 452 r = {}
453 453 for k, v in sorted(pycompat.iteritems(f)):
454 454 if match and not match(v):
455 455 continue
456 456 # remove copies
457 457 if v in a:
458 458 continue
459 459 r[v] = k
460 460 return r
461 461
462 462
463 463 def pathcopies(x, y, match=None):
464 464 """find {dst@y: src@x} copy mapping for directed compare"""
465 465 repo = x._repo
466 466 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
467 467 if debug:
468 468 repo.ui.debug(
469 469 b'debug.copies: searching copies from %s to %s\n' % (x, y)
470 470 )
471 471 if x == y or not x or not y:
472 472 return {}
473 473 if y.rev() is None and x == y.p1():
474 474 if debug:
475 475 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
476 476 # short-circuit to avoid issues with merge states
477 477 return _dirstatecopies(repo, match)
478 478 a = y.ancestor(x)
479 479 if a == x:
480 480 if debug:
481 481 repo.ui.debug(b'debug.copies: search mode: forward\n')
482 482 copies = _forwardcopies(x, y, match=match)
483 483 elif a == y:
484 484 if debug:
485 485 repo.ui.debug(b'debug.copies: search mode: backward\n')
486 486 copies = _backwardrenames(x, y, match=match)
487 487 else:
488 488 if debug:
489 489 repo.ui.debug(b'debug.copies: search mode: combined\n')
490 490 base = None
491 491 if a.rev() != node.nullrev:
492 492 base = x
493 493 copies = _chain(
494 494 _backwardrenames(x, a, match=match),
495 495 _forwardcopies(a, y, base, match=match),
496 496 )
497 497 _filter(x, y, copies)
498 498 return copies
499 499
500 500
501 501 def mergecopies(repo, c1, c2, base):
502 502 """
503 503 Finds moves and copies between context c1 and c2 that are relevant for
504 504 merging. 'base' will be used as the merge base.
505 505
506 506 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
507 507 files that were moved/ copied in one merge parent and modified in another.
508 508 For example:
509 509
510 510 o ---> 4 another commit
511 511 |
512 512 | o ---> 3 commit that modifies a.txt
513 513 | /
514 514 o / ---> 2 commit that moves a.txt to b.txt
515 515 |/
516 516 o ---> 1 merge base
517 517
518 518 If we try to rebase revision 3 on revision 4, since there is no a.txt in
519 519 revision 4, and if user have copytrace disabled, we prints the following
520 520 message:
521 521
522 522 ```other changed <file> which local deleted```
523 523
524 524 Returns a tuple where:
525 525
526 526 "branch_copies" an instance of branch_copies.
527 527
528 528 "diverge" is a mapping of source name -> list of destination names
529 529 for divergent renames.
530 530
531 531 This function calls different copytracing algorithms based on config.
532 532 """
533 533 # avoid silly behavior for update from empty dir
534 534 if not c1 or not c2 or c1 == c2:
535 535 return branch_copies(), branch_copies(), {}
536 536
537 537 narrowmatch = c1.repo().narrowmatch()
538 538
539 539 # avoid silly behavior for parent -> working dir
540 540 if c2.node() is None and c1.node() == repo.dirstate.p1():
541 541 return (
542 542 branch_copies(_dirstatecopies(repo, narrowmatch)),
543 543 branch_copies(),
544 544 {},
545 545 )
546 546
547 547 copytracing = repo.ui.config(b'experimental', b'copytrace')
548 548 if stringutil.parsebool(copytracing) is False:
549 549 # stringutil.parsebool() returns None when it is unable to parse the
550 550 # value, so we should rely on making sure copytracing is on such cases
551 551 return branch_copies(), branch_copies(), {}
552 552
553 553 if usechangesetcentricalgo(repo):
554 554 # The heuristics don't make sense when we need changeset-centric algos
555 555 return _fullcopytracing(repo, c1, c2, base)
556 556
557 557 # Copy trace disabling is explicitly below the node == p1 logic above
558 558 # because the logic above is required for a simple copy to be kept across a
559 559 # rebase.
560 560 if copytracing == b'heuristics':
561 561 # Do full copytracing if only non-public revisions are involved as
562 562 # that will be fast enough and will also cover the copies which could
563 563 # be missed by heuristics
564 564 if _isfullcopytraceable(repo, c1, base):
565 565 return _fullcopytracing(repo, c1, c2, base)
566 566 return _heuristicscopytracing(repo, c1, c2, base)
567 567 else:
568 568 return _fullcopytracing(repo, c1, c2, base)
569 569
570 570
571 571 def _isfullcopytraceable(repo, c1, base):
572 572 """ Checks that if base, source and destination are all no-public branches,
573 573 if yes let's use the full copytrace algorithm for increased capabilities
574 574 since it will be fast enough.
575 575
576 576 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
577 577 number of changesets from c1 to base such that if number of changesets are
578 578 more than the limit, full copytracing algorithm won't be used.
579 579 """
580 580 if c1.rev() is None:
581 581 c1 = c1.p1()
582 582 if c1.mutable() and base.mutable():
583 583 sourcecommitlimit = repo.ui.configint(
584 584 b'experimental', b'copytrace.sourcecommitlimit'
585 585 )
586 586 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
587 587 return commits < sourcecommitlimit
588 588 return False
589 589
590 590
591 591 def _checksinglesidecopies(
592 592 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
593 593 ):
594 594 if src not in m2:
595 595 # deleted on side 2
596 596 if src not in m1:
597 597 # renamed on side 1, deleted on side 2
598 598 renamedelete[src] = dsts1
599 599 elif src not in mb:
600 600 # Work around the "short-circuit to avoid issues with merge states"
601 601 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
602 602 # destination doesn't exist in y.
603 603 pass
604 604 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
605 605 return
606 606 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
607 607 # modified on side 2
608 608 for dst in dsts1:
609 609 copy[dst] = src
610 610
611 611
612 612 class branch_copies(object):
613 613 """Information about copies made on one side of a merge/graft.
614 614
615 615 "copy" is a mapping from destination name -> source name,
616 616 where source is in c1 and destination is in c2 or vice-versa.
617 617
618 618 "movewithdir" is a mapping from source name -> destination name,
619 619 where the file at source present in one context but not the other
620 620 needs to be moved to destination by the merge process, because the
621 621 other context moved the directory it is in.
622 622
623 623 "renamedelete" is a mapping of source name -> list of destination
624 624 names for files deleted in c1 that were renamed in c2 or vice-versa.
625 625
626 626 "dirmove" is a mapping of detected source dir -> destination dir renames.
627 627 This is needed for handling changes to new files previously grafted into
628 628 renamed directories.
629 629 """
630 630
631 631 def __init__(
632 632 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
633 633 ):
634 634 self.copy = {} if copy is None else copy
635 635 self.renamedelete = {} if renamedelete is None else renamedelete
636 636 self.dirmove = {} if dirmove is None else dirmove
637 637 self.movewithdir = {} if movewithdir is None else movewithdir
638 638
639 639 def __repr__(self):
640 640 return (
641 641 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
642 642 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
643 643 )
644 644
645 645
646 646 def _fullcopytracing(repo, c1, c2, base):
647 647 """ The full copytracing algorithm which finds all the new files that were
648 648 added from merge base up to the top commit and for each file it checks if
649 649 this file was copied from another file.
650 650
651 651 This is pretty slow when a lot of changesets are involved but will track all
652 652 the copies.
653 653 """
654 654 m1 = c1.manifest()
655 655 m2 = c2.manifest()
656 656 mb = base.manifest()
657 657
658 658 copies1 = pathcopies(base, c1)
659 659 copies2 = pathcopies(base, c2)
660 660
661 661 if not (copies1 or copies2):
662 662 return branch_copies(), branch_copies(), {}
663 663
664 664 inversecopies1 = {}
665 665 inversecopies2 = {}
666 666 for dst, src in copies1.items():
667 667 inversecopies1.setdefault(src, []).append(dst)
668 668 for dst, src in copies2.items():
669 669 inversecopies2.setdefault(src, []).append(dst)
670 670
671 671 copy1 = {}
672 672 copy2 = {}
673 673 diverge = {}
674 674 renamedelete1 = {}
675 675 renamedelete2 = {}
676 676 allsources = set(inversecopies1) | set(inversecopies2)
677 677 for src in allsources:
678 678 dsts1 = inversecopies1.get(src)
679 679 dsts2 = inversecopies2.get(src)
680 680 if dsts1 and dsts2:
681 681 # copied/renamed on both sides
682 682 if src not in m1 and src not in m2:
683 683 # renamed on both sides
684 684 dsts1 = set(dsts1)
685 685 dsts2 = set(dsts2)
686 686 # If there's some overlap in the rename destinations, we
687 687 # consider it not divergent. For example, if side 1 copies 'a'
688 688 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
689 689 # and 'd' and deletes 'a'.
690 690 if dsts1 & dsts2:
691 691 for dst in dsts1 & dsts2:
692 692 copy1[dst] = src
693 693 copy2[dst] = src
694 694 else:
695 695 diverge[src] = sorted(dsts1 | dsts2)
696 696 elif src in m1 and src in m2:
697 697 # copied on both sides
698 698 dsts1 = set(dsts1)
699 699 dsts2 = set(dsts2)
700 700 for dst in dsts1 & dsts2:
701 701 copy1[dst] = src
702 702 copy2[dst] = src
703 703 # TODO: Handle cases where it was renamed on one side and copied
704 704 # on the other side
705 705 elif dsts1:
706 706 # copied/renamed only on side 1
707 707 _checksinglesidecopies(
708 708 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
709 709 )
710 710 elif dsts2:
711 711 # copied/renamed only on side 2
712 712 _checksinglesidecopies(
713 713 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
714 714 )
715 715
716 716 # find interesting file sets from manifests
717 717 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
718 718 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
719 719 u1 = sorted(addedinm1 - addedinm2)
720 720 u2 = sorted(addedinm2 - addedinm1)
721 721
722 722 header = b" unmatched files in %s"
723 723 if u1:
724 724 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
725 725 if u2:
726 726 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
727 727
728 728 if repo.ui.debugflag:
729 729 renamedeleteset = set()
730 730 divergeset = set()
731 731 for dsts in diverge.values():
732 732 divergeset.update(dsts)
733 733 for dsts in renamedelete1.values():
734 734 renamedeleteset.update(dsts)
735 735 for dsts in renamedelete2.values():
736 736 renamedeleteset.update(dsts)
737 737
738 738 repo.ui.debug(
739 739 b" all copies found (* = to merge, ! = divergent, "
740 740 b"% = renamed and deleted):\n"
741 741 )
742 742 for side, copies in ((b"local", copies1), (b"remote", copies2)):
743 743 if not copies:
744 744 continue
745 745 repo.ui.debug(b" on %s side:\n" % side)
746 746 for f in sorted(copies):
747 747 note = b""
748 748 if f in copy1 or f in copy2:
749 749 note += b"*"
750 750 if f in divergeset:
751 751 note += b"!"
752 752 if f in renamedeleteset:
753 753 note += b"%"
754 754 repo.ui.debug(
755 755 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
756 756 )
757 757 del renamedeleteset
758 758 del divergeset
759 759
760 760 repo.ui.debug(b" checking for directory renames\n")
761 761
762 762 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
763 763 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
764 764
765 765 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
766 766 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
767 767
768 768 return branch_copies1, branch_copies2, diverge
769 769
770 770
771 771 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
772 772 """Finds moved directories and files that should move with them.
773 773
774 774 ctx: the context for one of the sides
775 775 copy: files copied on the same side (as ctx)
776 776 fullcopy: files copied on the same side (as ctx), including those that
777 777 merge.manifestmerge() won't care about
778 778 addedfiles: added files on the other side (compared to ctx)
779 779 """
780 780 # generate a directory move map
781 781 d = ctx.dirs()
782 782 invalid = set()
783 783 dirmove = {}
784 784
785 785 # examine each file copy for a potential directory move, which is
786 786 # when all the files in a directory are moved to a new directory
787 787 for dst, src in pycompat.iteritems(fullcopy):
788 788 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
789 789 if dsrc in invalid:
790 790 # already seen to be uninteresting
791 791 continue
792 792 elif dsrc in d and ddst in d:
793 793 # directory wasn't entirely moved locally
794 794 invalid.add(dsrc)
795 795 elif dsrc in dirmove and dirmove[dsrc] != ddst:
796 796 # files from the same directory moved to two different places
797 797 invalid.add(dsrc)
798 798 else:
799 799 # looks good so far
800 800 dirmove[dsrc] = ddst
801 801
802 802 for i in invalid:
803 803 if i in dirmove:
804 804 del dirmove[i]
805 805 del d, invalid
806 806
807 807 if not dirmove:
808 808 return {}, {}
809 809
810 810 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
811 811
812 812 for d in dirmove:
813 813 repo.ui.debug(
814 814 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
815 815 )
816 816
817 817 movewithdir = {}
818 818 # check unaccounted nonoverlapping files against directory moves
819 819 for f in addedfiles:
820 820 if f not in fullcopy:
821 821 for d in dirmove:
822 822 if f.startswith(d):
823 823 # new file added in a directory that was moved, move it
824 824 df = dirmove[d] + f[len(d) :]
825 825 if df not in copy:
826 826 movewithdir[f] = df
827 827 repo.ui.debug(
828 828 b" pending file src: '%s' -> dst: '%s'\n"
829 829 % (f, df)
830 830 )
831 831 break
832 832
833 833 return dirmove, movewithdir
834 834
835 835
836 836 def _heuristicscopytracing(repo, c1, c2, base):
837 837 """ Fast copytracing using filename heuristics
838 838
839 839 Assumes that moves or renames are of following two types:
840 840
841 841 1) Inside a directory only (same directory name but different filenames)
842 842 2) Move from one directory to another
843 843 (same filenames but different directory names)
844 844
845 845 Works only when there are no merge commits in the "source branch".
846 846 Source branch is commits from base up to c2 not including base.
847 847
848 848 If merge is involved it fallbacks to _fullcopytracing().
849 849
850 850 Can be used by setting the following config:
851 851
852 852 [experimental]
853 853 copytrace = heuristics
854 854
855 855 In some cases the copy/move candidates found by heuristics can be very large
856 856 in number and that will make the algorithm slow. The number of possible
857 857 candidates to check can be limited by using the config
858 858 `experimental.copytrace.movecandidateslimit` which defaults to 100.
859 859 """
860 860
861 861 if c1.rev() is None:
862 862 c1 = c1.p1()
863 863 if c2.rev() is None:
864 864 c2 = c2.p1()
865 865
866 866 changedfiles = set()
867 867 m1 = c1.manifest()
868 868 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
869 869 # If base is not in c2 branch, we switch to fullcopytracing
870 870 repo.ui.debug(
871 871 b"switching to full copytracing as base is not "
872 872 b"an ancestor of c2\n"
873 873 )
874 874 return _fullcopytracing(repo, c1, c2, base)
875 875
876 876 ctx = c2
877 877 while ctx != base:
878 878 if len(ctx.parents()) == 2:
879 879 # To keep things simple let's not handle merges
880 880 repo.ui.debug(b"switching to full copytracing because of merges\n")
881 881 return _fullcopytracing(repo, c1, c2, base)
882 882 changedfiles.update(ctx.files())
883 883 ctx = ctx.p1()
884 884
885 885 copies2 = {}
886 886 cp = _forwardcopies(base, c2)
887 887 for dst, src in pycompat.iteritems(cp):
888 888 if src in m1:
889 889 copies2[dst] = src
890 890
891 891 # file is missing if it isn't present in the destination, but is present in
892 892 # the base and present in the source.
893 893 # Presence in the base is important to exclude added files, presence in the
894 894 # source is important to exclude removed files.
895 895 filt = lambda f: f not in m1 and f in base and f in c2
896 896 missingfiles = [f for f in changedfiles if filt(f)]
897 897
898 898 copies1 = {}
899 899 if missingfiles:
900 900 basenametofilename = collections.defaultdict(list)
901 901 dirnametofilename = collections.defaultdict(list)
902 902
903 903 for f in m1.filesnotin(base.manifest()):
904 904 basename = os.path.basename(f)
905 905 dirname = os.path.dirname(f)
906 906 basenametofilename[basename].append(f)
907 907 dirnametofilename[dirname].append(f)
908 908
909 909 for f in missingfiles:
910 910 basename = os.path.basename(f)
911 911 dirname = os.path.dirname(f)
912 912 samebasename = basenametofilename[basename]
913 913 samedirname = dirnametofilename[dirname]
914 914 movecandidates = samebasename + samedirname
915 915 # f is guaranteed to be present in c2, that's why
916 916 # c2.filectx(f) won't fail
917 917 f2 = c2.filectx(f)
918 918 # we can have a lot of candidates which can slow down the heuristics
919 919 # config value to limit the number of candidates moves to check
920 920 maxcandidates = repo.ui.configint(
921 921 b'experimental', b'copytrace.movecandidateslimit'
922 922 )
923 923
924 924 if len(movecandidates) > maxcandidates:
925 925 repo.ui.status(
926 926 _(
927 927 b"skipping copytracing for '%s', more "
928 928 b"candidates than the limit: %d\n"
929 929 )
930 930 % (f, len(movecandidates))
931 931 )
932 932 continue
933 933
934 934 for candidate in movecandidates:
935 935 f1 = c1.filectx(candidate)
936 936 if _related(f1, f2):
937 937 # if there are a few related copies then we'll merge
938 938 # changes into all of them. This matches the behaviour
939 939 # of upstream copytracing
940 940 copies1[candidate] = f
941 941
942 942 return branch_copies(copies1), branch_copies(copies2), {}
943 943
944 944
945 945 def _related(f1, f2):
946 946 """return True if f1 and f2 filectx have a common ancestor
947 947
948 948 Walk back to common ancestor to see if the two files originate
949 949 from the same file. Since workingfilectx's rev() is None it messes
950 950 up the integer comparison logic, hence the pre-step check for
951 951 None (f1 and f2 can only be workingfilectx's initially).
952 952 """
953 953
954 954 if f1 == f2:
955 955 return True # a match
956 956
957 957 g1, g2 = f1.ancestors(), f2.ancestors()
958 958 try:
959 959 f1r, f2r = f1.linkrev(), f2.linkrev()
960 960
961 961 if f1r is None:
962 962 f1 = next(g1)
963 963 if f2r is None:
964 964 f2 = next(g2)
965 965
966 966 while True:
967 967 f1r, f2r = f1.linkrev(), f2.linkrev()
968 968 if f1r > f2r:
969 969 f1 = next(g1)
970 970 elif f2r > f1r:
971 971 f2 = next(g2)
972 972 else: # f1 and f2 point to files in the same linkrev
973 973 return f1 == f2 # true if they point to the same file
974 974 except StopIteration:
975 975 return False
976 976
977 977
978 978 def graftcopies(wctx, ctx, base):
979 979 """reproduce copies between base and ctx in the wctx
980 980
981 981 Unlike mergecopies(), this function will only consider copies between base
982 982 and ctx; it will ignore copies between base and wctx. Also unlike
983 983 mergecopies(), this function will apply copies to the working copy (instead
984 984 of just returning information about the copies). That makes it cheaper
985 985 (especially in the common case of base==ctx.p1()) and useful also when
986 986 experimental.copytrace=off.
987 987
988 988 merge.update() will have already marked most copies, but it will only
989 989 mark copies if it thinks the source files are related (see
990 990 merge._related()). It will also not mark copies if the file wasn't modified
991 991 on the local side. This function adds the copies that were "missed"
992 992 by merge.update().
993 993 """
994 994 new_copies = pathcopies(base, ctx)
995 995 _filter(wctx.p1(), wctx, new_copies)
996 996 for dst, src in pycompat.iteritems(new_copies):
997 997 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now