##// END OF EJS Templates
copies: implement __repr__ on branch_copies for debugging...
Martin von Zweigbergk -
r45528:cfd06649 default
parent child Browse files
Show More
@@ -1,991 +1,997 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 match as matchmod,
20 20 node,
21 21 pathutil,
22 22 pycompat,
23 23 util,
24 24 )
25 25
26 26
27 27 from .utils import stringutil
28 28
29 29
30 30 def _filter(src, dst, t):
31 31 """filters out invalid copies after chaining"""
32 32
33 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 35 # in the following table (not including trivial cases). For example, case 2
36 36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 37 # then was renamed between 'mid' and 'dst'.
38 38 #
39 39 # case src mid dst result
40 40 # 1 x y - -
41 41 # 2 x y y x->y
42 42 # 3 x y x -
43 43 # 4 x y z x->z
44 44 # 5 - x y -
45 45 # 6 x x y x->y
46 46 #
47 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 49 # between 5 and 6, so it includes all cases in its result.
50 50 # Cases 1, 3, and 5 are then removed by _filter().
51 51
52 52 for k, v in list(t.items()):
53 53 # remove copies from files that didn't exist
54 54 if v not in src:
55 55 del t[k]
56 56 # remove criss-crossed copies
57 57 elif k in src and v in dst:
58 58 del t[k]
59 59 # remove copies to files that were then removed
60 60 elif k not in dst:
61 61 del t[k]
62 62
63 63
64 64 def _chain(prefix, suffix):
65 65 """chain two sets of copies 'prefix' and 'suffix'"""
66 66 result = prefix.copy()
67 67 for key, value in pycompat.iteritems(suffix):
68 68 result[key] = prefix.get(value, value)
69 69 return result
70 70
71 71
72 72 def _tracefile(fctx, am, basemf):
73 73 """return file context that is the ancestor of fctx present in ancestor
74 74 manifest am
75 75
76 76 Note: we used to try and stop after a given limit, however checking if that
77 77 limit is reached turned out to be very expensive. we are better off
78 78 disabling that feature."""
79 79
80 80 for f in fctx.ancestors():
81 81 path = f.path()
82 82 if am.get(path, None) == f.filenode():
83 83 return path
84 84 if basemf and basemf.get(path, None) == f.filenode():
85 85 return path
86 86
87 87
88 88 def _dirstatecopies(repo, match=None):
89 89 ds = repo.dirstate
90 90 c = ds.copies().copy()
91 91 for k in list(c):
92 92 if ds[k] not in b'anm' or (match and not match(k)):
93 93 del c[k]
94 94 return c
95 95
96 96
97 97 def _computeforwardmissing(a, b, match=None):
98 98 """Computes which files are in b but not a.
99 99 This is its own function so extensions can easily wrap this call to see what
100 100 files _forwardcopies is about to process.
101 101 """
102 102 ma = a.manifest()
103 103 mb = b.manifest()
104 104 return mb.filesnotin(ma, match=match)
105 105
106 106
107 107 def usechangesetcentricalgo(repo):
108 108 """Checks if we should use changeset-centric copy algorithms"""
109 109 if repo.filecopiesmode == b'changeset-sidedata':
110 110 return True
111 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 112 changesetsource = (b'changeset-only', b'compatibility')
113 113 return readfrom in changesetsource
114 114
115 115
116 116 def _committedforwardcopies(a, b, base, match):
117 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 118 # files might have to be traced back to the fctx parent of the last
119 119 # one-side-only changeset, but not further back than that
120 120 repo = a._repo
121 121
122 122 if usechangesetcentricalgo(repo):
123 123 return _changesetforwardcopies(a, b, match)
124 124
125 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 126 dbg = repo.ui.debug
127 127 if debug:
128 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 129 am = a.manifest()
130 130 basemf = None if base is None else base.manifest()
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 136
137 137 # Computing the forward missing is quite expensive on large manifests, since
138 138 # it compares the entire manifests. We can optimize it in the common use
139 139 # case of computing what copies are in a commit versus its parent (like
140 140 # during a rebase or histedit). Note, we exclude merge commits from this
141 141 # optimization, since the ctx.files() for a merge commit is not correct for
142 142 # this comparison.
143 143 forwardmissingmatch = match
144 144 if b.p1() == a and b.p2().node() == node.nullid:
145 145 filesmatcher = matchmod.exact(b.files())
146 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 148
149 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 150
151 151 if debug:
152 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 153
154 154 for f in sorted(missing):
155 155 if debug:
156 156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 157 fctx = b[f]
158 158 fctx._ancestrycontext = ancestrycontext
159 159
160 160 if debug:
161 161 start = util.timer()
162 162 opath = _tracefile(fctx, am, basemf)
163 163 if opath:
164 164 if debug:
165 165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 166 cm[f] = opath
167 167 if debug:
168 168 dbg(
169 169 b'debug.copies: time: %f seconds\n'
170 170 % (util.timer() - start)
171 171 )
172 172 return cm
173 173
174 174
175 175 def _revinfogetter(repo):
176 176 """return a function that return multiple data given a <rev>"i
177 177
178 178 * p1: revision number of first parent
179 179 * p2: revision number of first parent
180 180 * p1copies: mapping of copies from p1
181 181 * p2copies: mapping of copies from p2
182 182 * removed: a list of removed files
183 183 * ismerged: a callback to know if file was merged in that revision
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187
188 188 def get_ismerged(rev):
189 189 ctx = repo[rev]
190 190
191 191 def ismerged(path):
192 192 if path not in ctx.files():
193 193 return False
194 194 fctx = ctx[path]
195 195 parents = fctx._filelog.parents(fctx._filenode)
196 196 nb_parents = 0
197 197 for n in parents:
198 198 if n != node.nullid:
199 199 nb_parents += 1
200 200 return nb_parents >= 2
201 201
202 202 return ismerged
203 203
204 204 if repo.filecopiesmode == b'changeset-sidedata':
205 205 changelogrevision = cl.changelogrevision
206 206 flags = cl.flags
207 207
208 208 # A small cache to avoid doing the work twice for merges
209 209 #
210 210 # In the vast majority of cases, if we ask information for a revision
211 211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 212 # keep the information around when reaching the first parent of a merge
213 213 # and dropping it after it was provided for the second parents.
214 214 #
215 215 # It exists cases were only one parent of the merge will be walked. It
216 216 # happens when the "destination" the copy tracing is descendant from a
217 217 # new root, not common with the "source". In that case, we will only walk
218 218 # through merge parents that are descendant of changesets common
219 219 # between "source" and "destination".
220 220 #
221 221 # With the current case implementation if such changesets have a copy
222 222 # information, we'll keep them in memory until the end of
223 223 # _changesetforwardcopies. We don't expect the case to be frequent
224 224 # enough to matters.
225 225 #
226 226 # In addition, it would be possible to reach pathological case, were
227 227 # many first parent are met before any second parent is reached. In
228 228 # that case the cache could grow. If this even become an issue one can
229 229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 230 # time to save memory.
231 231 merge_caches = {}
232 232
233 233 def revinfo(rev):
234 234 p1, p2 = parents(rev)
235 235 value = None
236 236 if flags(rev) & REVIDX_SIDEDATA:
237 237 e = merge_caches.pop(rev, None)
238 238 if e is not None:
239 239 return e
240 240 c = changelogrevision(rev)
241 241 p1copies = c.p1copies
242 242 p2copies = c.p2copies
243 243 removed = c.filesremoved
244 244 if p1 != node.nullrev and p2 != node.nullrev:
245 245 # XXX some case we over cache, IGNORE
246 246 value = merge_caches[rev] = (
247 247 p1,
248 248 p2,
249 249 p1copies,
250 250 p2copies,
251 251 removed,
252 252 get_ismerged(rev),
253 253 )
254 254 else:
255 255 p1copies = {}
256 256 p2copies = {}
257 257 removed = []
258 258
259 259 if value is None:
260 260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 261 return value
262 262
263 263 else:
264 264
265 265 def revinfo(rev):
266 266 p1, p2 = parents(rev)
267 267 ctx = repo[rev]
268 268 p1copies, p2copies = ctx._copies
269 269 removed = ctx.filesremoved()
270 270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
271 271
272 272 return revinfo
273 273
274 274
275 275 def _changesetforwardcopies(a, b, match):
276 276 if a.rev() in (node.nullrev, b.rev()):
277 277 return {}
278 278
279 279 repo = a.repo().unfiltered()
280 280 children = {}
281 281 revinfo = _revinfogetter(repo)
282 282
283 283 cl = repo.changelog
284 284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 286 mrset = set(missingrevs)
287 287 roots = set()
288 288 for r in missingrevs:
289 289 for p in cl.parentrevs(r):
290 290 if p == node.nullrev:
291 291 continue
292 292 if p not in children:
293 293 children[p] = [r]
294 294 else:
295 295 children[p].append(r)
296 296 if p not in mrset:
297 297 roots.add(p)
298 298 if not roots:
299 299 # no common revision to track copies from
300 300 return {}
301 301 min_root = min(roots)
302 302
303 303 from_head = set(
304 304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 305 )
306 306
307 307 iterrevs = set(from_head)
308 308 iterrevs &= mrset
309 309 iterrevs.update(roots)
310 310 iterrevs.remove(b.rev())
311 311 revs = sorted(iterrevs)
312 312 return _combinechangesetcopies(
313 313 revs, children, b.rev(), revinfo, match, isancestor
314 314 )
315 315
316 316
317 317 def _combinechangesetcopies(
318 318 revs, children, targetrev, revinfo, match, isancestor
319 319 ):
320 320 """combine the copies information for each item of iterrevs
321 321
322 322 revs: sorted iterable of revision to visit
323 323 children: a {parent: [children]} mapping.
324 324 targetrev: the final copies destination revision (not in iterrevs)
325 325 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
326 326 match: a matcher
327 327
328 328 It returns the aggregated copies information for `targetrev`.
329 329 """
330 330 all_copies = {}
331 331 alwaysmatch = match.always()
332 332 for r in revs:
333 333 copies = all_copies.pop(r, None)
334 334 if copies is None:
335 335 # this is a root
336 336 copies = {}
337 337 for i, c in enumerate(children[r]):
338 338 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
339 339 if r == p1:
340 340 parent = 1
341 341 childcopies = p1copies
342 342 else:
343 343 assert r == p2
344 344 parent = 2
345 345 childcopies = p2copies
346 346 if not alwaysmatch:
347 347 childcopies = {
348 348 dst: src for dst, src in childcopies.items() if match(dst)
349 349 }
350 350 newcopies = copies
351 351 if childcopies:
352 352 newcopies = copies.copy()
353 353 for dest, source in pycompat.iteritems(childcopies):
354 354 prev = copies.get(source)
355 355 if prev is not None and prev[1] is not None:
356 356 source = prev[1]
357 357 newcopies[dest] = (c, source)
358 358 assert newcopies is not copies
359 359 for f in removed:
360 360 if f in newcopies:
361 361 if newcopies is copies:
362 362 # copy on write to avoid affecting potential other
363 363 # branches. when there are no other branches, this
364 364 # could be avoided.
365 365 newcopies = copies.copy()
366 366 newcopies[f] = (c, None)
367 367 othercopies = all_copies.get(c)
368 368 if othercopies is None:
369 369 all_copies[c] = newcopies
370 370 else:
371 371 # we are the second parent to work on c, we need to merge our
372 372 # work with the other.
373 373 #
374 374 # In case of conflict, parent 1 take precedence over parent 2.
375 375 # This is an arbitrary choice made anew when implementing
376 376 # changeset based copies. It was made without regards with
377 377 # potential filelog related behavior.
378 378 if parent == 1:
379 379 _merge_copies_dict(
380 380 othercopies, newcopies, isancestor, ismerged
381 381 )
382 382 else:
383 383 _merge_copies_dict(
384 384 newcopies, othercopies, isancestor, ismerged
385 385 )
386 386 all_copies[c] = newcopies
387 387
388 388 final_copies = {}
389 389 for dest, (tt, source) in all_copies[targetrev].items():
390 390 if source is not None:
391 391 final_copies[dest] = source
392 392 return final_copies
393 393
394 394
395 395 def _merge_copies_dict(minor, major, isancestor, ismerged):
396 396 """merge two copies-mapping together, minor and major
397 397
398 398 In case of conflict, value from "major" will be picked.
399 399
400 400 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
401 401 ancestors of `high_rev`,
402 402
403 403 - `ismerged(path)`: callable return True if `path` have been merged in the
404 404 current revision,
405 405 """
406 406 for dest, value in major.items():
407 407 other = minor.get(dest)
408 408 if other is None:
409 409 minor[dest] = value
410 410 else:
411 411 new_tt = value[0]
412 412 other_tt = other[0]
413 413 if value[1] == other[1]:
414 414 continue
415 415 # content from "major" wins, unless it is older
416 416 # than the branch point or there is a merge
417 417 if (
418 418 new_tt == other_tt
419 419 or not isancestor(new_tt, other_tt)
420 420 or ismerged(dest)
421 421 ):
422 422 minor[dest] = value
423 423
424 424
425 425 def _forwardcopies(a, b, base=None, match=None):
426 426 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427 427
428 428 if base is None:
429 429 base = a
430 430 match = a.repo().narrowmatch(match)
431 431 # check for working copy
432 432 if b.rev() is None:
433 433 cm = _committedforwardcopies(a, b.p1(), base, match)
434 434 # combine copies from dirstate if necessary
435 435 copies = _chain(cm, _dirstatecopies(b._repo, match))
436 436 else:
437 437 copies = _committedforwardcopies(a, b, base, match)
438 438 return copies
439 439
440 440
441 441 def _backwardrenames(a, b, match):
442 442 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
443 443 return {}
444 444
445 445 # Even though we're not taking copies into account, 1:n rename situations
446 446 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
447 447 # arbitrarily pick one of the renames.
448 448 # We don't want to pass in "match" here, since that would filter
449 449 # the destination by it. Since we're reversing the copies, we want
450 450 # to filter the source instead.
451 451 f = _forwardcopies(b, a)
452 452 r = {}
453 453 for k, v in sorted(pycompat.iteritems(f)):
454 454 if match and not match(v):
455 455 continue
456 456 # remove copies
457 457 if v in a:
458 458 continue
459 459 r[v] = k
460 460 return r
461 461
462 462
463 463 def pathcopies(x, y, match=None):
464 464 """find {dst@y: src@x} copy mapping for directed compare"""
465 465 repo = x._repo
466 466 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
467 467 if debug:
468 468 repo.ui.debug(
469 469 b'debug.copies: searching copies from %s to %s\n' % (x, y)
470 470 )
471 471 if x == y or not x or not y:
472 472 return {}
473 473 if y.rev() is None and x == y.p1():
474 474 if debug:
475 475 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
476 476 # short-circuit to avoid issues with merge states
477 477 return _dirstatecopies(repo, match)
478 478 a = y.ancestor(x)
479 479 if a == x:
480 480 if debug:
481 481 repo.ui.debug(b'debug.copies: search mode: forward\n')
482 482 copies = _forwardcopies(x, y, match=match)
483 483 elif a == y:
484 484 if debug:
485 485 repo.ui.debug(b'debug.copies: search mode: backward\n')
486 486 copies = _backwardrenames(x, y, match=match)
487 487 else:
488 488 if debug:
489 489 repo.ui.debug(b'debug.copies: search mode: combined\n')
490 490 base = None
491 491 if a.rev() != node.nullrev:
492 492 base = x
493 493 copies = _chain(
494 494 _backwardrenames(x, a, match=match),
495 495 _forwardcopies(a, y, base, match=match),
496 496 )
497 497 _filter(x, y, copies)
498 498 return copies
499 499
500 500
501 501 def mergecopies(repo, c1, c2, base):
502 502 """
503 503 Finds moves and copies between context c1 and c2 that are relevant for
504 504 merging. 'base' will be used as the merge base.
505 505
506 506 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
507 507 files that were moved/ copied in one merge parent and modified in another.
508 508 For example:
509 509
510 510 o ---> 4 another commit
511 511 |
512 512 | o ---> 3 commit that modifies a.txt
513 513 | /
514 514 o / ---> 2 commit that moves a.txt to b.txt
515 515 |/
516 516 o ---> 1 merge base
517 517
518 518 If we try to rebase revision 3 on revision 4, since there is no a.txt in
519 519 revision 4, and if user have copytrace disabled, we prints the following
520 520 message:
521 521
522 522 ```other changed <file> which local deleted```
523 523
524 524 Returns a tuple where:
525 525
526 526 "branch_copies" an instance of branch_copies.
527 527
528 528 "diverge" is a mapping of source name -> list of destination names
529 529 for divergent renames.
530 530
531 531 This function calls different copytracing algorithms based on config.
532 532 """
533 533 # avoid silly behavior for update from empty dir
534 534 if not c1 or not c2 or c1 == c2:
535 535 return branch_copies(), branch_copies(), {}
536 536
537 537 narrowmatch = c1.repo().narrowmatch()
538 538
539 539 # avoid silly behavior for parent -> working dir
540 540 if c2.node() is None and c1.node() == repo.dirstate.p1():
541 541 return (
542 542 branch_copies(_dirstatecopies(repo, narrowmatch)),
543 543 branch_copies(),
544 544 {},
545 545 )
546 546
547 547 copytracing = repo.ui.config(b'experimental', b'copytrace')
548 548 if stringutil.parsebool(copytracing) is False:
549 549 # stringutil.parsebool() returns None when it is unable to parse the
550 550 # value, so we should rely on making sure copytracing is on such cases
551 551 return branch_copies(), branch_copies(), {}
552 552
553 553 if usechangesetcentricalgo(repo):
554 554 # The heuristics don't make sense when we need changeset-centric algos
555 555 return _fullcopytracing(repo, c1, c2, base)
556 556
557 557 # Copy trace disabling is explicitly below the node == p1 logic above
558 558 # because the logic above is required for a simple copy to be kept across a
559 559 # rebase.
560 560 if copytracing == b'heuristics':
561 561 # Do full copytracing if only non-public revisions are involved as
562 562 # that will be fast enough and will also cover the copies which could
563 563 # be missed by heuristics
564 564 if _isfullcopytraceable(repo, c1, base):
565 565 return _fullcopytracing(repo, c1, c2, base)
566 566 return _heuristicscopytracing(repo, c1, c2, base)
567 567 else:
568 568 return _fullcopytracing(repo, c1, c2, base)
569 569
570 570
571 571 def _isfullcopytraceable(repo, c1, base):
572 572 """ Checks that if base, source and destination are all no-public branches,
573 573 if yes let's use the full copytrace algorithm for increased capabilities
574 574 since it will be fast enough.
575 575
576 576 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
577 577 number of changesets from c1 to base such that if number of changesets are
578 578 more than the limit, full copytracing algorithm won't be used.
579 579 """
580 580 if c1.rev() is None:
581 581 c1 = c1.p1()
582 582 if c1.mutable() and base.mutable():
583 583 sourcecommitlimit = repo.ui.configint(
584 584 b'experimental', b'copytrace.sourcecommitlimit'
585 585 )
586 586 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
587 587 return commits < sourcecommitlimit
588 588 return False
589 589
590 590
591 591 def _checksinglesidecopies(
592 592 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
593 593 ):
594 594 if src not in m2:
595 595 # deleted on side 2
596 596 if src not in m1:
597 597 # renamed on side 1, deleted on side 2
598 598 renamedelete[src] = dsts1
599 599 elif src not in mb:
600 600 # Work around the "short-circuit to avoid issues with merge states"
601 601 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
602 602 # destination doesn't exist in y.
603 603 pass
604 604 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
605 605 return
606 606 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
607 607 # modified on side 2
608 608 for dst in dsts1:
609 609 copy[dst] = src
610 610
611 611
612 612 class branch_copies(object):
613 613 """Information about copies made on one side of a merge/graft.
614 614
615 615 "copy" is a mapping from destination name -> source name,
616 616 where source is in c1 and destination is in c2 or vice-versa.
617 617
618 618 "movewithdir" is a mapping from source name -> destination name,
619 619 where the file at source present in one context but not the other
620 620 needs to be moved to destination by the merge process, because the
621 621 other context moved the directory it is in.
622 622
623 623 "renamedelete" is a mapping of source name -> list of destination
624 624 names for files deleted in c1 that were renamed in c2 or vice-versa.
625 625
626 626 "dirmove" is a mapping of detected source dir -> destination dir renames.
627 627 This is needed for handling changes to new files previously grafted into
628 628 renamed directories.
629 629 """
630 630
631 631 def __init__(
632 632 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
633 633 ):
634 634 self.copy = {} if copy is None else copy
635 635 self.renamedelete = {} if renamedelete is None else renamedelete
636 636 self.dirmove = {} if dirmove is None else dirmove
637 637 self.movewithdir = {} if movewithdir is None else movewithdir
638 638
639 def __repr__(self):
640 return (
641 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
642 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
643 )
644
639 645
640 646 def _fullcopytracing(repo, c1, c2, base):
641 647 """ The full copytracing algorithm which finds all the new files that were
642 648 added from merge base up to the top commit and for each file it checks if
643 649 this file was copied from another file.
644 650
645 651 This is pretty slow when a lot of changesets are involved but will track all
646 652 the copies.
647 653 """
648 654 m1 = c1.manifest()
649 655 m2 = c2.manifest()
650 656 mb = base.manifest()
651 657
652 658 copies1 = pathcopies(base, c1)
653 659 copies2 = pathcopies(base, c2)
654 660
655 661 if not (copies1 or copies2):
656 662 return branch_copies(), branch_copies(), {}
657 663
658 664 inversecopies1 = {}
659 665 inversecopies2 = {}
660 666 for dst, src in copies1.items():
661 667 inversecopies1.setdefault(src, []).append(dst)
662 668 for dst, src in copies2.items():
663 669 inversecopies2.setdefault(src, []).append(dst)
664 670
665 671 copy1 = {}
666 672 copy2 = {}
667 673 diverge = {}
668 674 renamedelete1 = {}
669 675 renamedelete2 = {}
670 676 allsources = set(inversecopies1) | set(inversecopies2)
671 677 for src in allsources:
672 678 dsts1 = inversecopies1.get(src)
673 679 dsts2 = inversecopies2.get(src)
674 680 if dsts1 and dsts2:
675 681 # copied/renamed on both sides
676 682 if src not in m1 and src not in m2:
677 683 # renamed on both sides
678 684 dsts1 = set(dsts1)
679 685 dsts2 = set(dsts2)
680 686 # If there's some overlap in the rename destinations, we
681 687 # consider it not divergent. For example, if side 1 copies 'a'
682 688 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
683 689 # and 'd' and deletes 'a'.
684 690 if dsts1 & dsts2:
685 691 for dst in dsts1 & dsts2:
686 692 copy1[dst] = src
687 693 copy2[dst] = src
688 694 else:
689 695 diverge[src] = sorted(dsts1 | dsts2)
690 696 elif src in m1 and src in m2:
691 697 # copied on both sides
692 698 dsts1 = set(dsts1)
693 699 dsts2 = set(dsts2)
694 700 for dst in dsts1 & dsts2:
695 701 copy1[dst] = src
696 702 copy2[dst] = src
697 703 # TODO: Handle cases where it was renamed on one side and copied
698 704 # on the other side
699 705 elif dsts1:
700 706 # copied/renamed only on side 1
701 707 _checksinglesidecopies(
702 708 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
703 709 )
704 710 elif dsts2:
705 711 # copied/renamed only on side 2
706 712 _checksinglesidecopies(
707 713 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
708 714 )
709 715
710 716 # find interesting file sets from manifests
711 717 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
712 718 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
713 719 u1 = sorted(addedinm1 - addedinm2)
714 720 u2 = sorted(addedinm2 - addedinm1)
715 721
716 722 header = b" unmatched files in %s"
717 723 if u1:
718 724 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
719 725 if u2:
720 726 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
721 727
722 728 if repo.ui.debugflag:
723 729 renamedeleteset = set()
724 730 divergeset = set()
725 731 for dsts in diverge.values():
726 732 divergeset.update(dsts)
727 733 for dsts in renamedelete1.values():
728 734 renamedeleteset.update(dsts)
729 735 for dsts in renamedelete2.values():
730 736 renamedeleteset.update(dsts)
731 737
732 738 repo.ui.debug(
733 739 b" all copies found (* = to merge, ! = divergent, "
734 740 b"% = renamed and deleted):\n"
735 741 )
736 742 for side, copies in ((b"local", copies1), (b"remote", copies2)):
737 743 if not copies:
738 744 continue
739 745 repo.ui.debug(b" on %s side:\n" % side)
740 746 for f in sorted(copies):
741 747 note = b""
742 748 if f in copy1 or f in copy2:
743 749 note += b"*"
744 750 if f in divergeset:
745 751 note += b"!"
746 752 if f in renamedeleteset:
747 753 note += b"%"
748 754 repo.ui.debug(
749 755 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
750 756 )
751 757 del renamedeleteset
752 758 del divergeset
753 759
754 760 repo.ui.debug(b" checking for directory renames\n")
755 761
756 762 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
757 763 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
758 764
759 765 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
760 766 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
761 767
762 768 return branch_copies1, branch_copies2, diverge
763 769
764 770
765 771 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
766 772 """Finds moved directories and files that should move with them.
767 773
768 774 ctx: the context for one of the sides
769 775 copy: files copied on the same side (as ctx)
770 776 fullcopy: files copied on the same side (as ctx), including those that
771 777 merge.manifestmerge() won't care about
772 778 addedfiles: added files on the other side (compared to ctx)
773 779 """
774 780 # generate a directory move map
775 781 d = ctx.dirs()
776 782 invalid = set()
777 783 dirmove = {}
778 784
779 785 # examine each file copy for a potential directory move, which is
780 786 # when all the files in a directory are moved to a new directory
781 787 for dst, src in pycompat.iteritems(fullcopy):
782 788 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
783 789 if dsrc in invalid:
784 790 # already seen to be uninteresting
785 791 continue
786 792 elif dsrc in d and ddst in d:
787 793 # directory wasn't entirely moved locally
788 794 invalid.add(dsrc)
789 795 elif dsrc in dirmove and dirmove[dsrc] != ddst:
790 796 # files from the same directory moved to two different places
791 797 invalid.add(dsrc)
792 798 else:
793 799 # looks good so far
794 800 dirmove[dsrc] = ddst
795 801
796 802 for i in invalid:
797 803 if i in dirmove:
798 804 del dirmove[i]
799 805 del d, invalid
800 806
801 807 if not dirmove:
802 808 return {}, {}
803 809
804 810 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
805 811
806 812 for d in dirmove:
807 813 repo.ui.debug(
808 814 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
809 815 )
810 816
811 817 movewithdir = {}
812 818 # check unaccounted nonoverlapping files against directory moves
813 819 for f in addedfiles:
814 820 if f not in fullcopy:
815 821 for d in dirmove:
816 822 if f.startswith(d):
817 823 # new file added in a directory that was moved, move it
818 824 df = dirmove[d] + f[len(d) :]
819 825 if df not in copy:
820 826 movewithdir[f] = df
821 827 repo.ui.debug(
822 828 b" pending file src: '%s' -> dst: '%s'\n"
823 829 % (f, df)
824 830 )
825 831 break
826 832
827 833 return dirmove, movewithdir
828 834
829 835
830 836 def _heuristicscopytracing(repo, c1, c2, base):
831 837 """ Fast copytracing using filename heuristics
832 838
833 839 Assumes that moves or renames are of following two types:
834 840
835 841 1) Inside a directory only (same directory name but different filenames)
836 842 2) Move from one directory to another
837 843 (same filenames but different directory names)
838 844
839 845 Works only when there are no merge commits in the "source branch".
840 846 Source branch is commits from base up to c2 not including base.
841 847
842 848 If merge is involved it fallbacks to _fullcopytracing().
843 849
844 850 Can be used by setting the following config:
845 851
846 852 [experimental]
847 853 copytrace = heuristics
848 854
849 855 In some cases the copy/move candidates found by heuristics can be very large
850 856 in number and that will make the algorithm slow. The number of possible
851 857 candidates to check can be limited by using the config
852 858 `experimental.copytrace.movecandidateslimit` which defaults to 100.
853 859 """
854 860
855 861 if c1.rev() is None:
856 862 c1 = c1.p1()
857 863 if c2.rev() is None:
858 864 c2 = c2.p1()
859 865
860 866 changedfiles = set()
861 867 m1 = c1.manifest()
862 868 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
863 869 # If base is not in c2 branch, we switch to fullcopytracing
864 870 repo.ui.debug(
865 871 b"switching to full copytracing as base is not "
866 872 b"an ancestor of c2\n"
867 873 )
868 874 return _fullcopytracing(repo, c1, c2, base)
869 875
870 876 ctx = c2
871 877 while ctx != base:
872 878 if len(ctx.parents()) == 2:
873 879 # To keep things simple let's not handle merges
874 880 repo.ui.debug(b"switching to full copytracing because of merges\n")
875 881 return _fullcopytracing(repo, c1, c2, base)
876 882 changedfiles.update(ctx.files())
877 883 ctx = ctx.p1()
878 884
879 885 copies2 = {}
880 886 cp = _forwardcopies(base, c2)
881 887 for dst, src in pycompat.iteritems(cp):
882 888 if src in m1:
883 889 copies2[dst] = src
884 890
885 891 # file is missing if it isn't present in the destination, but is present in
886 892 # the base and present in the source.
887 893 # Presence in the base is important to exclude added files, presence in the
888 894 # source is important to exclude removed files.
889 895 filt = lambda f: f not in m1 and f in base and f in c2
890 896 missingfiles = [f for f in changedfiles if filt(f)]
891 897
892 898 copies1 = {}
893 899 if missingfiles:
894 900 basenametofilename = collections.defaultdict(list)
895 901 dirnametofilename = collections.defaultdict(list)
896 902
897 903 for f in m1.filesnotin(base.manifest()):
898 904 basename = os.path.basename(f)
899 905 dirname = os.path.dirname(f)
900 906 basenametofilename[basename].append(f)
901 907 dirnametofilename[dirname].append(f)
902 908
903 909 for f in missingfiles:
904 910 basename = os.path.basename(f)
905 911 dirname = os.path.dirname(f)
906 912 samebasename = basenametofilename[basename]
907 913 samedirname = dirnametofilename[dirname]
908 914 movecandidates = samebasename + samedirname
909 915 # f is guaranteed to be present in c2, that's why
910 916 # c2.filectx(f) won't fail
911 917 f2 = c2.filectx(f)
912 918 # we can have a lot of candidates which can slow down the heuristics
913 919 # config value to limit the number of candidates moves to check
914 920 maxcandidates = repo.ui.configint(
915 921 b'experimental', b'copytrace.movecandidateslimit'
916 922 )
917 923
918 924 if len(movecandidates) > maxcandidates:
919 925 repo.ui.status(
920 926 _(
921 927 b"skipping copytracing for '%s', more "
922 928 b"candidates than the limit: %d\n"
923 929 )
924 930 % (f, len(movecandidates))
925 931 )
926 932 continue
927 933
928 934 for candidate in movecandidates:
929 935 f1 = c1.filectx(candidate)
930 936 if _related(f1, f2):
931 937 # if there are a few related copies then we'll merge
932 938 # changes into all of them. This matches the behaviour
933 939 # of upstream copytracing
934 940 copies1[candidate] = f
935 941
936 942 return branch_copies(copies1), branch_copies(copies2), {}
937 943
938 944
939 945 def _related(f1, f2):
940 946 """return True if f1 and f2 filectx have a common ancestor
941 947
942 948 Walk back to common ancestor to see if the two files originate
943 949 from the same file. Since workingfilectx's rev() is None it messes
944 950 up the integer comparison logic, hence the pre-step check for
945 951 None (f1 and f2 can only be workingfilectx's initially).
946 952 """
947 953
948 954 if f1 == f2:
949 955 return True # a match
950 956
951 957 g1, g2 = f1.ancestors(), f2.ancestors()
952 958 try:
953 959 f1r, f2r = f1.linkrev(), f2.linkrev()
954 960
955 961 if f1r is None:
956 962 f1 = next(g1)
957 963 if f2r is None:
958 964 f2 = next(g2)
959 965
960 966 while True:
961 967 f1r, f2r = f1.linkrev(), f2.linkrev()
962 968 if f1r > f2r:
963 969 f1 = next(g1)
964 970 elif f2r > f1r:
965 971 f2 = next(g2)
966 972 else: # f1 and f2 point to files in the same linkrev
967 973 return f1 == f2 # true if they point to the same file
968 974 except StopIteration:
969 975 return False
970 976
971 977
972 978 def graftcopies(wctx, ctx, base):
973 979 """reproduce copies between base and ctx in the wctx
974 980
975 981 Unlike mergecopies(), this function will only consider copies between base
976 982 and ctx; it will ignore copies between base and wctx. Also unlike
977 983 mergecopies(), this function will apply copies to the working copy (instead
978 984 of just returning information about the copies). That makes it cheaper
979 985 (especially in the common case of base==ctx.p1()) and useful also when
980 986 experimental.copytrace=off.
981 987
982 988 merge.update() will have already marked most copies, but it will only
983 989 mark copies if it thinks the source files are related (see
984 990 merge._related()). It will also not mark copies if the file wasn't modified
985 991 on the local side. This function adds the copies that were "missed"
986 992 by merge.update().
987 993 """
988 994 new_copies = pathcopies(base, ctx)
989 995 _filter(wctx.p1(), wctx, new_copies)
990 996 for dst, src in pycompat.iteritems(new_copies):
991 997 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now