##// END OF EJS Templates
copies: directly pass a changes object to the copy tracing code...
marmoute -
r46217:2693659c default
parent child Browse files
Show More
@@ -1,1117 +1,1085
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 24
25 25 from .utils import stringutil
26 26
27 27
28 28 def _filter(src, dst, t):
29 29 """filters out invalid copies after chaining"""
30 30
31 31 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
32 32 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
33 33 # in the following table (not including trivial cases). For example, case 2
34 34 # is where a file existed in 'src' and remained under that name in 'mid' and
35 35 # then was renamed between 'mid' and 'dst'.
36 36 #
37 37 # case src mid dst result
38 38 # 1 x y - -
39 39 # 2 x y y x->y
40 40 # 3 x y x -
41 41 # 4 x y z x->z
42 42 # 5 - x y -
43 43 # 6 x x y x->y
44 44 #
45 45 # _chain() takes care of chaining the copies in 'a' and 'b', but it
46 46 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
47 47 # between 5 and 6, so it includes all cases in its result.
48 48 # Cases 1, 3, and 5 are then removed by _filter().
49 49
50 50 for k, v in list(t.items()):
51 51 # remove copies from files that didn't exist
52 52 if v not in src:
53 53 del t[k]
54 54 # remove criss-crossed copies
55 55 elif k in src and v in dst:
56 56 del t[k]
57 57 # remove copies to files that were then removed
58 58 elif k not in dst:
59 59 del t[k]
60 60
61 61
62 62 def _chain(prefix, suffix):
63 63 """chain two sets of copies 'prefix' and 'suffix'"""
64 64 result = prefix.copy()
65 65 for key, value in pycompat.iteritems(suffix):
66 66 result[key] = prefix.get(value, value)
67 67 return result
68 68
69 69
70 70 def _tracefile(fctx, am, basemf):
71 71 """return file context that is the ancestor of fctx present in ancestor
72 72 manifest am
73 73
74 74 Note: we used to try and stop after a given limit, however checking if that
75 75 limit is reached turned out to be very expensive. we are better off
76 76 disabling that feature."""
77 77
78 78 for f in fctx.ancestors():
79 79 path = f.path()
80 80 if am.get(path, None) == f.filenode():
81 81 return path
82 82 if basemf and basemf.get(path, None) == f.filenode():
83 83 return path
84 84
85 85
86 86 def _dirstatecopies(repo, match=None):
87 87 ds = repo.dirstate
88 88 c = ds.copies().copy()
89 89 for k in list(c):
90 90 if ds[k] not in b'anm' or (match and not match(k)):
91 91 del c[k]
92 92 return c
93 93
94 94
95 95 def _computeforwardmissing(a, b, match=None):
96 96 """Computes which files are in b but not a.
97 97 This is its own function so extensions can easily wrap this call to see what
98 98 files _forwardcopies is about to process.
99 99 """
100 100 ma = a.manifest()
101 101 mb = b.manifest()
102 102 return mb.filesnotin(ma, match=match)
103 103
104 104
105 105 def usechangesetcentricalgo(repo):
106 106 """Checks if we should use changeset-centric copy algorithms"""
107 107 if repo.filecopiesmode == b'changeset-sidedata':
108 108 return True
109 109 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
110 110 changesetsource = (b'changeset-only', b'compatibility')
111 111 return readfrom in changesetsource
112 112
113 113
114 114 def _committedforwardcopies(a, b, base, match):
115 115 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
116 116 # files might have to be traced back to the fctx parent of the last
117 117 # one-side-only changeset, but not further back than that
118 118 repo = a._repo
119 119
120 120 if usechangesetcentricalgo(repo):
121 121 return _changesetforwardcopies(a, b, match)
122 122
123 123 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
124 124 dbg = repo.ui.debug
125 125 if debug:
126 126 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
127 127 am = a.manifest()
128 128 basemf = None if base is None else base.manifest()
129 129
130 130 # find where new files came from
131 131 # we currently don't try to find where old files went, too expensive
132 132 # this means we can miss a case like 'hg rm b; hg cp a b'
133 133 cm = {}
134 134
135 135 # Computing the forward missing is quite expensive on large manifests, since
136 136 # it compares the entire manifests. We can optimize it in the common use
137 137 # case of computing what copies are in a commit versus its parent (like
138 138 # during a rebase or histedit). Note, we exclude merge commits from this
139 139 # optimization, since the ctx.files() for a merge commit is not correct for
140 140 # this comparison.
141 141 forwardmissingmatch = match
142 142 if b.p1() == a and b.p2().node() == node.nullid:
143 143 filesmatcher = matchmod.exact(b.files())
144 144 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
145 145 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
146 146
147 147 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
148 148
149 149 if debug:
150 150 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
151 151
152 152 for f in sorted(missing):
153 153 if debug:
154 154 dbg(b'debug.copies: tracing file: %s\n' % f)
155 155 fctx = b[f]
156 156 fctx._ancestrycontext = ancestrycontext
157 157
158 158 if debug:
159 159 start = util.timer()
160 160 opath = _tracefile(fctx, am, basemf)
161 161 if opath:
162 162 if debug:
163 163 dbg(b'debug.copies: rename of: %s\n' % opath)
164 164 cm[f] = opath
165 165 if debug:
166 166 dbg(
167 167 b'debug.copies: time: %f seconds\n'
168 168 % (util.timer() - start)
169 169 )
170 170 return cm
171 171
172 172
173 173 def _revinfo_getter(repo):
174 """return a function that return multiple data given a <rev>"i
174 """returns a function that returns the following data given a <rev>"
175 175
176 176 * p1: revision number of first parent
177 177 * p2: revision number of first parent
178 * p1copies: mapping of copies from p1
179 * p2copies: mapping of copies from p2
180 * removed: a list of removed files
181 * ismerged: a callback to know if file was merged in that revision
178 * changes: a ChangingFiles object
182 179 """
183 180 cl = repo.changelog
184 181 parents = cl.parentrevs
185 182
186 def get_ismerged(rev):
187 ctx = repo[rev]
188
189 def ismerged(path):
190 if path not in ctx.files():
191 return False
192 fctx = ctx[path]
193 parents = fctx._filelog.parents(fctx._filenode)
194 nb_parents = 0
195 for n in parents:
196 if n != node.nullid:
197 nb_parents += 1
198 return nb_parents >= 2
199
200 return ismerged
201
202 183 changelogrevision = cl.changelogrevision
203 184
204 185 # A small cache to avoid doing the work twice for merges
205 186 #
206 187 # In the vast majority of cases, if we ask information for a revision
207 188 # about 1 parent, we'll later ask it for the other. So it make sense to
208 189 # keep the information around when reaching the first parent of a merge
209 190 # and dropping it after it was provided for the second parents.
210 191 #
211 192 # It exists cases were only one parent of the merge will be walked. It
212 193 # happens when the "destination" the copy tracing is descendant from a
213 194 # new root, not common with the "source". In that case, we will only walk
214 195 # through merge parents that are descendant of changesets common
215 196 # between "source" and "destination".
216 197 #
217 198 # With the current case implementation if such changesets have a copy
218 199 # information, we'll keep them in memory until the end of
219 200 # _changesetforwardcopies. We don't expect the case to be frequent
220 201 # enough to matters.
221 202 #
222 203 # In addition, it would be possible to reach pathological case, were
223 204 # many first parent are met before any second parent is reached. In
224 205 # that case the cache could grow. If this even become an issue one can
225 206 # safely introduce a maximum cache size. This would trade extra CPU/IO
226 207 # time to save memory.
227 208 merge_caches = {}
228 209
229 210 def revinfo(rev):
230 211 p1, p2 = parents(rev)
231 212 value = None
232 213 e = merge_caches.pop(rev, None)
233 214 if e is not None:
234 215 return e
235 c = changelogrevision(rev)
236 p1copies = c.p1copies
237 p2copies = c.p2copies
238 removed = c.filesremoved
216 value = (p1, p2, changelogrevision(rev).changes)
239 217 if p1 != node.nullrev and p2 != node.nullrev:
240 218 # XXX some case we over cache, IGNORE
241 value = merge_caches[rev] = (
242 p1,
243 p2,
244 p1copies,
245 p2copies,
246 removed,
247 get_ismerged(rev),
248 )
249
250 if value is None:
251 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
219 merge_caches[rev] = value
252 220 return value
253 221
254 222 return revinfo
255 223
256 224
257 225 def _changesetforwardcopies(a, b, match):
258 226 if a.rev() in (node.nullrev, b.rev()):
259 227 return {}
260 228
261 229 repo = a.repo().unfiltered()
262 230 children = {}
263 231
264 232 cl = repo.changelog
265 233 isancestor = cl.isancestorrev # XXX we should had chaching to this.
266 234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 235 mrset = set(missingrevs)
268 236 roots = set()
269 237 for r in missingrevs:
270 238 for p in cl.parentrevs(r):
271 239 if p == node.nullrev:
272 240 continue
273 241 if p not in children:
274 242 children[p] = [r]
275 243 else:
276 244 children[p].append(r)
277 245 if p not in mrset:
278 246 roots.add(p)
279 247 if not roots:
280 248 # no common revision to track copies from
281 249 return {}
282 250 min_root = min(roots)
283 251
284 252 from_head = set(
285 253 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
286 254 )
287 255
288 256 iterrevs = set(from_head)
289 257 iterrevs &= mrset
290 258 iterrevs.update(roots)
291 259 iterrevs.remove(b.rev())
292 260 revs = sorted(iterrevs)
293 261
294 262 if repo.filecopiesmode == b'changeset-sidedata':
295 263 revinfo = _revinfo_getter(repo)
296 264 return _combine_changeset_copies(
297 265 revs, children, b.rev(), revinfo, match, isancestor
298 266 )
299 267 else:
300 268 revinfo = _revinfo_getter_extra(repo)
301 269 return _combine_changeset_copies_extra(
302 270 revs, children, b.rev(), revinfo, match, isancestor
303 271 )
304 272
305 273
306 274 def _combine_changeset_copies(
307 275 revs, children, targetrev, revinfo, match, isancestor
308 276 ):
309 277 """combine the copies information for each item of iterrevs
310 278
311 279 revs: sorted iterable of revision to visit
312 280 children: a {parent: [children]} mapping.
313 281 targetrev: the final copies destination revision (not in iterrevs)
314 282 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
315 283 match: a matcher
316 284
317 285 It returns the aggregated copies information for `targetrev`.
318 286 """
319 287 all_copies = {}
320 288 alwaysmatch = match.always()
321 289 for r in revs:
322 290 copies = all_copies.pop(r, None)
323 291 if copies is None:
324 292 # this is a root
325 293 copies = {}
326 294 for i, c in enumerate(children[r]):
327 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
295 p1, p2, changes = revinfo(c)
328 296 if r == p1:
329 297 parent = 1
330 childcopies = p1copies
298 childcopies = changes.copied_from_p1
331 299 else:
332 300 assert r == p2
333 301 parent = 2
334 childcopies = p2copies
302 childcopies = changes.copied_from_p2
335 303 if not alwaysmatch:
336 304 childcopies = {
337 305 dst: src for dst, src in childcopies.items() if match(dst)
338 306 }
339 307 newcopies = copies
340 308 if childcopies:
341 309 newcopies = copies.copy()
342 310 for dest, source in pycompat.iteritems(childcopies):
343 311 prev = copies.get(source)
344 312 if prev is not None and prev[1] is not None:
345 313 source = prev[1]
346 314 newcopies[dest] = (c, source)
347 315 assert newcopies is not copies
348 for f in removed:
316 for f in changes.removed:
349 317 if f in newcopies:
350 318 if newcopies is copies:
351 319 # copy on write to avoid affecting potential other
352 320 # branches. when there are no other branches, this
353 321 # could be avoided.
354 322 newcopies = copies.copy()
355 323 newcopies[f] = (c, None)
356 324 othercopies = all_copies.get(c)
357 325 if othercopies is None:
358 326 all_copies[c] = newcopies
359 327 else:
360 328 # we are the second parent to work on c, we need to merge our
361 329 # work with the other.
362 330 #
363 331 # In case of conflict, parent 1 take precedence over parent 2.
364 332 # This is an arbitrary choice made anew when implementing
365 333 # changeset based copies. It was made without regards with
366 334 # potential filelog related behavior.
367 335 if parent == 1:
368 336 _merge_copies_dict(
369 othercopies, newcopies, isancestor, ismerged
337 othercopies, newcopies, isancestor, changes
370 338 )
371 339 else:
372 340 _merge_copies_dict(
373 newcopies, othercopies, isancestor, ismerged
341 newcopies, othercopies, isancestor, changes
374 342 )
375 343 all_copies[c] = newcopies
376 344
377 345 final_copies = {}
378 346 for dest, (tt, source) in all_copies[targetrev].items():
379 347 if source is not None:
380 348 final_copies[dest] = source
381 349 return final_copies
382 350
383 351
384 def _merge_copies_dict(minor, major, isancestor, ismerged):
352 def _merge_copies_dict(minor, major, isancestor, changes):
385 353 """merge two copies-mapping together, minor and major
386 354
387 355 In case of conflict, value from "major" will be picked.
388 356
389 357 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
390 358 ancestors of `high_rev`,
391 359
392 360 - `ismerged(path)`: callable return True if `path` have been merged in the
393 361 current revision,
394 362 """
395 363 for dest, value in major.items():
396 364 other = minor.get(dest)
397 365 if other is None:
398 366 minor[dest] = value
399 367 else:
400 368 new_tt = value[0]
401 369 other_tt = other[0]
402 370 if value[1] == other[1]:
403 371 continue
404 372 # content from "major" wins, unless it is older
405 373 # than the branch point or there is a merge
406 374 if (
407 375 new_tt == other_tt
408 376 or not isancestor(new_tt, other_tt)
409 or ismerged(dest)
377 or dest in changes.merged
410 378 ):
411 379 minor[dest] = value
412 380
413 381
414 382 def _revinfo_getter_extra(repo):
415 383 """return a function that return multiple data given a <rev>"i
416 384
417 385 * p1: revision number of first parent
418 386 * p2: revision number of first parent
419 387 * p1copies: mapping of copies from p1
420 388 * p2copies: mapping of copies from p2
421 389 * removed: a list of removed files
422 390 * ismerged: a callback to know if file was merged in that revision
423 391 """
424 392 cl = repo.changelog
425 393 parents = cl.parentrevs
426 394
427 395 def get_ismerged(rev):
428 396 ctx = repo[rev]
429 397
430 398 def ismerged(path):
431 399 if path not in ctx.files():
432 400 return False
433 401 fctx = ctx[path]
434 402 parents = fctx._filelog.parents(fctx._filenode)
435 403 nb_parents = 0
436 404 for n in parents:
437 405 if n != node.nullid:
438 406 nb_parents += 1
439 407 return nb_parents >= 2
440 408
441 409 return ismerged
442 410
443 411 def revinfo(rev):
444 412 p1, p2 = parents(rev)
445 413 ctx = repo[rev]
446 414 p1copies, p2copies = ctx._copies
447 415 removed = ctx.filesremoved()
448 416 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
449 417
450 418 return revinfo
451 419
452 420
453 421 def _combine_changeset_copies_extra(
454 422 revs, children, targetrev, revinfo, match, isancestor
455 423 ):
456 424 """version of `_combine_changeset_copies` that works with the Google
457 425 specific "extra" based storage for copy information"""
458 426 all_copies = {}
459 427 alwaysmatch = match.always()
460 428 for r in revs:
461 429 copies = all_copies.pop(r, None)
462 430 if copies is None:
463 431 # this is a root
464 432 copies = {}
465 433 for i, c in enumerate(children[r]):
466 434 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
467 435 if r == p1:
468 436 parent = 1
469 437 childcopies = p1copies
470 438 else:
471 439 assert r == p2
472 440 parent = 2
473 441 childcopies = p2copies
474 442 if not alwaysmatch:
475 443 childcopies = {
476 444 dst: src for dst, src in childcopies.items() if match(dst)
477 445 }
478 446 newcopies = copies
479 447 if childcopies:
480 448 newcopies = copies.copy()
481 449 for dest, source in pycompat.iteritems(childcopies):
482 450 prev = copies.get(source)
483 451 if prev is not None and prev[1] is not None:
484 452 source = prev[1]
485 453 newcopies[dest] = (c, source)
486 454 assert newcopies is not copies
487 455 for f in removed:
488 456 if f in newcopies:
489 457 if newcopies is copies:
490 458 # copy on write to avoid affecting potential other
491 459 # branches. when there are no other branches, this
492 460 # could be avoided.
493 461 newcopies = copies.copy()
494 462 newcopies[f] = (c, None)
495 463 othercopies = all_copies.get(c)
496 464 if othercopies is None:
497 465 all_copies[c] = newcopies
498 466 else:
499 467 # we are the second parent to work on c, we need to merge our
500 468 # work with the other.
501 469 #
502 470 # In case of conflict, parent 1 take precedence over parent 2.
503 471 # This is an arbitrary choice made anew when implementing
504 472 # changeset based copies. It was made without regards with
505 473 # potential filelog related behavior.
506 474 if parent == 1:
507 475 _merge_copies_dict_extra(
508 476 othercopies, newcopies, isancestor, ismerged
509 477 )
510 478 else:
511 479 _merge_copies_dict_extra(
512 480 newcopies, othercopies, isancestor, ismerged
513 481 )
514 482 all_copies[c] = newcopies
515 483
516 484 final_copies = {}
517 485 for dest, (tt, source) in all_copies[targetrev].items():
518 486 if source is not None:
519 487 final_copies[dest] = source
520 488 return final_copies
521 489
522 490
523 491 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
524 492 """version of `_merge_copies_dict` that works with the Google
525 493 specific "extra" based storage for copy information"""
526 494 for dest, value in major.items():
527 495 other = minor.get(dest)
528 496 if other is None:
529 497 minor[dest] = value
530 498 else:
531 499 new_tt = value[0]
532 500 other_tt = other[0]
533 501 if value[1] == other[1]:
534 502 continue
535 503 # content from "major" wins, unless it is older
536 504 # than the branch point or there is a merge
537 505 if (
538 506 new_tt == other_tt
539 507 or not isancestor(new_tt, other_tt)
540 508 or ismerged(dest)
541 509 ):
542 510 minor[dest] = value
543 511
544 512
545 513 def _forwardcopies(a, b, base=None, match=None):
546 514 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
547 515
548 516 if base is None:
549 517 base = a
550 518 match = a.repo().narrowmatch(match)
551 519 # check for working copy
552 520 if b.rev() is None:
553 521 cm = _committedforwardcopies(a, b.p1(), base, match)
554 522 # combine copies from dirstate if necessary
555 523 copies = _chain(cm, _dirstatecopies(b._repo, match))
556 524 else:
557 525 copies = _committedforwardcopies(a, b, base, match)
558 526 return copies
559 527
560 528
561 529 def _backwardrenames(a, b, match):
562 530 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
563 531 return {}
564 532
565 533 # Even though we're not taking copies into account, 1:n rename situations
566 534 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
567 535 # arbitrarily pick one of the renames.
568 536 # We don't want to pass in "match" here, since that would filter
569 537 # the destination by it. Since we're reversing the copies, we want
570 538 # to filter the source instead.
571 539 f = _forwardcopies(b, a)
572 540 r = {}
573 541 for k, v in sorted(pycompat.iteritems(f)):
574 542 if match and not match(v):
575 543 continue
576 544 # remove copies
577 545 if v in a:
578 546 continue
579 547 r[v] = k
580 548 return r
581 549
582 550
583 551 def pathcopies(x, y, match=None):
584 552 """find {dst@y: src@x} copy mapping for directed compare"""
585 553 repo = x._repo
586 554 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
587 555 if debug:
588 556 repo.ui.debug(
589 557 b'debug.copies: searching copies from %s to %s\n' % (x, y)
590 558 )
591 559 if x == y or not x or not y:
592 560 return {}
593 561 if y.rev() is None and x == y.p1():
594 562 if debug:
595 563 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
596 564 # short-circuit to avoid issues with merge states
597 565 return _dirstatecopies(repo, match)
598 566 a = y.ancestor(x)
599 567 if a == x:
600 568 if debug:
601 569 repo.ui.debug(b'debug.copies: search mode: forward\n')
602 570 copies = _forwardcopies(x, y, match=match)
603 571 elif a == y:
604 572 if debug:
605 573 repo.ui.debug(b'debug.copies: search mode: backward\n')
606 574 copies = _backwardrenames(x, y, match=match)
607 575 else:
608 576 if debug:
609 577 repo.ui.debug(b'debug.copies: search mode: combined\n')
610 578 base = None
611 579 if a.rev() != node.nullrev:
612 580 base = x
613 581 copies = _chain(
614 582 _backwardrenames(x, a, match=match),
615 583 _forwardcopies(a, y, base, match=match),
616 584 )
617 585 _filter(x, y, copies)
618 586 return copies
619 587
620 588
621 589 def mergecopies(repo, c1, c2, base):
622 590 """
623 591 Finds moves and copies between context c1 and c2 that are relevant for
624 592 merging. 'base' will be used as the merge base.
625 593
626 594 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
627 595 files that were moved/ copied in one merge parent and modified in another.
628 596 For example:
629 597
630 598 o ---> 4 another commit
631 599 |
632 600 | o ---> 3 commit that modifies a.txt
633 601 | /
634 602 o / ---> 2 commit that moves a.txt to b.txt
635 603 |/
636 604 o ---> 1 merge base
637 605
638 606 If we try to rebase revision 3 on revision 4, since there is no a.txt in
639 607 revision 4, and if user have copytrace disabled, we prints the following
640 608 message:
641 609
642 610 ```other changed <file> which local deleted```
643 611
644 612 Returns a tuple where:
645 613
646 614 "branch_copies" an instance of branch_copies.
647 615
648 616 "diverge" is a mapping of source name -> list of destination names
649 617 for divergent renames.
650 618
651 619 This function calls different copytracing algorithms based on config.
652 620 """
653 621 # avoid silly behavior for update from empty dir
654 622 if not c1 or not c2 or c1 == c2:
655 623 return branch_copies(), branch_copies(), {}
656 624
657 625 narrowmatch = c1.repo().narrowmatch()
658 626
659 627 # avoid silly behavior for parent -> working dir
660 628 if c2.node() is None and c1.node() == repo.dirstate.p1():
661 629 return (
662 630 branch_copies(_dirstatecopies(repo, narrowmatch)),
663 631 branch_copies(),
664 632 {},
665 633 )
666 634
667 635 copytracing = repo.ui.config(b'experimental', b'copytrace')
668 636 if stringutil.parsebool(copytracing) is False:
669 637 # stringutil.parsebool() returns None when it is unable to parse the
670 638 # value, so we should rely on making sure copytracing is on such cases
671 639 return branch_copies(), branch_copies(), {}
672 640
673 641 if usechangesetcentricalgo(repo):
674 642 # The heuristics don't make sense when we need changeset-centric algos
675 643 return _fullcopytracing(repo, c1, c2, base)
676 644
677 645 # Copy trace disabling is explicitly below the node == p1 logic above
678 646 # because the logic above is required for a simple copy to be kept across a
679 647 # rebase.
680 648 if copytracing == b'heuristics':
681 649 # Do full copytracing if only non-public revisions are involved as
682 650 # that will be fast enough and will also cover the copies which could
683 651 # be missed by heuristics
684 652 if _isfullcopytraceable(repo, c1, base):
685 653 return _fullcopytracing(repo, c1, c2, base)
686 654 return _heuristicscopytracing(repo, c1, c2, base)
687 655 else:
688 656 return _fullcopytracing(repo, c1, c2, base)
689 657
690 658
691 659 def _isfullcopytraceable(repo, c1, base):
692 660 """ Checks that if base, source and destination are all no-public branches,
693 661 if yes let's use the full copytrace algorithm for increased capabilities
694 662 since it will be fast enough.
695 663
696 664 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
697 665 number of changesets from c1 to base such that if number of changesets are
698 666 more than the limit, full copytracing algorithm won't be used.
699 667 """
700 668 if c1.rev() is None:
701 669 c1 = c1.p1()
702 670 if c1.mutable() and base.mutable():
703 671 sourcecommitlimit = repo.ui.configint(
704 672 b'experimental', b'copytrace.sourcecommitlimit'
705 673 )
706 674 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
707 675 return commits < sourcecommitlimit
708 676 return False
709 677
710 678
711 679 def _checksinglesidecopies(
712 680 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
713 681 ):
714 682 if src not in m2:
715 683 # deleted on side 2
716 684 if src not in m1:
717 685 # renamed on side 1, deleted on side 2
718 686 renamedelete[src] = dsts1
719 687 elif src not in mb:
720 688 # Work around the "short-circuit to avoid issues with merge states"
721 689 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
722 690 # destination doesn't exist in y.
723 691 pass
724 692 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
725 693 return
726 694 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
727 695 # modified on side 2
728 696 for dst in dsts1:
729 697 copy[dst] = src
730 698
731 699
732 700 class branch_copies(object):
733 701 """Information about copies made on one side of a merge/graft.
734 702
735 703 "copy" is a mapping from destination name -> source name,
736 704 where source is in c1 and destination is in c2 or vice-versa.
737 705
738 706 "movewithdir" is a mapping from source name -> destination name,
739 707 where the file at source present in one context but not the other
740 708 needs to be moved to destination by the merge process, because the
741 709 other context moved the directory it is in.
742 710
743 711 "renamedelete" is a mapping of source name -> list of destination
744 712 names for files deleted in c1 that were renamed in c2 or vice-versa.
745 713
746 714 "dirmove" is a mapping of detected source dir -> destination dir renames.
747 715 This is needed for handling changes to new files previously grafted into
748 716 renamed directories.
749 717 """
750 718
751 719 def __init__(
752 720 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
753 721 ):
754 722 self.copy = {} if copy is None else copy
755 723 self.renamedelete = {} if renamedelete is None else renamedelete
756 724 self.dirmove = {} if dirmove is None else dirmove
757 725 self.movewithdir = {} if movewithdir is None else movewithdir
758 726
759 727 def __repr__(self):
760 728 return (
761 729 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
762 730 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
763 731 )
764 732
765 733
766 734 def _fullcopytracing(repo, c1, c2, base):
767 735 """ The full copytracing algorithm which finds all the new files that were
768 736 added from merge base up to the top commit and for each file it checks if
769 737 this file was copied from another file.
770 738
771 739 This is pretty slow when a lot of changesets are involved but will track all
772 740 the copies.
773 741 """
774 742 m1 = c1.manifest()
775 743 m2 = c2.manifest()
776 744 mb = base.manifest()
777 745
778 746 copies1 = pathcopies(base, c1)
779 747 copies2 = pathcopies(base, c2)
780 748
781 749 if not (copies1 or copies2):
782 750 return branch_copies(), branch_copies(), {}
783 751
784 752 inversecopies1 = {}
785 753 inversecopies2 = {}
786 754 for dst, src in copies1.items():
787 755 inversecopies1.setdefault(src, []).append(dst)
788 756 for dst, src in copies2.items():
789 757 inversecopies2.setdefault(src, []).append(dst)
790 758
791 759 copy1 = {}
792 760 copy2 = {}
793 761 diverge = {}
794 762 renamedelete1 = {}
795 763 renamedelete2 = {}
796 764 allsources = set(inversecopies1) | set(inversecopies2)
797 765 for src in allsources:
798 766 dsts1 = inversecopies1.get(src)
799 767 dsts2 = inversecopies2.get(src)
800 768 if dsts1 and dsts2:
801 769 # copied/renamed on both sides
802 770 if src not in m1 and src not in m2:
803 771 # renamed on both sides
804 772 dsts1 = set(dsts1)
805 773 dsts2 = set(dsts2)
806 774 # If there's some overlap in the rename destinations, we
807 775 # consider it not divergent. For example, if side 1 copies 'a'
808 776 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
809 777 # and 'd' and deletes 'a'.
810 778 if dsts1 & dsts2:
811 779 for dst in dsts1 & dsts2:
812 780 copy1[dst] = src
813 781 copy2[dst] = src
814 782 else:
815 783 diverge[src] = sorted(dsts1 | dsts2)
816 784 elif src in m1 and src in m2:
817 785 # copied on both sides
818 786 dsts1 = set(dsts1)
819 787 dsts2 = set(dsts2)
820 788 for dst in dsts1 & dsts2:
821 789 copy1[dst] = src
822 790 copy2[dst] = src
823 791 # TODO: Handle cases where it was renamed on one side and copied
824 792 # on the other side
825 793 elif dsts1:
826 794 # copied/renamed only on side 1
827 795 _checksinglesidecopies(
828 796 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
829 797 )
830 798 elif dsts2:
831 799 # copied/renamed only on side 2
832 800 _checksinglesidecopies(
833 801 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
834 802 )
835 803
836 804 # find interesting file sets from manifests
837 805 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
838 806 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
839 807 u1 = sorted(addedinm1 - addedinm2)
840 808 u2 = sorted(addedinm2 - addedinm1)
841 809
842 810 header = b" unmatched files in %s"
843 811 if u1:
844 812 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
845 813 if u2:
846 814 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
847 815
848 816 if repo.ui.debugflag:
849 817 renamedeleteset = set()
850 818 divergeset = set()
851 819 for dsts in diverge.values():
852 820 divergeset.update(dsts)
853 821 for dsts in renamedelete1.values():
854 822 renamedeleteset.update(dsts)
855 823 for dsts in renamedelete2.values():
856 824 renamedeleteset.update(dsts)
857 825
858 826 repo.ui.debug(
859 827 b" all copies found (* = to merge, ! = divergent, "
860 828 b"% = renamed and deleted):\n"
861 829 )
862 830 for side, copies in ((b"local", copies1), (b"remote", copies2)):
863 831 if not copies:
864 832 continue
865 833 repo.ui.debug(b" on %s side:\n" % side)
866 834 for f in sorted(copies):
867 835 note = b""
868 836 if f in copy1 or f in copy2:
869 837 note += b"*"
870 838 if f in divergeset:
871 839 note += b"!"
872 840 if f in renamedeleteset:
873 841 note += b"%"
874 842 repo.ui.debug(
875 843 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
876 844 )
877 845 del renamedeleteset
878 846 del divergeset
879 847
880 848 repo.ui.debug(b" checking for directory renames\n")
881 849
882 850 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
883 851 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
884 852
885 853 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
886 854 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
887 855
888 856 return branch_copies1, branch_copies2, diverge
889 857
890 858
891 859 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
892 860 """Finds moved directories and files that should move with them.
893 861
894 862 ctx: the context for one of the sides
895 863 copy: files copied on the same side (as ctx)
896 864 fullcopy: files copied on the same side (as ctx), including those that
897 865 merge.manifestmerge() won't care about
898 866 addedfiles: added files on the other side (compared to ctx)
899 867 """
900 868 # generate a directory move map
901 869 d = ctx.dirs()
902 870 invalid = set()
903 871 dirmove = {}
904 872
905 873 # examine each file copy for a potential directory move, which is
906 874 # when all the files in a directory are moved to a new directory
907 875 for dst, src in pycompat.iteritems(fullcopy):
908 876 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
909 877 if dsrc in invalid:
910 878 # already seen to be uninteresting
911 879 continue
912 880 elif dsrc in d and ddst in d:
913 881 # directory wasn't entirely moved locally
914 882 invalid.add(dsrc)
915 883 elif dsrc in dirmove and dirmove[dsrc] != ddst:
916 884 # files from the same directory moved to two different places
917 885 invalid.add(dsrc)
918 886 else:
919 887 # looks good so far
920 888 dirmove[dsrc] = ddst
921 889
922 890 for i in invalid:
923 891 if i in dirmove:
924 892 del dirmove[i]
925 893 del d, invalid
926 894
927 895 if not dirmove:
928 896 return {}, {}
929 897
930 898 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
931 899
932 900 for d in dirmove:
933 901 repo.ui.debug(
934 902 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
935 903 )
936 904
937 905 movewithdir = {}
938 906 # check unaccounted nonoverlapping files against directory moves
939 907 for f in addedfiles:
940 908 if f not in fullcopy:
941 909 for d in dirmove:
942 910 if f.startswith(d):
943 911 # new file added in a directory that was moved, move it
944 912 df = dirmove[d] + f[len(d) :]
945 913 if df not in copy:
946 914 movewithdir[f] = df
947 915 repo.ui.debug(
948 916 b" pending file src: '%s' -> dst: '%s'\n"
949 917 % (f, df)
950 918 )
951 919 break
952 920
953 921 return dirmove, movewithdir
954 922
955 923
956 924 def _heuristicscopytracing(repo, c1, c2, base):
957 925 """ Fast copytracing using filename heuristics
958 926
959 927 Assumes that moves or renames are of following two types:
960 928
961 929 1) Inside a directory only (same directory name but different filenames)
962 930 2) Move from one directory to another
963 931 (same filenames but different directory names)
964 932
965 933 Works only when there are no merge commits in the "source branch".
966 934 Source branch is commits from base up to c2 not including base.
967 935
968 936 If merge is involved it fallbacks to _fullcopytracing().
969 937
970 938 Can be used by setting the following config:
971 939
972 940 [experimental]
973 941 copytrace = heuristics
974 942
975 943 In some cases the copy/move candidates found by heuristics can be very large
976 944 in number and that will make the algorithm slow. The number of possible
977 945 candidates to check can be limited by using the config
978 946 `experimental.copytrace.movecandidateslimit` which defaults to 100.
979 947 """
980 948
981 949 if c1.rev() is None:
982 950 c1 = c1.p1()
983 951 if c2.rev() is None:
984 952 c2 = c2.p1()
985 953
986 954 changedfiles = set()
987 955 m1 = c1.manifest()
988 956 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
989 957 # If base is not in c2 branch, we switch to fullcopytracing
990 958 repo.ui.debug(
991 959 b"switching to full copytracing as base is not "
992 960 b"an ancestor of c2\n"
993 961 )
994 962 return _fullcopytracing(repo, c1, c2, base)
995 963
996 964 ctx = c2
997 965 while ctx != base:
998 966 if len(ctx.parents()) == 2:
999 967 # To keep things simple let's not handle merges
1000 968 repo.ui.debug(b"switching to full copytracing because of merges\n")
1001 969 return _fullcopytracing(repo, c1, c2, base)
1002 970 changedfiles.update(ctx.files())
1003 971 ctx = ctx.p1()
1004 972
1005 973 copies2 = {}
1006 974 cp = _forwardcopies(base, c2)
1007 975 for dst, src in pycompat.iteritems(cp):
1008 976 if src in m1:
1009 977 copies2[dst] = src
1010 978
1011 979 # file is missing if it isn't present in the destination, but is present in
1012 980 # the base and present in the source.
1013 981 # Presence in the base is important to exclude added files, presence in the
1014 982 # source is important to exclude removed files.
1015 983 filt = lambda f: f not in m1 and f in base and f in c2
1016 984 missingfiles = [f for f in changedfiles if filt(f)]
1017 985
1018 986 copies1 = {}
1019 987 if missingfiles:
1020 988 basenametofilename = collections.defaultdict(list)
1021 989 dirnametofilename = collections.defaultdict(list)
1022 990
1023 991 for f in m1.filesnotin(base.manifest()):
1024 992 basename = os.path.basename(f)
1025 993 dirname = os.path.dirname(f)
1026 994 basenametofilename[basename].append(f)
1027 995 dirnametofilename[dirname].append(f)
1028 996
1029 997 for f in missingfiles:
1030 998 basename = os.path.basename(f)
1031 999 dirname = os.path.dirname(f)
1032 1000 samebasename = basenametofilename[basename]
1033 1001 samedirname = dirnametofilename[dirname]
1034 1002 movecandidates = samebasename + samedirname
1035 1003 # f is guaranteed to be present in c2, that's why
1036 1004 # c2.filectx(f) won't fail
1037 1005 f2 = c2.filectx(f)
1038 1006 # we can have a lot of candidates which can slow down the heuristics
1039 1007 # config value to limit the number of candidates moves to check
1040 1008 maxcandidates = repo.ui.configint(
1041 1009 b'experimental', b'copytrace.movecandidateslimit'
1042 1010 )
1043 1011
1044 1012 if len(movecandidates) > maxcandidates:
1045 1013 repo.ui.status(
1046 1014 _(
1047 1015 b"skipping copytracing for '%s', more "
1048 1016 b"candidates than the limit: %d\n"
1049 1017 )
1050 1018 % (f, len(movecandidates))
1051 1019 )
1052 1020 continue
1053 1021
1054 1022 for candidate in movecandidates:
1055 1023 f1 = c1.filectx(candidate)
1056 1024 if _related(f1, f2):
1057 1025 # if there are a few related copies then we'll merge
1058 1026 # changes into all of them. This matches the behaviour
1059 1027 # of upstream copytracing
1060 1028 copies1[candidate] = f
1061 1029
1062 1030 return branch_copies(copies1), branch_copies(copies2), {}
1063 1031
1064 1032
1065 1033 def _related(f1, f2):
1066 1034 """return True if f1 and f2 filectx have a common ancestor
1067 1035
1068 1036 Walk back to common ancestor to see if the two files originate
1069 1037 from the same file. Since workingfilectx's rev() is None it messes
1070 1038 up the integer comparison logic, hence the pre-step check for
1071 1039 None (f1 and f2 can only be workingfilectx's initially).
1072 1040 """
1073 1041
1074 1042 if f1 == f2:
1075 1043 return True # a match
1076 1044
1077 1045 g1, g2 = f1.ancestors(), f2.ancestors()
1078 1046 try:
1079 1047 f1r, f2r = f1.linkrev(), f2.linkrev()
1080 1048
1081 1049 if f1r is None:
1082 1050 f1 = next(g1)
1083 1051 if f2r is None:
1084 1052 f2 = next(g2)
1085 1053
1086 1054 while True:
1087 1055 f1r, f2r = f1.linkrev(), f2.linkrev()
1088 1056 if f1r > f2r:
1089 1057 f1 = next(g1)
1090 1058 elif f2r > f1r:
1091 1059 f2 = next(g2)
1092 1060 else: # f1 and f2 point to files in the same linkrev
1093 1061 return f1 == f2 # true if they point to the same file
1094 1062 except StopIteration:
1095 1063 return False
1096 1064
1097 1065
1098 1066 def graftcopies(wctx, ctx, base):
1099 1067 """reproduce copies between base and ctx in the wctx
1100 1068
1101 1069 Unlike mergecopies(), this function will only consider copies between base
1102 1070 and ctx; it will ignore copies between base and wctx. Also unlike
1103 1071 mergecopies(), this function will apply copies to the working copy (instead
1104 1072 of just returning information about the copies). That makes it cheaper
1105 1073 (especially in the common case of base==ctx.p1()) and useful also when
1106 1074 experimental.copytrace=off.
1107 1075
1108 1076 merge.update() will have already marked most copies, but it will only
1109 1077 mark copies if it thinks the source files are related (see
1110 1078 merge._related()). It will also not mark copies if the file wasn't modified
1111 1079 on the local side. This function adds the copies that were "missed"
1112 1080 by merge.update().
1113 1081 """
1114 1082 new_copies = pathcopies(base, ctx)
1115 1083 _filter(wctx.p1(), wctx, new_copies)
1116 1084 for dst, src in pycompat.iteritems(new_copies):
1117 1085 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now