##// END OF EJS Templates
copies: make two version of the changeset centric algorithm...
marmoute -
r46214:ad6ebb6f default
parent child Browse files
Show More
@@ -1,997 +1,1095 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 match as matchmod,
20 20 node,
21 21 pathutil,
22 22 pycompat,
23 23 util,
24 24 )
25 25
26 26
27 27 from .utils import stringutil
28 28
29 29
30 30 def _filter(src, dst, t):
31 31 """filters out invalid copies after chaining"""
32 32
33 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 35 # in the following table (not including trivial cases). For example, case 2
36 36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 37 # then was renamed between 'mid' and 'dst'.
38 38 #
39 39 # case src mid dst result
40 40 # 1 x y - -
41 41 # 2 x y y x->y
42 42 # 3 x y x -
43 43 # 4 x y z x->z
44 44 # 5 - x y -
45 45 # 6 x x y x->y
46 46 #
47 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 49 # between 5 and 6, so it includes all cases in its result.
50 50 # Cases 1, 3, and 5 are then removed by _filter().
51 51
52 52 for k, v in list(t.items()):
53 53 # remove copies from files that didn't exist
54 54 if v not in src:
55 55 del t[k]
56 56 # remove criss-crossed copies
57 57 elif k in src and v in dst:
58 58 del t[k]
59 59 # remove copies to files that were then removed
60 60 elif k not in dst:
61 61 del t[k]
62 62
63 63
64 64 def _chain(prefix, suffix):
65 65 """chain two sets of copies 'prefix' and 'suffix'"""
66 66 result = prefix.copy()
67 67 for key, value in pycompat.iteritems(suffix):
68 68 result[key] = prefix.get(value, value)
69 69 return result
70 70
71 71
72 72 def _tracefile(fctx, am, basemf):
73 73 """return file context that is the ancestor of fctx present in ancestor
74 74 manifest am
75 75
76 76 Note: we used to try and stop after a given limit, however checking if that
77 77 limit is reached turned out to be very expensive. we are better off
78 78 disabling that feature."""
79 79
80 80 for f in fctx.ancestors():
81 81 path = f.path()
82 82 if am.get(path, None) == f.filenode():
83 83 return path
84 84 if basemf and basemf.get(path, None) == f.filenode():
85 85 return path
86 86
87 87
88 88 def _dirstatecopies(repo, match=None):
89 89 ds = repo.dirstate
90 90 c = ds.copies().copy()
91 91 for k in list(c):
92 92 if ds[k] not in b'anm' or (match and not match(k)):
93 93 del c[k]
94 94 return c
95 95
96 96
97 97 def _computeforwardmissing(a, b, match=None):
98 98 """Computes which files are in b but not a.
99 99 This is its own function so extensions can easily wrap this call to see what
100 100 files _forwardcopies is about to process.
101 101 """
102 102 ma = a.manifest()
103 103 mb = b.manifest()
104 104 return mb.filesnotin(ma, match=match)
105 105
106 106
107 107 def usechangesetcentricalgo(repo):
108 108 """Checks if we should use changeset-centric copy algorithms"""
109 109 if repo.filecopiesmode == b'changeset-sidedata':
110 110 return True
111 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 112 changesetsource = (b'changeset-only', b'compatibility')
113 113 return readfrom in changesetsource
114 114
115 115
116 116 def _committedforwardcopies(a, b, base, match):
117 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 118 # files might have to be traced back to the fctx parent of the last
119 119 # one-side-only changeset, but not further back than that
120 120 repo = a._repo
121 121
122 122 if usechangesetcentricalgo(repo):
123 123 return _changesetforwardcopies(a, b, match)
124 124
125 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 126 dbg = repo.ui.debug
127 127 if debug:
128 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 129 am = a.manifest()
130 130 basemf = None if base is None else base.manifest()
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 136
137 137 # Computing the forward missing is quite expensive on large manifests, since
138 138 # it compares the entire manifests. We can optimize it in the common use
139 139 # case of computing what copies are in a commit versus its parent (like
140 140 # during a rebase or histedit). Note, we exclude merge commits from this
141 141 # optimization, since the ctx.files() for a merge commit is not correct for
142 142 # this comparison.
143 143 forwardmissingmatch = match
144 144 if b.p1() == a and b.p2().node() == node.nullid:
145 145 filesmatcher = matchmod.exact(b.files())
146 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 148
149 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 150
151 151 if debug:
152 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 153
154 154 for f in sorted(missing):
155 155 if debug:
156 156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 157 fctx = b[f]
158 158 fctx._ancestrycontext = ancestrycontext
159 159
160 160 if debug:
161 161 start = util.timer()
162 162 opath = _tracefile(fctx, am, basemf)
163 163 if opath:
164 164 if debug:
165 165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 166 cm[f] = opath
167 167 if debug:
168 168 dbg(
169 169 b'debug.copies: time: %f seconds\n'
170 170 % (util.timer() - start)
171 171 )
172 172 return cm
173 173
174 174
175 175 def _revinfo_getter(repo):
176 176 """return a function that return multiple data given a <rev>"i
177 177
178 178 * p1: revision number of first parent
179 179 * p2: revision number of first parent
180 180 * p1copies: mapping of copies from p1
181 181 * p2copies: mapping of copies from p2
182 182 * removed: a list of removed files
183 183 * ismerged: a callback to know if file was merged in that revision
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187
188 188 def get_ismerged(rev):
189 189 ctx = repo[rev]
190 190
191 191 def ismerged(path):
192 192 if path not in ctx.files():
193 193 return False
194 194 fctx = ctx[path]
195 195 parents = fctx._filelog.parents(fctx._filenode)
196 196 nb_parents = 0
197 197 for n in parents:
198 198 if n != node.nullid:
199 199 nb_parents += 1
200 200 return nb_parents >= 2
201 201
202 202 return ismerged
203 203
204 204 if repo.filecopiesmode == b'changeset-sidedata':
205 205 changelogrevision = cl.changelogrevision
206 206 flags = cl.flags
207 207
208 208 # A small cache to avoid doing the work twice for merges
209 209 #
210 210 # In the vast majority of cases, if we ask information for a revision
211 211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 212 # keep the information around when reaching the first parent of a merge
213 213 # and dropping it after it was provided for the second parents.
214 214 #
215 215 # It exists cases were only one parent of the merge will be walked. It
216 216 # happens when the "destination" the copy tracing is descendant from a
217 217 # new root, not common with the "source". In that case, we will only walk
218 218 # through merge parents that are descendant of changesets common
219 219 # between "source" and "destination".
220 220 #
221 221 # With the current case implementation if such changesets have a copy
222 222 # information, we'll keep them in memory until the end of
223 223 # _changesetforwardcopies. We don't expect the case to be frequent
224 224 # enough to matters.
225 225 #
226 226 # In addition, it would be possible to reach pathological case, were
227 227 # many first parent are met before any second parent is reached. In
228 228 # that case the cache could grow. If this even become an issue one can
229 229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 230 # time to save memory.
231 231 merge_caches = {}
232 232
233 233 def revinfo(rev):
234 234 p1, p2 = parents(rev)
235 235 value = None
236 236 if flags(rev) & REVIDX_SIDEDATA:
237 237 e = merge_caches.pop(rev, None)
238 238 if e is not None:
239 239 return e
240 240 c = changelogrevision(rev)
241 241 p1copies = c.p1copies
242 242 p2copies = c.p2copies
243 243 removed = c.filesremoved
244 244 if p1 != node.nullrev and p2 != node.nullrev:
245 245 # XXX some case we over cache, IGNORE
246 246 value = merge_caches[rev] = (
247 247 p1,
248 248 p2,
249 249 p1copies,
250 250 p2copies,
251 251 removed,
252 252 get_ismerged(rev),
253 253 )
254 254 else:
255 255 p1copies = {}
256 256 p2copies = {}
257 257 removed = []
258 258
259 259 if value is None:
260 260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 261 return value
262 262
263 263 else:
264 264
265 265 def revinfo(rev):
266 266 p1, p2 = parents(rev)
267 267 ctx = repo[rev]
268 268 p1copies, p2copies = ctx._copies
269 269 removed = ctx.filesremoved()
270 270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
271 271
272 272 return revinfo
273 273
274 274
275 275 def _changesetforwardcopies(a, b, match):
276 276 if a.rev() in (node.nullrev, b.rev()):
277 277 return {}
278 278
279 279 repo = a.repo().unfiltered()
280 280 children = {}
281 281 revinfo = _revinfo_getter(repo)
282 282
283 283 cl = repo.changelog
284 284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 286 mrset = set(missingrevs)
287 287 roots = set()
288 288 for r in missingrevs:
289 289 for p in cl.parentrevs(r):
290 290 if p == node.nullrev:
291 291 continue
292 292 if p not in children:
293 293 children[p] = [r]
294 294 else:
295 295 children[p].append(r)
296 296 if p not in mrset:
297 297 roots.add(p)
298 298 if not roots:
299 299 # no common revision to track copies from
300 300 return {}
301 301 min_root = min(roots)
302 302
303 303 from_head = set(
304 304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 305 )
306 306
307 307 iterrevs = set(from_head)
308 308 iterrevs &= mrset
309 309 iterrevs.update(roots)
310 310 iterrevs.remove(b.rev())
311 311 revs = sorted(iterrevs)
312 return _combine_changeset_copies(
313 revs, children, b.rev(), revinfo, match, isancestor
314 )
312
313 if repo.filecopiesmode == b'changeset-sidedata':
314 return _combine_changeset_copies(
315 revs, children, b.rev(), revinfo, match, isancestor
316 )
317 else:
318 return _combine_changeset_copies_extra(
319 revs, children, b.rev(), revinfo, match, isancestor
320 )
315 321
316 322
317 323 def _combine_changeset_copies(
318 324 revs, children, targetrev, revinfo, match, isancestor
319 325 ):
320 326 """combine the copies information for each item of iterrevs
321 327
322 328 revs: sorted iterable of revision to visit
323 329 children: a {parent: [children]} mapping.
324 330 targetrev: the final copies destination revision (not in iterrevs)
325 331 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
326 332 match: a matcher
327 333
328 334 It returns the aggregated copies information for `targetrev`.
329 335 """
330 336 all_copies = {}
331 337 alwaysmatch = match.always()
332 338 for r in revs:
333 339 copies = all_copies.pop(r, None)
334 340 if copies is None:
335 341 # this is a root
336 342 copies = {}
337 343 for i, c in enumerate(children[r]):
338 344 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
339 345 if r == p1:
340 346 parent = 1
341 347 childcopies = p1copies
342 348 else:
343 349 assert r == p2
344 350 parent = 2
345 351 childcopies = p2copies
346 352 if not alwaysmatch:
347 353 childcopies = {
348 354 dst: src for dst, src in childcopies.items() if match(dst)
349 355 }
350 356 newcopies = copies
351 357 if childcopies:
352 358 newcopies = copies.copy()
353 359 for dest, source in pycompat.iteritems(childcopies):
354 360 prev = copies.get(source)
355 361 if prev is not None and prev[1] is not None:
356 362 source = prev[1]
357 363 newcopies[dest] = (c, source)
358 364 assert newcopies is not copies
359 365 for f in removed:
360 366 if f in newcopies:
361 367 if newcopies is copies:
362 368 # copy on write to avoid affecting potential other
363 369 # branches. when there are no other branches, this
364 370 # could be avoided.
365 371 newcopies = copies.copy()
366 372 newcopies[f] = (c, None)
367 373 othercopies = all_copies.get(c)
368 374 if othercopies is None:
369 375 all_copies[c] = newcopies
370 376 else:
371 377 # we are the second parent to work on c, we need to merge our
372 378 # work with the other.
373 379 #
374 380 # In case of conflict, parent 1 take precedence over parent 2.
375 381 # This is an arbitrary choice made anew when implementing
376 382 # changeset based copies. It was made without regards with
377 383 # potential filelog related behavior.
378 384 if parent == 1:
379 385 _merge_copies_dict(
380 386 othercopies, newcopies, isancestor, ismerged
381 387 )
382 388 else:
383 389 _merge_copies_dict(
384 390 newcopies, othercopies, isancestor, ismerged
385 391 )
386 392 all_copies[c] = newcopies
387 393
388 394 final_copies = {}
389 395 for dest, (tt, source) in all_copies[targetrev].items():
390 396 if source is not None:
391 397 final_copies[dest] = source
392 398 return final_copies
393 399
394 400
395 401 def _merge_copies_dict(minor, major, isancestor, ismerged):
396 402 """merge two copies-mapping together, minor and major
397 403
398 404 In case of conflict, value from "major" will be picked.
399 405
400 406 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
401 407 ancestors of `high_rev`,
402 408
403 409 - `ismerged(path)`: callable return True if `path` have been merged in the
404 410 current revision,
405 411 """
406 412 for dest, value in major.items():
407 413 other = minor.get(dest)
408 414 if other is None:
409 415 minor[dest] = value
410 416 else:
411 417 new_tt = value[0]
412 418 other_tt = other[0]
413 419 if value[1] == other[1]:
414 420 continue
415 421 # content from "major" wins, unless it is older
416 422 # than the branch point or there is a merge
417 423 if (
418 424 new_tt == other_tt
419 425 or not isancestor(new_tt, other_tt)
420 426 or ismerged(dest)
421 427 ):
422 428 minor[dest] = value
423 429
424 430
431 def _combine_changeset_copies_extra(
432 revs, children, targetrev, revinfo, match, isancestor
433 ):
434 """version of `_combine_changeset_copies` that works with the Google
435 specific "extra" based storage for copy information"""
436 all_copies = {}
437 alwaysmatch = match.always()
438 for r in revs:
439 copies = all_copies.pop(r, None)
440 if copies is None:
441 # this is a root
442 copies = {}
443 for i, c in enumerate(children[r]):
444 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
445 if r == p1:
446 parent = 1
447 childcopies = p1copies
448 else:
449 assert r == p2
450 parent = 2
451 childcopies = p2copies
452 if not alwaysmatch:
453 childcopies = {
454 dst: src for dst, src in childcopies.items() if match(dst)
455 }
456 newcopies = copies
457 if childcopies:
458 newcopies = copies.copy()
459 for dest, source in pycompat.iteritems(childcopies):
460 prev = copies.get(source)
461 if prev is not None and prev[1] is not None:
462 source = prev[1]
463 newcopies[dest] = (c, source)
464 assert newcopies is not copies
465 for f in removed:
466 if f in newcopies:
467 if newcopies is copies:
468 # copy on write to avoid affecting potential other
469 # branches. when there are no other branches, this
470 # could be avoided.
471 newcopies = copies.copy()
472 newcopies[f] = (c, None)
473 othercopies = all_copies.get(c)
474 if othercopies is None:
475 all_copies[c] = newcopies
476 else:
477 # we are the second parent to work on c, we need to merge our
478 # work with the other.
479 #
480 # In case of conflict, parent 1 take precedence over parent 2.
481 # This is an arbitrary choice made anew when implementing
482 # changeset based copies. It was made without regards with
483 # potential filelog related behavior.
484 if parent == 1:
485 _merge_copies_dict_extra(
486 othercopies, newcopies, isancestor, ismerged
487 )
488 else:
489 _merge_copies_dict_extra(
490 newcopies, othercopies, isancestor, ismerged
491 )
492 all_copies[c] = newcopies
493
494 final_copies = {}
495 for dest, (tt, source) in all_copies[targetrev].items():
496 if source is not None:
497 final_copies[dest] = source
498 return final_copies
499
500
501 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
502 """version of `_merge_copies_dict` that works with the Google
503 specific "extra" based storage for copy information"""
504 for dest, value in major.items():
505 other = minor.get(dest)
506 if other is None:
507 minor[dest] = value
508 else:
509 new_tt = value[0]
510 other_tt = other[0]
511 if value[1] == other[1]:
512 continue
513 # content from "major" wins, unless it is older
514 # than the branch point or there is a merge
515 if (
516 new_tt == other_tt
517 or not isancestor(new_tt, other_tt)
518 or ismerged(dest)
519 ):
520 minor[dest] = value
521
522
425 523 def _forwardcopies(a, b, base=None, match=None):
426 524 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427 525
428 526 if base is None:
429 527 base = a
430 528 match = a.repo().narrowmatch(match)
431 529 # check for working copy
432 530 if b.rev() is None:
433 531 cm = _committedforwardcopies(a, b.p1(), base, match)
434 532 # combine copies from dirstate if necessary
435 533 copies = _chain(cm, _dirstatecopies(b._repo, match))
436 534 else:
437 535 copies = _committedforwardcopies(a, b, base, match)
438 536 return copies
439 537
440 538
441 539 def _backwardrenames(a, b, match):
442 540 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
443 541 return {}
444 542
445 543 # Even though we're not taking copies into account, 1:n rename situations
446 544 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
447 545 # arbitrarily pick one of the renames.
448 546 # We don't want to pass in "match" here, since that would filter
449 547 # the destination by it. Since we're reversing the copies, we want
450 548 # to filter the source instead.
451 549 f = _forwardcopies(b, a)
452 550 r = {}
453 551 for k, v in sorted(pycompat.iteritems(f)):
454 552 if match and not match(v):
455 553 continue
456 554 # remove copies
457 555 if v in a:
458 556 continue
459 557 r[v] = k
460 558 return r
461 559
462 560
463 561 def pathcopies(x, y, match=None):
464 562 """find {dst@y: src@x} copy mapping for directed compare"""
465 563 repo = x._repo
466 564 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
467 565 if debug:
468 566 repo.ui.debug(
469 567 b'debug.copies: searching copies from %s to %s\n' % (x, y)
470 568 )
471 569 if x == y or not x or not y:
472 570 return {}
473 571 if y.rev() is None and x == y.p1():
474 572 if debug:
475 573 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
476 574 # short-circuit to avoid issues with merge states
477 575 return _dirstatecopies(repo, match)
478 576 a = y.ancestor(x)
479 577 if a == x:
480 578 if debug:
481 579 repo.ui.debug(b'debug.copies: search mode: forward\n')
482 580 copies = _forwardcopies(x, y, match=match)
483 581 elif a == y:
484 582 if debug:
485 583 repo.ui.debug(b'debug.copies: search mode: backward\n')
486 584 copies = _backwardrenames(x, y, match=match)
487 585 else:
488 586 if debug:
489 587 repo.ui.debug(b'debug.copies: search mode: combined\n')
490 588 base = None
491 589 if a.rev() != node.nullrev:
492 590 base = x
493 591 copies = _chain(
494 592 _backwardrenames(x, a, match=match),
495 593 _forwardcopies(a, y, base, match=match),
496 594 )
497 595 _filter(x, y, copies)
498 596 return copies
499 597
500 598
501 599 def mergecopies(repo, c1, c2, base):
502 600 """
503 601 Finds moves and copies between context c1 and c2 that are relevant for
504 602 merging. 'base' will be used as the merge base.
505 603
506 604 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
507 605 files that were moved/ copied in one merge parent and modified in another.
508 606 For example:
509 607
510 608 o ---> 4 another commit
511 609 |
512 610 | o ---> 3 commit that modifies a.txt
513 611 | /
514 612 o / ---> 2 commit that moves a.txt to b.txt
515 613 |/
516 614 o ---> 1 merge base
517 615
518 616 If we try to rebase revision 3 on revision 4, since there is no a.txt in
519 617 revision 4, and if user have copytrace disabled, we prints the following
520 618 message:
521 619
522 620 ```other changed <file> which local deleted```
523 621
524 622 Returns a tuple where:
525 623
526 624 "branch_copies" an instance of branch_copies.
527 625
528 626 "diverge" is a mapping of source name -> list of destination names
529 627 for divergent renames.
530 628
531 629 This function calls different copytracing algorithms based on config.
532 630 """
533 631 # avoid silly behavior for update from empty dir
534 632 if not c1 or not c2 or c1 == c2:
535 633 return branch_copies(), branch_copies(), {}
536 634
537 635 narrowmatch = c1.repo().narrowmatch()
538 636
539 637 # avoid silly behavior for parent -> working dir
540 638 if c2.node() is None and c1.node() == repo.dirstate.p1():
541 639 return (
542 640 branch_copies(_dirstatecopies(repo, narrowmatch)),
543 641 branch_copies(),
544 642 {},
545 643 )
546 644
547 645 copytracing = repo.ui.config(b'experimental', b'copytrace')
548 646 if stringutil.parsebool(copytracing) is False:
549 647 # stringutil.parsebool() returns None when it is unable to parse the
550 648 # value, so we should rely on making sure copytracing is on such cases
551 649 return branch_copies(), branch_copies(), {}
552 650
553 651 if usechangesetcentricalgo(repo):
554 652 # The heuristics don't make sense when we need changeset-centric algos
555 653 return _fullcopytracing(repo, c1, c2, base)
556 654
557 655 # Copy trace disabling is explicitly below the node == p1 logic above
558 656 # because the logic above is required for a simple copy to be kept across a
559 657 # rebase.
560 658 if copytracing == b'heuristics':
561 659 # Do full copytracing if only non-public revisions are involved as
562 660 # that will be fast enough and will also cover the copies which could
563 661 # be missed by heuristics
564 662 if _isfullcopytraceable(repo, c1, base):
565 663 return _fullcopytracing(repo, c1, c2, base)
566 664 return _heuristicscopytracing(repo, c1, c2, base)
567 665 else:
568 666 return _fullcopytracing(repo, c1, c2, base)
569 667
570 668
571 669 def _isfullcopytraceable(repo, c1, base):
572 670 """ Checks that if base, source and destination are all no-public branches,
573 671 if yes let's use the full copytrace algorithm for increased capabilities
574 672 since it will be fast enough.
575 673
576 674 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
577 675 number of changesets from c1 to base such that if number of changesets are
578 676 more than the limit, full copytracing algorithm won't be used.
579 677 """
580 678 if c1.rev() is None:
581 679 c1 = c1.p1()
582 680 if c1.mutable() and base.mutable():
583 681 sourcecommitlimit = repo.ui.configint(
584 682 b'experimental', b'copytrace.sourcecommitlimit'
585 683 )
586 684 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
587 685 return commits < sourcecommitlimit
588 686 return False
589 687
590 688
591 689 def _checksinglesidecopies(
592 690 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
593 691 ):
594 692 if src not in m2:
595 693 # deleted on side 2
596 694 if src not in m1:
597 695 # renamed on side 1, deleted on side 2
598 696 renamedelete[src] = dsts1
599 697 elif src not in mb:
600 698 # Work around the "short-circuit to avoid issues with merge states"
601 699 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
602 700 # destination doesn't exist in y.
603 701 pass
604 702 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
605 703 return
606 704 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
607 705 # modified on side 2
608 706 for dst in dsts1:
609 707 copy[dst] = src
610 708
611 709
612 710 class branch_copies(object):
613 711 """Information about copies made on one side of a merge/graft.
614 712
615 713 "copy" is a mapping from destination name -> source name,
616 714 where source is in c1 and destination is in c2 or vice-versa.
617 715
618 716 "movewithdir" is a mapping from source name -> destination name,
619 717 where the file at source present in one context but not the other
620 718 needs to be moved to destination by the merge process, because the
621 719 other context moved the directory it is in.
622 720
623 721 "renamedelete" is a mapping of source name -> list of destination
624 722 names for files deleted in c1 that were renamed in c2 or vice-versa.
625 723
626 724 "dirmove" is a mapping of detected source dir -> destination dir renames.
627 725 This is needed for handling changes to new files previously grafted into
628 726 renamed directories.
629 727 """
630 728
631 729 def __init__(
632 730 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
633 731 ):
634 732 self.copy = {} if copy is None else copy
635 733 self.renamedelete = {} if renamedelete is None else renamedelete
636 734 self.dirmove = {} if dirmove is None else dirmove
637 735 self.movewithdir = {} if movewithdir is None else movewithdir
638 736
639 737 def __repr__(self):
640 738 return (
641 739 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
642 740 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
643 741 )
644 742
645 743
646 744 def _fullcopytracing(repo, c1, c2, base):
647 745 """ The full copytracing algorithm which finds all the new files that were
648 746 added from merge base up to the top commit and for each file it checks if
649 747 this file was copied from another file.
650 748
651 749 This is pretty slow when a lot of changesets are involved but will track all
652 750 the copies.
653 751 """
654 752 m1 = c1.manifest()
655 753 m2 = c2.manifest()
656 754 mb = base.manifest()
657 755
658 756 copies1 = pathcopies(base, c1)
659 757 copies2 = pathcopies(base, c2)
660 758
661 759 if not (copies1 or copies2):
662 760 return branch_copies(), branch_copies(), {}
663 761
664 762 inversecopies1 = {}
665 763 inversecopies2 = {}
666 764 for dst, src in copies1.items():
667 765 inversecopies1.setdefault(src, []).append(dst)
668 766 for dst, src in copies2.items():
669 767 inversecopies2.setdefault(src, []).append(dst)
670 768
671 769 copy1 = {}
672 770 copy2 = {}
673 771 diverge = {}
674 772 renamedelete1 = {}
675 773 renamedelete2 = {}
676 774 allsources = set(inversecopies1) | set(inversecopies2)
677 775 for src in allsources:
678 776 dsts1 = inversecopies1.get(src)
679 777 dsts2 = inversecopies2.get(src)
680 778 if dsts1 and dsts2:
681 779 # copied/renamed on both sides
682 780 if src not in m1 and src not in m2:
683 781 # renamed on both sides
684 782 dsts1 = set(dsts1)
685 783 dsts2 = set(dsts2)
686 784 # If there's some overlap in the rename destinations, we
687 785 # consider it not divergent. For example, if side 1 copies 'a'
688 786 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
689 787 # and 'd' and deletes 'a'.
690 788 if dsts1 & dsts2:
691 789 for dst in dsts1 & dsts2:
692 790 copy1[dst] = src
693 791 copy2[dst] = src
694 792 else:
695 793 diverge[src] = sorted(dsts1 | dsts2)
696 794 elif src in m1 and src in m2:
697 795 # copied on both sides
698 796 dsts1 = set(dsts1)
699 797 dsts2 = set(dsts2)
700 798 for dst in dsts1 & dsts2:
701 799 copy1[dst] = src
702 800 copy2[dst] = src
703 801 # TODO: Handle cases where it was renamed on one side and copied
704 802 # on the other side
705 803 elif dsts1:
706 804 # copied/renamed only on side 1
707 805 _checksinglesidecopies(
708 806 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
709 807 )
710 808 elif dsts2:
711 809 # copied/renamed only on side 2
712 810 _checksinglesidecopies(
713 811 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
714 812 )
715 813
716 814 # find interesting file sets from manifests
717 815 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
718 816 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
719 817 u1 = sorted(addedinm1 - addedinm2)
720 818 u2 = sorted(addedinm2 - addedinm1)
721 819
722 820 header = b" unmatched files in %s"
723 821 if u1:
724 822 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
725 823 if u2:
726 824 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
727 825
728 826 if repo.ui.debugflag:
729 827 renamedeleteset = set()
730 828 divergeset = set()
731 829 for dsts in diverge.values():
732 830 divergeset.update(dsts)
733 831 for dsts in renamedelete1.values():
734 832 renamedeleteset.update(dsts)
735 833 for dsts in renamedelete2.values():
736 834 renamedeleteset.update(dsts)
737 835
738 836 repo.ui.debug(
739 837 b" all copies found (* = to merge, ! = divergent, "
740 838 b"% = renamed and deleted):\n"
741 839 )
742 840 for side, copies in ((b"local", copies1), (b"remote", copies2)):
743 841 if not copies:
744 842 continue
745 843 repo.ui.debug(b" on %s side:\n" % side)
746 844 for f in sorted(copies):
747 845 note = b""
748 846 if f in copy1 or f in copy2:
749 847 note += b"*"
750 848 if f in divergeset:
751 849 note += b"!"
752 850 if f in renamedeleteset:
753 851 note += b"%"
754 852 repo.ui.debug(
755 853 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
756 854 )
757 855 del renamedeleteset
758 856 del divergeset
759 857
760 858 repo.ui.debug(b" checking for directory renames\n")
761 859
762 860 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
763 861 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
764 862
765 863 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
766 864 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
767 865
768 866 return branch_copies1, branch_copies2, diverge
769 867
770 868
771 869 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
772 870 """Finds moved directories and files that should move with them.
773 871
774 872 ctx: the context for one of the sides
775 873 copy: files copied on the same side (as ctx)
776 874 fullcopy: files copied on the same side (as ctx), including those that
777 875 merge.manifestmerge() won't care about
778 876 addedfiles: added files on the other side (compared to ctx)
779 877 """
780 878 # generate a directory move map
781 879 d = ctx.dirs()
782 880 invalid = set()
783 881 dirmove = {}
784 882
785 883 # examine each file copy for a potential directory move, which is
786 884 # when all the files in a directory are moved to a new directory
787 885 for dst, src in pycompat.iteritems(fullcopy):
788 886 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
789 887 if dsrc in invalid:
790 888 # already seen to be uninteresting
791 889 continue
792 890 elif dsrc in d and ddst in d:
793 891 # directory wasn't entirely moved locally
794 892 invalid.add(dsrc)
795 893 elif dsrc in dirmove and dirmove[dsrc] != ddst:
796 894 # files from the same directory moved to two different places
797 895 invalid.add(dsrc)
798 896 else:
799 897 # looks good so far
800 898 dirmove[dsrc] = ddst
801 899
802 900 for i in invalid:
803 901 if i in dirmove:
804 902 del dirmove[i]
805 903 del d, invalid
806 904
807 905 if not dirmove:
808 906 return {}, {}
809 907
810 908 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
811 909
812 910 for d in dirmove:
813 911 repo.ui.debug(
814 912 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
815 913 )
816 914
817 915 movewithdir = {}
818 916 # check unaccounted nonoverlapping files against directory moves
819 917 for f in addedfiles:
820 918 if f not in fullcopy:
821 919 for d in dirmove:
822 920 if f.startswith(d):
823 921 # new file added in a directory that was moved, move it
824 922 df = dirmove[d] + f[len(d) :]
825 923 if df not in copy:
826 924 movewithdir[f] = df
827 925 repo.ui.debug(
828 926 b" pending file src: '%s' -> dst: '%s'\n"
829 927 % (f, df)
830 928 )
831 929 break
832 930
833 931 return dirmove, movewithdir
834 932
835 933
836 934 def _heuristicscopytracing(repo, c1, c2, base):
837 935 """ Fast copytracing using filename heuristics
838 936
839 937 Assumes that moves or renames are of following two types:
840 938
841 939 1) Inside a directory only (same directory name but different filenames)
842 940 2) Move from one directory to another
843 941 (same filenames but different directory names)
844 942
845 943 Works only when there are no merge commits in the "source branch".
846 944 Source branch is commits from base up to c2 not including base.
847 945
848 946 If merge is involved it fallbacks to _fullcopytracing().
849 947
850 948 Can be used by setting the following config:
851 949
852 950 [experimental]
853 951 copytrace = heuristics
854 952
855 953 In some cases the copy/move candidates found by heuristics can be very large
856 954 in number and that will make the algorithm slow. The number of possible
857 955 candidates to check can be limited by using the config
858 956 `experimental.copytrace.movecandidateslimit` which defaults to 100.
859 957 """
860 958
861 959 if c1.rev() is None:
862 960 c1 = c1.p1()
863 961 if c2.rev() is None:
864 962 c2 = c2.p1()
865 963
866 964 changedfiles = set()
867 965 m1 = c1.manifest()
868 966 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
869 967 # If base is not in c2 branch, we switch to fullcopytracing
870 968 repo.ui.debug(
871 969 b"switching to full copytracing as base is not "
872 970 b"an ancestor of c2\n"
873 971 )
874 972 return _fullcopytracing(repo, c1, c2, base)
875 973
876 974 ctx = c2
877 975 while ctx != base:
878 976 if len(ctx.parents()) == 2:
879 977 # To keep things simple let's not handle merges
880 978 repo.ui.debug(b"switching to full copytracing because of merges\n")
881 979 return _fullcopytracing(repo, c1, c2, base)
882 980 changedfiles.update(ctx.files())
883 981 ctx = ctx.p1()
884 982
885 983 copies2 = {}
886 984 cp = _forwardcopies(base, c2)
887 985 for dst, src in pycompat.iteritems(cp):
888 986 if src in m1:
889 987 copies2[dst] = src
890 988
891 989 # file is missing if it isn't present in the destination, but is present in
892 990 # the base and present in the source.
893 991 # Presence in the base is important to exclude added files, presence in the
894 992 # source is important to exclude removed files.
895 993 filt = lambda f: f not in m1 and f in base and f in c2
896 994 missingfiles = [f for f in changedfiles if filt(f)]
897 995
898 996 copies1 = {}
899 997 if missingfiles:
900 998 basenametofilename = collections.defaultdict(list)
901 999 dirnametofilename = collections.defaultdict(list)
902 1000
903 1001 for f in m1.filesnotin(base.manifest()):
904 1002 basename = os.path.basename(f)
905 1003 dirname = os.path.dirname(f)
906 1004 basenametofilename[basename].append(f)
907 1005 dirnametofilename[dirname].append(f)
908 1006
909 1007 for f in missingfiles:
910 1008 basename = os.path.basename(f)
911 1009 dirname = os.path.dirname(f)
912 1010 samebasename = basenametofilename[basename]
913 1011 samedirname = dirnametofilename[dirname]
914 1012 movecandidates = samebasename + samedirname
915 1013 # f is guaranteed to be present in c2, that's why
916 1014 # c2.filectx(f) won't fail
917 1015 f2 = c2.filectx(f)
918 1016 # we can have a lot of candidates which can slow down the heuristics
919 1017 # config value to limit the number of candidates moves to check
920 1018 maxcandidates = repo.ui.configint(
921 1019 b'experimental', b'copytrace.movecandidateslimit'
922 1020 )
923 1021
924 1022 if len(movecandidates) > maxcandidates:
925 1023 repo.ui.status(
926 1024 _(
927 1025 b"skipping copytracing for '%s', more "
928 1026 b"candidates than the limit: %d\n"
929 1027 )
930 1028 % (f, len(movecandidates))
931 1029 )
932 1030 continue
933 1031
934 1032 for candidate in movecandidates:
935 1033 f1 = c1.filectx(candidate)
936 1034 if _related(f1, f2):
937 1035 # if there are a few related copies then we'll merge
938 1036 # changes into all of them. This matches the behaviour
939 1037 # of upstream copytracing
940 1038 copies1[candidate] = f
941 1039
942 1040 return branch_copies(copies1), branch_copies(copies2), {}
943 1041
944 1042
945 1043 def _related(f1, f2):
946 1044 """return True if f1 and f2 filectx have a common ancestor
947 1045
948 1046 Walk back to common ancestor to see if the two files originate
949 1047 from the same file. Since workingfilectx's rev() is None it messes
950 1048 up the integer comparison logic, hence the pre-step check for
951 1049 None (f1 and f2 can only be workingfilectx's initially).
952 1050 """
953 1051
954 1052 if f1 == f2:
955 1053 return True # a match
956 1054
957 1055 g1, g2 = f1.ancestors(), f2.ancestors()
958 1056 try:
959 1057 f1r, f2r = f1.linkrev(), f2.linkrev()
960 1058
961 1059 if f1r is None:
962 1060 f1 = next(g1)
963 1061 if f2r is None:
964 1062 f2 = next(g2)
965 1063
966 1064 while True:
967 1065 f1r, f2r = f1.linkrev(), f2.linkrev()
968 1066 if f1r > f2r:
969 1067 f1 = next(g1)
970 1068 elif f2r > f1r:
971 1069 f2 = next(g2)
972 1070 else: # f1 and f2 point to files in the same linkrev
973 1071 return f1 == f2 # true if they point to the same file
974 1072 except StopIteration:
975 1073 return False
976 1074
977 1075
978 1076 def graftcopies(wctx, ctx, base):
979 1077 """reproduce copies between base and ctx in the wctx
980 1078
981 1079 Unlike mergecopies(), this function will only consider copies between base
982 1080 and ctx; it will ignore copies between base and wctx. Also unlike
983 1081 mergecopies(), this function will apply copies to the working copy (instead
984 1082 of just returning information about the copies). That makes it cheaper
985 1083 (especially in the common case of base==ctx.p1()) and useful also when
986 1084 experimental.copytrace=off.
987 1085
988 1086 merge.update() will have already marked most copies, but it will only
989 1087 mark copies if it thinks the source files are related (see
990 1088 merge._related()). It will also not mark copies if the file wasn't modified
991 1089 on the local side. This function adds the copies that were "missed"
992 1090 by merge.update().
993 1091 """
994 1092 new_copies = pathcopies(base, ctx)
995 1093 _filter(wctx.p1(), wctx, new_copies)
996 1094 for dst, src in pycompat.iteritems(new_copies):
997 1095 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now