##// END OF EJS Templates
copies: split the combination of the copies mapping in its own function...
marmoute -
r44225:421ea577 default
parent child Browse files
Show More
@@ -1,1017 +1,1032 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 error,
20 20 match as matchmod,
21 21 node,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 from .revlogutils import sidedata as sidedatamod
28 28
29 29 from .utils import stringutil
30 30
31 31
32 32 def _filter(src, dst, t):
33 33 """filters out invalid copies after chaining"""
34 34
35 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 37 # in the following table (not including trivial cases). For example, case 2
38 38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 39 # then was renamed between 'mid' and 'dst'.
40 40 #
41 41 # case src mid dst result
42 42 # 1 x y - -
43 43 # 2 x y y x->y
44 44 # 3 x y x -
45 45 # 4 x y z x->z
46 46 # 5 - x y -
47 47 # 6 x x y x->y
48 48 #
49 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 51 # between 5 and 6, so it includes all cases in its result.
52 52 # Cases 1, 3, and 5 are then removed by _filter().
53 53
54 54 for k, v in list(t.items()):
55 55 # remove copies from files that didn't exist
56 56 if v not in src:
57 57 del t[k]
58 58 # remove criss-crossed copies
59 59 elif k in src and v in dst:
60 60 del t[k]
61 61 # remove copies to files that were then removed
62 62 elif k not in dst:
63 63 del t[k]
64 64
65 65
66 66 def _chain(prefix, suffix):
67 67 """chain two sets of copies 'prefix' and 'suffix'"""
68 68 result = prefix.copy()
69 69 for key, value in pycompat.iteritems(suffix):
70 70 result[key] = prefix.get(value, value)
71 71 return result
72 72
73 73
74 74 def _tracefile(fctx, am, basemf):
75 75 """return file context that is the ancestor of fctx present in ancestor
76 76 manifest am
77 77
78 78 Note: we used to try and stop after a given limit, however checking if that
79 79 limit is reached turned out to be very expensive. we are better off
80 80 disabling that feature."""
81 81
82 82 for f in fctx.ancestors():
83 83 path = f.path()
84 84 if am.get(path, None) == f.filenode():
85 85 return path
86 86 if basemf and basemf.get(path, None) == f.filenode():
87 87 return path
88 88
89 89
90 90 def _dirstatecopies(repo, match=None):
91 91 ds = repo.dirstate
92 92 c = ds.copies().copy()
93 93 for k in list(c):
94 94 if ds[k] not in b'anm' or (match and not match(k)):
95 95 del c[k]
96 96 return c
97 97
98 98
99 99 def _computeforwardmissing(a, b, match=None):
100 100 """Computes which files are in b but not a.
101 101 This is its own function so extensions can easily wrap this call to see what
102 102 files _forwardcopies is about to process.
103 103 """
104 104 ma = a.manifest()
105 105 mb = b.manifest()
106 106 return mb.filesnotin(ma, match=match)
107 107
108 108
109 109 def usechangesetcentricalgo(repo):
110 110 """Checks if we should use changeset-centric copy algorithms"""
111 111 if repo.filecopiesmode == b'changeset-sidedata':
112 112 return True
113 113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 114 changesetsource = (b'changeset-only', b'compatibility')
115 115 return readfrom in changesetsource
116 116
117 117
118 118 def _committedforwardcopies(a, b, base, match):
119 119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 120 # files might have to be traced back to the fctx parent of the last
121 121 # one-side-only changeset, but not further back than that
122 122 repo = a._repo
123 123
124 124 if usechangesetcentricalgo(repo):
125 125 return _changesetforwardcopies(a, b, match)
126 126
127 127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 128 dbg = repo.ui.debug
129 129 if debug:
130 130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 131 am = a.manifest()
132 132 basemf = None if base is None else base.manifest()
133 133
134 134 # find where new files came from
135 135 # we currently don't try to find where old files went, too expensive
136 136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 137 cm = {}
138 138
139 139 # Computing the forward missing is quite expensive on large manifests, since
140 140 # it compares the entire manifests. We can optimize it in the common use
141 141 # case of computing what copies are in a commit versus its parent (like
142 142 # during a rebase or histedit). Note, we exclude merge commits from this
143 143 # optimization, since the ctx.files() for a merge commit is not correct for
144 144 # this comparison.
145 145 forwardmissingmatch = match
146 146 if b.p1() == a and b.p2().node() == node.nullid:
147 147 filesmatcher = matchmod.exact(b.files())
148 148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 150
151 151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 152
153 153 if debug:
154 154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 155
156 156 for f in sorted(missing):
157 157 if debug:
158 158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 159 fctx = b[f]
160 160 fctx._ancestrycontext = ancestrycontext
161 161
162 162 if debug:
163 163 start = util.timer()
164 164 opath = _tracefile(fctx, am, basemf)
165 165 if opath:
166 166 if debug:
167 167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 168 cm[f] = opath
169 169 if debug:
170 170 dbg(
171 171 b'debug.copies: time: %f seconds\n'
172 172 % (util.timer() - start)
173 173 )
174 174 return cm
175 175
176 176
177 177 def _revinfogetter(repo):
178 178 """return a function that return multiple data given a <rev>"i
179 179
180 180 * p1: revision number of first parent
181 181 * p2: revision number of first parent
182 182 * p1copies: mapping of copies from p1
183 183 * p2copies: mapping of copies from p2
184 184 * removed: a list of removed files
185 185 """
186 186 cl = repo.changelog
187 187 parents = cl.parentrevs
188 188
189 189 if repo.filecopiesmode == b'changeset-sidedata':
190 190 changelogrevision = cl.changelogrevision
191 191 flags = cl.flags
192 192
193 193 # A small cache to avoid doing the work twice for merges
194 194 #
195 195 # In the vast majority of cases, if we ask information for a revision
196 196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 197 # keep the information around when reaching the first parent of a merge
198 198 # and dropping it after it was provided for the second parents.
199 199 #
200 200 # It exists cases were only one parent of the merge will be walked. It
201 201 # happens when the "destination" the copy tracing is descendant from a
202 202 # new root, not common with the "source". In that case, we will only walk
203 203 # through merge parents that are descendant of changesets common
204 204 # between "source" and "destination".
205 205 #
206 206 # With the current case implementation if such changesets have a copy
207 207 # information, we'll keep them in memory until the end of
208 208 # _changesetforwardcopies. We don't expect the case to be frequent
209 209 # enough to matters.
210 210 #
211 211 # In addition, it would be possible to reach pathological case, were
212 212 # many first parent are met before any second parent is reached. In
213 213 # that case the cache could grow. If this even become an issue one can
214 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 215 # time to save memory.
216 216 merge_caches = {}
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 if flags(rev) & REVIDX_SIDEDATA:
221 221 e = merge_caches.pop(rev, None)
222 222 if e is not None:
223 223 return e
224 224 c = changelogrevision(rev)
225 225 p1copies = c.p1copies
226 226 p2copies = c.p2copies
227 227 removed = c.filesremoved
228 228 if p1 != node.nullrev and p2 != node.nullrev:
229 229 # XXX some case we over cache, IGNORE
230 230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 231 else:
232 232 p1copies = {}
233 233 p2copies = {}
234 234 removed = []
235 235 return p1, p2, p1copies, p2copies, removed
236 236
237 237 else:
238 238
239 239 def revinfo(rev):
240 240 p1, p2 = parents(rev)
241 241 ctx = repo[rev]
242 242 p1copies, p2copies = ctx._copies
243 243 removed = ctx.filesremoved()
244 244 return p1, p2, p1copies, p2copies, removed
245 245
246 246 return revinfo
247 247
248 248
249 249 def _changesetforwardcopies(a, b, match):
250 250 if a.rev() in (node.nullrev, b.rev()):
251 251 return {}
252 252
253 253 repo = a.repo().unfiltered()
254 254 children = {}
255 255 revinfo = _revinfogetter(repo)
256 256
257 257 cl = repo.changelog
258 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 259 mrset = set(missingrevs)
260 260 roots = set()
261 261 for r in missingrevs:
262 262 for p in cl.parentrevs(r):
263 263 if p == node.nullrev:
264 264 continue
265 265 if p not in children:
266 266 children[p] = [r]
267 267 else:
268 268 children[p].append(r)
269 269 if p not in mrset:
270 270 roots.add(p)
271 271 if not roots:
272 272 # no common revision to track copies from
273 273 return {}
274 274 min_root = min(roots)
275 275
276 276 from_head = set(
277 277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 278 )
279 279
280 280 iterrevs = set(from_head)
281 281 iterrevs &= mrset
282 282 iterrevs.update(roots)
283 283 iterrevs.remove(b.rev())
284 revs = sorted(iterrevs)
285 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
286
287
288 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
289 """combine the copies information for each item of iterrevs
290
291 revs: sorted iterable of revision to visit
292 children: a {parent: [children]} mapping.
293 targetrev: the final copies destination revision (not in iterrevs)
294 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
295 match: a matcher
296
297 It returns the aggregated copies information for `targetrev`.
298 """
284 299 all_copies = {}
285 300 alwaysmatch = match.always()
286 for r in sorted(iterrevs):
301 for r in revs:
287 302 copies = all_copies.pop(r, None)
288 303 if copies is None:
289 304 # this is a root
290 305 copies = {}
291 306 for i, c in enumerate(children[r]):
292 307 p1, p2, p1copies, p2copies, removed = revinfo(c)
293 308 if r == p1:
294 309 parent = 1
295 310 childcopies = p1copies
296 311 else:
297 312 assert r == p2
298 313 parent = 2
299 314 childcopies = p2copies
300 315 if not alwaysmatch:
301 316 childcopies = {
302 317 dst: src for dst, src in childcopies.items() if match(dst)
303 318 }
304 319 newcopies = copies
305 320 if childcopies:
306 321 newcopies = _chain(newcopies, childcopies)
307 322 # _chain makes a copies, we can avoid doing so in some
308 323 # simple/linear cases.
309 324 assert newcopies is not copies
310 325 for f in removed:
311 326 if f in newcopies:
312 327 if newcopies is copies:
313 328 # copy on write to avoid affecting potential other
314 329 # branches. when there are no other branches, this
315 330 # could be avoided.
316 331 newcopies = copies.copy()
317 332 del newcopies[f]
318 333 othercopies = all_copies.get(c)
319 334 if othercopies is None:
320 335 all_copies[c] = newcopies
321 336 else:
322 337 # we are the second parent to work on c, we need to merge our
323 338 # work with the other.
324 339 #
325 340 # Unlike when copies are stored in the filelog, we consider
326 341 # it a copy even if the destination already existed on the
327 342 # other branch. It's simply too expensive to check if the
328 343 # file existed in the manifest.
329 344 #
330 345 # In case of conflict, parent 1 take precedence over parent 2.
331 346 # This is an arbitrary choice made anew when implementing
332 347 # changeset based copies. It was made without regards with
333 348 # potential filelog related behavior.
334 349 if parent == 1:
335 350 othercopies.update(newcopies)
336 351 else:
337 352 newcopies.update(othercopies)
338 353 all_copies[c] = newcopies
339 return all_copies[b.rev()]
354 return all_copies[targetrev]
340 355
341 356
342 357 def _forwardcopies(a, b, base=None, match=None):
343 358 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
344 359
345 360 if base is None:
346 361 base = a
347 362 match = a.repo().narrowmatch(match)
348 363 # check for working copy
349 364 if b.rev() is None:
350 365 cm = _committedforwardcopies(a, b.p1(), base, match)
351 366 # combine copies from dirstate if necessary
352 367 copies = _chain(cm, _dirstatecopies(b._repo, match))
353 368 else:
354 369 copies = _committedforwardcopies(a, b, base, match)
355 370 return copies
356 371
357 372
358 373 def _backwardrenames(a, b, match):
359 374 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
360 375 return {}
361 376
362 377 # Even though we're not taking copies into account, 1:n rename situations
363 378 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
364 379 # arbitrarily pick one of the renames.
365 380 # We don't want to pass in "match" here, since that would filter
366 381 # the destination by it. Since we're reversing the copies, we want
367 382 # to filter the source instead.
368 383 f = _forwardcopies(b, a)
369 384 r = {}
370 385 for k, v in sorted(pycompat.iteritems(f)):
371 386 if match and not match(v):
372 387 continue
373 388 # remove copies
374 389 if v in a:
375 390 continue
376 391 r[v] = k
377 392 return r
378 393
379 394
380 395 def pathcopies(x, y, match=None):
381 396 """find {dst@y: src@x} copy mapping for directed compare"""
382 397 repo = x._repo
383 398 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
384 399 if debug:
385 400 repo.ui.debug(
386 401 b'debug.copies: searching copies from %s to %s\n' % (x, y)
387 402 )
388 403 if x == y or not x or not y:
389 404 return {}
390 405 a = y.ancestor(x)
391 406 if a == x:
392 407 if debug:
393 408 repo.ui.debug(b'debug.copies: search mode: forward\n')
394 409 if y.rev() is None and x == y.p1():
395 410 # short-circuit to avoid issues with merge states
396 411 return _dirstatecopies(repo, match)
397 412 copies = _forwardcopies(x, y, match=match)
398 413 elif a == y:
399 414 if debug:
400 415 repo.ui.debug(b'debug.copies: search mode: backward\n')
401 416 copies = _backwardrenames(x, y, match=match)
402 417 else:
403 418 if debug:
404 419 repo.ui.debug(b'debug.copies: search mode: combined\n')
405 420 base = None
406 421 if a.rev() != node.nullrev:
407 422 base = x
408 423 copies = _chain(
409 424 _backwardrenames(x, a, match=match),
410 425 _forwardcopies(a, y, base, match=match),
411 426 )
412 427 _filter(x, y, copies)
413 428 return copies
414 429
415 430
416 431 def mergecopies(repo, c1, c2, base):
417 432 """
418 433 Finds moves and copies between context c1 and c2 that are relevant for
419 434 merging. 'base' will be used as the merge base.
420 435
421 436 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
422 437 files that were moved/ copied in one merge parent and modified in another.
423 438 For example:
424 439
425 440 o ---> 4 another commit
426 441 |
427 442 | o ---> 3 commit that modifies a.txt
428 443 | /
429 444 o / ---> 2 commit that moves a.txt to b.txt
430 445 |/
431 446 o ---> 1 merge base
432 447
433 448 If we try to rebase revision 3 on revision 4, since there is no a.txt in
434 449 revision 4, and if user have copytrace disabled, we prints the following
435 450 message:
436 451
437 452 ```other changed <file> which local deleted```
438 453
439 454 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
440 455 "dirmove".
441 456
442 457 "copy" is a mapping from destination name -> source name,
443 458 where source is in c1 and destination is in c2 or vice-versa.
444 459
445 460 "movewithdir" is a mapping from source name -> destination name,
446 461 where the file at source present in one context but not the other
447 462 needs to be moved to destination by the merge process, because the
448 463 other context moved the directory it is in.
449 464
450 465 "diverge" is a mapping of source name -> list of destination names
451 466 for divergent renames.
452 467
453 468 "renamedelete" is a mapping of source name -> list of destination
454 469 names for files deleted in c1 that were renamed in c2 or vice-versa.
455 470
456 471 "dirmove" is a mapping of detected source dir -> destination dir renames.
457 472 This is needed for handling changes to new files previously grafted into
458 473 renamed directories.
459 474
460 475 This function calls different copytracing algorithms based on config.
461 476 """
462 477 # avoid silly behavior for update from empty dir
463 478 if not c1 or not c2 or c1 == c2:
464 479 return {}, {}, {}, {}, {}
465 480
466 481 narrowmatch = c1.repo().narrowmatch()
467 482
468 483 # avoid silly behavior for parent -> working dir
469 484 if c2.node() is None and c1.node() == repo.dirstate.p1():
470 485 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
471 486
472 487 copytracing = repo.ui.config(b'experimental', b'copytrace')
473 488 if stringutil.parsebool(copytracing) is False:
474 489 # stringutil.parsebool() returns None when it is unable to parse the
475 490 # value, so we should rely on making sure copytracing is on such cases
476 491 return {}, {}, {}, {}, {}
477 492
478 493 if usechangesetcentricalgo(repo):
479 494 # The heuristics don't make sense when we need changeset-centric algos
480 495 return _fullcopytracing(repo, c1, c2, base)
481 496
482 497 # Copy trace disabling is explicitly below the node == p1 logic above
483 498 # because the logic above is required for a simple copy to be kept across a
484 499 # rebase.
485 500 if copytracing == b'heuristics':
486 501 # Do full copytracing if only non-public revisions are involved as
487 502 # that will be fast enough and will also cover the copies which could
488 503 # be missed by heuristics
489 504 if _isfullcopytraceable(repo, c1, base):
490 505 return _fullcopytracing(repo, c1, c2, base)
491 506 return _heuristicscopytracing(repo, c1, c2, base)
492 507 else:
493 508 return _fullcopytracing(repo, c1, c2, base)
494 509
495 510
496 511 def _isfullcopytraceable(repo, c1, base):
497 512 """ Checks that if base, source and destination are all no-public branches,
498 513 if yes let's use the full copytrace algorithm for increased capabilities
499 514 since it will be fast enough.
500 515
501 516 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
502 517 number of changesets from c1 to base such that if number of changesets are
503 518 more than the limit, full copytracing algorithm won't be used.
504 519 """
505 520 if c1.rev() is None:
506 521 c1 = c1.p1()
507 522 if c1.mutable() and base.mutable():
508 523 sourcecommitlimit = repo.ui.configint(
509 524 b'experimental', b'copytrace.sourcecommitlimit'
510 525 )
511 526 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
512 527 return commits < sourcecommitlimit
513 528 return False
514 529
515 530
516 531 def _checksinglesidecopies(
517 532 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
518 533 ):
519 534 if src not in m2:
520 535 # deleted on side 2
521 536 if src not in m1:
522 537 # renamed on side 1, deleted on side 2
523 538 renamedelete[src] = dsts1
524 539 elif m2[src] != mb[src]:
525 540 if not _related(c2[src], base[src]):
526 541 return
527 542 # modified on side 2
528 543 for dst in dsts1:
529 544 if dst not in m2:
530 545 # dst not added on side 2 (handle as regular
531 546 # "both created" case in manifestmerge otherwise)
532 547 copy[dst] = src
533 548
534 549
535 550 def _fullcopytracing(repo, c1, c2, base):
536 551 """ The full copytracing algorithm which finds all the new files that were
537 552 added from merge base up to the top commit and for each file it checks if
538 553 this file was copied from another file.
539 554
540 555 This is pretty slow when a lot of changesets are involved but will track all
541 556 the copies.
542 557 """
543 558 m1 = c1.manifest()
544 559 m2 = c2.manifest()
545 560 mb = base.manifest()
546 561
547 562 copies1 = pathcopies(base, c1)
548 563 copies2 = pathcopies(base, c2)
549 564
550 565 inversecopies1 = {}
551 566 inversecopies2 = {}
552 567 for dst, src in copies1.items():
553 568 inversecopies1.setdefault(src, []).append(dst)
554 569 for dst, src in copies2.items():
555 570 inversecopies2.setdefault(src, []).append(dst)
556 571
557 572 copy = {}
558 573 diverge = {}
559 574 renamedelete = {}
560 575 allsources = set(inversecopies1) | set(inversecopies2)
561 576 for src in allsources:
562 577 dsts1 = inversecopies1.get(src)
563 578 dsts2 = inversecopies2.get(src)
564 579 if dsts1 and dsts2:
565 580 # copied/renamed on both sides
566 581 if src not in m1 and src not in m2:
567 582 # renamed on both sides
568 583 dsts1 = set(dsts1)
569 584 dsts2 = set(dsts2)
570 585 # If there's some overlap in the rename destinations, we
571 586 # consider it not divergent. For example, if side 1 copies 'a'
572 587 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
573 588 # and 'd' and deletes 'a'.
574 589 if dsts1 & dsts2:
575 590 for dst in dsts1 & dsts2:
576 591 copy[dst] = src
577 592 else:
578 593 diverge[src] = sorted(dsts1 | dsts2)
579 594 elif src in m1 and src in m2:
580 595 # copied on both sides
581 596 dsts1 = set(dsts1)
582 597 dsts2 = set(dsts2)
583 598 for dst in dsts1 & dsts2:
584 599 copy[dst] = src
585 600 # TODO: Handle cases where it was renamed on one side and copied
586 601 # on the other side
587 602 elif dsts1:
588 603 # copied/renamed only on side 1
589 604 _checksinglesidecopies(
590 605 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
591 606 )
592 607 elif dsts2:
593 608 # copied/renamed only on side 2
594 609 _checksinglesidecopies(
595 610 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
596 611 )
597 612
598 613 renamedeleteset = set()
599 614 divergeset = set()
600 615 for dsts in diverge.values():
601 616 divergeset.update(dsts)
602 617 for dsts in renamedelete.values():
603 618 renamedeleteset.update(dsts)
604 619
605 620 # find interesting file sets from manifests
606 621 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
607 622 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
608 623 u1 = sorted(addedinm1 - addedinm2)
609 624 u2 = sorted(addedinm2 - addedinm1)
610 625
611 626 header = b" unmatched files in %s"
612 627 if u1:
613 628 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
614 629 if u2:
615 630 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
616 631
617 632 fullcopy = copies1.copy()
618 633 fullcopy.update(copies2)
619 634 if not fullcopy:
620 635 return copy, {}, diverge, renamedelete, {}
621 636
622 637 if repo.ui.debugflag:
623 638 repo.ui.debug(
624 639 b" all copies found (* = to merge, ! = divergent, "
625 640 b"% = renamed and deleted):\n"
626 641 )
627 642 for f in sorted(fullcopy):
628 643 note = b""
629 644 if f in copy:
630 645 note += b"*"
631 646 if f in divergeset:
632 647 note += b"!"
633 648 if f in renamedeleteset:
634 649 note += b"%"
635 650 repo.ui.debug(
636 651 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
637 652 )
638 653 del divergeset
639 654
640 655 repo.ui.debug(b" checking for directory renames\n")
641 656
642 657 # generate a directory move map
643 658 d1, d2 = c1.dirs(), c2.dirs()
644 659 invalid = set()
645 660 dirmove = {}
646 661
647 662 # examine each file copy for a potential directory move, which is
648 663 # when all the files in a directory are moved to a new directory
649 664 for dst, src in pycompat.iteritems(fullcopy):
650 665 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
651 666 if dsrc in invalid:
652 667 # already seen to be uninteresting
653 668 continue
654 669 elif dsrc in d1 and ddst in d1:
655 670 # directory wasn't entirely moved locally
656 671 invalid.add(dsrc)
657 672 elif dsrc in d2 and ddst in d2:
658 673 # directory wasn't entirely moved remotely
659 674 invalid.add(dsrc)
660 675 elif dsrc in dirmove and dirmove[dsrc] != ddst:
661 676 # files from the same directory moved to two different places
662 677 invalid.add(dsrc)
663 678 else:
664 679 # looks good so far
665 680 dirmove[dsrc] = ddst
666 681
667 682 for i in invalid:
668 683 if i in dirmove:
669 684 del dirmove[i]
670 685 del d1, d2, invalid
671 686
672 687 if not dirmove:
673 688 return copy, {}, diverge, renamedelete, {}
674 689
675 690 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
676 691
677 692 for d in dirmove:
678 693 repo.ui.debug(
679 694 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
680 695 )
681 696
682 697 movewithdir = {}
683 698 # check unaccounted nonoverlapping files against directory moves
684 699 for f in u1 + u2:
685 700 if f not in fullcopy:
686 701 for d in dirmove:
687 702 if f.startswith(d):
688 703 # new file added in a directory that was moved, move it
689 704 df = dirmove[d] + f[len(d) :]
690 705 if df not in copy:
691 706 movewithdir[f] = df
692 707 repo.ui.debug(
693 708 b" pending file src: '%s' -> dst: '%s'\n"
694 709 % (f, df)
695 710 )
696 711 break
697 712
698 713 return copy, movewithdir, diverge, renamedelete, dirmove
699 714
700 715
701 716 def _heuristicscopytracing(repo, c1, c2, base):
702 717 """ Fast copytracing using filename heuristics
703 718
704 719 Assumes that moves or renames are of following two types:
705 720
706 721 1) Inside a directory only (same directory name but different filenames)
707 722 2) Move from one directory to another
708 723 (same filenames but different directory names)
709 724
710 725 Works only when there are no merge commits in the "source branch".
711 726 Source branch is commits from base up to c2 not including base.
712 727
713 728 If merge is involved it fallbacks to _fullcopytracing().
714 729
715 730 Can be used by setting the following config:
716 731
717 732 [experimental]
718 733 copytrace = heuristics
719 734
720 735 In some cases the copy/move candidates found by heuristics can be very large
721 736 in number and that will make the algorithm slow. The number of possible
722 737 candidates to check can be limited by using the config
723 738 `experimental.copytrace.movecandidateslimit` which defaults to 100.
724 739 """
725 740
726 741 if c1.rev() is None:
727 742 c1 = c1.p1()
728 743 if c2.rev() is None:
729 744 c2 = c2.p1()
730 745
731 746 copies = {}
732 747
733 748 changedfiles = set()
734 749 m1 = c1.manifest()
735 750 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
736 751 # If base is not in c2 branch, we switch to fullcopytracing
737 752 repo.ui.debug(
738 753 b"switching to full copytracing as base is not "
739 754 b"an ancestor of c2\n"
740 755 )
741 756 return _fullcopytracing(repo, c1, c2, base)
742 757
743 758 ctx = c2
744 759 while ctx != base:
745 760 if len(ctx.parents()) == 2:
746 761 # To keep things simple let's not handle merges
747 762 repo.ui.debug(b"switching to full copytracing because of merges\n")
748 763 return _fullcopytracing(repo, c1, c2, base)
749 764 changedfiles.update(ctx.files())
750 765 ctx = ctx.p1()
751 766
752 767 cp = _forwardcopies(base, c2)
753 768 for dst, src in pycompat.iteritems(cp):
754 769 if src in m1:
755 770 copies[dst] = src
756 771
757 772 # file is missing if it isn't present in the destination, but is present in
758 773 # the base and present in the source.
759 774 # Presence in the base is important to exclude added files, presence in the
760 775 # source is important to exclude removed files.
761 776 filt = lambda f: f not in m1 and f in base and f in c2
762 777 missingfiles = [f for f in changedfiles if filt(f)]
763 778
764 779 if missingfiles:
765 780 basenametofilename = collections.defaultdict(list)
766 781 dirnametofilename = collections.defaultdict(list)
767 782
768 783 for f in m1.filesnotin(base.manifest()):
769 784 basename = os.path.basename(f)
770 785 dirname = os.path.dirname(f)
771 786 basenametofilename[basename].append(f)
772 787 dirnametofilename[dirname].append(f)
773 788
774 789 for f in missingfiles:
775 790 basename = os.path.basename(f)
776 791 dirname = os.path.dirname(f)
777 792 samebasename = basenametofilename[basename]
778 793 samedirname = dirnametofilename[dirname]
779 794 movecandidates = samebasename + samedirname
780 795 # f is guaranteed to be present in c2, that's why
781 796 # c2.filectx(f) won't fail
782 797 f2 = c2.filectx(f)
783 798 # we can have a lot of candidates which can slow down the heuristics
784 799 # config value to limit the number of candidates moves to check
785 800 maxcandidates = repo.ui.configint(
786 801 b'experimental', b'copytrace.movecandidateslimit'
787 802 )
788 803
789 804 if len(movecandidates) > maxcandidates:
790 805 repo.ui.status(
791 806 _(
792 807 b"skipping copytracing for '%s', more "
793 808 b"candidates than the limit: %d\n"
794 809 )
795 810 % (f, len(movecandidates))
796 811 )
797 812 continue
798 813
799 814 for candidate in movecandidates:
800 815 f1 = c1.filectx(candidate)
801 816 if _related(f1, f2):
802 817 # if there are a few related copies then we'll merge
803 818 # changes into all of them. This matches the behaviour
804 819 # of upstream copytracing
805 820 copies[candidate] = f
806 821
807 822 return copies, {}, {}, {}, {}
808 823
809 824
810 825 def _related(f1, f2):
811 826 """return True if f1 and f2 filectx have a common ancestor
812 827
813 828 Walk back to common ancestor to see if the two files originate
814 829 from the same file. Since workingfilectx's rev() is None it messes
815 830 up the integer comparison logic, hence the pre-step check for
816 831 None (f1 and f2 can only be workingfilectx's initially).
817 832 """
818 833
819 834 if f1 == f2:
820 835 return True # a match
821 836
822 837 g1, g2 = f1.ancestors(), f2.ancestors()
823 838 try:
824 839 f1r, f2r = f1.linkrev(), f2.linkrev()
825 840
826 841 if f1r is None:
827 842 f1 = next(g1)
828 843 if f2r is None:
829 844 f2 = next(g2)
830 845
831 846 while True:
832 847 f1r, f2r = f1.linkrev(), f2.linkrev()
833 848 if f1r > f2r:
834 849 f1 = next(g1)
835 850 elif f2r > f1r:
836 851 f2 = next(g2)
837 852 else: # f1 and f2 point to files in the same linkrev
838 853 return f1 == f2 # true if they point to the same file
839 854 except StopIteration:
840 855 return False
841 856
842 857
843 858 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
844 859 """reproduce copies from fromrev to rev in the dirstate
845 860
846 861 If skiprev is specified, it's a revision that should be used to
847 862 filter copy records. Any copies that occur between fromrev and
848 863 skiprev will not be duplicated, even if they appear in the set of
849 864 copies between fromrev and rev.
850 865 """
851 866 exclude = {}
852 867 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
853 868 bctrace = stringutil.parsebool(ctraceconfig)
854 869 if skiprev is not None and (
855 870 ctraceconfig == b'heuristics' or bctrace or bctrace is None
856 871 ):
857 872 # copytrace='off' skips this line, but not the entire function because
858 873 # the line below is O(size of the repo) during a rebase, while the rest
859 874 # of the function is much faster (and is required for carrying copy
860 875 # metadata across the rebase anyway).
861 876 exclude = pathcopies(repo[fromrev], repo[skiprev])
862 877 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
863 878 if dst in exclude:
864 879 continue
865 880 if dst in wctx:
866 881 wctx[dst].markcopied(src)
867 882
868 883
869 884 def computechangesetfilesadded(ctx):
870 885 """return the list of files added in a changeset
871 886 """
872 887 added = []
873 888 for f in ctx.files():
874 889 if not any(f in p for p in ctx.parents()):
875 890 added.append(f)
876 891 return added
877 892
878 893
879 894 def computechangesetfilesremoved(ctx):
880 895 """return the list of files removed in a changeset
881 896 """
882 897 removed = []
883 898 for f in ctx.files():
884 899 if f not in ctx:
885 900 removed.append(f)
886 901 return removed
887 902
888 903
889 904 def computechangesetcopies(ctx):
890 905 """return the copies data for a changeset
891 906
892 907 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
893 908
894 909 Each dictionnary are in the form: `{newname: oldname}`
895 910 """
896 911 p1copies = {}
897 912 p2copies = {}
898 913 p1 = ctx.p1()
899 914 p2 = ctx.p2()
900 915 narrowmatch = ctx._repo.narrowmatch()
901 916 for dst in ctx.files():
902 917 if not narrowmatch(dst) or dst not in ctx:
903 918 continue
904 919 copied = ctx[dst].renamed()
905 920 if not copied:
906 921 continue
907 922 src, srcnode = copied
908 923 if src in p1 and p1[src].filenode() == srcnode:
909 924 p1copies[dst] = src
910 925 elif src in p2 and p2[src].filenode() == srcnode:
911 926 p2copies[dst] = src
912 927 return p1copies, p2copies
913 928
914 929
915 930 def encodecopies(files, copies):
916 931 items = []
917 932 for i, dst in enumerate(files):
918 933 if dst in copies:
919 934 items.append(b'%d\0%s' % (i, copies[dst]))
920 935 if len(items) != len(copies):
921 936 raise error.ProgrammingError(
922 937 b'some copy targets missing from file list'
923 938 )
924 939 return b"\n".join(items)
925 940
926 941
927 942 def decodecopies(files, data):
928 943 try:
929 944 copies = {}
930 945 if not data:
931 946 return copies
932 947 for l in data.split(b'\n'):
933 948 strindex, src = l.split(b'\0')
934 949 i = int(strindex)
935 950 dst = files[i]
936 951 copies[dst] = src
937 952 return copies
938 953 except (ValueError, IndexError):
939 954 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
940 955 # used different syntax for the value.
941 956 return None
942 957
943 958
944 959 def encodefileindices(files, subset):
945 960 subset = set(subset)
946 961 indices = []
947 962 for i, f in enumerate(files):
948 963 if f in subset:
949 964 indices.append(b'%d' % i)
950 965 return b'\n'.join(indices)
951 966
952 967
953 968 def decodefileindices(files, data):
954 969 try:
955 970 subset = []
956 971 if not data:
957 972 return subset
958 973 for strindex in data.split(b'\n'):
959 974 i = int(strindex)
960 975 if i < 0 or i >= len(files):
961 976 return None
962 977 subset.append(files[i])
963 978 return subset
964 979 except (ValueError, IndexError):
965 980 # Perhaps someone had chosen the same key name (e.g. "added") and
966 981 # used different syntax for the value.
967 982 return None
968 983
969 984
970 985 def _getsidedata(srcrepo, rev):
971 986 ctx = srcrepo[rev]
972 987 filescopies = computechangesetcopies(ctx)
973 988 filesadded = computechangesetfilesadded(ctx)
974 989 filesremoved = computechangesetfilesremoved(ctx)
975 990 sidedata = {}
976 991 if any([filescopies, filesadded, filesremoved]):
977 992 sortedfiles = sorted(ctx.files())
978 993 p1copies, p2copies = filescopies
979 994 p1copies = encodecopies(sortedfiles, p1copies)
980 995 p2copies = encodecopies(sortedfiles, p2copies)
981 996 filesadded = encodefileindices(sortedfiles, filesadded)
982 997 filesremoved = encodefileindices(sortedfiles, filesremoved)
983 998 if p1copies:
984 999 sidedata[sidedatamod.SD_P1COPIES] = p1copies
985 1000 if p2copies:
986 1001 sidedata[sidedatamod.SD_P2COPIES] = p2copies
987 1002 if filesadded:
988 1003 sidedata[sidedatamod.SD_FILESADDED] = filesadded
989 1004 if filesremoved:
990 1005 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
991 1006 return sidedata
992 1007
993 1008
994 1009 def getsidedataadder(srcrepo, destrepo):
995 1010 def sidedatacompanion(revlog, rev):
996 1011 sidedata = {}
997 1012 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
998 1013 sidedata = _getsidedata(srcrepo, rev)
999 1014 return False, (), sidedata
1000 1015
1001 1016 return sidedatacompanion
1002 1017
1003 1018
1004 1019 def getsidedataremover(srcrepo, destrepo):
1005 1020 def sidedatacompanion(revlog, rev):
1006 1021 f = ()
1007 1022 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1008 1023 if revlog.flags(rev) & REVIDX_SIDEDATA:
1009 1024 f = (
1010 1025 sidedatamod.SD_P1COPIES,
1011 1026 sidedatamod.SD_P2COPIES,
1012 1027 sidedatamod.SD_FILESADDED,
1013 1028 sidedatamod.SD_FILESREMOVED,
1014 1029 )
1015 1030 return False, f, {}
1016 1031
1017 1032 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now