##// END OF EJS Templates
sidedatacopies: only fetch information once for merge...
marmoute -
r43595:90213d02 default
parent child Browse files
Show More
@@ -1,986 +1,1017 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 error,
20 20 match as matchmod,
21 21 node,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 from .revlogutils import sidedata as sidedatamod
28 28
29 29 from .utils import stringutil
30 30
31 31
32 32 def _filter(src, dst, t):
33 33 """filters out invalid copies after chaining"""
34 34
35 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 37 # in the following table (not including trivial cases). For example, case 2
38 38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 39 # then was renamed between 'mid' and 'dst'.
40 40 #
41 41 # case src mid dst result
42 42 # 1 x y - -
43 43 # 2 x y y x->y
44 44 # 3 x y x -
45 45 # 4 x y z x->z
46 46 # 5 - x y -
47 47 # 6 x x y x->y
48 48 #
49 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 51 # between 5 and 6, so it includes all cases in its result.
52 52 # Cases 1, 3, and 5 are then removed by _filter().
53 53
54 54 for k, v in list(t.items()):
55 55 # remove copies from files that didn't exist
56 56 if v not in src:
57 57 del t[k]
58 58 # remove criss-crossed copies
59 59 elif k in src and v in dst:
60 60 del t[k]
61 61 # remove copies to files that were then removed
62 62 elif k not in dst:
63 63 del t[k]
64 64
65 65
66 66 def _chain(a, b):
67 67 """chain two sets of copies 'a' and 'b'"""
68 68 t = a.copy()
69 69 for k, v in pycompat.iteritems(b):
70 70 if v in t:
71 71 t[k] = t[v]
72 72 else:
73 73 t[k] = v
74 74 return t
75 75
76 76
77 77 def _tracefile(fctx, am, basemf):
78 78 """return file context that is the ancestor of fctx present in ancestor
79 79 manifest am
80 80
81 81 Note: we used to try and stop after a given limit, however checking if that
82 82 limit is reached turned out to be very expensive. we are better off
83 83 disabling that feature."""
84 84
85 85 for f in fctx.ancestors():
86 86 path = f.path()
87 87 if am.get(path, None) == f.filenode():
88 88 return path
89 89 if basemf and basemf.get(path, None) == f.filenode():
90 90 return path
91 91
92 92
93 93 def _dirstatecopies(repo, match=None):
94 94 ds = repo.dirstate
95 95 c = ds.copies().copy()
96 96 for k in list(c):
97 97 if ds[k] not in b'anm' or (match and not match(k)):
98 98 del c[k]
99 99 return c
100 100
101 101
102 102 def _computeforwardmissing(a, b, match=None):
103 103 """Computes which files are in b but not a.
104 104 This is its own function so extensions can easily wrap this call to see what
105 105 files _forwardcopies is about to process.
106 106 """
107 107 ma = a.manifest()
108 108 mb = b.manifest()
109 109 return mb.filesnotin(ma, match=match)
110 110
111 111
112 112 def usechangesetcentricalgo(repo):
113 113 """Checks if we should use changeset-centric copy algorithms"""
114 114 if repo.filecopiesmode == b'changeset-sidedata':
115 115 return True
116 116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 117 changesetsource = (b'changeset-only', b'compatibility')
118 118 return readfrom in changesetsource
119 119
120 120
121 121 def _committedforwardcopies(a, b, base, match):
122 122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 123 # files might have to be traced back to the fctx parent of the last
124 124 # one-side-only changeset, but not further back than that
125 125 repo = a._repo
126 126
127 127 if usechangesetcentricalgo(repo):
128 128 return _changesetforwardcopies(a, b, match)
129 129
130 130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 131 dbg = repo.ui.debug
132 132 if debug:
133 133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 134 am = a.manifest()
135 135 basemf = None if base is None else base.manifest()
136 136
137 137 # find where new files came from
138 138 # we currently don't try to find where old files went, too expensive
139 139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 140 cm = {}
141 141
142 142 # Computing the forward missing is quite expensive on large manifests, since
143 143 # it compares the entire manifests. We can optimize it in the common use
144 144 # case of computing what copies are in a commit versus its parent (like
145 145 # during a rebase or histedit). Note, we exclude merge commits from this
146 146 # optimization, since the ctx.files() for a merge commit is not correct for
147 147 # this comparison.
148 148 forwardmissingmatch = match
149 149 if b.p1() == a and b.p2().node() == node.nullid:
150 150 filesmatcher = matchmod.exact(b.files())
151 151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 153
154 154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 155
156 156 if debug:
157 157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 158
159 159 for f in sorted(missing):
160 160 if debug:
161 161 dbg(b'debug.copies: tracing file: %s\n' % f)
162 162 fctx = b[f]
163 163 fctx._ancestrycontext = ancestrycontext
164 164
165 165 if debug:
166 166 start = util.timer()
167 167 opath = _tracefile(fctx, am, basemf)
168 168 if opath:
169 169 if debug:
170 170 dbg(b'debug.copies: rename of: %s\n' % opath)
171 171 cm[f] = opath
172 172 if debug:
173 173 dbg(
174 174 b'debug.copies: time: %f seconds\n'
175 175 % (util.timer() - start)
176 176 )
177 177 return cm
178 178
179 179
180 180 def _revinfogetter(repo):
181 181 """return a function that return multiple data given a <rev>"i
182 182
183 183 * p1: revision number of first parent
184 184 * p2: revision number of first parent
185 185 * p1copies: mapping of copies from p1
186 186 * p2copies: mapping of copies from p2
187 187 * removed: a list of removed files
188 188 """
189 189 cl = repo.changelog
190 190 parents = cl.parentrevs
191 191
192 192 if repo.filecopiesmode == b'changeset-sidedata':
193 193 changelogrevision = cl.changelogrevision
194 194 flags = cl.flags
195 195
196 # A small cache to avoid doing the work twice for merges
197 #
198 # In the vast majority of cases, if we ask information for a revision
199 # about 1 parent, we'll later ask it for the other. So it make sense to
200 # keep the information around when reaching the first parent of a merge
201 # and dropping it after it was provided for the second parents.
202 #
203 # It exists cases were only one parent of the merge will be walked. It
204 # happens when the "destination" the copy tracing is descendant from a
205 # new root, not common with the "source". In that case, we will only walk
206 # through merge parents that are descendant of changesets common
207 # between "source" and "destination".
208 #
209 # With the current case implementation if such changesets have a copy
210 # information, we'll keep them in memory until the end of
211 # _changesetforwardcopies. We don't expect the case to be frequent
212 # enough to matters.
213 #
214 # In addition, it would be possible to reach pathological case, were
215 # many first parent are met before any second parent is reached. In
216 # that case the cache could grow. If this even become an issue one can
217 # safely introduce a maximum cache size. This would trade extra CPU/IO
218 # time to save memory.
219 merge_caches = {}
220
196 221 def revinfo(rev):
197 222 p1, p2 = parents(rev)
198 223 if flags(rev) & REVIDX_SIDEDATA:
224 e = merge_caches.pop(rev, None)
225 if e is not None:
226 return e
199 227 c = changelogrevision(rev)
200 228 p1copies = c.p1copies
201 229 p2copies = c.p2copies
202 230 removed = c.filesremoved
231 if p1 != node.nullrev and p2 != node.nullrev:
232 # XXX some case we over cache, IGNORE
233 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
203 234 else:
204 235 p1copies = {}
205 236 p2copies = {}
206 237 removed = ()
207 238 return p1, p2, p1copies, p2copies, removed
208 239
209 240 else:
210 241
211 242 def revinfo(rev):
212 243 p1, p2 = parents(rev)
213 244 ctx = repo[rev]
214 245 p1copies, p2copies = ctx._copies
215 246 removed = ctx.filesremoved()
216 247 return p1, p2, p1copies, p2copies, removed
217 248
218 249 return revinfo
219 250
220 251
221 252 def _changesetforwardcopies(a, b, match):
222 253 if a.rev() in (node.nullrev, b.rev()):
223 254 return {}
224 255
225 256 repo = a.repo().unfiltered()
226 257 children = {}
227 258 revinfo = _revinfogetter(repo)
228 259
229 260 cl = repo.changelog
230 261 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 262 mrset = set(missingrevs)
232 263 roots = set()
233 264 for r in missingrevs:
234 265 for p in cl.parentrevs(r):
235 266 if p == node.nullrev:
236 267 continue
237 268 if p not in children:
238 269 children[p] = [r]
239 270 else:
240 271 children[p].append(r)
241 272 if p not in mrset:
242 273 roots.add(p)
243 274 if not roots:
244 275 # no common revision to track copies from
245 276 return {}
246 277 min_root = min(roots)
247 278
248 279 from_head = set(
249 280 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
250 281 )
251 282
252 283 iterrevs = set(from_head)
253 284 iterrevs &= mrset
254 285 iterrevs.update(roots)
255 286 iterrevs.remove(b.rev())
256 287 all_copies = {r: {} for r in roots}
257 288 alwaysmatch = match.always()
258 289 for r in sorted(iterrevs):
259 290 copies = all_copies.pop(r)
260 291 for i, c in enumerate(children[r]):
261 292 p1, p2, p1copies, p2copies, removed = revinfo(c)
262 293 if r == p1:
263 294 parent = 1
264 295 childcopies = p1copies
265 296 else:
266 297 assert r == p2
267 298 parent = 2
268 299 childcopies = p2copies
269 300 if not alwaysmatch:
270 301 childcopies = {
271 302 dst: src for dst, src in childcopies.items() if match(dst)
272 303 }
273 304 newcopies = copies
274 305 if childcopies:
275 306 newcopies = _chain(newcopies, childcopies)
276 307 # _chain makes a copies, we can avoid doing so in some
277 308 # simple/linear cases.
278 309 assert newcopies is not copies
279 310 for f in removed:
280 311 if f in newcopies:
281 312 if newcopies is copies:
282 313 # copy on write to avoid affecting potential other
283 314 # branches. when there are no other branches, this
284 315 # could be avoided.
285 316 newcopies = copies.copy()
286 317 del newcopies[f]
287 318 othercopies = all_copies.get(c)
288 319 if othercopies is None:
289 320 all_copies[c] = newcopies
290 321 else:
291 322 # we are the second parent to work on c, we need to merge our
292 323 # work with the other.
293 324 #
294 325 # Unlike when copies are stored in the filelog, we consider
295 326 # it a copy even if the destination already existed on the
296 327 # other branch. It's simply too expensive to check if the
297 328 # file existed in the manifest.
298 329 #
299 330 # In case of conflict, parent 1 take precedence over parent 2.
300 331 # This is an arbitrary choice made anew when implementing
301 332 # changeset based copies. It was made without regards with
302 333 # potential filelog related behavior.
303 334 if parent == 1:
304 335 othercopies.update(newcopies)
305 336 else:
306 337 newcopies.update(othercopies)
307 338 all_copies[c] = newcopies
308 339 return all_copies[b.rev()]
309 340
310 341
311 342 def _forwardcopies(a, b, base=None, match=None):
312 343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
313 344
314 345 if base is None:
315 346 base = a
316 347 match = a.repo().narrowmatch(match)
317 348 # check for working copy
318 349 if b.rev() is None:
319 350 cm = _committedforwardcopies(a, b.p1(), base, match)
320 351 # combine copies from dirstate if necessary
321 352 copies = _chain(cm, _dirstatecopies(b._repo, match))
322 353 else:
323 354 copies = _committedforwardcopies(a, b, base, match)
324 355 return copies
325 356
326 357
327 358 def _backwardrenames(a, b, match):
328 359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
329 360 return {}
330 361
331 362 # Even though we're not taking copies into account, 1:n rename situations
332 363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
333 364 # arbitrarily pick one of the renames.
334 365 # We don't want to pass in "match" here, since that would filter
335 366 # the destination by it. Since we're reversing the copies, we want
336 367 # to filter the source instead.
337 368 f = _forwardcopies(b, a)
338 369 r = {}
339 370 for k, v in sorted(pycompat.iteritems(f)):
340 371 if match and not match(v):
341 372 continue
342 373 # remove copies
343 374 if v in a:
344 375 continue
345 376 r[v] = k
346 377 return r
347 378
348 379
349 380 def pathcopies(x, y, match=None):
350 381 """find {dst@y: src@x} copy mapping for directed compare"""
351 382 repo = x._repo
352 383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
353 384 if debug:
354 385 repo.ui.debug(
355 386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
356 387 )
357 388 if x == y or not x or not y:
358 389 return {}
359 390 a = y.ancestor(x)
360 391 if a == x:
361 392 if debug:
362 393 repo.ui.debug(b'debug.copies: search mode: forward\n')
363 394 if y.rev() is None and x == y.p1():
364 395 # short-circuit to avoid issues with merge states
365 396 return _dirstatecopies(repo, match)
366 397 copies = _forwardcopies(x, y, match=match)
367 398 elif a == y:
368 399 if debug:
369 400 repo.ui.debug(b'debug.copies: search mode: backward\n')
370 401 copies = _backwardrenames(x, y, match=match)
371 402 else:
372 403 if debug:
373 404 repo.ui.debug(b'debug.copies: search mode: combined\n')
374 405 base = None
375 406 if a.rev() != node.nullrev:
376 407 base = x
377 408 copies = _chain(
378 409 _backwardrenames(x, a, match=match),
379 410 _forwardcopies(a, y, base, match=match),
380 411 )
381 412 _filter(x, y, copies)
382 413 return copies
383 414
384 415
385 416 def mergecopies(repo, c1, c2, base):
386 417 """
387 418 Finds moves and copies between context c1 and c2 that are relevant for
388 419 merging. 'base' will be used as the merge base.
389 420
390 421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 422 files that were moved/ copied in one merge parent and modified in another.
392 423 For example:
393 424
394 425 o ---> 4 another commit
395 426 |
396 427 | o ---> 3 commit that modifies a.txt
397 428 | /
398 429 o / ---> 2 commit that moves a.txt to b.txt
399 430 |/
400 431 o ---> 1 merge base
401 432
402 433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 434 revision 4, and if user have copytrace disabled, we prints the following
404 435 message:
405 436
406 437 ```other changed <file> which local deleted```
407 438
408 439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 440 "dirmove".
410 441
411 442 "copy" is a mapping from destination name -> source name,
412 443 where source is in c1 and destination is in c2 or vice-versa.
413 444
414 445 "movewithdir" is a mapping from source name -> destination name,
415 446 where the file at source present in one context but not the other
416 447 needs to be moved to destination by the merge process, because the
417 448 other context moved the directory it is in.
418 449
419 450 "diverge" is a mapping of source name -> list of destination names
420 451 for divergent renames.
421 452
422 453 "renamedelete" is a mapping of source name -> list of destination
423 454 names for files deleted in c1 that were renamed in c2 or vice-versa.
424 455
425 456 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 457 This is needed for handling changes to new files previously grafted into
427 458 renamed directories.
428 459
429 460 This function calls different copytracing algorithms based on config.
430 461 """
431 462 # avoid silly behavior for update from empty dir
432 463 if not c1 or not c2 or c1 == c2:
433 464 return {}, {}, {}, {}, {}
434 465
435 466 narrowmatch = c1.repo().narrowmatch()
436 467
437 468 # avoid silly behavior for parent -> working dir
438 469 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440 471
441 472 copytracing = repo.ui.config(b'experimental', b'copytrace')
442 473 if stringutil.parsebool(copytracing) is False:
443 474 # stringutil.parsebool() returns None when it is unable to parse the
444 475 # value, so we should rely on making sure copytracing is on such cases
445 476 return {}, {}, {}, {}, {}
446 477
447 478 if usechangesetcentricalgo(repo):
448 479 # The heuristics don't make sense when we need changeset-centric algos
449 480 return _fullcopytracing(repo, c1, c2, base)
450 481
451 482 # Copy trace disabling is explicitly below the node == p1 logic above
452 483 # because the logic above is required for a simple copy to be kept across a
453 484 # rebase.
454 485 if copytracing == b'heuristics':
455 486 # Do full copytracing if only non-public revisions are involved as
456 487 # that will be fast enough and will also cover the copies which could
457 488 # be missed by heuristics
458 489 if _isfullcopytraceable(repo, c1, base):
459 490 return _fullcopytracing(repo, c1, c2, base)
460 491 return _heuristicscopytracing(repo, c1, c2, base)
461 492 else:
462 493 return _fullcopytracing(repo, c1, c2, base)
463 494
464 495
465 496 def _isfullcopytraceable(repo, c1, base):
466 497 """ Checks that if base, source and destination are all no-public branches,
467 498 if yes let's use the full copytrace algorithm for increased capabilities
468 499 since it will be fast enough.
469 500
470 501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
471 502 number of changesets from c1 to base such that if number of changesets are
472 503 more than the limit, full copytracing algorithm won't be used.
473 504 """
474 505 if c1.rev() is None:
475 506 c1 = c1.p1()
476 507 if c1.mutable() and base.mutable():
477 508 sourcecommitlimit = repo.ui.configint(
478 509 b'experimental', b'copytrace.sourcecommitlimit'
479 510 )
480 511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
481 512 return commits < sourcecommitlimit
482 513 return False
483 514
484 515
485 516 def _checksinglesidecopies(
486 517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
487 518 ):
488 519 if src not in m2:
489 520 # deleted on side 2
490 521 if src not in m1:
491 522 # renamed on side 1, deleted on side 2
492 523 renamedelete[src] = dsts1
493 524 elif m2[src] != mb[src]:
494 525 if not _related(c2[src], base[src]):
495 526 return
496 527 # modified on side 2
497 528 for dst in dsts1:
498 529 if dst not in m2:
499 530 # dst not added on side 2 (handle as regular
500 531 # "both created" case in manifestmerge otherwise)
501 532 copy[dst] = src
502 533
503 534
504 535 def _fullcopytracing(repo, c1, c2, base):
505 536 """ The full copytracing algorithm which finds all the new files that were
506 537 added from merge base up to the top commit and for each file it checks if
507 538 this file was copied from another file.
508 539
509 540 This is pretty slow when a lot of changesets are involved but will track all
510 541 the copies.
511 542 """
512 543 m1 = c1.manifest()
513 544 m2 = c2.manifest()
514 545 mb = base.manifest()
515 546
516 547 copies1 = pathcopies(base, c1)
517 548 copies2 = pathcopies(base, c2)
518 549
519 550 inversecopies1 = {}
520 551 inversecopies2 = {}
521 552 for dst, src in copies1.items():
522 553 inversecopies1.setdefault(src, []).append(dst)
523 554 for dst, src in copies2.items():
524 555 inversecopies2.setdefault(src, []).append(dst)
525 556
526 557 copy = {}
527 558 diverge = {}
528 559 renamedelete = {}
529 560 allsources = set(inversecopies1) | set(inversecopies2)
530 561 for src in allsources:
531 562 dsts1 = inversecopies1.get(src)
532 563 dsts2 = inversecopies2.get(src)
533 564 if dsts1 and dsts2:
534 565 # copied/renamed on both sides
535 566 if src not in m1 and src not in m2:
536 567 # renamed on both sides
537 568 dsts1 = set(dsts1)
538 569 dsts2 = set(dsts2)
539 570 # If there's some overlap in the rename destinations, we
540 571 # consider it not divergent. For example, if side 1 copies 'a'
541 572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
542 573 # and 'd' and deletes 'a'.
543 574 if dsts1 & dsts2:
544 575 for dst in dsts1 & dsts2:
545 576 copy[dst] = src
546 577 else:
547 578 diverge[src] = sorted(dsts1 | dsts2)
548 579 elif src in m1 and src in m2:
549 580 # copied on both sides
550 581 dsts1 = set(dsts1)
551 582 dsts2 = set(dsts2)
552 583 for dst in dsts1 & dsts2:
553 584 copy[dst] = src
554 585 # TODO: Handle cases where it was renamed on one side and copied
555 586 # on the other side
556 587 elif dsts1:
557 588 # copied/renamed only on side 1
558 589 _checksinglesidecopies(
559 590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
560 591 )
561 592 elif dsts2:
562 593 # copied/renamed only on side 2
563 594 _checksinglesidecopies(
564 595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
565 596 )
566 597
567 598 renamedeleteset = set()
568 599 divergeset = set()
569 600 for dsts in diverge.values():
570 601 divergeset.update(dsts)
571 602 for dsts in renamedelete.values():
572 603 renamedeleteset.update(dsts)
573 604
574 605 # find interesting file sets from manifests
575 606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
576 607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
577 608 u1 = sorted(addedinm1 - addedinm2)
578 609 u2 = sorted(addedinm2 - addedinm1)
579 610
580 611 header = b" unmatched files in %s"
581 612 if u1:
582 613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
583 614 if u2:
584 615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
585 616
586 617 fullcopy = copies1.copy()
587 618 fullcopy.update(copies2)
588 619 if not fullcopy:
589 620 return copy, {}, diverge, renamedelete, {}
590 621
591 622 if repo.ui.debugflag:
592 623 repo.ui.debug(
593 624 b" all copies found (* = to merge, ! = divergent, "
594 625 b"% = renamed and deleted):\n"
595 626 )
596 627 for f in sorted(fullcopy):
597 628 note = b""
598 629 if f in copy:
599 630 note += b"*"
600 631 if f in divergeset:
601 632 note += b"!"
602 633 if f in renamedeleteset:
603 634 note += b"%"
604 635 repo.ui.debug(
605 636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
606 637 )
607 638 del divergeset
608 639
609 640 repo.ui.debug(b" checking for directory renames\n")
610 641
611 642 # generate a directory move map
612 643 d1, d2 = c1.dirs(), c2.dirs()
613 644 invalid = set()
614 645 dirmove = {}
615 646
616 647 # examine each file copy for a potential directory move, which is
617 648 # when all the files in a directory are moved to a new directory
618 649 for dst, src in pycompat.iteritems(fullcopy):
619 650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
620 651 if dsrc in invalid:
621 652 # already seen to be uninteresting
622 653 continue
623 654 elif dsrc in d1 and ddst in d1:
624 655 # directory wasn't entirely moved locally
625 656 invalid.add(dsrc)
626 657 elif dsrc in d2 and ddst in d2:
627 658 # directory wasn't entirely moved remotely
628 659 invalid.add(dsrc)
629 660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
630 661 # files from the same directory moved to two different places
631 662 invalid.add(dsrc)
632 663 else:
633 664 # looks good so far
634 665 dirmove[dsrc] = ddst
635 666
636 667 for i in invalid:
637 668 if i in dirmove:
638 669 del dirmove[i]
639 670 del d1, d2, invalid
640 671
641 672 if not dirmove:
642 673 return copy, {}, diverge, renamedelete, {}
643 674
644 675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
645 676
646 677 for d in dirmove:
647 678 repo.ui.debug(
648 679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
649 680 )
650 681
651 682 movewithdir = {}
652 683 # check unaccounted nonoverlapping files against directory moves
653 684 for f in u1 + u2:
654 685 if f not in fullcopy:
655 686 for d in dirmove:
656 687 if f.startswith(d):
657 688 # new file added in a directory that was moved, move it
658 689 df = dirmove[d] + f[len(d) :]
659 690 if df not in copy:
660 691 movewithdir[f] = df
661 692 repo.ui.debug(
662 693 b" pending file src: '%s' -> dst: '%s'\n"
663 694 % (f, df)
664 695 )
665 696 break
666 697
667 698 return copy, movewithdir, diverge, renamedelete, dirmove
668 699
669 700
670 701 def _heuristicscopytracing(repo, c1, c2, base):
671 702 """ Fast copytracing using filename heuristics
672 703
673 704 Assumes that moves or renames are of following two types:
674 705
675 706 1) Inside a directory only (same directory name but different filenames)
676 707 2) Move from one directory to another
677 708 (same filenames but different directory names)
678 709
679 710 Works only when there are no merge commits in the "source branch".
680 711 Source branch is commits from base up to c2 not including base.
681 712
682 713 If merge is involved it fallbacks to _fullcopytracing().
683 714
684 715 Can be used by setting the following config:
685 716
686 717 [experimental]
687 718 copytrace = heuristics
688 719
689 720 In some cases the copy/move candidates found by heuristics can be very large
690 721 in number and that will make the algorithm slow. The number of possible
691 722 candidates to check can be limited by using the config
692 723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
693 724 """
694 725
695 726 if c1.rev() is None:
696 727 c1 = c1.p1()
697 728 if c2.rev() is None:
698 729 c2 = c2.p1()
699 730
700 731 copies = {}
701 732
702 733 changedfiles = set()
703 734 m1 = c1.manifest()
704 735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
705 736 # If base is not in c2 branch, we switch to fullcopytracing
706 737 repo.ui.debug(
707 738 b"switching to full copytracing as base is not "
708 739 b"an ancestor of c2\n"
709 740 )
710 741 return _fullcopytracing(repo, c1, c2, base)
711 742
712 743 ctx = c2
713 744 while ctx != base:
714 745 if len(ctx.parents()) == 2:
715 746 # To keep things simple let's not handle merges
716 747 repo.ui.debug(b"switching to full copytracing because of merges\n")
717 748 return _fullcopytracing(repo, c1, c2, base)
718 749 changedfiles.update(ctx.files())
719 750 ctx = ctx.p1()
720 751
721 752 cp = _forwardcopies(base, c2)
722 753 for dst, src in pycompat.iteritems(cp):
723 754 if src in m1:
724 755 copies[dst] = src
725 756
726 757 # file is missing if it isn't present in the destination, but is present in
727 758 # the base and present in the source.
728 759 # Presence in the base is important to exclude added files, presence in the
729 760 # source is important to exclude removed files.
730 761 filt = lambda f: f not in m1 and f in base and f in c2
731 762 missingfiles = [f for f in changedfiles if filt(f)]
732 763
733 764 if missingfiles:
734 765 basenametofilename = collections.defaultdict(list)
735 766 dirnametofilename = collections.defaultdict(list)
736 767
737 768 for f in m1.filesnotin(base.manifest()):
738 769 basename = os.path.basename(f)
739 770 dirname = os.path.dirname(f)
740 771 basenametofilename[basename].append(f)
741 772 dirnametofilename[dirname].append(f)
742 773
743 774 for f in missingfiles:
744 775 basename = os.path.basename(f)
745 776 dirname = os.path.dirname(f)
746 777 samebasename = basenametofilename[basename]
747 778 samedirname = dirnametofilename[dirname]
748 779 movecandidates = samebasename + samedirname
749 780 # f is guaranteed to be present in c2, that's why
750 781 # c2.filectx(f) won't fail
751 782 f2 = c2.filectx(f)
752 783 # we can have a lot of candidates which can slow down the heuristics
753 784 # config value to limit the number of candidates moves to check
754 785 maxcandidates = repo.ui.configint(
755 786 b'experimental', b'copytrace.movecandidateslimit'
756 787 )
757 788
758 789 if len(movecandidates) > maxcandidates:
759 790 repo.ui.status(
760 791 _(
761 792 b"skipping copytracing for '%s', more "
762 793 b"candidates than the limit: %d\n"
763 794 )
764 795 % (f, len(movecandidates))
765 796 )
766 797 continue
767 798
768 799 for candidate in movecandidates:
769 800 f1 = c1.filectx(candidate)
770 801 if _related(f1, f2):
771 802 # if there are a few related copies then we'll merge
772 803 # changes into all of them. This matches the behaviour
773 804 # of upstream copytracing
774 805 copies[candidate] = f
775 806
776 807 return copies, {}, {}, {}, {}
777 808
778 809
779 810 def _related(f1, f2):
780 811 """return True if f1 and f2 filectx have a common ancestor
781 812
782 813 Walk back to common ancestor to see if the two files originate
783 814 from the same file. Since workingfilectx's rev() is None it messes
784 815 up the integer comparison logic, hence the pre-step check for
785 816 None (f1 and f2 can only be workingfilectx's initially).
786 817 """
787 818
788 819 if f1 == f2:
789 820 return True # a match
790 821
791 822 g1, g2 = f1.ancestors(), f2.ancestors()
792 823 try:
793 824 f1r, f2r = f1.linkrev(), f2.linkrev()
794 825
795 826 if f1r is None:
796 827 f1 = next(g1)
797 828 if f2r is None:
798 829 f2 = next(g2)
799 830
800 831 while True:
801 832 f1r, f2r = f1.linkrev(), f2.linkrev()
802 833 if f1r > f2r:
803 834 f1 = next(g1)
804 835 elif f2r > f1r:
805 836 f2 = next(g2)
806 837 else: # f1 and f2 point to files in the same linkrev
807 838 return f1 == f2 # true if they point to the same file
808 839 except StopIteration:
809 840 return False
810 841
811 842
812 843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
813 844 """reproduce copies from fromrev to rev in the dirstate
814 845
815 846 If skiprev is specified, it's a revision that should be used to
816 847 filter copy records. Any copies that occur between fromrev and
817 848 skiprev will not be duplicated, even if they appear in the set of
818 849 copies between fromrev and rev.
819 850 """
820 851 exclude = {}
821 852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
822 853 bctrace = stringutil.parsebool(ctraceconfig)
823 854 if skiprev is not None and (
824 855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
825 856 ):
826 857 # copytrace='off' skips this line, but not the entire function because
827 858 # the line below is O(size of the repo) during a rebase, while the rest
828 859 # of the function is much faster (and is required for carrying copy
829 860 # metadata across the rebase anyway).
830 861 exclude = pathcopies(repo[fromrev], repo[skiprev])
831 862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
832 863 if dst in exclude:
833 864 continue
834 865 if dst in wctx:
835 866 wctx[dst].markcopied(src)
836 867
837 868
838 869 def computechangesetfilesadded(ctx):
839 870 """return the list of files added in a changeset
840 871 """
841 872 added = []
842 873 for f in ctx.files():
843 874 if not any(f in p for p in ctx.parents()):
844 875 added.append(f)
845 876 return added
846 877
847 878
848 879 def computechangesetfilesremoved(ctx):
849 880 """return the list of files removed in a changeset
850 881 """
851 882 removed = []
852 883 for f in ctx.files():
853 884 if f not in ctx:
854 885 removed.append(f)
855 886 return removed
856 887
857 888
858 889 def computechangesetcopies(ctx):
859 890 """return the copies data for a changeset
860 891
861 892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
862 893
863 894 Each dictionnary are in the form: `{newname: oldname}`
864 895 """
865 896 p1copies = {}
866 897 p2copies = {}
867 898 p1 = ctx.p1()
868 899 p2 = ctx.p2()
869 900 narrowmatch = ctx._repo.narrowmatch()
870 901 for dst in ctx.files():
871 902 if not narrowmatch(dst) or dst not in ctx:
872 903 continue
873 904 copied = ctx[dst].renamed()
874 905 if not copied:
875 906 continue
876 907 src, srcnode = copied
877 908 if src in p1 and p1[src].filenode() == srcnode:
878 909 p1copies[dst] = src
879 910 elif src in p2 and p2[src].filenode() == srcnode:
880 911 p2copies[dst] = src
881 912 return p1copies, p2copies
882 913
883 914
884 915 def encodecopies(files, copies):
885 916 items = []
886 917 for i, dst in enumerate(files):
887 918 if dst in copies:
888 919 items.append(b'%d\0%s' % (i, copies[dst]))
889 920 if len(items) != len(copies):
890 921 raise error.ProgrammingError(
891 922 b'some copy targets missing from file list'
892 923 )
893 924 return b"\n".join(items)
894 925
895 926
896 927 def decodecopies(files, data):
897 928 try:
898 929 copies = {}
899 930 if not data:
900 931 return copies
901 932 for l in data.split(b'\n'):
902 933 strindex, src = l.split(b'\0')
903 934 i = int(strindex)
904 935 dst = files[i]
905 936 copies[dst] = src
906 937 return copies
907 938 except (ValueError, IndexError):
908 939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
909 940 # used different syntax for the value.
910 941 return None
911 942
912 943
913 944 def encodefileindices(files, subset):
914 945 subset = set(subset)
915 946 indices = []
916 947 for i, f in enumerate(files):
917 948 if f in subset:
918 949 indices.append(b'%d' % i)
919 950 return b'\n'.join(indices)
920 951
921 952
922 953 def decodefileindices(files, data):
923 954 try:
924 955 subset = []
925 956 if not data:
926 957 return subset
927 958 for strindex in data.split(b'\n'):
928 959 i = int(strindex)
929 960 if i < 0 or i >= len(files):
930 961 return None
931 962 subset.append(files[i])
932 963 return subset
933 964 except (ValueError, IndexError):
934 965 # Perhaps someone had chosen the same key name (e.g. "added") and
935 966 # used different syntax for the value.
936 967 return None
937 968
938 969
939 970 def _getsidedata(srcrepo, rev):
940 971 ctx = srcrepo[rev]
941 972 filescopies = computechangesetcopies(ctx)
942 973 filesadded = computechangesetfilesadded(ctx)
943 974 filesremoved = computechangesetfilesremoved(ctx)
944 975 sidedata = {}
945 976 if any([filescopies, filesadded, filesremoved]):
946 977 sortedfiles = sorted(ctx.files())
947 978 p1copies, p2copies = filescopies
948 979 p1copies = encodecopies(sortedfiles, p1copies)
949 980 p2copies = encodecopies(sortedfiles, p2copies)
950 981 filesadded = encodefileindices(sortedfiles, filesadded)
951 982 filesremoved = encodefileindices(sortedfiles, filesremoved)
952 983 if p1copies:
953 984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
954 985 if p2copies:
955 986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
956 987 if filesadded:
957 988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
958 989 if filesremoved:
959 990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
960 991 return sidedata
961 992
962 993
963 994 def getsidedataadder(srcrepo, destrepo):
964 995 def sidedatacompanion(revlog, rev):
965 996 sidedata = {}
966 997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
967 998 sidedata = _getsidedata(srcrepo, rev)
968 999 return False, (), sidedata
969 1000
970 1001 return sidedatacompanion
971 1002
972 1003
973 1004 def getsidedataremover(srcrepo, destrepo):
974 1005 def sidedatacompanion(revlog, rev):
975 1006 f = ()
976 1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
977 1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
978 1009 f = (
979 1010 sidedatamod.SD_P1COPIES,
980 1011 sidedatamod.SD_P2COPIES,
981 1012 sidedatamod.SD_FILESADDED,
982 1013 sidedatamod.SD_FILESREMOVED,
983 1014 )
984 1015 return False, f, {}
985 1016
986 1017 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now