##// END OF EJS Templates
copies: simplify chain loop...
marmoute -
r43596:f0a2c197 default
parent child Browse files
Show More
@@ -1,1017 +1,1014 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 error,
20 20 match as matchmod,
21 21 node,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 from .revlogutils import sidedata as sidedatamod
28 28
29 29 from .utils import stringutil
30 30
31 31
32 32 def _filter(src, dst, t):
33 33 """filters out invalid copies after chaining"""
34 34
35 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 37 # in the following table (not including trivial cases). For example, case 2
38 38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 39 # then was renamed between 'mid' and 'dst'.
40 40 #
41 41 # case src mid dst result
42 42 # 1 x y - -
43 43 # 2 x y y x->y
44 44 # 3 x y x -
45 45 # 4 x y z x->z
46 46 # 5 - x y -
47 47 # 6 x x y x->y
48 48 #
49 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 51 # between 5 and 6, so it includes all cases in its result.
52 52 # Cases 1, 3, and 5 are then removed by _filter().
53 53
54 54 for k, v in list(t.items()):
55 55 # remove copies from files that didn't exist
56 56 if v not in src:
57 57 del t[k]
58 58 # remove criss-crossed copies
59 59 elif k in src and v in dst:
60 60 del t[k]
61 61 # remove copies to files that were then removed
62 62 elif k not in dst:
63 63 del t[k]
64 64
65 65
66 66 def _chain(a, b):
67 67 """chain two sets of copies 'a' and 'b'"""
68 68 t = a.copy()
69 69 for k, v in pycompat.iteritems(b):
70 if v in t:
71 t[k] = t[v]
72 else:
73 t[k] = v
70 t[k] = t.get(v, v)
74 71 return t
75 72
76 73
77 74 def _tracefile(fctx, am, basemf):
78 75 """return file context that is the ancestor of fctx present in ancestor
79 76 manifest am
80 77
81 78 Note: we used to try and stop after a given limit, however checking if that
82 79 limit is reached turned out to be very expensive. we are better off
83 80 disabling that feature."""
84 81
85 82 for f in fctx.ancestors():
86 83 path = f.path()
87 84 if am.get(path, None) == f.filenode():
88 85 return path
89 86 if basemf and basemf.get(path, None) == f.filenode():
90 87 return path
91 88
92 89
93 90 def _dirstatecopies(repo, match=None):
94 91 ds = repo.dirstate
95 92 c = ds.copies().copy()
96 93 for k in list(c):
97 94 if ds[k] not in b'anm' or (match and not match(k)):
98 95 del c[k]
99 96 return c
100 97
101 98
102 99 def _computeforwardmissing(a, b, match=None):
103 100 """Computes which files are in b but not a.
104 101 This is its own function so extensions can easily wrap this call to see what
105 102 files _forwardcopies is about to process.
106 103 """
107 104 ma = a.manifest()
108 105 mb = b.manifest()
109 106 return mb.filesnotin(ma, match=match)
110 107
111 108
112 109 def usechangesetcentricalgo(repo):
113 110 """Checks if we should use changeset-centric copy algorithms"""
114 111 if repo.filecopiesmode == b'changeset-sidedata':
115 112 return True
116 113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 114 changesetsource = (b'changeset-only', b'compatibility')
118 115 return readfrom in changesetsource
119 116
120 117
121 118 def _committedforwardcopies(a, b, base, match):
122 119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 120 # files might have to be traced back to the fctx parent of the last
124 121 # one-side-only changeset, but not further back than that
125 122 repo = a._repo
126 123
127 124 if usechangesetcentricalgo(repo):
128 125 return _changesetforwardcopies(a, b, match)
129 126
130 127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 128 dbg = repo.ui.debug
132 129 if debug:
133 130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 131 am = a.manifest()
135 132 basemf = None if base is None else base.manifest()
136 133
137 134 # find where new files came from
138 135 # we currently don't try to find where old files went, too expensive
139 136 # this means we can miss a case like 'hg rm b; hg cp a b'
140 137 cm = {}
141 138
142 139 # Computing the forward missing is quite expensive on large manifests, since
143 140 # it compares the entire manifests. We can optimize it in the common use
144 141 # case of computing what copies are in a commit versus its parent (like
145 142 # during a rebase or histedit). Note, we exclude merge commits from this
146 143 # optimization, since the ctx.files() for a merge commit is not correct for
147 144 # this comparison.
148 145 forwardmissingmatch = match
149 146 if b.p1() == a and b.p2().node() == node.nullid:
150 147 filesmatcher = matchmod.exact(b.files())
151 148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 150
154 151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 152
156 153 if debug:
157 154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 155
159 156 for f in sorted(missing):
160 157 if debug:
161 158 dbg(b'debug.copies: tracing file: %s\n' % f)
162 159 fctx = b[f]
163 160 fctx._ancestrycontext = ancestrycontext
164 161
165 162 if debug:
166 163 start = util.timer()
167 164 opath = _tracefile(fctx, am, basemf)
168 165 if opath:
169 166 if debug:
170 167 dbg(b'debug.copies: rename of: %s\n' % opath)
171 168 cm[f] = opath
172 169 if debug:
173 170 dbg(
174 171 b'debug.copies: time: %f seconds\n'
175 172 % (util.timer() - start)
176 173 )
177 174 return cm
178 175
179 176
180 177 def _revinfogetter(repo):
181 178 """return a function that return multiple data given a <rev>"i
182 179
183 180 * p1: revision number of first parent
184 181 * p2: revision number of first parent
185 182 * p1copies: mapping of copies from p1
186 183 * p2copies: mapping of copies from p2
187 184 * removed: a list of removed files
188 185 """
189 186 cl = repo.changelog
190 187 parents = cl.parentrevs
191 188
192 189 if repo.filecopiesmode == b'changeset-sidedata':
193 190 changelogrevision = cl.changelogrevision
194 191 flags = cl.flags
195 192
196 193 # A small cache to avoid doing the work twice for merges
197 194 #
198 195 # In the vast majority of cases, if we ask information for a revision
199 196 # about 1 parent, we'll later ask it for the other. So it make sense to
200 197 # keep the information around when reaching the first parent of a merge
201 198 # and dropping it after it was provided for the second parents.
202 199 #
203 200 # It exists cases were only one parent of the merge will be walked. It
204 201 # happens when the "destination" the copy tracing is descendant from a
205 202 # new root, not common with the "source". In that case, we will only walk
206 203 # through merge parents that are descendant of changesets common
207 204 # between "source" and "destination".
208 205 #
209 206 # With the current case implementation if such changesets have a copy
210 207 # information, we'll keep them in memory until the end of
211 208 # _changesetforwardcopies. We don't expect the case to be frequent
212 209 # enough to matters.
213 210 #
214 211 # In addition, it would be possible to reach pathological case, were
215 212 # many first parent are met before any second parent is reached. In
216 213 # that case the cache could grow. If this even become an issue one can
217 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
218 215 # time to save memory.
219 216 merge_caches = {}
220 217
221 218 def revinfo(rev):
222 219 p1, p2 = parents(rev)
223 220 if flags(rev) & REVIDX_SIDEDATA:
224 221 e = merge_caches.pop(rev, None)
225 222 if e is not None:
226 223 return e
227 224 c = changelogrevision(rev)
228 225 p1copies = c.p1copies
229 226 p2copies = c.p2copies
230 227 removed = c.filesremoved
231 228 if p1 != node.nullrev and p2 != node.nullrev:
232 229 # XXX some case we over cache, IGNORE
233 230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
234 231 else:
235 232 p1copies = {}
236 233 p2copies = {}
237 234 removed = ()
238 235 return p1, p2, p1copies, p2copies, removed
239 236
240 237 else:
241 238
242 239 def revinfo(rev):
243 240 p1, p2 = parents(rev)
244 241 ctx = repo[rev]
245 242 p1copies, p2copies = ctx._copies
246 243 removed = ctx.filesremoved()
247 244 return p1, p2, p1copies, p2copies, removed
248 245
249 246 return revinfo
250 247
251 248
252 249 def _changesetforwardcopies(a, b, match):
253 250 if a.rev() in (node.nullrev, b.rev()):
254 251 return {}
255 252
256 253 repo = a.repo().unfiltered()
257 254 children = {}
258 255 revinfo = _revinfogetter(repo)
259 256
260 257 cl = repo.changelog
261 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
262 259 mrset = set(missingrevs)
263 260 roots = set()
264 261 for r in missingrevs:
265 262 for p in cl.parentrevs(r):
266 263 if p == node.nullrev:
267 264 continue
268 265 if p not in children:
269 266 children[p] = [r]
270 267 else:
271 268 children[p].append(r)
272 269 if p not in mrset:
273 270 roots.add(p)
274 271 if not roots:
275 272 # no common revision to track copies from
276 273 return {}
277 274 min_root = min(roots)
278 275
279 276 from_head = set(
280 277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
281 278 )
282 279
283 280 iterrevs = set(from_head)
284 281 iterrevs &= mrset
285 282 iterrevs.update(roots)
286 283 iterrevs.remove(b.rev())
287 284 all_copies = {r: {} for r in roots}
288 285 alwaysmatch = match.always()
289 286 for r in sorted(iterrevs):
290 287 copies = all_copies.pop(r)
291 288 for i, c in enumerate(children[r]):
292 289 p1, p2, p1copies, p2copies, removed = revinfo(c)
293 290 if r == p1:
294 291 parent = 1
295 292 childcopies = p1copies
296 293 else:
297 294 assert r == p2
298 295 parent = 2
299 296 childcopies = p2copies
300 297 if not alwaysmatch:
301 298 childcopies = {
302 299 dst: src for dst, src in childcopies.items() if match(dst)
303 300 }
304 301 newcopies = copies
305 302 if childcopies:
306 303 newcopies = _chain(newcopies, childcopies)
307 304 # _chain makes a copies, we can avoid doing so in some
308 305 # simple/linear cases.
309 306 assert newcopies is not copies
310 307 for f in removed:
311 308 if f in newcopies:
312 309 if newcopies is copies:
313 310 # copy on write to avoid affecting potential other
314 311 # branches. when there are no other branches, this
315 312 # could be avoided.
316 313 newcopies = copies.copy()
317 314 del newcopies[f]
318 315 othercopies = all_copies.get(c)
319 316 if othercopies is None:
320 317 all_copies[c] = newcopies
321 318 else:
322 319 # we are the second parent to work on c, we need to merge our
323 320 # work with the other.
324 321 #
325 322 # Unlike when copies are stored in the filelog, we consider
326 323 # it a copy even if the destination already existed on the
327 324 # other branch. It's simply too expensive to check if the
328 325 # file existed in the manifest.
329 326 #
330 327 # In case of conflict, parent 1 take precedence over parent 2.
331 328 # This is an arbitrary choice made anew when implementing
332 329 # changeset based copies. It was made without regards with
333 330 # potential filelog related behavior.
334 331 if parent == 1:
335 332 othercopies.update(newcopies)
336 333 else:
337 334 newcopies.update(othercopies)
338 335 all_copies[c] = newcopies
339 336 return all_copies[b.rev()]
340 337
341 338
342 339 def _forwardcopies(a, b, base=None, match=None):
343 340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
344 341
345 342 if base is None:
346 343 base = a
347 344 match = a.repo().narrowmatch(match)
348 345 # check for working copy
349 346 if b.rev() is None:
350 347 cm = _committedforwardcopies(a, b.p1(), base, match)
351 348 # combine copies from dirstate if necessary
352 349 copies = _chain(cm, _dirstatecopies(b._repo, match))
353 350 else:
354 351 copies = _committedforwardcopies(a, b, base, match)
355 352 return copies
356 353
357 354
358 355 def _backwardrenames(a, b, match):
359 356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
360 357 return {}
361 358
362 359 # Even though we're not taking copies into account, 1:n rename situations
363 360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
364 361 # arbitrarily pick one of the renames.
365 362 # We don't want to pass in "match" here, since that would filter
366 363 # the destination by it. Since we're reversing the copies, we want
367 364 # to filter the source instead.
368 365 f = _forwardcopies(b, a)
369 366 r = {}
370 367 for k, v in sorted(pycompat.iteritems(f)):
371 368 if match and not match(v):
372 369 continue
373 370 # remove copies
374 371 if v in a:
375 372 continue
376 373 r[v] = k
377 374 return r
378 375
379 376
380 377 def pathcopies(x, y, match=None):
381 378 """find {dst@y: src@x} copy mapping for directed compare"""
382 379 repo = x._repo
383 380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
384 381 if debug:
385 382 repo.ui.debug(
386 383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
387 384 )
388 385 if x == y or not x or not y:
389 386 return {}
390 387 a = y.ancestor(x)
391 388 if a == x:
392 389 if debug:
393 390 repo.ui.debug(b'debug.copies: search mode: forward\n')
394 391 if y.rev() is None and x == y.p1():
395 392 # short-circuit to avoid issues with merge states
396 393 return _dirstatecopies(repo, match)
397 394 copies = _forwardcopies(x, y, match=match)
398 395 elif a == y:
399 396 if debug:
400 397 repo.ui.debug(b'debug.copies: search mode: backward\n')
401 398 copies = _backwardrenames(x, y, match=match)
402 399 else:
403 400 if debug:
404 401 repo.ui.debug(b'debug.copies: search mode: combined\n')
405 402 base = None
406 403 if a.rev() != node.nullrev:
407 404 base = x
408 405 copies = _chain(
409 406 _backwardrenames(x, a, match=match),
410 407 _forwardcopies(a, y, base, match=match),
411 408 )
412 409 _filter(x, y, copies)
413 410 return copies
414 411
415 412
416 413 def mergecopies(repo, c1, c2, base):
417 414 """
418 415 Finds moves and copies between context c1 and c2 that are relevant for
419 416 merging. 'base' will be used as the merge base.
420 417
421 418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
422 419 files that were moved/ copied in one merge parent and modified in another.
423 420 For example:
424 421
425 422 o ---> 4 another commit
426 423 |
427 424 | o ---> 3 commit that modifies a.txt
428 425 | /
429 426 o / ---> 2 commit that moves a.txt to b.txt
430 427 |/
431 428 o ---> 1 merge base
432 429
433 430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
434 431 revision 4, and if user have copytrace disabled, we prints the following
435 432 message:
436 433
437 434 ```other changed <file> which local deleted```
438 435
439 436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
440 437 "dirmove".
441 438
442 439 "copy" is a mapping from destination name -> source name,
443 440 where source is in c1 and destination is in c2 or vice-versa.
444 441
445 442 "movewithdir" is a mapping from source name -> destination name,
446 443 where the file at source present in one context but not the other
447 444 needs to be moved to destination by the merge process, because the
448 445 other context moved the directory it is in.
449 446
450 447 "diverge" is a mapping of source name -> list of destination names
451 448 for divergent renames.
452 449
453 450 "renamedelete" is a mapping of source name -> list of destination
454 451 names for files deleted in c1 that were renamed in c2 or vice-versa.
455 452
456 453 "dirmove" is a mapping of detected source dir -> destination dir renames.
457 454 This is needed for handling changes to new files previously grafted into
458 455 renamed directories.
459 456
460 457 This function calls different copytracing algorithms based on config.
461 458 """
462 459 # avoid silly behavior for update from empty dir
463 460 if not c1 or not c2 or c1 == c2:
464 461 return {}, {}, {}, {}, {}
465 462
466 463 narrowmatch = c1.repo().narrowmatch()
467 464
468 465 # avoid silly behavior for parent -> working dir
469 466 if c2.node() is None and c1.node() == repo.dirstate.p1():
470 467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
471 468
472 469 copytracing = repo.ui.config(b'experimental', b'copytrace')
473 470 if stringutil.parsebool(copytracing) is False:
474 471 # stringutil.parsebool() returns None when it is unable to parse the
475 472 # value, so we should rely on making sure copytracing is on such cases
476 473 return {}, {}, {}, {}, {}
477 474
478 475 if usechangesetcentricalgo(repo):
479 476 # The heuristics don't make sense when we need changeset-centric algos
480 477 return _fullcopytracing(repo, c1, c2, base)
481 478
482 479 # Copy trace disabling is explicitly below the node == p1 logic above
483 480 # because the logic above is required for a simple copy to be kept across a
484 481 # rebase.
485 482 if copytracing == b'heuristics':
486 483 # Do full copytracing if only non-public revisions are involved as
487 484 # that will be fast enough and will also cover the copies which could
488 485 # be missed by heuristics
489 486 if _isfullcopytraceable(repo, c1, base):
490 487 return _fullcopytracing(repo, c1, c2, base)
491 488 return _heuristicscopytracing(repo, c1, c2, base)
492 489 else:
493 490 return _fullcopytracing(repo, c1, c2, base)
494 491
495 492
496 493 def _isfullcopytraceable(repo, c1, base):
497 494 """ Checks that if base, source and destination are all no-public branches,
498 495 if yes let's use the full copytrace algorithm for increased capabilities
499 496 since it will be fast enough.
500 497
501 498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
502 499 number of changesets from c1 to base such that if number of changesets are
503 500 more than the limit, full copytracing algorithm won't be used.
504 501 """
505 502 if c1.rev() is None:
506 503 c1 = c1.p1()
507 504 if c1.mutable() and base.mutable():
508 505 sourcecommitlimit = repo.ui.configint(
509 506 b'experimental', b'copytrace.sourcecommitlimit'
510 507 )
511 508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
512 509 return commits < sourcecommitlimit
513 510 return False
514 511
515 512
516 513 def _checksinglesidecopies(
517 514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
518 515 ):
519 516 if src not in m2:
520 517 # deleted on side 2
521 518 if src not in m1:
522 519 # renamed on side 1, deleted on side 2
523 520 renamedelete[src] = dsts1
524 521 elif m2[src] != mb[src]:
525 522 if not _related(c2[src], base[src]):
526 523 return
527 524 # modified on side 2
528 525 for dst in dsts1:
529 526 if dst not in m2:
530 527 # dst not added on side 2 (handle as regular
531 528 # "both created" case in manifestmerge otherwise)
532 529 copy[dst] = src
533 530
534 531
535 532 def _fullcopytracing(repo, c1, c2, base):
536 533 """ The full copytracing algorithm which finds all the new files that were
537 534 added from merge base up to the top commit and for each file it checks if
538 535 this file was copied from another file.
539 536
540 537 This is pretty slow when a lot of changesets are involved but will track all
541 538 the copies.
542 539 """
543 540 m1 = c1.manifest()
544 541 m2 = c2.manifest()
545 542 mb = base.manifest()
546 543
547 544 copies1 = pathcopies(base, c1)
548 545 copies2 = pathcopies(base, c2)
549 546
550 547 inversecopies1 = {}
551 548 inversecopies2 = {}
552 549 for dst, src in copies1.items():
553 550 inversecopies1.setdefault(src, []).append(dst)
554 551 for dst, src in copies2.items():
555 552 inversecopies2.setdefault(src, []).append(dst)
556 553
557 554 copy = {}
558 555 diverge = {}
559 556 renamedelete = {}
560 557 allsources = set(inversecopies1) | set(inversecopies2)
561 558 for src in allsources:
562 559 dsts1 = inversecopies1.get(src)
563 560 dsts2 = inversecopies2.get(src)
564 561 if dsts1 and dsts2:
565 562 # copied/renamed on both sides
566 563 if src not in m1 and src not in m2:
567 564 # renamed on both sides
568 565 dsts1 = set(dsts1)
569 566 dsts2 = set(dsts2)
570 567 # If there's some overlap in the rename destinations, we
571 568 # consider it not divergent. For example, if side 1 copies 'a'
572 569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
573 570 # and 'd' and deletes 'a'.
574 571 if dsts1 & dsts2:
575 572 for dst in dsts1 & dsts2:
576 573 copy[dst] = src
577 574 else:
578 575 diverge[src] = sorted(dsts1 | dsts2)
579 576 elif src in m1 and src in m2:
580 577 # copied on both sides
581 578 dsts1 = set(dsts1)
582 579 dsts2 = set(dsts2)
583 580 for dst in dsts1 & dsts2:
584 581 copy[dst] = src
585 582 # TODO: Handle cases where it was renamed on one side and copied
586 583 # on the other side
587 584 elif dsts1:
588 585 # copied/renamed only on side 1
589 586 _checksinglesidecopies(
590 587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
591 588 )
592 589 elif dsts2:
593 590 # copied/renamed only on side 2
594 591 _checksinglesidecopies(
595 592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
596 593 )
597 594
598 595 renamedeleteset = set()
599 596 divergeset = set()
600 597 for dsts in diverge.values():
601 598 divergeset.update(dsts)
602 599 for dsts in renamedelete.values():
603 600 renamedeleteset.update(dsts)
604 601
605 602 # find interesting file sets from manifests
606 603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
607 604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
608 605 u1 = sorted(addedinm1 - addedinm2)
609 606 u2 = sorted(addedinm2 - addedinm1)
610 607
611 608 header = b" unmatched files in %s"
612 609 if u1:
613 610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
614 611 if u2:
615 612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
616 613
617 614 fullcopy = copies1.copy()
618 615 fullcopy.update(copies2)
619 616 if not fullcopy:
620 617 return copy, {}, diverge, renamedelete, {}
621 618
622 619 if repo.ui.debugflag:
623 620 repo.ui.debug(
624 621 b" all copies found (* = to merge, ! = divergent, "
625 622 b"% = renamed and deleted):\n"
626 623 )
627 624 for f in sorted(fullcopy):
628 625 note = b""
629 626 if f in copy:
630 627 note += b"*"
631 628 if f in divergeset:
632 629 note += b"!"
633 630 if f in renamedeleteset:
634 631 note += b"%"
635 632 repo.ui.debug(
636 633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
637 634 )
638 635 del divergeset
639 636
640 637 repo.ui.debug(b" checking for directory renames\n")
641 638
642 639 # generate a directory move map
643 640 d1, d2 = c1.dirs(), c2.dirs()
644 641 invalid = set()
645 642 dirmove = {}
646 643
647 644 # examine each file copy for a potential directory move, which is
648 645 # when all the files in a directory are moved to a new directory
649 646 for dst, src in pycompat.iteritems(fullcopy):
650 647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
651 648 if dsrc in invalid:
652 649 # already seen to be uninteresting
653 650 continue
654 651 elif dsrc in d1 and ddst in d1:
655 652 # directory wasn't entirely moved locally
656 653 invalid.add(dsrc)
657 654 elif dsrc in d2 and ddst in d2:
658 655 # directory wasn't entirely moved remotely
659 656 invalid.add(dsrc)
660 657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
661 658 # files from the same directory moved to two different places
662 659 invalid.add(dsrc)
663 660 else:
664 661 # looks good so far
665 662 dirmove[dsrc] = ddst
666 663
667 664 for i in invalid:
668 665 if i in dirmove:
669 666 del dirmove[i]
670 667 del d1, d2, invalid
671 668
672 669 if not dirmove:
673 670 return copy, {}, diverge, renamedelete, {}
674 671
675 672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
676 673
677 674 for d in dirmove:
678 675 repo.ui.debug(
679 676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
680 677 )
681 678
682 679 movewithdir = {}
683 680 # check unaccounted nonoverlapping files against directory moves
684 681 for f in u1 + u2:
685 682 if f not in fullcopy:
686 683 for d in dirmove:
687 684 if f.startswith(d):
688 685 # new file added in a directory that was moved, move it
689 686 df = dirmove[d] + f[len(d) :]
690 687 if df not in copy:
691 688 movewithdir[f] = df
692 689 repo.ui.debug(
693 690 b" pending file src: '%s' -> dst: '%s'\n"
694 691 % (f, df)
695 692 )
696 693 break
697 694
698 695 return copy, movewithdir, diverge, renamedelete, dirmove
699 696
700 697
701 698 def _heuristicscopytracing(repo, c1, c2, base):
702 699 """ Fast copytracing using filename heuristics
703 700
704 701 Assumes that moves or renames are of following two types:
705 702
706 703 1) Inside a directory only (same directory name but different filenames)
707 704 2) Move from one directory to another
708 705 (same filenames but different directory names)
709 706
710 707 Works only when there are no merge commits in the "source branch".
711 708 Source branch is commits from base up to c2 not including base.
712 709
713 710 If merge is involved it fallbacks to _fullcopytracing().
714 711
715 712 Can be used by setting the following config:
716 713
717 714 [experimental]
718 715 copytrace = heuristics
719 716
720 717 In some cases the copy/move candidates found by heuristics can be very large
721 718 in number and that will make the algorithm slow. The number of possible
722 719 candidates to check can be limited by using the config
723 720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
724 721 """
725 722
726 723 if c1.rev() is None:
727 724 c1 = c1.p1()
728 725 if c2.rev() is None:
729 726 c2 = c2.p1()
730 727
731 728 copies = {}
732 729
733 730 changedfiles = set()
734 731 m1 = c1.manifest()
735 732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
736 733 # If base is not in c2 branch, we switch to fullcopytracing
737 734 repo.ui.debug(
738 735 b"switching to full copytracing as base is not "
739 736 b"an ancestor of c2\n"
740 737 )
741 738 return _fullcopytracing(repo, c1, c2, base)
742 739
743 740 ctx = c2
744 741 while ctx != base:
745 742 if len(ctx.parents()) == 2:
746 743 # To keep things simple let's not handle merges
747 744 repo.ui.debug(b"switching to full copytracing because of merges\n")
748 745 return _fullcopytracing(repo, c1, c2, base)
749 746 changedfiles.update(ctx.files())
750 747 ctx = ctx.p1()
751 748
752 749 cp = _forwardcopies(base, c2)
753 750 for dst, src in pycompat.iteritems(cp):
754 751 if src in m1:
755 752 copies[dst] = src
756 753
757 754 # file is missing if it isn't present in the destination, but is present in
758 755 # the base and present in the source.
759 756 # Presence in the base is important to exclude added files, presence in the
760 757 # source is important to exclude removed files.
761 758 filt = lambda f: f not in m1 and f in base and f in c2
762 759 missingfiles = [f for f in changedfiles if filt(f)]
763 760
764 761 if missingfiles:
765 762 basenametofilename = collections.defaultdict(list)
766 763 dirnametofilename = collections.defaultdict(list)
767 764
768 765 for f in m1.filesnotin(base.manifest()):
769 766 basename = os.path.basename(f)
770 767 dirname = os.path.dirname(f)
771 768 basenametofilename[basename].append(f)
772 769 dirnametofilename[dirname].append(f)
773 770
774 771 for f in missingfiles:
775 772 basename = os.path.basename(f)
776 773 dirname = os.path.dirname(f)
777 774 samebasename = basenametofilename[basename]
778 775 samedirname = dirnametofilename[dirname]
779 776 movecandidates = samebasename + samedirname
780 777 # f is guaranteed to be present in c2, that's why
781 778 # c2.filectx(f) won't fail
782 779 f2 = c2.filectx(f)
783 780 # we can have a lot of candidates which can slow down the heuristics
784 781 # config value to limit the number of candidates moves to check
785 782 maxcandidates = repo.ui.configint(
786 783 b'experimental', b'copytrace.movecandidateslimit'
787 784 )
788 785
789 786 if len(movecandidates) > maxcandidates:
790 787 repo.ui.status(
791 788 _(
792 789 b"skipping copytracing for '%s', more "
793 790 b"candidates than the limit: %d\n"
794 791 )
795 792 % (f, len(movecandidates))
796 793 )
797 794 continue
798 795
799 796 for candidate in movecandidates:
800 797 f1 = c1.filectx(candidate)
801 798 if _related(f1, f2):
802 799 # if there are a few related copies then we'll merge
803 800 # changes into all of them. This matches the behaviour
804 801 # of upstream copytracing
805 802 copies[candidate] = f
806 803
807 804 return copies, {}, {}, {}, {}
808 805
809 806
810 807 def _related(f1, f2):
811 808 """return True if f1 and f2 filectx have a common ancestor
812 809
813 810 Walk back to common ancestor to see if the two files originate
814 811 from the same file. Since workingfilectx's rev() is None it messes
815 812 up the integer comparison logic, hence the pre-step check for
816 813 None (f1 and f2 can only be workingfilectx's initially).
817 814 """
818 815
819 816 if f1 == f2:
820 817 return True # a match
821 818
822 819 g1, g2 = f1.ancestors(), f2.ancestors()
823 820 try:
824 821 f1r, f2r = f1.linkrev(), f2.linkrev()
825 822
826 823 if f1r is None:
827 824 f1 = next(g1)
828 825 if f2r is None:
829 826 f2 = next(g2)
830 827
831 828 while True:
832 829 f1r, f2r = f1.linkrev(), f2.linkrev()
833 830 if f1r > f2r:
834 831 f1 = next(g1)
835 832 elif f2r > f1r:
836 833 f2 = next(g2)
837 834 else: # f1 and f2 point to files in the same linkrev
838 835 return f1 == f2 # true if they point to the same file
839 836 except StopIteration:
840 837 return False
841 838
842 839
843 840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
844 841 """reproduce copies from fromrev to rev in the dirstate
845 842
846 843 If skiprev is specified, it's a revision that should be used to
847 844 filter copy records. Any copies that occur between fromrev and
848 845 skiprev will not be duplicated, even if they appear in the set of
849 846 copies between fromrev and rev.
850 847 """
851 848 exclude = {}
852 849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
853 850 bctrace = stringutil.parsebool(ctraceconfig)
854 851 if skiprev is not None and (
855 852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
856 853 ):
857 854 # copytrace='off' skips this line, but not the entire function because
858 855 # the line below is O(size of the repo) during a rebase, while the rest
859 856 # of the function is much faster (and is required for carrying copy
860 857 # metadata across the rebase anyway).
861 858 exclude = pathcopies(repo[fromrev], repo[skiprev])
862 859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
863 860 if dst in exclude:
864 861 continue
865 862 if dst in wctx:
866 863 wctx[dst].markcopied(src)
867 864
868 865
869 866 def computechangesetfilesadded(ctx):
870 867 """return the list of files added in a changeset
871 868 """
872 869 added = []
873 870 for f in ctx.files():
874 871 if not any(f in p for p in ctx.parents()):
875 872 added.append(f)
876 873 return added
877 874
878 875
879 876 def computechangesetfilesremoved(ctx):
880 877 """return the list of files removed in a changeset
881 878 """
882 879 removed = []
883 880 for f in ctx.files():
884 881 if f not in ctx:
885 882 removed.append(f)
886 883 return removed
887 884
888 885
889 886 def computechangesetcopies(ctx):
890 887 """return the copies data for a changeset
891 888
892 889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
893 890
894 891 Each dictionnary are in the form: `{newname: oldname}`
895 892 """
896 893 p1copies = {}
897 894 p2copies = {}
898 895 p1 = ctx.p1()
899 896 p2 = ctx.p2()
900 897 narrowmatch = ctx._repo.narrowmatch()
901 898 for dst in ctx.files():
902 899 if not narrowmatch(dst) or dst not in ctx:
903 900 continue
904 901 copied = ctx[dst].renamed()
905 902 if not copied:
906 903 continue
907 904 src, srcnode = copied
908 905 if src in p1 and p1[src].filenode() == srcnode:
909 906 p1copies[dst] = src
910 907 elif src in p2 and p2[src].filenode() == srcnode:
911 908 p2copies[dst] = src
912 909 return p1copies, p2copies
913 910
914 911
915 912 def encodecopies(files, copies):
916 913 items = []
917 914 for i, dst in enumerate(files):
918 915 if dst in copies:
919 916 items.append(b'%d\0%s' % (i, copies[dst]))
920 917 if len(items) != len(copies):
921 918 raise error.ProgrammingError(
922 919 b'some copy targets missing from file list'
923 920 )
924 921 return b"\n".join(items)
925 922
926 923
927 924 def decodecopies(files, data):
928 925 try:
929 926 copies = {}
930 927 if not data:
931 928 return copies
932 929 for l in data.split(b'\n'):
933 930 strindex, src = l.split(b'\0')
934 931 i = int(strindex)
935 932 dst = files[i]
936 933 copies[dst] = src
937 934 return copies
938 935 except (ValueError, IndexError):
939 936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
940 937 # used different syntax for the value.
941 938 return None
942 939
943 940
944 941 def encodefileindices(files, subset):
945 942 subset = set(subset)
946 943 indices = []
947 944 for i, f in enumerate(files):
948 945 if f in subset:
949 946 indices.append(b'%d' % i)
950 947 return b'\n'.join(indices)
951 948
952 949
953 950 def decodefileindices(files, data):
954 951 try:
955 952 subset = []
956 953 if not data:
957 954 return subset
958 955 for strindex in data.split(b'\n'):
959 956 i = int(strindex)
960 957 if i < 0 or i >= len(files):
961 958 return None
962 959 subset.append(files[i])
963 960 return subset
964 961 except (ValueError, IndexError):
965 962 # Perhaps someone had chosen the same key name (e.g. "added") and
966 963 # used different syntax for the value.
967 964 return None
968 965
969 966
970 967 def _getsidedata(srcrepo, rev):
971 968 ctx = srcrepo[rev]
972 969 filescopies = computechangesetcopies(ctx)
973 970 filesadded = computechangesetfilesadded(ctx)
974 971 filesremoved = computechangesetfilesremoved(ctx)
975 972 sidedata = {}
976 973 if any([filescopies, filesadded, filesremoved]):
977 974 sortedfiles = sorted(ctx.files())
978 975 p1copies, p2copies = filescopies
979 976 p1copies = encodecopies(sortedfiles, p1copies)
980 977 p2copies = encodecopies(sortedfiles, p2copies)
981 978 filesadded = encodefileindices(sortedfiles, filesadded)
982 979 filesremoved = encodefileindices(sortedfiles, filesremoved)
983 980 if p1copies:
984 981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
985 982 if p2copies:
986 983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
987 984 if filesadded:
988 985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
989 986 if filesremoved:
990 987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
991 988 return sidedata
992 989
993 990
994 991 def getsidedataadder(srcrepo, destrepo):
995 992 def sidedatacompanion(revlog, rev):
996 993 sidedata = {}
997 994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
998 995 sidedata = _getsidedata(srcrepo, rev)
999 996 return False, (), sidedata
1000 997
1001 998 return sidedatacompanion
1002 999
1003 1000
1004 1001 def getsidedataremover(srcrepo, destrepo):
1005 1002 def sidedatacompanion(revlog, rev):
1006 1003 f = ()
1007 1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1008 1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1009 1006 f = (
1010 1007 sidedatamod.SD_P1COPIES,
1011 1008 sidedatamod.SD_P2COPIES,
1012 1009 sidedatamod.SD_FILESADDED,
1013 1010 sidedatamod.SD_FILESREMOVED,
1014 1011 )
1015 1012 return False, f, {}
1016 1013
1017 1014 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now