##// END OF EJS Templates
copies: return consistent type from revinfo...
marmoute -
r44221:79750d78 default
parent child Browse files
Show More
@@ -1,1014 +1,1014 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 error,
20 20 match as matchmod,
21 21 node,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 from .revlogutils import sidedata as sidedatamod
28 28
29 29 from .utils import stringutil
30 30
31 31
32 32 def _filter(src, dst, t):
33 33 """filters out invalid copies after chaining"""
34 34
35 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 37 # in the following table (not including trivial cases). For example, case 2
38 38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 39 # then was renamed between 'mid' and 'dst'.
40 40 #
41 41 # case src mid dst result
42 42 # 1 x y - -
43 43 # 2 x y y x->y
44 44 # 3 x y x -
45 45 # 4 x y z x->z
46 46 # 5 - x y -
47 47 # 6 x x y x->y
48 48 #
49 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 51 # between 5 and 6, so it includes all cases in its result.
52 52 # Cases 1, 3, and 5 are then removed by _filter().
53 53
54 54 for k, v in list(t.items()):
55 55 # remove copies from files that didn't exist
56 56 if v not in src:
57 57 del t[k]
58 58 # remove criss-crossed copies
59 59 elif k in src and v in dst:
60 60 del t[k]
61 61 # remove copies to files that were then removed
62 62 elif k not in dst:
63 63 del t[k]
64 64
65 65
66 66 def _chain(a, b):
67 67 """chain two sets of copies 'a' and 'b'"""
68 68 t = a.copy()
69 69 for k, v in pycompat.iteritems(b):
70 70 t[k] = t.get(v, v)
71 71 return t
72 72
73 73
74 74 def _tracefile(fctx, am, basemf):
75 75 """return file context that is the ancestor of fctx present in ancestor
76 76 manifest am
77 77
78 78 Note: we used to try and stop after a given limit, however checking if that
79 79 limit is reached turned out to be very expensive. we are better off
80 80 disabling that feature."""
81 81
82 82 for f in fctx.ancestors():
83 83 path = f.path()
84 84 if am.get(path, None) == f.filenode():
85 85 return path
86 86 if basemf and basemf.get(path, None) == f.filenode():
87 87 return path
88 88
89 89
90 90 def _dirstatecopies(repo, match=None):
91 91 ds = repo.dirstate
92 92 c = ds.copies().copy()
93 93 for k in list(c):
94 94 if ds[k] not in b'anm' or (match and not match(k)):
95 95 del c[k]
96 96 return c
97 97
98 98
99 99 def _computeforwardmissing(a, b, match=None):
100 100 """Computes which files are in b but not a.
101 101 This is its own function so extensions can easily wrap this call to see what
102 102 files _forwardcopies is about to process.
103 103 """
104 104 ma = a.manifest()
105 105 mb = b.manifest()
106 106 return mb.filesnotin(ma, match=match)
107 107
108 108
109 109 def usechangesetcentricalgo(repo):
110 110 """Checks if we should use changeset-centric copy algorithms"""
111 111 if repo.filecopiesmode == b'changeset-sidedata':
112 112 return True
113 113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 114 changesetsource = (b'changeset-only', b'compatibility')
115 115 return readfrom in changesetsource
116 116
117 117
118 118 def _committedforwardcopies(a, b, base, match):
119 119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 120 # files might have to be traced back to the fctx parent of the last
121 121 # one-side-only changeset, but not further back than that
122 122 repo = a._repo
123 123
124 124 if usechangesetcentricalgo(repo):
125 125 return _changesetforwardcopies(a, b, match)
126 126
127 127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 128 dbg = repo.ui.debug
129 129 if debug:
130 130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 131 am = a.manifest()
132 132 basemf = None if base is None else base.manifest()
133 133
134 134 # find where new files came from
135 135 # we currently don't try to find where old files went, too expensive
136 136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 137 cm = {}
138 138
139 139 # Computing the forward missing is quite expensive on large manifests, since
140 140 # it compares the entire manifests. We can optimize it in the common use
141 141 # case of computing what copies are in a commit versus its parent (like
142 142 # during a rebase or histedit). Note, we exclude merge commits from this
143 143 # optimization, since the ctx.files() for a merge commit is not correct for
144 144 # this comparison.
145 145 forwardmissingmatch = match
146 146 if b.p1() == a and b.p2().node() == node.nullid:
147 147 filesmatcher = matchmod.exact(b.files())
148 148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 150
151 151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 152
153 153 if debug:
154 154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 155
156 156 for f in sorted(missing):
157 157 if debug:
158 158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 159 fctx = b[f]
160 160 fctx._ancestrycontext = ancestrycontext
161 161
162 162 if debug:
163 163 start = util.timer()
164 164 opath = _tracefile(fctx, am, basemf)
165 165 if opath:
166 166 if debug:
167 167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 168 cm[f] = opath
169 169 if debug:
170 170 dbg(
171 171 b'debug.copies: time: %f seconds\n'
172 172 % (util.timer() - start)
173 173 )
174 174 return cm
175 175
176 176
177 177 def _revinfogetter(repo):
178 178 """return a function that return multiple data given a <rev>"i
179 179
180 180 * p1: revision number of first parent
181 181 * p2: revision number of first parent
182 182 * p1copies: mapping of copies from p1
183 183 * p2copies: mapping of copies from p2
184 184 * removed: a list of removed files
185 185 """
186 186 cl = repo.changelog
187 187 parents = cl.parentrevs
188 188
189 189 if repo.filecopiesmode == b'changeset-sidedata':
190 190 changelogrevision = cl.changelogrevision
191 191 flags = cl.flags
192 192
193 193 # A small cache to avoid doing the work twice for merges
194 194 #
195 195 # In the vast majority of cases, if we ask information for a revision
196 196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 197 # keep the information around when reaching the first parent of a merge
198 198 # and dropping it after it was provided for the second parents.
199 199 #
200 200 # It exists cases were only one parent of the merge will be walked. It
201 201 # happens when the "destination" the copy tracing is descendant from a
202 202 # new root, not common with the "source". In that case, we will only walk
203 203 # through merge parents that are descendant of changesets common
204 204 # between "source" and "destination".
205 205 #
206 206 # With the current case implementation if such changesets have a copy
207 207 # information, we'll keep them in memory until the end of
208 208 # _changesetforwardcopies. We don't expect the case to be frequent
209 209 # enough to matters.
210 210 #
211 211 # In addition, it would be possible to reach pathological case, were
212 212 # many first parent are met before any second parent is reached. In
213 213 # that case the cache could grow. If this even become an issue one can
214 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 215 # time to save memory.
216 216 merge_caches = {}
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 if flags(rev) & REVIDX_SIDEDATA:
221 221 e = merge_caches.pop(rev, None)
222 222 if e is not None:
223 223 return e
224 224 c = changelogrevision(rev)
225 225 p1copies = c.p1copies
226 226 p2copies = c.p2copies
227 227 removed = c.filesremoved
228 228 if p1 != node.nullrev and p2 != node.nullrev:
229 229 # XXX some case we over cache, IGNORE
230 230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 231 else:
232 232 p1copies = {}
233 233 p2copies = {}
234 removed = ()
234 removed = []
235 235 return p1, p2, p1copies, p2copies, removed
236 236
237 237 else:
238 238
239 239 def revinfo(rev):
240 240 p1, p2 = parents(rev)
241 241 ctx = repo[rev]
242 242 p1copies, p2copies = ctx._copies
243 243 removed = ctx.filesremoved()
244 244 return p1, p2, p1copies, p2copies, removed
245 245
246 246 return revinfo
247 247
248 248
249 249 def _changesetforwardcopies(a, b, match):
250 250 if a.rev() in (node.nullrev, b.rev()):
251 251 return {}
252 252
253 253 repo = a.repo().unfiltered()
254 254 children = {}
255 255 revinfo = _revinfogetter(repo)
256 256
257 257 cl = repo.changelog
258 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 259 mrset = set(missingrevs)
260 260 roots = set()
261 261 for r in missingrevs:
262 262 for p in cl.parentrevs(r):
263 263 if p == node.nullrev:
264 264 continue
265 265 if p not in children:
266 266 children[p] = [r]
267 267 else:
268 268 children[p].append(r)
269 269 if p not in mrset:
270 270 roots.add(p)
271 271 if not roots:
272 272 # no common revision to track copies from
273 273 return {}
274 274 min_root = min(roots)
275 275
276 276 from_head = set(
277 277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 278 )
279 279
280 280 iterrevs = set(from_head)
281 281 iterrevs &= mrset
282 282 iterrevs.update(roots)
283 283 iterrevs.remove(b.rev())
284 284 all_copies = {r: {} for r in roots}
285 285 alwaysmatch = match.always()
286 286 for r in sorted(iterrevs):
287 287 copies = all_copies.pop(r)
288 288 for i, c in enumerate(children[r]):
289 289 p1, p2, p1copies, p2copies, removed = revinfo(c)
290 290 if r == p1:
291 291 parent = 1
292 292 childcopies = p1copies
293 293 else:
294 294 assert r == p2
295 295 parent = 2
296 296 childcopies = p2copies
297 297 if not alwaysmatch:
298 298 childcopies = {
299 299 dst: src for dst, src in childcopies.items() if match(dst)
300 300 }
301 301 newcopies = copies
302 302 if childcopies:
303 303 newcopies = _chain(newcopies, childcopies)
304 304 # _chain makes a copies, we can avoid doing so in some
305 305 # simple/linear cases.
306 306 assert newcopies is not copies
307 307 for f in removed:
308 308 if f in newcopies:
309 309 if newcopies is copies:
310 310 # copy on write to avoid affecting potential other
311 311 # branches. when there are no other branches, this
312 312 # could be avoided.
313 313 newcopies = copies.copy()
314 314 del newcopies[f]
315 315 othercopies = all_copies.get(c)
316 316 if othercopies is None:
317 317 all_copies[c] = newcopies
318 318 else:
319 319 # we are the second parent to work on c, we need to merge our
320 320 # work with the other.
321 321 #
322 322 # Unlike when copies are stored in the filelog, we consider
323 323 # it a copy even if the destination already existed on the
324 324 # other branch. It's simply too expensive to check if the
325 325 # file existed in the manifest.
326 326 #
327 327 # In case of conflict, parent 1 take precedence over parent 2.
328 328 # This is an arbitrary choice made anew when implementing
329 329 # changeset based copies. It was made without regards with
330 330 # potential filelog related behavior.
331 331 if parent == 1:
332 332 othercopies.update(newcopies)
333 333 else:
334 334 newcopies.update(othercopies)
335 335 all_copies[c] = newcopies
336 336 return all_copies[b.rev()]
337 337
338 338
339 339 def _forwardcopies(a, b, base=None, match=None):
340 340 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
341 341
342 342 if base is None:
343 343 base = a
344 344 match = a.repo().narrowmatch(match)
345 345 # check for working copy
346 346 if b.rev() is None:
347 347 cm = _committedforwardcopies(a, b.p1(), base, match)
348 348 # combine copies from dirstate if necessary
349 349 copies = _chain(cm, _dirstatecopies(b._repo, match))
350 350 else:
351 351 copies = _committedforwardcopies(a, b, base, match)
352 352 return copies
353 353
354 354
355 355 def _backwardrenames(a, b, match):
356 356 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
357 357 return {}
358 358
359 359 # Even though we're not taking copies into account, 1:n rename situations
360 360 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
361 361 # arbitrarily pick one of the renames.
362 362 # We don't want to pass in "match" here, since that would filter
363 363 # the destination by it. Since we're reversing the copies, we want
364 364 # to filter the source instead.
365 365 f = _forwardcopies(b, a)
366 366 r = {}
367 367 for k, v in sorted(pycompat.iteritems(f)):
368 368 if match and not match(v):
369 369 continue
370 370 # remove copies
371 371 if v in a:
372 372 continue
373 373 r[v] = k
374 374 return r
375 375
376 376
377 377 def pathcopies(x, y, match=None):
378 378 """find {dst@y: src@x} copy mapping for directed compare"""
379 379 repo = x._repo
380 380 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
381 381 if debug:
382 382 repo.ui.debug(
383 383 b'debug.copies: searching copies from %s to %s\n' % (x, y)
384 384 )
385 385 if x == y or not x or not y:
386 386 return {}
387 387 a = y.ancestor(x)
388 388 if a == x:
389 389 if debug:
390 390 repo.ui.debug(b'debug.copies: search mode: forward\n')
391 391 if y.rev() is None and x == y.p1():
392 392 # short-circuit to avoid issues with merge states
393 393 return _dirstatecopies(repo, match)
394 394 copies = _forwardcopies(x, y, match=match)
395 395 elif a == y:
396 396 if debug:
397 397 repo.ui.debug(b'debug.copies: search mode: backward\n')
398 398 copies = _backwardrenames(x, y, match=match)
399 399 else:
400 400 if debug:
401 401 repo.ui.debug(b'debug.copies: search mode: combined\n')
402 402 base = None
403 403 if a.rev() != node.nullrev:
404 404 base = x
405 405 copies = _chain(
406 406 _backwardrenames(x, a, match=match),
407 407 _forwardcopies(a, y, base, match=match),
408 408 )
409 409 _filter(x, y, copies)
410 410 return copies
411 411
412 412
413 413 def mergecopies(repo, c1, c2, base):
414 414 """
415 415 Finds moves and copies between context c1 and c2 that are relevant for
416 416 merging. 'base' will be used as the merge base.
417 417
418 418 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
419 419 files that were moved/ copied in one merge parent and modified in another.
420 420 For example:
421 421
422 422 o ---> 4 another commit
423 423 |
424 424 | o ---> 3 commit that modifies a.txt
425 425 | /
426 426 o / ---> 2 commit that moves a.txt to b.txt
427 427 |/
428 428 o ---> 1 merge base
429 429
430 430 If we try to rebase revision 3 on revision 4, since there is no a.txt in
431 431 revision 4, and if user have copytrace disabled, we prints the following
432 432 message:
433 433
434 434 ```other changed <file> which local deleted```
435 435
436 436 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
437 437 "dirmove".
438 438
439 439 "copy" is a mapping from destination name -> source name,
440 440 where source is in c1 and destination is in c2 or vice-versa.
441 441
442 442 "movewithdir" is a mapping from source name -> destination name,
443 443 where the file at source present in one context but not the other
444 444 needs to be moved to destination by the merge process, because the
445 445 other context moved the directory it is in.
446 446
447 447 "diverge" is a mapping of source name -> list of destination names
448 448 for divergent renames.
449 449
450 450 "renamedelete" is a mapping of source name -> list of destination
451 451 names for files deleted in c1 that were renamed in c2 or vice-versa.
452 452
453 453 "dirmove" is a mapping of detected source dir -> destination dir renames.
454 454 This is needed for handling changes to new files previously grafted into
455 455 renamed directories.
456 456
457 457 This function calls different copytracing algorithms based on config.
458 458 """
459 459 # avoid silly behavior for update from empty dir
460 460 if not c1 or not c2 or c1 == c2:
461 461 return {}, {}, {}, {}, {}
462 462
463 463 narrowmatch = c1.repo().narrowmatch()
464 464
465 465 # avoid silly behavior for parent -> working dir
466 466 if c2.node() is None and c1.node() == repo.dirstate.p1():
467 467 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
468 468
469 469 copytracing = repo.ui.config(b'experimental', b'copytrace')
470 470 if stringutil.parsebool(copytracing) is False:
471 471 # stringutil.parsebool() returns None when it is unable to parse the
472 472 # value, so we should rely on making sure copytracing is on such cases
473 473 return {}, {}, {}, {}, {}
474 474
475 475 if usechangesetcentricalgo(repo):
476 476 # The heuristics don't make sense when we need changeset-centric algos
477 477 return _fullcopytracing(repo, c1, c2, base)
478 478
479 479 # Copy trace disabling is explicitly below the node == p1 logic above
480 480 # because the logic above is required for a simple copy to be kept across a
481 481 # rebase.
482 482 if copytracing == b'heuristics':
483 483 # Do full copytracing if only non-public revisions are involved as
484 484 # that will be fast enough and will also cover the copies which could
485 485 # be missed by heuristics
486 486 if _isfullcopytraceable(repo, c1, base):
487 487 return _fullcopytracing(repo, c1, c2, base)
488 488 return _heuristicscopytracing(repo, c1, c2, base)
489 489 else:
490 490 return _fullcopytracing(repo, c1, c2, base)
491 491
492 492
493 493 def _isfullcopytraceable(repo, c1, base):
494 494 """ Checks that if base, source and destination are all no-public branches,
495 495 if yes let's use the full copytrace algorithm for increased capabilities
496 496 since it will be fast enough.
497 497
498 498 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
499 499 number of changesets from c1 to base such that if number of changesets are
500 500 more than the limit, full copytracing algorithm won't be used.
501 501 """
502 502 if c1.rev() is None:
503 503 c1 = c1.p1()
504 504 if c1.mutable() and base.mutable():
505 505 sourcecommitlimit = repo.ui.configint(
506 506 b'experimental', b'copytrace.sourcecommitlimit'
507 507 )
508 508 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
509 509 return commits < sourcecommitlimit
510 510 return False
511 511
512 512
513 513 def _checksinglesidecopies(
514 514 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
515 515 ):
516 516 if src not in m2:
517 517 # deleted on side 2
518 518 if src not in m1:
519 519 # renamed on side 1, deleted on side 2
520 520 renamedelete[src] = dsts1
521 521 elif m2[src] != mb[src]:
522 522 if not _related(c2[src], base[src]):
523 523 return
524 524 # modified on side 2
525 525 for dst in dsts1:
526 526 if dst not in m2:
527 527 # dst not added on side 2 (handle as regular
528 528 # "both created" case in manifestmerge otherwise)
529 529 copy[dst] = src
530 530
531 531
532 532 def _fullcopytracing(repo, c1, c2, base):
533 533 """ The full copytracing algorithm which finds all the new files that were
534 534 added from merge base up to the top commit and for each file it checks if
535 535 this file was copied from another file.
536 536
537 537 This is pretty slow when a lot of changesets are involved but will track all
538 538 the copies.
539 539 """
540 540 m1 = c1.manifest()
541 541 m2 = c2.manifest()
542 542 mb = base.manifest()
543 543
544 544 copies1 = pathcopies(base, c1)
545 545 copies2 = pathcopies(base, c2)
546 546
547 547 inversecopies1 = {}
548 548 inversecopies2 = {}
549 549 for dst, src in copies1.items():
550 550 inversecopies1.setdefault(src, []).append(dst)
551 551 for dst, src in copies2.items():
552 552 inversecopies2.setdefault(src, []).append(dst)
553 553
554 554 copy = {}
555 555 diverge = {}
556 556 renamedelete = {}
557 557 allsources = set(inversecopies1) | set(inversecopies2)
558 558 for src in allsources:
559 559 dsts1 = inversecopies1.get(src)
560 560 dsts2 = inversecopies2.get(src)
561 561 if dsts1 and dsts2:
562 562 # copied/renamed on both sides
563 563 if src not in m1 and src not in m2:
564 564 # renamed on both sides
565 565 dsts1 = set(dsts1)
566 566 dsts2 = set(dsts2)
567 567 # If there's some overlap in the rename destinations, we
568 568 # consider it not divergent. For example, if side 1 copies 'a'
569 569 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
570 570 # and 'd' and deletes 'a'.
571 571 if dsts1 & dsts2:
572 572 for dst in dsts1 & dsts2:
573 573 copy[dst] = src
574 574 else:
575 575 diverge[src] = sorted(dsts1 | dsts2)
576 576 elif src in m1 and src in m2:
577 577 # copied on both sides
578 578 dsts1 = set(dsts1)
579 579 dsts2 = set(dsts2)
580 580 for dst in dsts1 & dsts2:
581 581 copy[dst] = src
582 582 # TODO: Handle cases where it was renamed on one side and copied
583 583 # on the other side
584 584 elif dsts1:
585 585 # copied/renamed only on side 1
586 586 _checksinglesidecopies(
587 587 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
588 588 )
589 589 elif dsts2:
590 590 # copied/renamed only on side 2
591 591 _checksinglesidecopies(
592 592 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
593 593 )
594 594
595 595 renamedeleteset = set()
596 596 divergeset = set()
597 597 for dsts in diverge.values():
598 598 divergeset.update(dsts)
599 599 for dsts in renamedelete.values():
600 600 renamedeleteset.update(dsts)
601 601
602 602 # find interesting file sets from manifests
603 603 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
604 604 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
605 605 u1 = sorted(addedinm1 - addedinm2)
606 606 u2 = sorted(addedinm2 - addedinm1)
607 607
608 608 header = b" unmatched files in %s"
609 609 if u1:
610 610 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
611 611 if u2:
612 612 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
613 613
614 614 fullcopy = copies1.copy()
615 615 fullcopy.update(copies2)
616 616 if not fullcopy:
617 617 return copy, {}, diverge, renamedelete, {}
618 618
619 619 if repo.ui.debugflag:
620 620 repo.ui.debug(
621 621 b" all copies found (* = to merge, ! = divergent, "
622 622 b"% = renamed and deleted):\n"
623 623 )
624 624 for f in sorted(fullcopy):
625 625 note = b""
626 626 if f in copy:
627 627 note += b"*"
628 628 if f in divergeset:
629 629 note += b"!"
630 630 if f in renamedeleteset:
631 631 note += b"%"
632 632 repo.ui.debug(
633 633 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
634 634 )
635 635 del divergeset
636 636
637 637 repo.ui.debug(b" checking for directory renames\n")
638 638
639 639 # generate a directory move map
640 640 d1, d2 = c1.dirs(), c2.dirs()
641 641 invalid = set()
642 642 dirmove = {}
643 643
644 644 # examine each file copy for a potential directory move, which is
645 645 # when all the files in a directory are moved to a new directory
646 646 for dst, src in pycompat.iteritems(fullcopy):
647 647 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
648 648 if dsrc in invalid:
649 649 # already seen to be uninteresting
650 650 continue
651 651 elif dsrc in d1 and ddst in d1:
652 652 # directory wasn't entirely moved locally
653 653 invalid.add(dsrc)
654 654 elif dsrc in d2 and ddst in d2:
655 655 # directory wasn't entirely moved remotely
656 656 invalid.add(dsrc)
657 657 elif dsrc in dirmove and dirmove[dsrc] != ddst:
658 658 # files from the same directory moved to two different places
659 659 invalid.add(dsrc)
660 660 else:
661 661 # looks good so far
662 662 dirmove[dsrc] = ddst
663 663
664 664 for i in invalid:
665 665 if i in dirmove:
666 666 del dirmove[i]
667 667 del d1, d2, invalid
668 668
669 669 if not dirmove:
670 670 return copy, {}, diverge, renamedelete, {}
671 671
672 672 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
673 673
674 674 for d in dirmove:
675 675 repo.ui.debug(
676 676 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
677 677 )
678 678
679 679 movewithdir = {}
680 680 # check unaccounted nonoverlapping files against directory moves
681 681 for f in u1 + u2:
682 682 if f not in fullcopy:
683 683 for d in dirmove:
684 684 if f.startswith(d):
685 685 # new file added in a directory that was moved, move it
686 686 df = dirmove[d] + f[len(d) :]
687 687 if df not in copy:
688 688 movewithdir[f] = df
689 689 repo.ui.debug(
690 690 b" pending file src: '%s' -> dst: '%s'\n"
691 691 % (f, df)
692 692 )
693 693 break
694 694
695 695 return copy, movewithdir, diverge, renamedelete, dirmove
696 696
697 697
698 698 def _heuristicscopytracing(repo, c1, c2, base):
699 699 """ Fast copytracing using filename heuristics
700 700
701 701 Assumes that moves or renames are of following two types:
702 702
703 703 1) Inside a directory only (same directory name but different filenames)
704 704 2) Move from one directory to another
705 705 (same filenames but different directory names)
706 706
707 707 Works only when there are no merge commits in the "source branch".
708 708 Source branch is commits from base up to c2 not including base.
709 709
710 710 If merge is involved it fallbacks to _fullcopytracing().
711 711
712 712 Can be used by setting the following config:
713 713
714 714 [experimental]
715 715 copytrace = heuristics
716 716
717 717 In some cases the copy/move candidates found by heuristics can be very large
718 718 in number and that will make the algorithm slow. The number of possible
719 719 candidates to check can be limited by using the config
720 720 `experimental.copytrace.movecandidateslimit` which defaults to 100.
721 721 """
722 722
723 723 if c1.rev() is None:
724 724 c1 = c1.p1()
725 725 if c2.rev() is None:
726 726 c2 = c2.p1()
727 727
728 728 copies = {}
729 729
730 730 changedfiles = set()
731 731 m1 = c1.manifest()
732 732 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
733 733 # If base is not in c2 branch, we switch to fullcopytracing
734 734 repo.ui.debug(
735 735 b"switching to full copytracing as base is not "
736 736 b"an ancestor of c2\n"
737 737 )
738 738 return _fullcopytracing(repo, c1, c2, base)
739 739
740 740 ctx = c2
741 741 while ctx != base:
742 742 if len(ctx.parents()) == 2:
743 743 # To keep things simple let's not handle merges
744 744 repo.ui.debug(b"switching to full copytracing because of merges\n")
745 745 return _fullcopytracing(repo, c1, c2, base)
746 746 changedfiles.update(ctx.files())
747 747 ctx = ctx.p1()
748 748
749 749 cp = _forwardcopies(base, c2)
750 750 for dst, src in pycompat.iteritems(cp):
751 751 if src in m1:
752 752 copies[dst] = src
753 753
754 754 # file is missing if it isn't present in the destination, but is present in
755 755 # the base and present in the source.
756 756 # Presence in the base is important to exclude added files, presence in the
757 757 # source is important to exclude removed files.
758 758 filt = lambda f: f not in m1 and f in base and f in c2
759 759 missingfiles = [f for f in changedfiles if filt(f)]
760 760
761 761 if missingfiles:
762 762 basenametofilename = collections.defaultdict(list)
763 763 dirnametofilename = collections.defaultdict(list)
764 764
765 765 for f in m1.filesnotin(base.manifest()):
766 766 basename = os.path.basename(f)
767 767 dirname = os.path.dirname(f)
768 768 basenametofilename[basename].append(f)
769 769 dirnametofilename[dirname].append(f)
770 770
771 771 for f in missingfiles:
772 772 basename = os.path.basename(f)
773 773 dirname = os.path.dirname(f)
774 774 samebasename = basenametofilename[basename]
775 775 samedirname = dirnametofilename[dirname]
776 776 movecandidates = samebasename + samedirname
777 777 # f is guaranteed to be present in c2, that's why
778 778 # c2.filectx(f) won't fail
779 779 f2 = c2.filectx(f)
780 780 # we can have a lot of candidates which can slow down the heuristics
781 781 # config value to limit the number of candidates moves to check
782 782 maxcandidates = repo.ui.configint(
783 783 b'experimental', b'copytrace.movecandidateslimit'
784 784 )
785 785
786 786 if len(movecandidates) > maxcandidates:
787 787 repo.ui.status(
788 788 _(
789 789 b"skipping copytracing for '%s', more "
790 790 b"candidates than the limit: %d\n"
791 791 )
792 792 % (f, len(movecandidates))
793 793 )
794 794 continue
795 795
796 796 for candidate in movecandidates:
797 797 f1 = c1.filectx(candidate)
798 798 if _related(f1, f2):
799 799 # if there are a few related copies then we'll merge
800 800 # changes into all of them. This matches the behaviour
801 801 # of upstream copytracing
802 802 copies[candidate] = f
803 803
804 804 return copies, {}, {}, {}, {}
805 805
806 806
807 807 def _related(f1, f2):
808 808 """return True if f1 and f2 filectx have a common ancestor
809 809
810 810 Walk back to common ancestor to see if the two files originate
811 811 from the same file. Since workingfilectx's rev() is None it messes
812 812 up the integer comparison logic, hence the pre-step check for
813 813 None (f1 and f2 can only be workingfilectx's initially).
814 814 """
815 815
816 816 if f1 == f2:
817 817 return True # a match
818 818
819 819 g1, g2 = f1.ancestors(), f2.ancestors()
820 820 try:
821 821 f1r, f2r = f1.linkrev(), f2.linkrev()
822 822
823 823 if f1r is None:
824 824 f1 = next(g1)
825 825 if f2r is None:
826 826 f2 = next(g2)
827 827
828 828 while True:
829 829 f1r, f2r = f1.linkrev(), f2.linkrev()
830 830 if f1r > f2r:
831 831 f1 = next(g1)
832 832 elif f2r > f1r:
833 833 f2 = next(g2)
834 834 else: # f1 and f2 point to files in the same linkrev
835 835 return f1 == f2 # true if they point to the same file
836 836 except StopIteration:
837 837 return False
838 838
839 839
840 840 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
841 841 """reproduce copies from fromrev to rev in the dirstate
842 842
843 843 If skiprev is specified, it's a revision that should be used to
844 844 filter copy records. Any copies that occur between fromrev and
845 845 skiprev will not be duplicated, even if they appear in the set of
846 846 copies between fromrev and rev.
847 847 """
848 848 exclude = {}
849 849 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
850 850 bctrace = stringutil.parsebool(ctraceconfig)
851 851 if skiprev is not None and (
852 852 ctraceconfig == b'heuristics' or bctrace or bctrace is None
853 853 ):
854 854 # copytrace='off' skips this line, but not the entire function because
855 855 # the line below is O(size of the repo) during a rebase, while the rest
856 856 # of the function is much faster (and is required for carrying copy
857 857 # metadata across the rebase anyway).
858 858 exclude = pathcopies(repo[fromrev], repo[skiprev])
859 859 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
860 860 if dst in exclude:
861 861 continue
862 862 if dst in wctx:
863 863 wctx[dst].markcopied(src)
864 864
865 865
866 866 def computechangesetfilesadded(ctx):
867 867 """return the list of files added in a changeset
868 868 """
869 869 added = []
870 870 for f in ctx.files():
871 871 if not any(f in p for p in ctx.parents()):
872 872 added.append(f)
873 873 return added
874 874
875 875
876 876 def computechangesetfilesremoved(ctx):
877 877 """return the list of files removed in a changeset
878 878 """
879 879 removed = []
880 880 for f in ctx.files():
881 881 if f not in ctx:
882 882 removed.append(f)
883 883 return removed
884 884
885 885
886 886 def computechangesetcopies(ctx):
887 887 """return the copies data for a changeset
888 888
889 889 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
890 890
891 891 Each dictionnary are in the form: `{newname: oldname}`
892 892 """
893 893 p1copies = {}
894 894 p2copies = {}
895 895 p1 = ctx.p1()
896 896 p2 = ctx.p2()
897 897 narrowmatch = ctx._repo.narrowmatch()
898 898 for dst in ctx.files():
899 899 if not narrowmatch(dst) or dst not in ctx:
900 900 continue
901 901 copied = ctx[dst].renamed()
902 902 if not copied:
903 903 continue
904 904 src, srcnode = copied
905 905 if src in p1 and p1[src].filenode() == srcnode:
906 906 p1copies[dst] = src
907 907 elif src in p2 and p2[src].filenode() == srcnode:
908 908 p2copies[dst] = src
909 909 return p1copies, p2copies
910 910
911 911
912 912 def encodecopies(files, copies):
913 913 items = []
914 914 for i, dst in enumerate(files):
915 915 if dst in copies:
916 916 items.append(b'%d\0%s' % (i, copies[dst]))
917 917 if len(items) != len(copies):
918 918 raise error.ProgrammingError(
919 919 b'some copy targets missing from file list'
920 920 )
921 921 return b"\n".join(items)
922 922
923 923
924 924 def decodecopies(files, data):
925 925 try:
926 926 copies = {}
927 927 if not data:
928 928 return copies
929 929 for l in data.split(b'\n'):
930 930 strindex, src = l.split(b'\0')
931 931 i = int(strindex)
932 932 dst = files[i]
933 933 copies[dst] = src
934 934 return copies
935 935 except (ValueError, IndexError):
936 936 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
937 937 # used different syntax for the value.
938 938 return None
939 939
940 940
941 941 def encodefileindices(files, subset):
942 942 subset = set(subset)
943 943 indices = []
944 944 for i, f in enumerate(files):
945 945 if f in subset:
946 946 indices.append(b'%d' % i)
947 947 return b'\n'.join(indices)
948 948
949 949
950 950 def decodefileindices(files, data):
951 951 try:
952 952 subset = []
953 953 if not data:
954 954 return subset
955 955 for strindex in data.split(b'\n'):
956 956 i = int(strindex)
957 957 if i < 0 or i >= len(files):
958 958 return None
959 959 subset.append(files[i])
960 960 return subset
961 961 except (ValueError, IndexError):
962 962 # Perhaps someone had chosen the same key name (e.g. "added") and
963 963 # used different syntax for the value.
964 964 return None
965 965
966 966
967 967 def _getsidedata(srcrepo, rev):
968 968 ctx = srcrepo[rev]
969 969 filescopies = computechangesetcopies(ctx)
970 970 filesadded = computechangesetfilesadded(ctx)
971 971 filesremoved = computechangesetfilesremoved(ctx)
972 972 sidedata = {}
973 973 if any([filescopies, filesadded, filesremoved]):
974 974 sortedfiles = sorted(ctx.files())
975 975 p1copies, p2copies = filescopies
976 976 p1copies = encodecopies(sortedfiles, p1copies)
977 977 p2copies = encodecopies(sortedfiles, p2copies)
978 978 filesadded = encodefileindices(sortedfiles, filesadded)
979 979 filesremoved = encodefileindices(sortedfiles, filesremoved)
980 980 if p1copies:
981 981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
982 982 if p2copies:
983 983 sidedata[sidedatamod.SD_P2COPIES] = p2copies
984 984 if filesadded:
985 985 sidedata[sidedatamod.SD_FILESADDED] = filesadded
986 986 if filesremoved:
987 987 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
988 988 return sidedata
989 989
990 990
991 991 def getsidedataadder(srcrepo, destrepo):
992 992 def sidedatacompanion(revlog, rev):
993 993 sidedata = {}
994 994 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
995 995 sidedata = _getsidedata(srcrepo, rev)
996 996 return False, (), sidedata
997 997
998 998 return sidedatacompanion
999 999
1000 1000
1001 1001 def getsidedataremover(srcrepo, destrepo):
1002 1002 def sidedatacompanion(revlog, rev):
1003 1003 f = ()
1004 1004 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1005 1005 if revlog.flags(rev) & REVIDX_SIDEDATA:
1006 1006 f = (
1007 1007 sidedatamod.SD_P1COPIES,
1008 1008 sidedatamod.SD_P2COPIES,
1009 1009 sidedatamod.SD_FILESADDED,
1010 1010 sidedatamod.SD_FILESREMOVED,
1011 1011 )
1012 1012 return False, f, {}
1013 1013
1014 1014 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now