##// END OF EJS Templates
copies: do not initialize the dictionary with root in changeset copies...
marmoute -
r44177:fb8f10c0 default draft
parent child Browse files
Show More
@@ -1,1014 +1,1017 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 17
18 18 from . import (
19 19 error,
20 20 match as matchmod,
21 21 node,
22 22 pathutil,
23 23 pycompat,
24 24 util,
25 25 )
26 26
27 27 from .revlogutils import sidedata as sidedatamod
28 28
29 29 from .utils import stringutil
30 30
31 31
32 32 def _filter(src, dst, t):
33 33 """filters out invalid copies after chaining"""
34 34
35 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 37 # in the following table (not including trivial cases). For example, case 2
38 38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 39 # then was renamed between 'mid' and 'dst'.
40 40 #
41 41 # case src mid dst result
42 42 # 1 x y - -
43 43 # 2 x y y x->y
44 44 # 3 x y x -
45 45 # 4 x y z x->z
46 46 # 5 - x y -
47 47 # 6 x x y x->y
48 48 #
49 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 51 # between 5 and 6, so it includes all cases in its result.
52 52 # Cases 1, 3, and 5 are then removed by _filter().
53 53
54 54 for k, v in list(t.items()):
55 55 # remove copies from files that didn't exist
56 56 if v not in src:
57 57 del t[k]
58 58 # remove criss-crossed copies
59 59 elif k in src and v in dst:
60 60 del t[k]
61 61 # remove copies to files that were then removed
62 62 elif k not in dst:
63 63 del t[k]
64 64
65 65
66 66 def _chain(prefix, suffix):
67 67 """chain two sets of copies 'prefix' and 'suffix'"""
68 68 result = prefix.copy()
69 69 for key, value in pycompat.iteritems(suffix):
70 70 result[key] = prefix.get(value, value)
71 71 return result
72 72
73 73
74 74 def _tracefile(fctx, am, basemf):
75 75 """return file context that is the ancestor of fctx present in ancestor
76 76 manifest am
77 77
78 78 Note: we used to try and stop after a given limit, however checking if that
79 79 limit is reached turned out to be very expensive. we are better off
80 80 disabling that feature."""
81 81
82 82 for f in fctx.ancestors():
83 83 path = f.path()
84 84 if am.get(path, None) == f.filenode():
85 85 return path
86 86 if basemf and basemf.get(path, None) == f.filenode():
87 87 return path
88 88
89 89
90 90 def _dirstatecopies(repo, match=None):
91 91 ds = repo.dirstate
92 92 c = ds.copies().copy()
93 93 for k in list(c):
94 94 if ds[k] not in b'anm' or (match and not match(k)):
95 95 del c[k]
96 96 return c
97 97
98 98
99 99 def _computeforwardmissing(a, b, match=None):
100 100 """Computes which files are in b but not a.
101 101 This is its own function so extensions can easily wrap this call to see what
102 102 files _forwardcopies is about to process.
103 103 """
104 104 ma = a.manifest()
105 105 mb = b.manifest()
106 106 return mb.filesnotin(ma, match=match)
107 107
108 108
109 109 def usechangesetcentricalgo(repo):
110 110 """Checks if we should use changeset-centric copy algorithms"""
111 111 if repo.filecopiesmode == b'changeset-sidedata':
112 112 return True
113 113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 114 changesetsource = (b'changeset-only', b'compatibility')
115 115 return readfrom in changesetsource
116 116
117 117
118 118 def _committedforwardcopies(a, b, base, match):
119 119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 120 # files might have to be traced back to the fctx parent of the last
121 121 # one-side-only changeset, but not further back than that
122 122 repo = a._repo
123 123
124 124 if usechangesetcentricalgo(repo):
125 125 return _changesetforwardcopies(a, b, match)
126 126
127 127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 128 dbg = repo.ui.debug
129 129 if debug:
130 130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 131 am = a.manifest()
132 132 basemf = None if base is None else base.manifest()
133 133
134 134 # find where new files came from
135 135 # we currently don't try to find where old files went, too expensive
136 136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 137 cm = {}
138 138
139 139 # Computing the forward missing is quite expensive on large manifests, since
140 140 # it compares the entire manifests. We can optimize it in the common use
141 141 # case of computing what copies are in a commit versus its parent (like
142 142 # during a rebase or histedit). Note, we exclude merge commits from this
143 143 # optimization, since the ctx.files() for a merge commit is not correct for
144 144 # this comparison.
145 145 forwardmissingmatch = match
146 146 if b.p1() == a and b.p2().node() == node.nullid:
147 147 filesmatcher = matchmod.exact(b.files())
148 148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 150
151 151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 152
153 153 if debug:
154 154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 155
156 156 for f in sorted(missing):
157 157 if debug:
158 158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 159 fctx = b[f]
160 160 fctx._ancestrycontext = ancestrycontext
161 161
162 162 if debug:
163 163 start = util.timer()
164 164 opath = _tracefile(fctx, am, basemf)
165 165 if opath:
166 166 if debug:
167 167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 168 cm[f] = opath
169 169 if debug:
170 170 dbg(
171 171 b'debug.copies: time: %f seconds\n'
172 172 % (util.timer() - start)
173 173 )
174 174 return cm
175 175
176 176
177 177 def _revinfogetter(repo):
178 178 """return a function that return multiple data given a <rev>"i
179 179
180 180 * p1: revision number of first parent
181 181 * p2: revision number of first parent
182 182 * p1copies: mapping of copies from p1
183 183 * p2copies: mapping of copies from p2
184 184 * removed: a list of removed files
185 185 """
186 186 cl = repo.changelog
187 187 parents = cl.parentrevs
188 188
189 189 if repo.filecopiesmode == b'changeset-sidedata':
190 190 changelogrevision = cl.changelogrevision
191 191 flags = cl.flags
192 192
193 193 # A small cache to avoid doing the work twice for merges
194 194 #
195 195 # In the vast majority of cases, if we ask information for a revision
196 196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 197 # keep the information around when reaching the first parent of a merge
198 198 # and dropping it after it was provided for the second parents.
199 199 #
200 200 # It exists cases were only one parent of the merge will be walked. It
201 201 # happens when the "destination" the copy tracing is descendant from a
202 202 # new root, not common with the "source". In that case, we will only walk
203 203 # through merge parents that are descendant of changesets common
204 204 # between "source" and "destination".
205 205 #
206 206 # With the current case implementation if such changesets have a copy
207 207 # information, we'll keep them in memory until the end of
208 208 # _changesetforwardcopies. We don't expect the case to be frequent
209 209 # enough to matters.
210 210 #
211 211 # In addition, it would be possible to reach pathological case, were
212 212 # many first parent are met before any second parent is reached. In
213 213 # that case the cache could grow. If this even become an issue one can
214 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 215 # time to save memory.
216 216 merge_caches = {}
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 if flags(rev) & REVIDX_SIDEDATA:
221 221 e = merge_caches.pop(rev, None)
222 222 if e is not None:
223 223 return e
224 224 c = changelogrevision(rev)
225 225 p1copies = c.p1copies
226 226 p2copies = c.p2copies
227 227 removed = c.filesremoved
228 228 if p1 != node.nullrev and p2 != node.nullrev:
229 229 # XXX some case we over cache, IGNORE
230 230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 231 else:
232 232 p1copies = {}
233 233 p2copies = {}
234 234 removed = []
235 235 return p1, p2, p1copies, p2copies, removed
236 236
237 237 else:
238 238
239 239 def revinfo(rev):
240 240 p1, p2 = parents(rev)
241 241 ctx = repo[rev]
242 242 p1copies, p2copies = ctx._copies
243 243 removed = ctx.filesremoved()
244 244 return p1, p2, p1copies, p2copies, removed
245 245
246 246 return revinfo
247 247
248 248
249 249 def _changesetforwardcopies(a, b, match):
250 250 if a.rev() in (node.nullrev, b.rev()):
251 251 return {}
252 252
253 253 repo = a.repo().unfiltered()
254 254 children = {}
255 255 revinfo = _revinfogetter(repo)
256 256
257 257 cl = repo.changelog
258 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 259 mrset = set(missingrevs)
260 260 roots = set()
261 261 for r in missingrevs:
262 262 for p in cl.parentrevs(r):
263 263 if p == node.nullrev:
264 264 continue
265 265 if p not in children:
266 266 children[p] = [r]
267 267 else:
268 268 children[p].append(r)
269 269 if p not in mrset:
270 270 roots.add(p)
271 271 if not roots:
272 272 # no common revision to track copies from
273 273 return {}
274 274 min_root = min(roots)
275 275
276 276 from_head = set(
277 277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 278 )
279 279
280 280 iterrevs = set(from_head)
281 281 iterrevs &= mrset
282 282 iterrevs.update(roots)
283 283 iterrevs.remove(b.rev())
284 all_copies = {r: {} for r in roots}
284 all_copies = {}
285 285 alwaysmatch = match.always()
286 286 for r in sorted(iterrevs):
287 copies = all_copies.pop(r)
287 copies = all_copies.pop(r, None)
288 if copies is None:
289 # this is a root
290 copies = {}
288 291 for i, c in enumerate(children[r]):
289 292 p1, p2, p1copies, p2copies, removed = revinfo(c)
290 293 if r == p1:
291 294 parent = 1
292 295 childcopies = p1copies
293 296 else:
294 297 assert r == p2
295 298 parent = 2
296 299 childcopies = p2copies
297 300 if not alwaysmatch:
298 301 childcopies = {
299 302 dst: src for dst, src in childcopies.items() if match(dst)
300 303 }
301 304 newcopies = copies
302 305 if childcopies:
303 306 newcopies = _chain(newcopies, childcopies)
304 307 # _chain makes a copies, we can avoid doing so in some
305 308 # simple/linear cases.
306 309 assert newcopies is not copies
307 310 for f in removed:
308 311 if f in newcopies:
309 312 if newcopies is copies:
310 313 # copy on write to avoid affecting potential other
311 314 # branches. when there are no other branches, this
312 315 # could be avoided.
313 316 newcopies = copies.copy()
314 317 del newcopies[f]
315 318 othercopies = all_copies.get(c)
316 319 if othercopies is None:
317 320 all_copies[c] = newcopies
318 321 else:
319 322 # we are the second parent to work on c, we need to merge our
320 323 # work with the other.
321 324 #
322 325 # Unlike when copies are stored in the filelog, we consider
323 326 # it a copy even if the destination already existed on the
324 327 # other branch. It's simply too expensive to check if the
325 328 # file existed in the manifest.
326 329 #
327 330 # In case of conflict, parent 1 take precedence over parent 2.
328 331 # This is an arbitrary choice made anew when implementing
329 332 # changeset based copies. It was made without regards with
330 333 # potential filelog related behavior.
331 334 if parent == 1:
332 335 othercopies.update(newcopies)
333 336 else:
334 337 newcopies.update(othercopies)
335 338 all_copies[c] = newcopies
336 339 return all_copies[b.rev()]
337 340
338 341
339 342 def _forwardcopies(a, b, base=None, match=None):
340 343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
341 344
342 345 if base is None:
343 346 base = a
344 347 match = a.repo().narrowmatch(match)
345 348 # check for working copy
346 349 if b.rev() is None:
347 350 cm = _committedforwardcopies(a, b.p1(), base, match)
348 351 # combine copies from dirstate if necessary
349 352 copies = _chain(cm, _dirstatecopies(b._repo, match))
350 353 else:
351 354 copies = _committedforwardcopies(a, b, base, match)
352 355 return copies
353 356
354 357
355 358 def _backwardrenames(a, b, match):
356 359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
357 360 return {}
358 361
359 362 # Even though we're not taking copies into account, 1:n rename situations
360 363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
361 364 # arbitrarily pick one of the renames.
362 365 # We don't want to pass in "match" here, since that would filter
363 366 # the destination by it. Since we're reversing the copies, we want
364 367 # to filter the source instead.
365 368 f = _forwardcopies(b, a)
366 369 r = {}
367 370 for k, v in sorted(pycompat.iteritems(f)):
368 371 if match and not match(v):
369 372 continue
370 373 # remove copies
371 374 if v in a:
372 375 continue
373 376 r[v] = k
374 377 return r
375 378
376 379
377 380 def pathcopies(x, y, match=None):
378 381 """find {dst@y: src@x} copy mapping for directed compare"""
379 382 repo = x._repo
380 383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
381 384 if debug:
382 385 repo.ui.debug(
383 386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
384 387 )
385 388 if x == y or not x or not y:
386 389 return {}
387 390 a = y.ancestor(x)
388 391 if a == x:
389 392 if debug:
390 393 repo.ui.debug(b'debug.copies: search mode: forward\n')
391 394 if y.rev() is None and x == y.p1():
392 395 # short-circuit to avoid issues with merge states
393 396 return _dirstatecopies(repo, match)
394 397 copies = _forwardcopies(x, y, match=match)
395 398 elif a == y:
396 399 if debug:
397 400 repo.ui.debug(b'debug.copies: search mode: backward\n')
398 401 copies = _backwardrenames(x, y, match=match)
399 402 else:
400 403 if debug:
401 404 repo.ui.debug(b'debug.copies: search mode: combined\n')
402 405 base = None
403 406 if a.rev() != node.nullrev:
404 407 base = x
405 408 copies = _chain(
406 409 _backwardrenames(x, a, match=match),
407 410 _forwardcopies(a, y, base, match=match),
408 411 )
409 412 _filter(x, y, copies)
410 413 return copies
411 414
412 415
413 416 def mergecopies(repo, c1, c2, base):
414 417 """
415 418 Finds moves and copies between context c1 and c2 that are relevant for
416 419 merging. 'base' will be used as the merge base.
417 420
418 421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
419 422 files that were moved/ copied in one merge parent and modified in another.
420 423 For example:
421 424
422 425 o ---> 4 another commit
423 426 |
424 427 | o ---> 3 commit that modifies a.txt
425 428 | /
426 429 o / ---> 2 commit that moves a.txt to b.txt
427 430 |/
428 431 o ---> 1 merge base
429 432
430 433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
431 434 revision 4, and if user have copytrace disabled, we prints the following
432 435 message:
433 436
434 437 ```other changed <file> which local deleted```
435 438
436 439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
437 440 "dirmove".
438 441
439 442 "copy" is a mapping from destination name -> source name,
440 443 where source is in c1 and destination is in c2 or vice-versa.
441 444
442 445 "movewithdir" is a mapping from source name -> destination name,
443 446 where the file at source present in one context but not the other
444 447 needs to be moved to destination by the merge process, because the
445 448 other context moved the directory it is in.
446 449
447 450 "diverge" is a mapping of source name -> list of destination names
448 451 for divergent renames.
449 452
450 453 "renamedelete" is a mapping of source name -> list of destination
451 454 names for files deleted in c1 that were renamed in c2 or vice-versa.
452 455
453 456 "dirmove" is a mapping of detected source dir -> destination dir renames.
454 457 This is needed for handling changes to new files previously grafted into
455 458 renamed directories.
456 459
457 460 This function calls different copytracing algorithms based on config.
458 461 """
459 462 # avoid silly behavior for update from empty dir
460 463 if not c1 or not c2 or c1 == c2:
461 464 return {}, {}, {}, {}, {}
462 465
463 466 narrowmatch = c1.repo().narrowmatch()
464 467
465 468 # avoid silly behavior for parent -> working dir
466 469 if c2.node() is None and c1.node() == repo.dirstate.p1():
467 470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
468 471
469 472 copytracing = repo.ui.config(b'experimental', b'copytrace')
470 473 if stringutil.parsebool(copytracing) is False:
471 474 # stringutil.parsebool() returns None when it is unable to parse the
472 475 # value, so we should rely on making sure copytracing is on such cases
473 476 return {}, {}, {}, {}, {}
474 477
475 478 if usechangesetcentricalgo(repo):
476 479 # The heuristics don't make sense when we need changeset-centric algos
477 480 return _fullcopytracing(repo, c1, c2, base)
478 481
479 482 # Copy trace disabling is explicitly below the node == p1 logic above
480 483 # because the logic above is required for a simple copy to be kept across a
481 484 # rebase.
482 485 if copytracing == b'heuristics':
483 486 # Do full copytracing if only non-public revisions are involved as
484 487 # that will be fast enough and will also cover the copies which could
485 488 # be missed by heuristics
486 489 if _isfullcopytraceable(repo, c1, base):
487 490 return _fullcopytracing(repo, c1, c2, base)
488 491 return _heuristicscopytracing(repo, c1, c2, base)
489 492 else:
490 493 return _fullcopytracing(repo, c1, c2, base)
491 494
492 495
493 496 def _isfullcopytraceable(repo, c1, base):
494 497 """ Checks that if base, source and destination are all no-public branches,
495 498 if yes let's use the full copytrace algorithm for increased capabilities
496 499 since it will be fast enough.
497 500
498 501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
499 502 number of changesets from c1 to base such that if number of changesets are
500 503 more than the limit, full copytracing algorithm won't be used.
501 504 """
502 505 if c1.rev() is None:
503 506 c1 = c1.p1()
504 507 if c1.mutable() and base.mutable():
505 508 sourcecommitlimit = repo.ui.configint(
506 509 b'experimental', b'copytrace.sourcecommitlimit'
507 510 )
508 511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
509 512 return commits < sourcecommitlimit
510 513 return False
511 514
512 515
513 516 def _checksinglesidecopies(
514 517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
515 518 ):
516 519 if src not in m2:
517 520 # deleted on side 2
518 521 if src not in m1:
519 522 # renamed on side 1, deleted on side 2
520 523 renamedelete[src] = dsts1
521 524 elif m2[src] != mb[src]:
522 525 if not _related(c2[src], base[src]):
523 526 return
524 527 # modified on side 2
525 528 for dst in dsts1:
526 529 if dst not in m2:
527 530 # dst not added on side 2 (handle as regular
528 531 # "both created" case in manifestmerge otherwise)
529 532 copy[dst] = src
530 533
531 534
532 535 def _fullcopytracing(repo, c1, c2, base):
533 536 """ The full copytracing algorithm which finds all the new files that were
534 537 added from merge base up to the top commit and for each file it checks if
535 538 this file was copied from another file.
536 539
537 540 This is pretty slow when a lot of changesets are involved but will track all
538 541 the copies.
539 542 """
540 543 m1 = c1.manifest()
541 544 m2 = c2.manifest()
542 545 mb = base.manifest()
543 546
544 547 copies1 = pathcopies(base, c1)
545 548 copies2 = pathcopies(base, c2)
546 549
547 550 inversecopies1 = {}
548 551 inversecopies2 = {}
549 552 for dst, src in copies1.items():
550 553 inversecopies1.setdefault(src, []).append(dst)
551 554 for dst, src in copies2.items():
552 555 inversecopies2.setdefault(src, []).append(dst)
553 556
554 557 copy = {}
555 558 diverge = {}
556 559 renamedelete = {}
557 560 allsources = set(inversecopies1) | set(inversecopies2)
558 561 for src in allsources:
559 562 dsts1 = inversecopies1.get(src)
560 563 dsts2 = inversecopies2.get(src)
561 564 if dsts1 and dsts2:
562 565 # copied/renamed on both sides
563 566 if src not in m1 and src not in m2:
564 567 # renamed on both sides
565 568 dsts1 = set(dsts1)
566 569 dsts2 = set(dsts2)
567 570 # If there's some overlap in the rename destinations, we
568 571 # consider it not divergent. For example, if side 1 copies 'a'
569 572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
570 573 # and 'd' and deletes 'a'.
571 574 if dsts1 & dsts2:
572 575 for dst in dsts1 & dsts2:
573 576 copy[dst] = src
574 577 else:
575 578 diverge[src] = sorted(dsts1 | dsts2)
576 579 elif src in m1 and src in m2:
577 580 # copied on both sides
578 581 dsts1 = set(dsts1)
579 582 dsts2 = set(dsts2)
580 583 for dst in dsts1 & dsts2:
581 584 copy[dst] = src
582 585 # TODO: Handle cases where it was renamed on one side and copied
583 586 # on the other side
584 587 elif dsts1:
585 588 # copied/renamed only on side 1
586 589 _checksinglesidecopies(
587 590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
588 591 )
589 592 elif dsts2:
590 593 # copied/renamed only on side 2
591 594 _checksinglesidecopies(
592 595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
593 596 )
594 597
595 598 renamedeleteset = set()
596 599 divergeset = set()
597 600 for dsts in diverge.values():
598 601 divergeset.update(dsts)
599 602 for dsts in renamedelete.values():
600 603 renamedeleteset.update(dsts)
601 604
602 605 # find interesting file sets from manifests
603 606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
604 607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
605 608 u1 = sorted(addedinm1 - addedinm2)
606 609 u2 = sorted(addedinm2 - addedinm1)
607 610
608 611 header = b" unmatched files in %s"
609 612 if u1:
610 613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
611 614 if u2:
612 615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
613 616
614 617 fullcopy = copies1.copy()
615 618 fullcopy.update(copies2)
616 619 if not fullcopy:
617 620 return copy, {}, diverge, renamedelete, {}
618 621
619 622 if repo.ui.debugflag:
620 623 repo.ui.debug(
621 624 b" all copies found (* = to merge, ! = divergent, "
622 625 b"% = renamed and deleted):\n"
623 626 )
624 627 for f in sorted(fullcopy):
625 628 note = b""
626 629 if f in copy:
627 630 note += b"*"
628 631 if f in divergeset:
629 632 note += b"!"
630 633 if f in renamedeleteset:
631 634 note += b"%"
632 635 repo.ui.debug(
633 636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
634 637 )
635 638 del divergeset
636 639
637 640 repo.ui.debug(b" checking for directory renames\n")
638 641
639 642 # generate a directory move map
640 643 d1, d2 = c1.dirs(), c2.dirs()
641 644 invalid = set()
642 645 dirmove = {}
643 646
644 647 # examine each file copy for a potential directory move, which is
645 648 # when all the files in a directory are moved to a new directory
646 649 for dst, src in pycompat.iteritems(fullcopy):
647 650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
648 651 if dsrc in invalid:
649 652 # already seen to be uninteresting
650 653 continue
651 654 elif dsrc in d1 and ddst in d1:
652 655 # directory wasn't entirely moved locally
653 656 invalid.add(dsrc)
654 657 elif dsrc in d2 and ddst in d2:
655 658 # directory wasn't entirely moved remotely
656 659 invalid.add(dsrc)
657 660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
658 661 # files from the same directory moved to two different places
659 662 invalid.add(dsrc)
660 663 else:
661 664 # looks good so far
662 665 dirmove[dsrc] = ddst
663 666
664 667 for i in invalid:
665 668 if i in dirmove:
666 669 del dirmove[i]
667 670 del d1, d2, invalid
668 671
669 672 if not dirmove:
670 673 return copy, {}, diverge, renamedelete, {}
671 674
672 675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
673 676
674 677 for d in dirmove:
675 678 repo.ui.debug(
676 679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
677 680 )
678 681
679 682 movewithdir = {}
680 683 # check unaccounted nonoverlapping files against directory moves
681 684 for f in u1 + u2:
682 685 if f not in fullcopy:
683 686 for d in dirmove:
684 687 if f.startswith(d):
685 688 # new file added in a directory that was moved, move it
686 689 df = dirmove[d] + f[len(d) :]
687 690 if df not in copy:
688 691 movewithdir[f] = df
689 692 repo.ui.debug(
690 693 b" pending file src: '%s' -> dst: '%s'\n"
691 694 % (f, df)
692 695 )
693 696 break
694 697
695 698 return copy, movewithdir, diverge, renamedelete, dirmove
696 699
697 700
698 701 def _heuristicscopytracing(repo, c1, c2, base):
699 702 """ Fast copytracing using filename heuristics
700 703
701 704 Assumes that moves or renames are of following two types:
702 705
703 706 1) Inside a directory only (same directory name but different filenames)
704 707 2) Move from one directory to another
705 708 (same filenames but different directory names)
706 709
707 710 Works only when there are no merge commits in the "source branch".
708 711 Source branch is commits from base up to c2 not including base.
709 712
710 713 If merge is involved it fallbacks to _fullcopytracing().
711 714
712 715 Can be used by setting the following config:
713 716
714 717 [experimental]
715 718 copytrace = heuristics
716 719
717 720 In some cases the copy/move candidates found by heuristics can be very large
718 721 in number and that will make the algorithm slow. The number of possible
719 722 candidates to check can be limited by using the config
720 723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
721 724 """
722 725
723 726 if c1.rev() is None:
724 727 c1 = c1.p1()
725 728 if c2.rev() is None:
726 729 c2 = c2.p1()
727 730
728 731 copies = {}
729 732
730 733 changedfiles = set()
731 734 m1 = c1.manifest()
732 735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
733 736 # If base is not in c2 branch, we switch to fullcopytracing
734 737 repo.ui.debug(
735 738 b"switching to full copytracing as base is not "
736 739 b"an ancestor of c2\n"
737 740 )
738 741 return _fullcopytracing(repo, c1, c2, base)
739 742
740 743 ctx = c2
741 744 while ctx != base:
742 745 if len(ctx.parents()) == 2:
743 746 # To keep things simple let's not handle merges
744 747 repo.ui.debug(b"switching to full copytracing because of merges\n")
745 748 return _fullcopytracing(repo, c1, c2, base)
746 749 changedfiles.update(ctx.files())
747 750 ctx = ctx.p1()
748 751
749 752 cp = _forwardcopies(base, c2)
750 753 for dst, src in pycompat.iteritems(cp):
751 754 if src in m1:
752 755 copies[dst] = src
753 756
754 757 # file is missing if it isn't present in the destination, but is present in
755 758 # the base and present in the source.
756 759 # Presence in the base is important to exclude added files, presence in the
757 760 # source is important to exclude removed files.
758 761 filt = lambda f: f not in m1 and f in base and f in c2
759 762 missingfiles = [f for f in changedfiles if filt(f)]
760 763
761 764 if missingfiles:
762 765 basenametofilename = collections.defaultdict(list)
763 766 dirnametofilename = collections.defaultdict(list)
764 767
765 768 for f in m1.filesnotin(base.manifest()):
766 769 basename = os.path.basename(f)
767 770 dirname = os.path.dirname(f)
768 771 basenametofilename[basename].append(f)
769 772 dirnametofilename[dirname].append(f)
770 773
771 774 for f in missingfiles:
772 775 basename = os.path.basename(f)
773 776 dirname = os.path.dirname(f)
774 777 samebasename = basenametofilename[basename]
775 778 samedirname = dirnametofilename[dirname]
776 779 movecandidates = samebasename + samedirname
777 780 # f is guaranteed to be present in c2, that's why
778 781 # c2.filectx(f) won't fail
779 782 f2 = c2.filectx(f)
780 783 # we can have a lot of candidates which can slow down the heuristics
781 784 # config value to limit the number of candidates moves to check
782 785 maxcandidates = repo.ui.configint(
783 786 b'experimental', b'copytrace.movecandidateslimit'
784 787 )
785 788
786 789 if len(movecandidates) > maxcandidates:
787 790 repo.ui.status(
788 791 _(
789 792 b"skipping copytracing for '%s', more "
790 793 b"candidates than the limit: %d\n"
791 794 )
792 795 % (f, len(movecandidates))
793 796 )
794 797 continue
795 798
796 799 for candidate in movecandidates:
797 800 f1 = c1.filectx(candidate)
798 801 if _related(f1, f2):
799 802 # if there are a few related copies then we'll merge
800 803 # changes into all of them. This matches the behaviour
801 804 # of upstream copytracing
802 805 copies[candidate] = f
803 806
804 807 return copies, {}, {}, {}, {}
805 808
806 809
807 810 def _related(f1, f2):
808 811 """return True if f1 and f2 filectx have a common ancestor
809 812
810 813 Walk back to common ancestor to see if the two files originate
811 814 from the same file. Since workingfilectx's rev() is None it messes
812 815 up the integer comparison logic, hence the pre-step check for
813 816 None (f1 and f2 can only be workingfilectx's initially).
814 817 """
815 818
816 819 if f1 == f2:
817 820 return True # a match
818 821
819 822 g1, g2 = f1.ancestors(), f2.ancestors()
820 823 try:
821 824 f1r, f2r = f1.linkrev(), f2.linkrev()
822 825
823 826 if f1r is None:
824 827 f1 = next(g1)
825 828 if f2r is None:
826 829 f2 = next(g2)
827 830
828 831 while True:
829 832 f1r, f2r = f1.linkrev(), f2.linkrev()
830 833 if f1r > f2r:
831 834 f1 = next(g1)
832 835 elif f2r > f1r:
833 836 f2 = next(g2)
834 837 else: # f1 and f2 point to files in the same linkrev
835 838 return f1 == f2 # true if they point to the same file
836 839 except StopIteration:
837 840 return False
838 841
839 842
840 843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
841 844 """reproduce copies from fromrev to rev in the dirstate
842 845
843 846 If skiprev is specified, it's a revision that should be used to
844 847 filter copy records. Any copies that occur between fromrev and
845 848 skiprev will not be duplicated, even if they appear in the set of
846 849 copies between fromrev and rev.
847 850 """
848 851 exclude = {}
849 852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
850 853 bctrace = stringutil.parsebool(ctraceconfig)
851 854 if skiprev is not None and (
852 855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
853 856 ):
854 857 # copytrace='off' skips this line, but not the entire function because
855 858 # the line below is O(size of the repo) during a rebase, while the rest
856 859 # of the function is much faster (and is required for carrying copy
857 860 # metadata across the rebase anyway).
858 861 exclude = pathcopies(repo[fromrev], repo[skiprev])
859 862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
860 863 if dst in exclude:
861 864 continue
862 865 if dst in wctx:
863 866 wctx[dst].markcopied(src)
864 867
865 868
866 869 def computechangesetfilesadded(ctx):
867 870 """return the list of files added in a changeset
868 871 """
869 872 added = []
870 873 for f in ctx.files():
871 874 if not any(f in p for p in ctx.parents()):
872 875 added.append(f)
873 876 return added
874 877
875 878
876 879 def computechangesetfilesremoved(ctx):
877 880 """return the list of files removed in a changeset
878 881 """
879 882 removed = []
880 883 for f in ctx.files():
881 884 if f not in ctx:
882 885 removed.append(f)
883 886 return removed
884 887
885 888
886 889 def computechangesetcopies(ctx):
887 890 """return the copies data for a changeset
888 891
889 892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
890 893
891 894 Each dictionnary are in the form: `{newname: oldname}`
892 895 """
893 896 p1copies = {}
894 897 p2copies = {}
895 898 p1 = ctx.p1()
896 899 p2 = ctx.p2()
897 900 narrowmatch = ctx._repo.narrowmatch()
898 901 for dst in ctx.files():
899 902 if not narrowmatch(dst) or dst not in ctx:
900 903 continue
901 904 copied = ctx[dst].renamed()
902 905 if not copied:
903 906 continue
904 907 src, srcnode = copied
905 908 if src in p1 and p1[src].filenode() == srcnode:
906 909 p1copies[dst] = src
907 910 elif src in p2 and p2[src].filenode() == srcnode:
908 911 p2copies[dst] = src
909 912 return p1copies, p2copies
910 913
911 914
912 915 def encodecopies(files, copies):
913 916 items = []
914 917 for i, dst in enumerate(files):
915 918 if dst in copies:
916 919 items.append(b'%d\0%s' % (i, copies[dst]))
917 920 if len(items) != len(copies):
918 921 raise error.ProgrammingError(
919 922 b'some copy targets missing from file list'
920 923 )
921 924 return b"\n".join(items)
922 925
923 926
924 927 def decodecopies(files, data):
925 928 try:
926 929 copies = {}
927 930 if not data:
928 931 return copies
929 932 for l in data.split(b'\n'):
930 933 strindex, src = l.split(b'\0')
931 934 i = int(strindex)
932 935 dst = files[i]
933 936 copies[dst] = src
934 937 return copies
935 938 except (ValueError, IndexError):
936 939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
937 940 # used different syntax for the value.
938 941 return None
939 942
940 943
941 944 def encodefileindices(files, subset):
942 945 subset = set(subset)
943 946 indices = []
944 947 for i, f in enumerate(files):
945 948 if f in subset:
946 949 indices.append(b'%d' % i)
947 950 return b'\n'.join(indices)
948 951
949 952
950 953 def decodefileindices(files, data):
951 954 try:
952 955 subset = []
953 956 if not data:
954 957 return subset
955 958 for strindex in data.split(b'\n'):
956 959 i = int(strindex)
957 960 if i < 0 or i >= len(files):
958 961 return None
959 962 subset.append(files[i])
960 963 return subset
961 964 except (ValueError, IndexError):
962 965 # Perhaps someone had chosen the same key name (e.g. "added") and
963 966 # used different syntax for the value.
964 967 return None
965 968
966 969
967 970 def _getsidedata(srcrepo, rev):
968 971 ctx = srcrepo[rev]
969 972 filescopies = computechangesetcopies(ctx)
970 973 filesadded = computechangesetfilesadded(ctx)
971 974 filesremoved = computechangesetfilesremoved(ctx)
972 975 sidedata = {}
973 976 if any([filescopies, filesadded, filesremoved]):
974 977 sortedfiles = sorted(ctx.files())
975 978 p1copies, p2copies = filescopies
976 979 p1copies = encodecopies(sortedfiles, p1copies)
977 980 p2copies = encodecopies(sortedfiles, p2copies)
978 981 filesadded = encodefileindices(sortedfiles, filesadded)
979 982 filesremoved = encodefileindices(sortedfiles, filesremoved)
980 983 if p1copies:
981 984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
982 985 if p2copies:
983 986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
984 987 if filesadded:
985 988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
986 989 if filesremoved:
987 990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
988 991 return sidedata
989 992
990 993
991 994 def getsidedataadder(srcrepo, destrepo):
992 995 def sidedatacompanion(revlog, rev):
993 996 sidedata = {}
994 997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
995 998 sidedata = _getsidedata(srcrepo, rev)
996 999 return False, (), sidedata
997 1000
998 1001 return sidedatacompanion
999 1002
1000 1003
1001 1004 def getsidedataremover(srcrepo, destrepo):
1002 1005 def sidedatacompanion(revlog, rev):
1003 1006 f = ()
1004 1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1005 1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
1006 1009 f = (
1007 1010 sidedatamod.SD_P1COPIES,
1008 1011 sidedatamod.SD_P2COPIES,
1009 1012 sidedatamod.SD_FILESADDED,
1010 1013 sidedatamod.SD_FILESREMOVED,
1011 1014 )
1012 1015 return False, f, {}
1013 1016
1014 1017 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now