##// END OF EJS Templates
copies: compute the exact set of revision to walk...
marmoute -
r43593:83bb1e89 default
parent child Browse files
Show More
@@ -1,974 +1,982 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 import heapq
12 11 import os
13 12
14 13 from .i18n import _
15 14
16 15
17 16 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 17
19 18 from . import (
20 19 error,
21 20 match as matchmod,
22 21 node,
23 22 pathutil,
24 23 pycompat,
25 24 util,
26 25 )
27 26
28 27 from .revlogutils import sidedata as sidedatamod
29 28
30 29 from .utils import stringutil
31 30
32 31
33 32 def _filter(src, dst, t):
34 33 """filters out invalid copies after chaining"""
35 34
36 35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 37 # in the following table (not including trivial cases). For example, case 2
39 38 # is where a file existed in 'src' and remained under that name in 'mid' and
40 39 # then was renamed between 'mid' and 'dst'.
41 40 #
42 41 # case src mid dst result
43 42 # 1 x y - -
44 43 # 2 x y y x->y
45 44 # 3 x y x -
46 45 # 4 x y z x->z
47 46 # 5 - x y -
48 47 # 6 x x y x->y
49 48 #
50 49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 51 # between 5 and 6, so it includes all cases in its result.
53 52 # Cases 1, 3, and 5 are then removed by _filter().
54 53
55 54 for k, v in list(t.items()):
56 55 # remove copies from files that didn't exist
57 56 if v not in src:
58 57 del t[k]
59 58 # remove criss-crossed copies
60 59 elif k in src and v in dst:
61 60 del t[k]
62 61 # remove copies to files that were then removed
63 62 elif k not in dst:
64 63 del t[k]
65 64
66 65
67 66 def _chain(a, b):
68 67 """chain two sets of copies 'a' and 'b'"""
69 68 t = a.copy()
70 69 for k, v in pycompat.iteritems(b):
71 70 if v in t:
72 71 t[k] = t[v]
73 72 else:
74 73 t[k] = v
75 74 return t
76 75
77 76
78 77 def _tracefile(fctx, am, basemf):
79 78 """return file context that is the ancestor of fctx present in ancestor
80 79 manifest am
81 80
82 81 Note: we used to try and stop after a given limit, however checking if that
83 82 limit is reached turned out to be very expensive. we are better off
84 83 disabling that feature."""
85 84
86 85 for f in fctx.ancestors():
87 86 path = f.path()
88 87 if am.get(path, None) == f.filenode():
89 88 return path
90 89 if basemf and basemf.get(path, None) == f.filenode():
91 90 return path
92 91
93 92
94 93 def _dirstatecopies(repo, match=None):
95 94 ds = repo.dirstate
96 95 c = ds.copies().copy()
97 96 for k in list(c):
98 97 if ds[k] not in b'anm' or (match and not match(k)):
99 98 del c[k]
100 99 return c
101 100
102 101
103 102 def _computeforwardmissing(a, b, match=None):
104 103 """Computes which files are in b but not a.
105 104 This is its own function so extensions can easily wrap this call to see what
106 105 files _forwardcopies is about to process.
107 106 """
108 107 ma = a.manifest()
109 108 mb = b.manifest()
110 109 return mb.filesnotin(ma, match=match)
111 110
112 111
113 112 def usechangesetcentricalgo(repo):
114 113 """Checks if we should use changeset-centric copy algorithms"""
115 114 if repo.filecopiesmode == b'changeset-sidedata':
116 115 return True
117 116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 117 changesetsource = (b'changeset-only', b'compatibility')
119 118 return readfrom in changesetsource
120 119
121 120
122 121 def _committedforwardcopies(a, b, base, match):
123 122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 123 # files might have to be traced back to the fctx parent of the last
125 124 # one-side-only changeset, but not further back than that
126 125 repo = a._repo
127 126
128 127 if usechangesetcentricalgo(repo):
129 128 return _changesetforwardcopies(a, b, match)
130 129
131 130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 131 dbg = repo.ui.debug
133 132 if debug:
134 133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 134 am = a.manifest()
136 135 basemf = None if base is None else base.manifest()
137 136
138 137 # find where new files came from
139 138 # we currently don't try to find where old files went, too expensive
140 139 # this means we can miss a case like 'hg rm b; hg cp a b'
141 140 cm = {}
142 141
143 142 # Computing the forward missing is quite expensive on large manifests, since
144 143 # it compares the entire manifests. We can optimize it in the common use
145 144 # case of computing what copies are in a commit versus its parent (like
146 145 # during a rebase or histedit). Note, we exclude merge commits from this
147 146 # optimization, since the ctx.files() for a merge commit is not correct for
148 147 # this comparison.
149 148 forwardmissingmatch = match
150 149 if b.p1() == a and b.p2().node() == node.nullid:
151 150 filesmatcher = matchmod.exact(b.files())
152 151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 153
155 154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 155
157 156 if debug:
158 157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 158
160 159 for f in sorted(missing):
161 160 if debug:
162 161 dbg(b'debug.copies: tracing file: %s\n' % f)
163 162 fctx = b[f]
164 163 fctx._ancestrycontext = ancestrycontext
165 164
166 165 if debug:
167 166 start = util.timer()
168 167 opath = _tracefile(fctx, am, basemf)
169 168 if opath:
170 169 if debug:
171 170 dbg(b'debug.copies: rename of: %s\n' % opath)
172 171 cm[f] = opath
173 172 if debug:
174 173 dbg(
175 174 b'debug.copies: time: %f seconds\n'
176 175 % (util.timer() - start)
177 176 )
178 177 return cm
179 178
180 179
181 180 def _revinfogetter(repo):
182 181 """return a function that return multiple data given a <rev>"i
183 182
184 183 * p1: revision number of first parent
185 184 * p2: revision number of first parent
186 185 * p1copies: mapping of copies from p1
187 186 * p2copies: mapping of copies from p2
188 187 * removed: a list of removed files
189 188 """
190 189 cl = repo.changelog
191 190 parents = cl.parentrevs
192 191
193 192 if repo.filecopiesmode == b'changeset-sidedata':
194 193 changelogrevision = cl.changelogrevision
195 194 flags = cl.flags
196 195
197 196 def revinfo(rev):
198 197 p1, p2 = parents(rev)
199 198 if flags(rev) & REVIDX_SIDEDATA:
200 199 c = changelogrevision(rev)
201 200 p1copies = c.p1copies
202 201 p2copies = c.p2copies
203 202 removed = c.filesremoved
204 203 else:
205 204 p1copies = {}
206 205 p2copies = {}
207 206 removed = ()
208 207 return p1, p2, p1copies, p2copies, removed
209 208
210 209 else:
211 210
212 211 def revinfo(rev):
213 212 p1, p2 = parents(rev)
214 213 ctx = repo[rev]
215 214 p1copies, p2copies = ctx._copies
216 215 removed = ctx.filesremoved()
217 216 return p1, p2, p1copies, p2copies, removed
218 217
219 218 return revinfo
220 219
221 220
222 221 def _changesetforwardcopies(a, b, match):
223 222 if a.rev() in (node.nullrev, b.rev()):
224 223 return {}
225 224
226 225 repo = a.repo().unfiltered()
227 226 children = {}
228 227 revinfo = _revinfogetter(repo)
229 228
230 229 cl = repo.changelog
231 230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 mrset = set(missingrevs)
232 roots = set()
232 233 for r in missingrevs:
233 234 for p in cl.parentrevs(r):
234 235 if p == node.nullrev:
235 236 continue
236 237 if p not in children:
237 238 children[p] = [r]
238 239 else:
239 240 children[p].append(r)
241 if p not in mrset:
242 roots.add(p)
243 if not roots:
244 # no common revision to track copies from
245 return {}
246 min_root = min(roots)
240 247
241 roots = set(children) - set(missingrevs)
242 work = list(roots)
248 from_head = set(
249 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
250 )
251
252 iterrevs = set(from_head)
253 iterrevs &= mrset
254 iterrevs.update(roots)
255 iterrevs.remove(b.rev())
243 256 all_copies = {r: {} for r in roots}
244 heapq.heapify(work)
245 257 alwaysmatch = match.always()
246 while work:
247 r = heapq.heappop(work)
258 for r in sorted(iterrevs):
248 259 copies = all_copies.pop(r)
249 if r == b.rev():
250 return copies
251 260 for i, c in enumerate(children[r]):
252 261 p1, p2, p1copies, p2copies, removed = revinfo(c)
253 262 if r == p1:
254 263 parent = 1
255 264 childcopies = p1copies
256 265 else:
257 266 assert r == p2
258 267 parent = 2
259 268 childcopies = p2copies
260 269 if not alwaysmatch:
261 270 childcopies = {
262 271 dst: src for dst, src in childcopies.items() if match(dst)
263 272 }
264 273 # Copy the dict only if later iterations will also need it
265 274 if i != len(children[r]) - 1:
266 275 newcopies = copies.copy()
267 276 else:
268 277 newcopies = copies
269 278 if childcopies:
270 279 newcopies = _chain(newcopies, childcopies)
271 280 for f in removed:
272 281 if f in newcopies:
273 282 del newcopies[f]
274 283 othercopies = all_copies.get(c)
275 284 if othercopies is None:
276 heapq.heappush(work, c)
277 285 all_copies[c] = newcopies
278 286 else:
279 287 # we are the second parent to work on c, we need to merge our
280 288 # work with the other.
281 289 #
282 290 # Unlike when copies are stored in the filelog, we consider
283 291 # it a copy even if the destination already existed on the
284 292 # other branch. It's simply too expensive to check if the
285 293 # file existed in the manifest.
286 294 #
287 295 # In case of conflict, parent 1 take precedence over parent 2.
288 296 # This is an arbitrary choice made anew when implementing
289 297 # changeset based copies. It was made without regards with
290 298 # potential filelog related behavior.
291 299 if parent == 1:
292 300 othercopies.update(newcopies)
293 301 else:
294 302 newcopies.update(othercopies)
295 303 all_copies[c] = newcopies
296 assert False
304 return all_copies[b.rev()]
297 305
298 306
299 307 def _forwardcopies(a, b, base=None, match=None):
300 308 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
301 309
302 310 if base is None:
303 311 base = a
304 312 match = a.repo().narrowmatch(match)
305 313 # check for working copy
306 314 if b.rev() is None:
307 315 cm = _committedforwardcopies(a, b.p1(), base, match)
308 316 # combine copies from dirstate if necessary
309 317 copies = _chain(cm, _dirstatecopies(b._repo, match))
310 318 else:
311 319 copies = _committedforwardcopies(a, b, base, match)
312 320 return copies
313 321
314 322
315 323 def _backwardrenames(a, b, match):
316 324 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
317 325 return {}
318 326
319 327 # Even though we're not taking copies into account, 1:n rename situations
320 328 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
321 329 # arbitrarily pick one of the renames.
322 330 # We don't want to pass in "match" here, since that would filter
323 331 # the destination by it. Since we're reversing the copies, we want
324 332 # to filter the source instead.
325 333 f = _forwardcopies(b, a)
326 334 r = {}
327 335 for k, v in sorted(pycompat.iteritems(f)):
328 336 if match and not match(v):
329 337 continue
330 338 # remove copies
331 339 if v in a:
332 340 continue
333 341 r[v] = k
334 342 return r
335 343
336 344
337 345 def pathcopies(x, y, match=None):
338 346 """find {dst@y: src@x} copy mapping for directed compare"""
339 347 repo = x._repo
340 348 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
341 349 if debug:
342 350 repo.ui.debug(
343 351 b'debug.copies: searching copies from %s to %s\n' % (x, y)
344 352 )
345 353 if x == y or not x or not y:
346 354 return {}
347 355 a = y.ancestor(x)
348 356 if a == x:
349 357 if debug:
350 358 repo.ui.debug(b'debug.copies: search mode: forward\n')
351 359 if y.rev() is None and x == y.p1():
352 360 # short-circuit to avoid issues with merge states
353 361 return _dirstatecopies(repo, match)
354 362 copies = _forwardcopies(x, y, match=match)
355 363 elif a == y:
356 364 if debug:
357 365 repo.ui.debug(b'debug.copies: search mode: backward\n')
358 366 copies = _backwardrenames(x, y, match=match)
359 367 else:
360 368 if debug:
361 369 repo.ui.debug(b'debug.copies: search mode: combined\n')
362 370 base = None
363 371 if a.rev() != node.nullrev:
364 372 base = x
365 373 copies = _chain(
366 374 _backwardrenames(x, a, match=match),
367 375 _forwardcopies(a, y, base, match=match),
368 376 )
369 377 _filter(x, y, copies)
370 378 return copies
371 379
372 380
373 381 def mergecopies(repo, c1, c2, base):
374 382 """
375 383 Finds moves and copies between context c1 and c2 that are relevant for
376 384 merging. 'base' will be used as the merge base.
377 385
378 386 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
379 387 files that were moved/ copied in one merge parent and modified in another.
380 388 For example:
381 389
382 390 o ---> 4 another commit
383 391 |
384 392 | o ---> 3 commit that modifies a.txt
385 393 | /
386 394 o / ---> 2 commit that moves a.txt to b.txt
387 395 |/
388 396 o ---> 1 merge base
389 397
390 398 If we try to rebase revision 3 on revision 4, since there is no a.txt in
391 399 revision 4, and if user have copytrace disabled, we prints the following
392 400 message:
393 401
394 402 ```other changed <file> which local deleted```
395 403
396 404 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
397 405 "dirmove".
398 406
399 407 "copy" is a mapping from destination name -> source name,
400 408 where source is in c1 and destination is in c2 or vice-versa.
401 409
402 410 "movewithdir" is a mapping from source name -> destination name,
403 411 where the file at source present in one context but not the other
404 412 needs to be moved to destination by the merge process, because the
405 413 other context moved the directory it is in.
406 414
407 415 "diverge" is a mapping of source name -> list of destination names
408 416 for divergent renames.
409 417
410 418 "renamedelete" is a mapping of source name -> list of destination
411 419 names for files deleted in c1 that were renamed in c2 or vice-versa.
412 420
413 421 "dirmove" is a mapping of detected source dir -> destination dir renames.
414 422 This is needed for handling changes to new files previously grafted into
415 423 renamed directories.
416 424
417 425 This function calls different copytracing algorithms based on config.
418 426 """
419 427 # avoid silly behavior for update from empty dir
420 428 if not c1 or not c2 or c1 == c2:
421 429 return {}, {}, {}, {}, {}
422 430
423 431 narrowmatch = c1.repo().narrowmatch()
424 432
425 433 # avoid silly behavior for parent -> working dir
426 434 if c2.node() is None and c1.node() == repo.dirstate.p1():
427 435 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
428 436
429 437 copytracing = repo.ui.config(b'experimental', b'copytrace')
430 438 if stringutil.parsebool(copytracing) is False:
431 439 # stringutil.parsebool() returns None when it is unable to parse the
432 440 # value, so we should rely on making sure copytracing is on such cases
433 441 return {}, {}, {}, {}, {}
434 442
435 443 if usechangesetcentricalgo(repo):
436 444 # The heuristics don't make sense when we need changeset-centric algos
437 445 return _fullcopytracing(repo, c1, c2, base)
438 446
439 447 # Copy trace disabling is explicitly below the node == p1 logic above
440 448 # because the logic above is required for a simple copy to be kept across a
441 449 # rebase.
442 450 if copytracing == b'heuristics':
443 451 # Do full copytracing if only non-public revisions are involved as
444 452 # that will be fast enough and will also cover the copies which could
445 453 # be missed by heuristics
446 454 if _isfullcopytraceable(repo, c1, base):
447 455 return _fullcopytracing(repo, c1, c2, base)
448 456 return _heuristicscopytracing(repo, c1, c2, base)
449 457 else:
450 458 return _fullcopytracing(repo, c1, c2, base)
451 459
452 460
453 461 def _isfullcopytraceable(repo, c1, base):
454 462 """ Checks that if base, source and destination are all no-public branches,
455 463 if yes let's use the full copytrace algorithm for increased capabilities
456 464 since it will be fast enough.
457 465
458 466 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
459 467 number of changesets from c1 to base such that if number of changesets are
460 468 more than the limit, full copytracing algorithm won't be used.
461 469 """
462 470 if c1.rev() is None:
463 471 c1 = c1.p1()
464 472 if c1.mutable() and base.mutable():
465 473 sourcecommitlimit = repo.ui.configint(
466 474 b'experimental', b'copytrace.sourcecommitlimit'
467 475 )
468 476 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
469 477 return commits < sourcecommitlimit
470 478 return False
471 479
472 480
473 481 def _checksinglesidecopies(
474 482 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
475 483 ):
476 484 if src not in m2:
477 485 # deleted on side 2
478 486 if src not in m1:
479 487 # renamed on side 1, deleted on side 2
480 488 renamedelete[src] = dsts1
481 489 elif m2[src] != mb[src]:
482 490 if not _related(c2[src], base[src]):
483 491 return
484 492 # modified on side 2
485 493 for dst in dsts1:
486 494 if dst not in m2:
487 495 # dst not added on side 2 (handle as regular
488 496 # "both created" case in manifestmerge otherwise)
489 497 copy[dst] = src
490 498
491 499
492 500 def _fullcopytracing(repo, c1, c2, base):
493 501 """ The full copytracing algorithm which finds all the new files that were
494 502 added from merge base up to the top commit and for each file it checks if
495 503 this file was copied from another file.
496 504
497 505 This is pretty slow when a lot of changesets are involved but will track all
498 506 the copies.
499 507 """
500 508 m1 = c1.manifest()
501 509 m2 = c2.manifest()
502 510 mb = base.manifest()
503 511
504 512 copies1 = pathcopies(base, c1)
505 513 copies2 = pathcopies(base, c2)
506 514
507 515 inversecopies1 = {}
508 516 inversecopies2 = {}
509 517 for dst, src in copies1.items():
510 518 inversecopies1.setdefault(src, []).append(dst)
511 519 for dst, src in copies2.items():
512 520 inversecopies2.setdefault(src, []).append(dst)
513 521
514 522 copy = {}
515 523 diverge = {}
516 524 renamedelete = {}
517 525 allsources = set(inversecopies1) | set(inversecopies2)
518 526 for src in allsources:
519 527 dsts1 = inversecopies1.get(src)
520 528 dsts2 = inversecopies2.get(src)
521 529 if dsts1 and dsts2:
522 530 # copied/renamed on both sides
523 531 if src not in m1 and src not in m2:
524 532 # renamed on both sides
525 533 dsts1 = set(dsts1)
526 534 dsts2 = set(dsts2)
527 535 # If there's some overlap in the rename destinations, we
528 536 # consider it not divergent. For example, if side 1 copies 'a'
529 537 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
530 538 # and 'd' and deletes 'a'.
531 539 if dsts1 & dsts2:
532 540 for dst in dsts1 & dsts2:
533 541 copy[dst] = src
534 542 else:
535 543 diverge[src] = sorted(dsts1 | dsts2)
536 544 elif src in m1 and src in m2:
537 545 # copied on both sides
538 546 dsts1 = set(dsts1)
539 547 dsts2 = set(dsts2)
540 548 for dst in dsts1 & dsts2:
541 549 copy[dst] = src
542 550 # TODO: Handle cases where it was renamed on one side and copied
543 551 # on the other side
544 552 elif dsts1:
545 553 # copied/renamed only on side 1
546 554 _checksinglesidecopies(
547 555 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
548 556 )
549 557 elif dsts2:
550 558 # copied/renamed only on side 2
551 559 _checksinglesidecopies(
552 560 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
553 561 )
554 562
555 563 renamedeleteset = set()
556 564 divergeset = set()
557 565 for dsts in diverge.values():
558 566 divergeset.update(dsts)
559 567 for dsts in renamedelete.values():
560 568 renamedeleteset.update(dsts)
561 569
562 570 # find interesting file sets from manifests
563 571 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
564 572 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
565 573 u1 = sorted(addedinm1 - addedinm2)
566 574 u2 = sorted(addedinm2 - addedinm1)
567 575
568 576 header = b" unmatched files in %s"
569 577 if u1:
570 578 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
571 579 if u2:
572 580 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
573 581
574 582 fullcopy = copies1.copy()
575 583 fullcopy.update(copies2)
576 584 if not fullcopy:
577 585 return copy, {}, diverge, renamedelete, {}
578 586
579 587 if repo.ui.debugflag:
580 588 repo.ui.debug(
581 589 b" all copies found (* = to merge, ! = divergent, "
582 590 b"% = renamed and deleted):\n"
583 591 )
584 592 for f in sorted(fullcopy):
585 593 note = b""
586 594 if f in copy:
587 595 note += b"*"
588 596 if f in divergeset:
589 597 note += b"!"
590 598 if f in renamedeleteset:
591 599 note += b"%"
592 600 repo.ui.debug(
593 601 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
594 602 )
595 603 del divergeset
596 604
597 605 repo.ui.debug(b" checking for directory renames\n")
598 606
599 607 # generate a directory move map
600 608 d1, d2 = c1.dirs(), c2.dirs()
601 609 invalid = set()
602 610 dirmove = {}
603 611
604 612 # examine each file copy for a potential directory move, which is
605 613 # when all the files in a directory are moved to a new directory
606 614 for dst, src in pycompat.iteritems(fullcopy):
607 615 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
608 616 if dsrc in invalid:
609 617 # already seen to be uninteresting
610 618 continue
611 619 elif dsrc in d1 and ddst in d1:
612 620 # directory wasn't entirely moved locally
613 621 invalid.add(dsrc)
614 622 elif dsrc in d2 and ddst in d2:
615 623 # directory wasn't entirely moved remotely
616 624 invalid.add(dsrc)
617 625 elif dsrc in dirmove and dirmove[dsrc] != ddst:
618 626 # files from the same directory moved to two different places
619 627 invalid.add(dsrc)
620 628 else:
621 629 # looks good so far
622 630 dirmove[dsrc] = ddst
623 631
624 632 for i in invalid:
625 633 if i in dirmove:
626 634 del dirmove[i]
627 635 del d1, d2, invalid
628 636
629 637 if not dirmove:
630 638 return copy, {}, diverge, renamedelete, {}
631 639
632 640 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
633 641
634 642 for d in dirmove:
635 643 repo.ui.debug(
636 644 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
637 645 )
638 646
639 647 movewithdir = {}
640 648 # check unaccounted nonoverlapping files against directory moves
641 649 for f in u1 + u2:
642 650 if f not in fullcopy:
643 651 for d in dirmove:
644 652 if f.startswith(d):
645 653 # new file added in a directory that was moved, move it
646 654 df = dirmove[d] + f[len(d) :]
647 655 if df not in copy:
648 656 movewithdir[f] = df
649 657 repo.ui.debug(
650 658 b" pending file src: '%s' -> dst: '%s'\n"
651 659 % (f, df)
652 660 )
653 661 break
654 662
655 663 return copy, movewithdir, diverge, renamedelete, dirmove
656 664
657 665
658 666 def _heuristicscopytracing(repo, c1, c2, base):
659 667 """ Fast copytracing using filename heuristics
660 668
661 669 Assumes that moves or renames are of following two types:
662 670
663 671 1) Inside a directory only (same directory name but different filenames)
664 672 2) Move from one directory to another
665 673 (same filenames but different directory names)
666 674
667 675 Works only when there are no merge commits in the "source branch".
668 676 Source branch is commits from base up to c2 not including base.
669 677
670 678 If merge is involved it fallbacks to _fullcopytracing().
671 679
672 680 Can be used by setting the following config:
673 681
674 682 [experimental]
675 683 copytrace = heuristics
676 684
677 685 In some cases the copy/move candidates found by heuristics can be very large
678 686 in number and that will make the algorithm slow. The number of possible
679 687 candidates to check can be limited by using the config
680 688 `experimental.copytrace.movecandidateslimit` which defaults to 100.
681 689 """
682 690
683 691 if c1.rev() is None:
684 692 c1 = c1.p1()
685 693 if c2.rev() is None:
686 694 c2 = c2.p1()
687 695
688 696 copies = {}
689 697
690 698 changedfiles = set()
691 699 m1 = c1.manifest()
692 700 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
693 701 # If base is not in c2 branch, we switch to fullcopytracing
694 702 repo.ui.debug(
695 703 b"switching to full copytracing as base is not "
696 704 b"an ancestor of c2\n"
697 705 )
698 706 return _fullcopytracing(repo, c1, c2, base)
699 707
700 708 ctx = c2
701 709 while ctx != base:
702 710 if len(ctx.parents()) == 2:
703 711 # To keep things simple let's not handle merges
704 712 repo.ui.debug(b"switching to full copytracing because of merges\n")
705 713 return _fullcopytracing(repo, c1, c2, base)
706 714 changedfiles.update(ctx.files())
707 715 ctx = ctx.p1()
708 716
709 717 cp = _forwardcopies(base, c2)
710 718 for dst, src in pycompat.iteritems(cp):
711 719 if src in m1:
712 720 copies[dst] = src
713 721
714 722 # file is missing if it isn't present in the destination, but is present in
715 723 # the base and present in the source.
716 724 # Presence in the base is important to exclude added files, presence in the
717 725 # source is important to exclude removed files.
718 726 filt = lambda f: f not in m1 and f in base and f in c2
719 727 missingfiles = [f for f in changedfiles if filt(f)]
720 728
721 729 if missingfiles:
722 730 basenametofilename = collections.defaultdict(list)
723 731 dirnametofilename = collections.defaultdict(list)
724 732
725 733 for f in m1.filesnotin(base.manifest()):
726 734 basename = os.path.basename(f)
727 735 dirname = os.path.dirname(f)
728 736 basenametofilename[basename].append(f)
729 737 dirnametofilename[dirname].append(f)
730 738
731 739 for f in missingfiles:
732 740 basename = os.path.basename(f)
733 741 dirname = os.path.dirname(f)
734 742 samebasename = basenametofilename[basename]
735 743 samedirname = dirnametofilename[dirname]
736 744 movecandidates = samebasename + samedirname
737 745 # f is guaranteed to be present in c2, that's why
738 746 # c2.filectx(f) won't fail
739 747 f2 = c2.filectx(f)
740 748 # we can have a lot of candidates which can slow down the heuristics
741 749 # config value to limit the number of candidates moves to check
742 750 maxcandidates = repo.ui.configint(
743 751 b'experimental', b'copytrace.movecandidateslimit'
744 752 )
745 753
746 754 if len(movecandidates) > maxcandidates:
747 755 repo.ui.status(
748 756 _(
749 757 b"skipping copytracing for '%s', more "
750 758 b"candidates than the limit: %d\n"
751 759 )
752 760 % (f, len(movecandidates))
753 761 )
754 762 continue
755 763
756 764 for candidate in movecandidates:
757 765 f1 = c1.filectx(candidate)
758 766 if _related(f1, f2):
759 767 # if there are a few related copies then we'll merge
760 768 # changes into all of them. This matches the behaviour
761 769 # of upstream copytracing
762 770 copies[candidate] = f
763 771
764 772 return copies, {}, {}, {}, {}
765 773
766 774
767 775 def _related(f1, f2):
768 776 """return True if f1 and f2 filectx have a common ancestor
769 777
770 778 Walk back to common ancestor to see if the two files originate
771 779 from the same file. Since workingfilectx's rev() is None it messes
772 780 up the integer comparison logic, hence the pre-step check for
773 781 None (f1 and f2 can only be workingfilectx's initially).
774 782 """
775 783
776 784 if f1 == f2:
777 785 return True # a match
778 786
779 787 g1, g2 = f1.ancestors(), f2.ancestors()
780 788 try:
781 789 f1r, f2r = f1.linkrev(), f2.linkrev()
782 790
783 791 if f1r is None:
784 792 f1 = next(g1)
785 793 if f2r is None:
786 794 f2 = next(g2)
787 795
788 796 while True:
789 797 f1r, f2r = f1.linkrev(), f2.linkrev()
790 798 if f1r > f2r:
791 799 f1 = next(g1)
792 800 elif f2r > f1r:
793 801 f2 = next(g2)
794 802 else: # f1 and f2 point to files in the same linkrev
795 803 return f1 == f2 # true if they point to the same file
796 804 except StopIteration:
797 805 return False
798 806
799 807
800 808 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
801 809 """reproduce copies from fromrev to rev in the dirstate
802 810
803 811 If skiprev is specified, it's a revision that should be used to
804 812 filter copy records. Any copies that occur between fromrev and
805 813 skiprev will not be duplicated, even if they appear in the set of
806 814 copies between fromrev and rev.
807 815 """
808 816 exclude = {}
809 817 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
810 818 bctrace = stringutil.parsebool(ctraceconfig)
811 819 if skiprev is not None and (
812 820 ctraceconfig == b'heuristics' or bctrace or bctrace is None
813 821 ):
814 822 # copytrace='off' skips this line, but not the entire function because
815 823 # the line below is O(size of the repo) during a rebase, while the rest
816 824 # of the function is much faster (and is required for carrying copy
817 825 # metadata across the rebase anyway).
818 826 exclude = pathcopies(repo[fromrev], repo[skiprev])
819 827 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
820 828 if dst in exclude:
821 829 continue
822 830 if dst in wctx:
823 831 wctx[dst].markcopied(src)
824 832
825 833
826 834 def computechangesetfilesadded(ctx):
827 835 """return the list of files added in a changeset
828 836 """
829 837 added = []
830 838 for f in ctx.files():
831 839 if not any(f in p for p in ctx.parents()):
832 840 added.append(f)
833 841 return added
834 842
835 843
836 844 def computechangesetfilesremoved(ctx):
837 845 """return the list of files removed in a changeset
838 846 """
839 847 removed = []
840 848 for f in ctx.files():
841 849 if f not in ctx:
842 850 removed.append(f)
843 851 return removed
844 852
845 853
846 854 def computechangesetcopies(ctx):
847 855 """return the copies data for a changeset
848 856
849 857 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
850 858
851 859 Each dictionnary are in the form: `{newname: oldname}`
852 860 """
853 861 p1copies = {}
854 862 p2copies = {}
855 863 p1 = ctx.p1()
856 864 p2 = ctx.p2()
857 865 narrowmatch = ctx._repo.narrowmatch()
858 866 for dst in ctx.files():
859 867 if not narrowmatch(dst) or dst not in ctx:
860 868 continue
861 869 copied = ctx[dst].renamed()
862 870 if not copied:
863 871 continue
864 872 src, srcnode = copied
865 873 if src in p1 and p1[src].filenode() == srcnode:
866 874 p1copies[dst] = src
867 875 elif src in p2 and p2[src].filenode() == srcnode:
868 876 p2copies[dst] = src
869 877 return p1copies, p2copies
870 878
871 879
872 880 def encodecopies(files, copies):
873 881 items = []
874 882 for i, dst in enumerate(files):
875 883 if dst in copies:
876 884 items.append(b'%d\0%s' % (i, copies[dst]))
877 885 if len(items) != len(copies):
878 886 raise error.ProgrammingError(
879 887 b'some copy targets missing from file list'
880 888 )
881 889 return b"\n".join(items)
882 890
883 891
884 892 def decodecopies(files, data):
885 893 try:
886 894 copies = {}
887 895 if not data:
888 896 return copies
889 897 for l in data.split(b'\n'):
890 898 strindex, src = l.split(b'\0')
891 899 i = int(strindex)
892 900 dst = files[i]
893 901 copies[dst] = src
894 902 return copies
895 903 except (ValueError, IndexError):
896 904 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
897 905 # used different syntax for the value.
898 906 return None
899 907
900 908
901 909 def encodefileindices(files, subset):
902 910 subset = set(subset)
903 911 indices = []
904 912 for i, f in enumerate(files):
905 913 if f in subset:
906 914 indices.append(b'%d' % i)
907 915 return b'\n'.join(indices)
908 916
909 917
910 918 def decodefileindices(files, data):
911 919 try:
912 920 subset = []
913 921 if not data:
914 922 return subset
915 923 for strindex in data.split(b'\n'):
916 924 i = int(strindex)
917 925 if i < 0 or i >= len(files):
918 926 return None
919 927 subset.append(files[i])
920 928 return subset
921 929 except (ValueError, IndexError):
922 930 # Perhaps someone had chosen the same key name (e.g. "added") and
923 931 # used different syntax for the value.
924 932 return None
925 933
926 934
927 935 def _getsidedata(srcrepo, rev):
928 936 ctx = srcrepo[rev]
929 937 filescopies = computechangesetcopies(ctx)
930 938 filesadded = computechangesetfilesadded(ctx)
931 939 filesremoved = computechangesetfilesremoved(ctx)
932 940 sidedata = {}
933 941 if any([filescopies, filesadded, filesremoved]):
934 942 sortedfiles = sorted(ctx.files())
935 943 p1copies, p2copies = filescopies
936 944 p1copies = encodecopies(sortedfiles, p1copies)
937 945 p2copies = encodecopies(sortedfiles, p2copies)
938 946 filesadded = encodefileindices(sortedfiles, filesadded)
939 947 filesremoved = encodefileindices(sortedfiles, filesremoved)
940 948 if p1copies:
941 949 sidedata[sidedatamod.SD_P1COPIES] = p1copies
942 950 if p2copies:
943 951 sidedata[sidedatamod.SD_P2COPIES] = p2copies
944 952 if filesadded:
945 953 sidedata[sidedatamod.SD_FILESADDED] = filesadded
946 954 if filesremoved:
947 955 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
948 956 return sidedata
949 957
950 958
951 959 def getsidedataadder(srcrepo, destrepo):
952 960 def sidedatacompanion(revlog, rev):
953 961 sidedata = {}
954 962 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
955 963 sidedata = _getsidedata(srcrepo, rev)
956 964 return False, (), sidedata
957 965
958 966 return sidedatacompanion
959 967
960 968
961 969 def getsidedataremover(srcrepo, destrepo):
962 970 def sidedatacompanion(revlog, rev):
963 971 f = ()
964 972 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
965 973 if revlog.flags(rev) & REVIDX_SIDEDATA:
966 974 f = (
967 975 sidedatamod.SD_P1COPIES,
968 976 sidedatamod.SD_P2COPIES,
969 977 sidedatamod.SD_FILESADDED,
970 978 sidedatamod.SD_FILESREMOVED,
971 979 )
972 980 return False, f, {}
973 981
974 982 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now