##// END OF EJS Templates
copies: get copies information directly from _copies...
marmoute -
r43547:82dabad5 default
parent child Browse files
Show More
@@ -1,931 +1,932 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _changesetforwardcopies(a, b, match):
182 182 if a.rev() in (node.nullrev, b.rev()):
183 183 return {}
184 184
185 185 repo = a.repo()
186 186 children = {}
187 187 cl = repo.changelog
188 188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 189 for r in missingrevs:
190 190 for p in cl.parentrevs(r):
191 191 if p == node.nullrev:
192 192 continue
193 193 if p not in children:
194 194 children[p] = [r]
195 195 else:
196 196 children[p].append(r)
197 197
198 198 roots = set(children) - set(missingrevs)
199 199 work = list(roots)
200 200 all_copies = {r: {} for r in roots}
201 201 heapq.heapify(work)
202 202 alwaysmatch = match.always()
203 203 while work:
204 204 r = heapq.heappop(work)
205 205 copies = all_copies.pop(r)
206 206 if r == b.rev():
207 207 return copies
208 208 for i, c in enumerate(children[r]):
209 209 childctx = repo[c]
210 p1copies, p2copies = childctx._copies
210 211 if r == childctx.p1().rev():
211 212 parent = 1
212 childcopies = childctx.p1copies()
213 childcopies = p1copies
213 214 else:
214 215 assert r == childctx.p2().rev()
215 216 parent = 2
216 childcopies = childctx.p2copies()
217 childcopies = p2copies
217 218 if not alwaysmatch:
218 219 childcopies = {
219 220 dst: src for dst, src in childcopies.items() if match(dst)
220 221 }
221 222 # Copy the dict only if later iterations will also need it
222 223 if i != len(children[r]) - 1:
223 224 newcopies = copies.copy()
224 225 else:
225 226 newcopies = copies
226 227 if childcopies:
227 228 newcopies = _chain(newcopies, childcopies)
228 229 for f in childctx.filesremoved():
229 230 if f in newcopies:
230 231 del newcopies[f]
231 232 othercopies = all_copies.get(c)
232 233 if othercopies is None:
233 234 heapq.heappush(work, c)
234 235 all_copies[c] = newcopies
235 236 else:
236 237 # we are the second parent to work on c, we need to merge our
237 238 # work with the other.
238 239 #
239 240 # Unlike when copies are stored in the filelog, we consider
240 241 # it a copy even if the destination already existed on the
241 242 # other branch. It's simply too expensive to check if the
242 243 # file existed in the manifest.
243 244 #
244 245 # In case of conflict, parent 1 take precedence over parent 2.
245 246 # This is an arbitrary choice made anew when implementing
246 247 # changeset based copies. It was made without regards with
247 248 # potential filelog related behavior.
248 249 if parent == 1:
249 250 othercopies.update(newcopies)
250 251 else:
251 252 newcopies.update(othercopies)
252 253 all_copies[c] = newcopies
253 254 assert False
254 255
255 256
256 257 def _forwardcopies(a, b, base=None, match=None):
257 258 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
258 259
259 260 if base is None:
260 261 base = a
261 262 match = a.repo().narrowmatch(match)
262 263 # check for working copy
263 264 if b.rev() is None:
264 265 cm = _committedforwardcopies(a, b.p1(), base, match)
265 266 # combine copies from dirstate if necessary
266 267 copies = _chain(cm, _dirstatecopies(b._repo, match))
267 268 else:
268 269 copies = _committedforwardcopies(a, b, base, match)
269 270 return copies
270 271
271 272
272 273 def _backwardrenames(a, b, match):
273 274 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
274 275 return {}
275 276
276 277 # Even though we're not taking copies into account, 1:n rename situations
277 278 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
278 279 # arbitrarily pick one of the renames.
279 280 # We don't want to pass in "match" here, since that would filter
280 281 # the destination by it. Since we're reversing the copies, we want
281 282 # to filter the source instead.
282 283 f = _forwardcopies(b, a)
283 284 r = {}
284 285 for k, v in sorted(pycompat.iteritems(f)):
285 286 if match and not match(v):
286 287 continue
287 288 # remove copies
288 289 if v in a:
289 290 continue
290 291 r[v] = k
291 292 return r
292 293
293 294
294 295 def pathcopies(x, y, match=None):
295 296 """find {dst@y: src@x} copy mapping for directed compare"""
296 297 repo = x._repo
297 298 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
298 299 if debug:
299 300 repo.ui.debug(
300 301 b'debug.copies: searching copies from %s to %s\n' % (x, y)
301 302 )
302 303 if x == y or not x or not y:
303 304 return {}
304 305 a = y.ancestor(x)
305 306 if a == x:
306 307 if debug:
307 308 repo.ui.debug(b'debug.copies: search mode: forward\n')
308 309 if y.rev() is None and x == y.p1():
309 310 # short-circuit to avoid issues with merge states
310 311 return _dirstatecopies(repo, match)
311 312 copies = _forwardcopies(x, y, match=match)
312 313 elif a == y:
313 314 if debug:
314 315 repo.ui.debug(b'debug.copies: search mode: backward\n')
315 316 copies = _backwardrenames(x, y, match=match)
316 317 else:
317 318 if debug:
318 319 repo.ui.debug(b'debug.copies: search mode: combined\n')
319 320 base = None
320 321 if a.rev() != node.nullrev:
321 322 base = x
322 323 copies = _chain(
323 324 _backwardrenames(x, a, match=match),
324 325 _forwardcopies(a, y, base, match=match),
325 326 )
326 327 _filter(x, y, copies)
327 328 return copies
328 329
329 330
330 331 def mergecopies(repo, c1, c2, base):
331 332 """
332 333 Finds moves and copies between context c1 and c2 that are relevant for
333 334 merging. 'base' will be used as the merge base.
334 335
335 336 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
336 337 files that were moved/ copied in one merge parent and modified in another.
337 338 For example:
338 339
339 340 o ---> 4 another commit
340 341 |
341 342 | o ---> 3 commit that modifies a.txt
342 343 | /
343 344 o / ---> 2 commit that moves a.txt to b.txt
344 345 |/
345 346 o ---> 1 merge base
346 347
347 348 If we try to rebase revision 3 on revision 4, since there is no a.txt in
348 349 revision 4, and if user have copytrace disabled, we prints the following
349 350 message:
350 351
351 352 ```other changed <file> which local deleted```
352 353
353 354 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
354 355 "dirmove".
355 356
356 357 "copy" is a mapping from destination name -> source name,
357 358 where source is in c1 and destination is in c2 or vice-versa.
358 359
359 360 "movewithdir" is a mapping from source name -> destination name,
360 361 where the file at source present in one context but not the other
361 362 needs to be moved to destination by the merge process, because the
362 363 other context moved the directory it is in.
363 364
364 365 "diverge" is a mapping of source name -> list of destination names
365 366 for divergent renames.
366 367
367 368 "renamedelete" is a mapping of source name -> list of destination
368 369 names for files deleted in c1 that were renamed in c2 or vice-versa.
369 370
370 371 "dirmove" is a mapping of detected source dir -> destination dir renames.
371 372 This is needed for handling changes to new files previously grafted into
372 373 renamed directories.
373 374
374 375 This function calls different copytracing algorithms based on config.
375 376 """
376 377 # avoid silly behavior for update from empty dir
377 378 if not c1 or not c2 or c1 == c2:
378 379 return {}, {}, {}, {}, {}
379 380
380 381 narrowmatch = c1.repo().narrowmatch()
381 382
382 383 # avoid silly behavior for parent -> working dir
383 384 if c2.node() is None and c1.node() == repo.dirstate.p1():
384 385 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
385 386
386 387 copytracing = repo.ui.config(b'experimental', b'copytrace')
387 388 if stringutil.parsebool(copytracing) is False:
388 389 # stringutil.parsebool() returns None when it is unable to parse the
389 390 # value, so we should rely on making sure copytracing is on such cases
390 391 return {}, {}, {}, {}, {}
391 392
392 393 if usechangesetcentricalgo(repo):
393 394 # The heuristics don't make sense when we need changeset-centric algos
394 395 return _fullcopytracing(repo, c1, c2, base)
395 396
396 397 # Copy trace disabling is explicitly below the node == p1 logic above
397 398 # because the logic above is required for a simple copy to be kept across a
398 399 # rebase.
399 400 if copytracing == b'heuristics':
400 401 # Do full copytracing if only non-public revisions are involved as
401 402 # that will be fast enough and will also cover the copies which could
402 403 # be missed by heuristics
403 404 if _isfullcopytraceable(repo, c1, base):
404 405 return _fullcopytracing(repo, c1, c2, base)
405 406 return _heuristicscopytracing(repo, c1, c2, base)
406 407 else:
407 408 return _fullcopytracing(repo, c1, c2, base)
408 409
409 410
410 411 def _isfullcopytraceable(repo, c1, base):
411 412 """ Checks that if base, source and destination are all no-public branches,
412 413 if yes let's use the full copytrace algorithm for increased capabilities
413 414 since it will be fast enough.
414 415
415 416 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
416 417 number of changesets from c1 to base such that if number of changesets are
417 418 more than the limit, full copytracing algorithm won't be used.
418 419 """
419 420 if c1.rev() is None:
420 421 c1 = c1.p1()
421 422 if c1.mutable() and base.mutable():
422 423 sourcecommitlimit = repo.ui.configint(
423 424 b'experimental', b'copytrace.sourcecommitlimit'
424 425 )
425 426 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
426 427 return commits < sourcecommitlimit
427 428 return False
428 429
429 430
430 431 def _checksinglesidecopies(
431 432 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
432 433 ):
433 434 if src not in m2:
434 435 # deleted on side 2
435 436 if src not in m1:
436 437 # renamed on side 1, deleted on side 2
437 438 renamedelete[src] = dsts1
438 439 elif m2[src] != mb[src]:
439 440 if not _related(c2[src], base[src]):
440 441 return
441 442 # modified on side 2
442 443 for dst in dsts1:
443 444 if dst not in m2:
444 445 # dst not added on side 2 (handle as regular
445 446 # "both created" case in manifestmerge otherwise)
446 447 copy[dst] = src
447 448
448 449
449 450 def _fullcopytracing(repo, c1, c2, base):
450 451 """ The full copytracing algorithm which finds all the new files that were
451 452 added from merge base up to the top commit and for each file it checks if
452 453 this file was copied from another file.
453 454
454 455 This is pretty slow when a lot of changesets are involved but will track all
455 456 the copies.
456 457 """
457 458 m1 = c1.manifest()
458 459 m2 = c2.manifest()
459 460 mb = base.manifest()
460 461
461 462 copies1 = pathcopies(base, c1)
462 463 copies2 = pathcopies(base, c2)
463 464
464 465 inversecopies1 = {}
465 466 inversecopies2 = {}
466 467 for dst, src in copies1.items():
467 468 inversecopies1.setdefault(src, []).append(dst)
468 469 for dst, src in copies2.items():
469 470 inversecopies2.setdefault(src, []).append(dst)
470 471
471 472 copy = {}
472 473 diverge = {}
473 474 renamedelete = {}
474 475 allsources = set(inversecopies1) | set(inversecopies2)
475 476 for src in allsources:
476 477 dsts1 = inversecopies1.get(src)
477 478 dsts2 = inversecopies2.get(src)
478 479 if dsts1 and dsts2:
479 480 # copied/renamed on both sides
480 481 if src not in m1 and src not in m2:
481 482 # renamed on both sides
482 483 dsts1 = set(dsts1)
483 484 dsts2 = set(dsts2)
484 485 # If there's some overlap in the rename destinations, we
485 486 # consider it not divergent. For example, if side 1 copies 'a'
486 487 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
487 488 # and 'd' and deletes 'a'.
488 489 if dsts1 & dsts2:
489 490 for dst in dsts1 & dsts2:
490 491 copy[dst] = src
491 492 else:
492 493 diverge[src] = sorted(dsts1 | dsts2)
493 494 elif src in m1 and src in m2:
494 495 # copied on both sides
495 496 dsts1 = set(dsts1)
496 497 dsts2 = set(dsts2)
497 498 for dst in dsts1 & dsts2:
498 499 copy[dst] = src
499 500 # TODO: Handle cases where it was renamed on one side and copied
500 501 # on the other side
501 502 elif dsts1:
502 503 # copied/renamed only on side 1
503 504 _checksinglesidecopies(
504 505 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
505 506 )
506 507 elif dsts2:
507 508 # copied/renamed only on side 2
508 509 _checksinglesidecopies(
509 510 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
510 511 )
511 512
512 513 renamedeleteset = set()
513 514 divergeset = set()
514 515 for dsts in diverge.values():
515 516 divergeset.update(dsts)
516 517 for dsts in renamedelete.values():
517 518 renamedeleteset.update(dsts)
518 519
519 520 # find interesting file sets from manifests
520 521 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
521 522 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
522 523 u1 = sorted(addedinm1 - addedinm2)
523 524 u2 = sorted(addedinm2 - addedinm1)
524 525
525 526 header = b" unmatched files in %s"
526 527 if u1:
527 528 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
528 529 if u2:
529 530 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
530 531
531 532 fullcopy = copies1.copy()
532 533 fullcopy.update(copies2)
533 534 if not fullcopy:
534 535 return copy, {}, diverge, renamedelete, {}
535 536
536 537 if repo.ui.debugflag:
537 538 repo.ui.debug(
538 539 b" all copies found (* = to merge, ! = divergent, "
539 540 b"% = renamed and deleted):\n"
540 541 )
541 542 for f in sorted(fullcopy):
542 543 note = b""
543 544 if f in copy:
544 545 note += b"*"
545 546 if f in divergeset:
546 547 note += b"!"
547 548 if f in renamedeleteset:
548 549 note += b"%"
549 550 repo.ui.debug(
550 551 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
551 552 )
552 553 del divergeset
553 554
554 555 repo.ui.debug(b" checking for directory renames\n")
555 556
556 557 # generate a directory move map
557 558 d1, d2 = c1.dirs(), c2.dirs()
558 559 invalid = set()
559 560 dirmove = {}
560 561
561 562 # examine each file copy for a potential directory move, which is
562 563 # when all the files in a directory are moved to a new directory
563 564 for dst, src in pycompat.iteritems(fullcopy):
564 565 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
565 566 if dsrc in invalid:
566 567 # already seen to be uninteresting
567 568 continue
568 569 elif dsrc in d1 and ddst in d1:
569 570 # directory wasn't entirely moved locally
570 571 invalid.add(dsrc)
571 572 elif dsrc in d2 and ddst in d2:
572 573 # directory wasn't entirely moved remotely
573 574 invalid.add(dsrc)
574 575 elif dsrc in dirmove and dirmove[dsrc] != ddst:
575 576 # files from the same directory moved to two different places
576 577 invalid.add(dsrc)
577 578 else:
578 579 # looks good so far
579 580 dirmove[dsrc] = ddst
580 581
581 582 for i in invalid:
582 583 if i in dirmove:
583 584 del dirmove[i]
584 585 del d1, d2, invalid
585 586
586 587 if not dirmove:
587 588 return copy, {}, diverge, renamedelete, {}
588 589
589 590 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
590 591
591 592 for d in dirmove:
592 593 repo.ui.debug(
593 594 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
594 595 )
595 596
596 597 movewithdir = {}
597 598 # check unaccounted nonoverlapping files against directory moves
598 599 for f in u1 + u2:
599 600 if f not in fullcopy:
600 601 for d in dirmove:
601 602 if f.startswith(d):
602 603 # new file added in a directory that was moved, move it
603 604 df = dirmove[d] + f[len(d) :]
604 605 if df not in copy:
605 606 movewithdir[f] = df
606 607 repo.ui.debug(
607 608 b" pending file src: '%s' -> dst: '%s'\n"
608 609 % (f, df)
609 610 )
610 611 break
611 612
612 613 return copy, movewithdir, diverge, renamedelete, dirmove
613 614
614 615
615 616 def _heuristicscopytracing(repo, c1, c2, base):
616 617 """ Fast copytracing using filename heuristics
617 618
618 619 Assumes that moves or renames are of following two types:
619 620
620 621 1) Inside a directory only (same directory name but different filenames)
621 622 2) Move from one directory to another
622 623 (same filenames but different directory names)
623 624
624 625 Works only when there are no merge commits in the "source branch".
625 626 Source branch is commits from base up to c2 not including base.
626 627
627 628 If merge is involved it fallbacks to _fullcopytracing().
628 629
629 630 Can be used by setting the following config:
630 631
631 632 [experimental]
632 633 copytrace = heuristics
633 634
634 635 In some cases the copy/move candidates found by heuristics can be very large
635 636 in number and that will make the algorithm slow. The number of possible
636 637 candidates to check can be limited by using the config
637 638 `experimental.copytrace.movecandidateslimit` which defaults to 100.
638 639 """
639 640
640 641 if c1.rev() is None:
641 642 c1 = c1.p1()
642 643 if c2.rev() is None:
643 644 c2 = c2.p1()
644 645
645 646 copies = {}
646 647
647 648 changedfiles = set()
648 649 m1 = c1.manifest()
649 650 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
650 651 # If base is not in c2 branch, we switch to fullcopytracing
651 652 repo.ui.debug(
652 653 b"switching to full copytracing as base is not "
653 654 b"an ancestor of c2\n"
654 655 )
655 656 return _fullcopytracing(repo, c1, c2, base)
656 657
657 658 ctx = c2
658 659 while ctx != base:
659 660 if len(ctx.parents()) == 2:
660 661 # To keep things simple let's not handle merges
661 662 repo.ui.debug(b"switching to full copytracing because of merges\n")
662 663 return _fullcopytracing(repo, c1, c2, base)
663 664 changedfiles.update(ctx.files())
664 665 ctx = ctx.p1()
665 666
666 667 cp = _forwardcopies(base, c2)
667 668 for dst, src in pycompat.iteritems(cp):
668 669 if src in m1:
669 670 copies[dst] = src
670 671
671 672 # file is missing if it isn't present in the destination, but is present in
672 673 # the base and present in the source.
673 674 # Presence in the base is important to exclude added files, presence in the
674 675 # source is important to exclude removed files.
675 676 filt = lambda f: f not in m1 and f in base and f in c2
676 677 missingfiles = [f for f in changedfiles if filt(f)]
677 678
678 679 if missingfiles:
679 680 basenametofilename = collections.defaultdict(list)
680 681 dirnametofilename = collections.defaultdict(list)
681 682
682 683 for f in m1.filesnotin(base.manifest()):
683 684 basename = os.path.basename(f)
684 685 dirname = os.path.dirname(f)
685 686 basenametofilename[basename].append(f)
686 687 dirnametofilename[dirname].append(f)
687 688
688 689 for f in missingfiles:
689 690 basename = os.path.basename(f)
690 691 dirname = os.path.dirname(f)
691 692 samebasename = basenametofilename[basename]
692 693 samedirname = dirnametofilename[dirname]
693 694 movecandidates = samebasename + samedirname
694 695 # f is guaranteed to be present in c2, that's why
695 696 # c2.filectx(f) won't fail
696 697 f2 = c2.filectx(f)
697 698 # we can have a lot of candidates which can slow down the heuristics
698 699 # config value to limit the number of candidates moves to check
699 700 maxcandidates = repo.ui.configint(
700 701 b'experimental', b'copytrace.movecandidateslimit'
701 702 )
702 703
703 704 if len(movecandidates) > maxcandidates:
704 705 repo.ui.status(
705 706 _(
706 707 b"skipping copytracing for '%s', more "
707 708 b"candidates than the limit: %d\n"
708 709 )
709 710 % (f, len(movecandidates))
710 711 )
711 712 continue
712 713
713 714 for candidate in movecandidates:
714 715 f1 = c1.filectx(candidate)
715 716 if _related(f1, f2):
716 717 # if there are a few related copies then we'll merge
717 718 # changes into all of them. This matches the behaviour
718 719 # of upstream copytracing
719 720 copies[candidate] = f
720 721
721 722 return copies, {}, {}, {}, {}
722 723
723 724
724 725 def _related(f1, f2):
725 726 """return True if f1 and f2 filectx have a common ancestor
726 727
727 728 Walk back to common ancestor to see if the two files originate
728 729 from the same file. Since workingfilectx's rev() is None it messes
729 730 up the integer comparison logic, hence the pre-step check for
730 731 None (f1 and f2 can only be workingfilectx's initially).
731 732 """
732 733
733 734 if f1 == f2:
734 735 return True # a match
735 736
736 737 g1, g2 = f1.ancestors(), f2.ancestors()
737 738 try:
738 739 f1r, f2r = f1.linkrev(), f2.linkrev()
739 740
740 741 if f1r is None:
741 742 f1 = next(g1)
742 743 if f2r is None:
743 744 f2 = next(g2)
744 745
745 746 while True:
746 747 f1r, f2r = f1.linkrev(), f2.linkrev()
747 748 if f1r > f2r:
748 749 f1 = next(g1)
749 750 elif f2r > f1r:
750 751 f2 = next(g2)
751 752 else: # f1 and f2 point to files in the same linkrev
752 753 return f1 == f2 # true if they point to the same file
753 754 except StopIteration:
754 755 return False
755 756
756 757
757 758 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
758 759 """reproduce copies from fromrev to rev in the dirstate
759 760
760 761 If skiprev is specified, it's a revision that should be used to
761 762 filter copy records. Any copies that occur between fromrev and
762 763 skiprev will not be duplicated, even if they appear in the set of
763 764 copies between fromrev and rev.
764 765 """
765 766 exclude = {}
766 767 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
767 768 bctrace = stringutil.parsebool(ctraceconfig)
768 769 if skiprev is not None and (
769 770 ctraceconfig == b'heuristics' or bctrace or bctrace is None
770 771 ):
771 772 # copytrace='off' skips this line, but not the entire function because
772 773 # the line below is O(size of the repo) during a rebase, while the rest
773 774 # of the function is much faster (and is required for carrying copy
774 775 # metadata across the rebase anyway).
775 776 exclude = pathcopies(repo[fromrev], repo[skiprev])
776 777 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
777 778 if dst in exclude:
778 779 continue
779 780 if dst in wctx:
780 781 wctx[dst].markcopied(src)
781 782
782 783
783 784 def computechangesetfilesadded(ctx):
784 785 """return the list of files added in a changeset
785 786 """
786 787 added = []
787 788 for f in ctx.files():
788 789 if not any(f in p for p in ctx.parents()):
789 790 added.append(f)
790 791 return added
791 792
792 793
793 794 def computechangesetfilesremoved(ctx):
794 795 """return the list of files removed in a changeset
795 796 """
796 797 removed = []
797 798 for f in ctx.files():
798 799 if f not in ctx:
799 800 removed.append(f)
800 801 return removed
801 802
802 803
803 804 def computechangesetcopies(ctx):
804 805 """return the copies data for a changeset
805 806
806 807 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
807 808
808 809 Each dictionnary are in the form: `{newname: oldname}`
809 810 """
810 811 p1copies = {}
811 812 p2copies = {}
812 813 p1 = ctx.p1()
813 814 p2 = ctx.p2()
814 815 narrowmatch = ctx._repo.narrowmatch()
815 816 for dst in ctx.files():
816 817 if not narrowmatch(dst) or dst not in ctx:
817 818 continue
818 819 copied = ctx[dst].renamed()
819 820 if not copied:
820 821 continue
821 822 src, srcnode = copied
822 823 if src in p1 and p1[src].filenode() == srcnode:
823 824 p1copies[dst] = src
824 825 elif src in p2 and p2[src].filenode() == srcnode:
825 826 p2copies[dst] = src
826 827 return p1copies, p2copies
827 828
828 829
829 830 def encodecopies(files, copies):
830 831 items = []
831 832 for i, dst in enumerate(files):
832 833 if dst in copies:
833 834 items.append(b'%d\0%s' % (i, copies[dst]))
834 835 if len(items) != len(copies):
835 836 raise error.ProgrammingError(
836 837 b'some copy targets missing from file list'
837 838 )
838 839 return b"\n".join(items)
839 840
840 841
841 842 def decodecopies(files, data):
842 843 try:
843 844 copies = {}
844 845 if not data:
845 846 return copies
846 847 for l in data.split(b'\n'):
847 848 strindex, src = l.split(b'\0')
848 849 i = int(strindex)
849 850 dst = files[i]
850 851 copies[dst] = src
851 852 return copies
852 853 except (ValueError, IndexError):
853 854 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
854 855 # used different syntax for the value.
855 856 return None
856 857
857 858
858 859 def encodefileindices(files, subset):
859 860 subset = set(subset)
860 861 indices = []
861 862 for i, f in enumerate(files):
862 863 if f in subset:
863 864 indices.append(b'%d' % i)
864 865 return b'\n'.join(indices)
865 866
866 867
867 868 def decodefileindices(files, data):
868 869 try:
869 870 subset = []
870 871 if not data:
871 872 return subset
872 873 for strindex in data.split(b'\n'):
873 874 i = int(strindex)
874 875 if i < 0 or i >= len(files):
875 876 return None
876 877 subset.append(files[i])
877 878 return subset
878 879 except (ValueError, IndexError):
879 880 # Perhaps someone had chosen the same key name (e.g. "added") and
880 881 # used different syntax for the value.
881 882 return None
882 883
883 884
884 885 def _getsidedata(srcrepo, rev):
885 886 ctx = srcrepo[rev]
886 887 filescopies = computechangesetcopies(ctx)
887 888 filesadded = computechangesetfilesadded(ctx)
888 889 filesremoved = computechangesetfilesremoved(ctx)
889 890 sidedata = {}
890 891 if any([filescopies, filesadded, filesremoved]):
891 892 sortedfiles = sorted(ctx.files())
892 893 p1copies, p2copies = filescopies
893 894 p1copies = encodecopies(sortedfiles, p1copies)
894 895 p2copies = encodecopies(sortedfiles, p2copies)
895 896 filesadded = encodefileindices(sortedfiles, filesadded)
896 897 filesremoved = encodefileindices(sortedfiles, filesremoved)
897 898 if p1copies:
898 899 sidedata[sidedatamod.SD_P1COPIES] = p1copies
899 900 if p2copies:
900 901 sidedata[sidedatamod.SD_P2COPIES] = p2copies
901 902 if filesadded:
902 903 sidedata[sidedatamod.SD_FILESADDED] = filesadded
903 904 if filesremoved:
904 905 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
905 906 return sidedata
906 907
907 908
908 909 def getsidedataadder(srcrepo, destrepo):
909 910 def sidedatacompanion(revlog, rev):
910 911 sidedata = {}
911 912 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
912 913 sidedata = _getsidedata(srcrepo, rev)
913 914 return False, (), sidedata
914 915
915 916 return sidedatacompanion
916 917
917 918
918 919 def getsidedataremover(srcrepo, destrepo):
919 920 def sidedatacompanion(revlog, rev):
920 921 f = ()
921 922 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
922 923 if revlog.flags(rev) & REVIDX_SIDEDATA:
923 924 f = (
924 925 sidedatamod.SD_P1COPIES,
925 926 sidedatamod.SD_P2COPIES,
926 927 sidedatamod.SD_FILESADDED,
927 928 sidedatamod.SD_FILESREMOVED,
928 929 )
929 930 return False, f, {}
930 931
931 932 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now