##// END OF EJS Templates
copies: avoid instancing more changectx to access parent revisions...
marmoute -
r43548:181d28ba default
parent child Browse files
Show More
@@ -1,932 +1,933 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16
17 17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18 18
19 19 from . import (
20 20 error,
21 21 match as matchmod,
22 22 node,
23 23 pathutil,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28 from .revlogutils import sidedata as sidedatamod
29 29
30 30 from .utils import stringutil
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(a, b):
68 68 """chain two sets of copies 'a' and 'b'"""
69 69 t = a.copy()
70 70 for k, v in pycompat.iteritems(b):
71 71 if v in t:
72 72 t[k] = t[v]
73 73 else:
74 74 t[k] = v
75 75 return t
76 76
77 77
78 78 def _tracefile(fctx, am, basemf):
79 79 """return file context that is the ancestor of fctx present in ancestor
80 80 manifest am
81 81
82 82 Note: we used to try and stop after a given limit, however checking if that
83 83 limit is reached turned out to be very expensive. we are better off
84 84 disabling that feature."""
85 85
86 86 for f in fctx.ancestors():
87 87 path = f.path()
88 88 if am.get(path, None) == f.filenode():
89 89 return path
90 90 if basemf and basemf.get(path, None) == f.filenode():
91 91 return path
92 92
93 93
94 94 def _dirstatecopies(repo, match=None):
95 95 ds = repo.dirstate
96 96 c = ds.copies().copy()
97 97 for k in list(c):
98 98 if ds[k] not in b'anm' or (match and not match(k)):
99 99 del c[k]
100 100 return c
101 101
102 102
103 103 def _computeforwardmissing(a, b, match=None):
104 104 """Computes which files are in b but not a.
105 105 This is its own function so extensions can easily wrap this call to see what
106 106 files _forwardcopies is about to process.
107 107 """
108 108 ma = a.manifest()
109 109 mb = b.manifest()
110 110 return mb.filesnotin(ma, match=match)
111 111
112 112
113 113 def usechangesetcentricalgo(repo):
114 114 """Checks if we should use changeset-centric copy algorithms"""
115 115 if repo.filecopiesmode == b'changeset-sidedata':
116 116 return True
117 117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 118 changesetsource = (b'changeset-only', b'compatibility')
119 119 return readfrom in changesetsource
120 120
121 121
122 122 def _committedforwardcopies(a, b, base, match):
123 123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 124 # files might have to be traced back to the fctx parent of the last
125 125 # one-side-only changeset, but not further back than that
126 126 repo = a._repo
127 127
128 128 if usechangesetcentricalgo(repo):
129 129 return _changesetforwardcopies(a, b, match)
130 130
131 131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 132 dbg = repo.ui.debug
133 133 if debug:
134 134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 135 am = a.manifest()
136 136 basemf = None if base is None else base.manifest()
137 137
138 138 # find where new files came from
139 139 # we currently don't try to find where old files went, too expensive
140 140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 141 cm = {}
142 142
143 143 # Computing the forward missing is quite expensive on large manifests, since
144 144 # it compares the entire manifests. We can optimize it in the common use
145 145 # case of computing what copies are in a commit versus its parent (like
146 146 # during a rebase or histedit). Note, we exclude merge commits from this
147 147 # optimization, since the ctx.files() for a merge commit is not correct for
148 148 # this comparison.
149 149 forwardmissingmatch = match
150 150 if b.p1() == a and b.p2().node() == node.nullid:
151 151 filesmatcher = matchmod.exact(b.files())
152 152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154 154
155 155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156 156
157 157 if debug:
158 158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159 159
160 160 for f in sorted(missing):
161 161 if debug:
162 162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 163 fctx = b[f]
164 164 fctx._ancestrycontext = ancestrycontext
165 165
166 166 if debug:
167 167 start = util.timer()
168 168 opath = _tracefile(fctx, am, basemf)
169 169 if opath:
170 170 if debug:
171 171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 172 cm[f] = opath
173 173 if debug:
174 174 dbg(
175 175 b'debug.copies: time: %f seconds\n'
176 176 % (util.timer() - start)
177 177 )
178 178 return cm
179 179
180 180
181 181 def _changesetforwardcopies(a, b, match):
182 182 if a.rev() in (node.nullrev, b.rev()):
183 183 return {}
184 184
185 185 repo = a.repo()
186 186 children = {}
187 187 cl = repo.changelog
188 188 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
189 189 for r in missingrevs:
190 190 for p in cl.parentrevs(r):
191 191 if p == node.nullrev:
192 192 continue
193 193 if p not in children:
194 194 children[p] = [r]
195 195 else:
196 196 children[p].append(r)
197 197
198 198 roots = set(children) - set(missingrevs)
199 199 work = list(roots)
200 200 all_copies = {r: {} for r in roots}
201 201 heapq.heapify(work)
202 202 alwaysmatch = match.always()
203 203 while work:
204 204 r = heapq.heappop(work)
205 205 copies = all_copies.pop(r)
206 206 if r == b.rev():
207 207 return copies
208 208 for i, c in enumerate(children[r]):
209 209 childctx = repo[c]
210 p1, p2 = cl.parentrevs(c)
210 211 p1copies, p2copies = childctx._copies
211 if r == childctx.p1().rev():
212 if r == p1:
212 213 parent = 1
213 214 childcopies = p1copies
214 215 else:
215 assert r == childctx.p2().rev()
216 assert r == p2
216 217 parent = 2
217 218 childcopies = p2copies
218 219 if not alwaysmatch:
219 220 childcopies = {
220 221 dst: src for dst, src in childcopies.items() if match(dst)
221 222 }
222 223 # Copy the dict only if later iterations will also need it
223 224 if i != len(children[r]) - 1:
224 225 newcopies = copies.copy()
225 226 else:
226 227 newcopies = copies
227 228 if childcopies:
228 229 newcopies = _chain(newcopies, childcopies)
229 230 for f in childctx.filesremoved():
230 231 if f in newcopies:
231 232 del newcopies[f]
232 233 othercopies = all_copies.get(c)
233 234 if othercopies is None:
234 235 heapq.heappush(work, c)
235 236 all_copies[c] = newcopies
236 237 else:
237 238 # we are the second parent to work on c, we need to merge our
238 239 # work with the other.
239 240 #
240 241 # Unlike when copies are stored in the filelog, we consider
241 242 # it a copy even if the destination already existed on the
242 243 # other branch. It's simply too expensive to check if the
243 244 # file existed in the manifest.
244 245 #
245 246 # In case of conflict, parent 1 take precedence over parent 2.
246 247 # This is an arbitrary choice made anew when implementing
247 248 # changeset based copies. It was made without regards with
248 249 # potential filelog related behavior.
249 250 if parent == 1:
250 251 othercopies.update(newcopies)
251 252 else:
252 253 newcopies.update(othercopies)
253 254 all_copies[c] = newcopies
254 255 assert False
255 256
256 257
257 258 def _forwardcopies(a, b, base=None, match=None):
258 259 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
259 260
260 261 if base is None:
261 262 base = a
262 263 match = a.repo().narrowmatch(match)
263 264 # check for working copy
264 265 if b.rev() is None:
265 266 cm = _committedforwardcopies(a, b.p1(), base, match)
266 267 # combine copies from dirstate if necessary
267 268 copies = _chain(cm, _dirstatecopies(b._repo, match))
268 269 else:
269 270 copies = _committedforwardcopies(a, b, base, match)
270 271 return copies
271 272
272 273
273 274 def _backwardrenames(a, b, match):
274 275 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
275 276 return {}
276 277
277 278 # Even though we're not taking copies into account, 1:n rename situations
278 279 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
279 280 # arbitrarily pick one of the renames.
280 281 # We don't want to pass in "match" here, since that would filter
281 282 # the destination by it. Since we're reversing the copies, we want
282 283 # to filter the source instead.
283 284 f = _forwardcopies(b, a)
284 285 r = {}
285 286 for k, v in sorted(pycompat.iteritems(f)):
286 287 if match and not match(v):
287 288 continue
288 289 # remove copies
289 290 if v in a:
290 291 continue
291 292 r[v] = k
292 293 return r
293 294
294 295
295 296 def pathcopies(x, y, match=None):
296 297 """find {dst@y: src@x} copy mapping for directed compare"""
297 298 repo = x._repo
298 299 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
299 300 if debug:
300 301 repo.ui.debug(
301 302 b'debug.copies: searching copies from %s to %s\n' % (x, y)
302 303 )
303 304 if x == y or not x or not y:
304 305 return {}
305 306 a = y.ancestor(x)
306 307 if a == x:
307 308 if debug:
308 309 repo.ui.debug(b'debug.copies: search mode: forward\n')
309 310 if y.rev() is None and x == y.p1():
310 311 # short-circuit to avoid issues with merge states
311 312 return _dirstatecopies(repo, match)
312 313 copies = _forwardcopies(x, y, match=match)
313 314 elif a == y:
314 315 if debug:
315 316 repo.ui.debug(b'debug.copies: search mode: backward\n')
316 317 copies = _backwardrenames(x, y, match=match)
317 318 else:
318 319 if debug:
319 320 repo.ui.debug(b'debug.copies: search mode: combined\n')
320 321 base = None
321 322 if a.rev() != node.nullrev:
322 323 base = x
323 324 copies = _chain(
324 325 _backwardrenames(x, a, match=match),
325 326 _forwardcopies(a, y, base, match=match),
326 327 )
327 328 _filter(x, y, copies)
328 329 return copies
329 330
330 331
331 332 def mergecopies(repo, c1, c2, base):
332 333 """
333 334 Finds moves and copies between context c1 and c2 that are relevant for
334 335 merging. 'base' will be used as the merge base.
335 336
336 337 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
337 338 files that were moved/ copied in one merge parent and modified in another.
338 339 For example:
339 340
340 341 o ---> 4 another commit
341 342 |
342 343 | o ---> 3 commit that modifies a.txt
343 344 | /
344 345 o / ---> 2 commit that moves a.txt to b.txt
345 346 |/
346 347 o ---> 1 merge base
347 348
348 349 If we try to rebase revision 3 on revision 4, since there is no a.txt in
349 350 revision 4, and if user have copytrace disabled, we prints the following
350 351 message:
351 352
352 353 ```other changed <file> which local deleted```
353 354
354 355 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
355 356 "dirmove".
356 357
357 358 "copy" is a mapping from destination name -> source name,
358 359 where source is in c1 and destination is in c2 or vice-versa.
359 360
360 361 "movewithdir" is a mapping from source name -> destination name,
361 362 where the file at source present in one context but not the other
362 363 needs to be moved to destination by the merge process, because the
363 364 other context moved the directory it is in.
364 365
365 366 "diverge" is a mapping of source name -> list of destination names
366 367 for divergent renames.
367 368
368 369 "renamedelete" is a mapping of source name -> list of destination
369 370 names for files deleted in c1 that were renamed in c2 or vice-versa.
370 371
371 372 "dirmove" is a mapping of detected source dir -> destination dir renames.
372 373 This is needed for handling changes to new files previously grafted into
373 374 renamed directories.
374 375
375 376 This function calls different copytracing algorithms based on config.
376 377 """
377 378 # avoid silly behavior for update from empty dir
378 379 if not c1 or not c2 or c1 == c2:
379 380 return {}, {}, {}, {}, {}
380 381
381 382 narrowmatch = c1.repo().narrowmatch()
382 383
383 384 # avoid silly behavior for parent -> working dir
384 385 if c2.node() is None and c1.node() == repo.dirstate.p1():
385 386 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
386 387
387 388 copytracing = repo.ui.config(b'experimental', b'copytrace')
388 389 if stringutil.parsebool(copytracing) is False:
389 390 # stringutil.parsebool() returns None when it is unable to parse the
390 391 # value, so we should rely on making sure copytracing is on such cases
391 392 return {}, {}, {}, {}, {}
392 393
393 394 if usechangesetcentricalgo(repo):
394 395 # The heuristics don't make sense when we need changeset-centric algos
395 396 return _fullcopytracing(repo, c1, c2, base)
396 397
397 398 # Copy trace disabling is explicitly below the node == p1 logic above
398 399 # because the logic above is required for a simple copy to be kept across a
399 400 # rebase.
400 401 if copytracing == b'heuristics':
401 402 # Do full copytracing if only non-public revisions are involved as
402 403 # that will be fast enough and will also cover the copies which could
403 404 # be missed by heuristics
404 405 if _isfullcopytraceable(repo, c1, base):
405 406 return _fullcopytracing(repo, c1, c2, base)
406 407 return _heuristicscopytracing(repo, c1, c2, base)
407 408 else:
408 409 return _fullcopytracing(repo, c1, c2, base)
409 410
410 411
411 412 def _isfullcopytraceable(repo, c1, base):
412 413 """ Checks that if base, source and destination are all no-public branches,
413 414 if yes let's use the full copytrace algorithm for increased capabilities
414 415 since it will be fast enough.
415 416
416 417 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
417 418 number of changesets from c1 to base such that if number of changesets are
418 419 more than the limit, full copytracing algorithm won't be used.
419 420 """
420 421 if c1.rev() is None:
421 422 c1 = c1.p1()
422 423 if c1.mutable() and base.mutable():
423 424 sourcecommitlimit = repo.ui.configint(
424 425 b'experimental', b'copytrace.sourcecommitlimit'
425 426 )
426 427 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
427 428 return commits < sourcecommitlimit
428 429 return False
429 430
430 431
431 432 def _checksinglesidecopies(
432 433 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
433 434 ):
434 435 if src not in m2:
435 436 # deleted on side 2
436 437 if src not in m1:
437 438 # renamed on side 1, deleted on side 2
438 439 renamedelete[src] = dsts1
439 440 elif m2[src] != mb[src]:
440 441 if not _related(c2[src], base[src]):
441 442 return
442 443 # modified on side 2
443 444 for dst in dsts1:
444 445 if dst not in m2:
445 446 # dst not added on side 2 (handle as regular
446 447 # "both created" case in manifestmerge otherwise)
447 448 copy[dst] = src
448 449
449 450
450 451 def _fullcopytracing(repo, c1, c2, base):
451 452 """ The full copytracing algorithm which finds all the new files that were
452 453 added from merge base up to the top commit and for each file it checks if
453 454 this file was copied from another file.
454 455
455 456 This is pretty slow when a lot of changesets are involved but will track all
456 457 the copies.
457 458 """
458 459 m1 = c1.manifest()
459 460 m2 = c2.manifest()
460 461 mb = base.manifest()
461 462
462 463 copies1 = pathcopies(base, c1)
463 464 copies2 = pathcopies(base, c2)
464 465
465 466 inversecopies1 = {}
466 467 inversecopies2 = {}
467 468 for dst, src in copies1.items():
468 469 inversecopies1.setdefault(src, []).append(dst)
469 470 for dst, src in copies2.items():
470 471 inversecopies2.setdefault(src, []).append(dst)
471 472
472 473 copy = {}
473 474 diverge = {}
474 475 renamedelete = {}
475 476 allsources = set(inversecopies1) | set(inversecopies2)
476 477 for src in allsources:
477 478 dsts1 = inversecopies1.get(src)
478 479 dsts2 = inversecopies2.get(src)
479 480 if dsts1 and dsts2:
480 481 # copied/renamed on both sides
481 482 if src not in m1 and src not in m2:
482 483 # renamed on both sides
483 484 dsts1 = set(dsts1)
484 485 dsts2 = set(dsts2)
485 486 # If there's some overlap in the rename destinations, we
486 487 # consider it not divergent. For example, if side 1 copies 'a'
487 488 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
488 489 # and 'd' and deletes 'a'.
489 490 if dsts1 & dsts2:
490 491 for dst in dsts1 & dsts2:
491 492 copy[dst] = src
492 493 else:
493 494 diverge[src] = sorted(dsts1 | dsts2)
494 495 elif src in m1 and src in m2:
495 496 # copied on both sides
496 497 dsts1 = set(dsts1)
497 498 dsts2 = set(dsts2)
498 499 for dst in dsts1 & dsts2:
499 500 copy[dst] = src
500 501 # TODO: Handle cases where it was renamed on one side and copied
501 502 # on the other side
502 503 elif dsts1:
503 504 # copied/renamed only on side 1
504 505 _checksinglesidecopies(
505 506 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
506 507 )
507 508 elif dsts2:
508 509 # copied/renamed only on side 2
509 510 _checksinglesidecopies(
510 511 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
511 512 )
512 513
513 514 renamedeleteset = set()
514 515 divergeset = set()
515 516 for dsts in diverge.values():
516 517 divergeset.update(dsts)
517 518 for dsts in renamedelete.values():
518 519 renamedeleteset.update(dsts)
519 520
520 521 # find interesting file sets from manifests
521 522 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
522 523 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
523 524 u1 = sorted(addedinm1 - addedinm2)
524 525 u2 = sorted(addedinm2 - addedinm1)
525 526
526 527 header = b" unmatched files in %s"
527 528 if u1:
528 529 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
529 530 if u2:
530 531 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
531 532
532 533 fullcopy = copies1.copy()
533 534 fullcopy.update(copies2)
534 535 if not fullcopy:
535 536 return copy, {}, diverge, renamedelete, {}
536 537
537 538 if repo.ui.debugflag:
538 539 repo.ui.debug(
539 540 b" all copies found (* = to merge, ! = divergent, "
540 541 b"% = renamed and deleted):\n"
541 542 )
542 543 for f in sorted(fullcopy):
543 544 note = b""
544 545 if f in copy:
545 546 note += b"*"
546 547 if f in divergeset:
547 548 note += b"!"
548 549 if f in renamedeleteset:
549 550 note += b"%"
550 551 repo.ui.debug(
551 552 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
552 553 )
553 554 del divergeset
554 555
555 556 repo.ui.debug(b" checking for directory renames\n")
556 557
557 558 # generate a directory move map
558 559 d1, d2 = c1.dirs(), c2.dirs()
559 560 invalid = set()
560 561 dirmove = {}
561 562
562 563 # examine each file copy for a potential directory move, which is
563 564 # when all the files in a directory are moved to a new directory
564 565 for dst, src in pycompat.iteritems(fullcopy):
565 566 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
566 567 if dsrc in invalid:
567 568 # already seen to be uninteresting
568 569 continue
569 570 elif dsrc in d1 and ddst in d1:
570 571 # directory wasn't entirely moved locally
571 572 invalid.add(dsrc)
572 573 elif dsrc in d2 and ddst in d2:
573 574 # directory wasn't entirely moved remotely
574 575 invalid.add(dsrc)
575 576 elif dsrc in dirmove and dirmove[dsrc] != ddst:
576 577 # files from the same directory moved to two different places
577 578 invalid.add(dsrc)
578 579 else:
579 580 # looks good so far
580 581 dirmove[dsrc] = ddst
581 582
582 583 for i in invalid:
583 584 if i in dirmove:
584 585 del dirmove[i]
585 586 del d1, d2, invalid
586 587
587 588 if not dirmove:
588 589 return copy, {}, diverge, renamedelete, {}
589 590
590 591 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
591 592
592 593 for d in dirmove:
593 594 repo.ui.debug(
594 595 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
595 596 )
596 597
597 598 movewithdir = {}
598 599 # check unaccounted nonoverlapping files against directory moves
599 600 for f in u1 + u2:
600 601 if f not in fullcopy:
601 602 for d in dirmove:
602 603 if f.startswith(d):
603 604 # new file added in a directory that was moved, move it
604 605 df = dirmove[d] + f[len(d) :]
605 606 if df not in copy:
606 607 movewithdir[f] = df
607 608 repo.ui.debug(
608 609 b" pending file src: '%s' -> dst: '%s'\n"
609 610 % (f, df)
610 611 )
611 612 break
612 613
613 614 return copy, movewithdir, diverge, renamedelete, dirmove
614 615
615 616
616 617 def _heuristicscopytracing(repo, c1, c2, base):
617 618 """ Fast copytracing using filename heuristics
618 619
619 620 Assumes that moves or renames are of following two types:
620 621
621 622 1) Inside a directory only (same directory name but different filenames)
622 623 2) Move from one directory to another
623 624 (same filenames but different directory names)
624 625
625 626 Works only when there are no merge commits in the "source branch".
626 627 Source branch is commits from base up to c2 not including base.
627 628
628 629 If merge is involved it fallbacks to _fullcopytracing().
629 630
630 631 Can be used by setting the following config:
631 632
632 633 [experimental]
633 634 copytrace = heuristics
634 635
635 636 In some cases the copy/move candidates found by heuristics can be very large
636 637 in number and that will make the algorithm slow. The number of possible
637 638 candidates to check can be limited by using the config
638 639 `experimental.copytrace.movecandidateslimit` which defaults to 100.
639 640 """
640 641
641 642 if c1.rev() is None:
642 643 c1 = c1.p1()
643 644 if c2.rev() is None:
644 645 c2 = c2.p1()
645 646
646 647 copies = {}
647 648
648 649 changedfiles = set()
649 650 m1 = c1.manifest()
650 651 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
651 652 # If base is not in c2 branch, we switch to fullcopytracing
652 653 repo.ui.debug(
653 654 b"switching to full copytracing as base is not "
654 655 b"an ancestor of c2\n"
655 656 )
656 657 return _fullcopytracing(repo, c1, c2, base)
657 658
658 659 ctx = c2
659 660 while ctx != base:
660 661 if len(ctx.parents()) == 2:
661 662 # To keep things simple let's not handle merges
662 663 repo.ui.debug(b"switching to full copytracing because of merges\n")
663 664 return _fullcopytracing(repo, c1, c2, base)
664 665 changedfiles.update(ctx.files())
665 666 ctx = ctx.p1()
666 667
667 668 cp = _forwardcopies(base, c2)
668 669 for dst, src in pycompat.iteritems(cp):
669 670 if src in m1:
670 671 copies[dst] = src
671 672
672 673 # file is missing if it isn't present in the destination, but is present in
673 674 # the base and present in the source.
674 675 # Presence in the base is important to exclude added files, presence in the
675 676 # source is important to exclude removed files.
676 677 filt = lambda f: f not in m1 and f in base and f in c2
677 678 missingfiles = [f for f in changedfiles if filt(f)]
678 679
679 680 if missingfiles:
680 681 basenametofilename = collections.defaultdict(list)
681 682 dirnametofilename = collections.defaultdict(list)
682 683
683 684 for f in m1.filesnotin(base.manifest()):
684 685 basename = os.path.basename(f)
685 686 dirname = os.path.dirname(f)
686 687 basenametofilename[basename].append(f)
687 688 dirnametofilename[dirname].append(f)
688 689
689 690 for f in missingfiles:
690 691 basename = os.path.basename(f)
691 692 dirname = os.path.dirname(f)
692 693 samebasename = basenametofilename[basename]
693 694 samedirname = dirnametofilename[dirname]
694 695 movecandidates = samebasename + samedirname
695 696 # f is guaranteed to be present in c2, that's why
696 697 # c2.filectx(f) won't fail
697 698 f2 = c2.filectx(f)
698 699 # we can have a lot of candidates which can slow down the heuristics
699 700 # config value to limit the number of candidates moves to check
700 701 maxcandidates = repo.ui.configint(
701 702 b'experimental', b'copytrace.movecandidateslimit'
702 703 )
703 704
704 705 if len(movecandidates) > maxcandidates:
705 706 repo.ui.status(
706 707 _(
707 708 b"skipping copytracing for '%s', more "
708 709 b"candidates than the limit: %d\n"
709 710 )
710 711 % (f, len(movecandidates))
711 712 )
712 713 continue
713 714
714 715 for candidate in movecandidates:
715 716 f1 = c1.filectx(candidate)
716 717 if _related(f1, f2):
717 718 # if there are a few related copies then we'll merge
718 719 # changes into all of them. This matches the behaviour
719 720 # of upstream copytracing
720 721 copies[candidate] = f
721 722
722 723 return copies, {}, {}, {}, {}
723 724
724 725
725 726 def _related(f1, f2):
726 727 """return True if f1 and f2 filectx have a common ancestor
727 728
728 729 Walk back to common ancestor to see if the two files originate
729 730 from the same file. Since workingfilectx's rev() is None it messes
730 731 up the integer comparison logic, hence the pre-step check for
731 732 None (f1 and f2 can only be workingfilectx's initially).
732 733 """
733 734
734 735 if f1 == f2:
735 736 return True # a match
736 737
737 738 g1, g2 = f1.ancestors(), f2.ancestors()
738 739 try:
739 740 f1r, f2r = f1.linkrev(), f2.linkrev()
740 741
741 742 if f1r is None:
742 743 f1 = next(g1)
743 744 if f2r is None:
744 745 f2 = next(g2)
745 746
746 747 while True:
747 748 f1r, f2r = f1.linkrev(), f2.linkrev()
748 749 if f1r > f2r:
749 750 f1 = next(g1)
750 751 elif f2r > f1r:
751 752 f2 = next(g2)
752 753 else: # f1 and f2 point to files in the same linkrev
753 754 return f1 == f2 # true if they point to the same file
754 755 except StopIteration:
755 756 return False
756 757
757 758
758 759 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
759 760 """reproduce copies from fromrev to rev in the dirstate
760 761
761 762 If skiprev is specified, it's a revision that should be used to
762 763 filter copy records. Any copies that occur between fromrev and
763 764 skiprev will not be duplicated, even if they appear in the set of
764 765 copies between fromrev and rev.
765 766 """
766 767 exclude = {}
767 768 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
768 769 bctrace = stringutil.parsebool(ctraceconfig)
769 770 if skiprev is not None and (
770 771 ctraceconfig == b'heuristics' or bctrace or bctrace is None
771 772 ):
772 773 # copytrace='off' skips this line, but not the entire function because
773 774 # the line below is O(size of the repo) during a rebase, while the rest
774 775 # of the function is much faster (and is required for carrying copy
775 776 # metadata across the rebase anyway).
776 777 exclude = pathcopies(repo[fromrev], repo[skiprev])
777 778 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
778 779 if dst in exclude:
779 780 continue
780 781 if dst in wctx:
781 782 wctx[dst].markcopied(src)
782 783
783 784
784 785 def computechangesetfilesadded(ctx):
785 786 """return the list of files added in a changeset
786 787 """
787 788 added = []
788 789 for f in ctx.files():
789 790 if not any(f in p for p in ctx.parents()):
790 791 added.append(f)
791 792 return added
792 793
793 794
794 795 def computechangesetfilesremoved(ctx):
795 796 """return the list of files removed in a changeset
796 797 """
797 798 removed = []
798 799 for f in ctx.files():
799 800 if f not in ctx:
800 801 removed.append(f)
801 802 return removed
802 803
803 804
804 805 def computechangesetcopies(ctx):
805 806 """return the copies data for a changeset
806 807
807 808 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
808 809
809 810 Each dictionnary are in the form: `{newname: oldname}`
810 811 """
811 812 p1copies = {}
812 813 p2copies = {}
813 814 p1 = ctx.p1()
814 815 p2 = ctx.p2()
815 816 narrowmatch = ctx._repo.narrowmatch()
816 817 for dst in ctx.files():
817 818 if not narrowmatch(dst) or dst not in ctx:
818 819 continue
819 820 copied = ctx[dst].renamed()
820 821 if not copied:
821 822 continue
822 823 src, srcnode = copied
823 824 if src in p1 and p1[src].filenode() == srcnode:
824 825 p1copies[dst] = src
825 826 elif src in p2 and p2[src].filenode() == srcnode:
826 827 p2copies[dst] = src
827 828 return p1copies, p2copies
828 829
829 830
830 831 def encodecopies(files, copies):
831 832 items = []
832 833 for i, dst in enumerate(files):
833 834 if dst in copies:
834 835 items.append(b'%d\0%s' % (i, copies[dst]))
835 836 if len(items) != len(copies):
836 837 raise error.ProgrammingError(
837 838 b'some copy targets missing from file list'
838 839 )
839 840 return b"\n".join(items)
840 841
841 842
842 843 def decodecopies(files, data):
843 844 try:
844 845 copies = {}
845 846 if not data:
846 847 return copies
847 848 for l in data.split(b'\n'):
848 849 strindex, src = l.split(b'\0')
849 850 i = int(strindex)
850 851 dst = files[i]
851 852 copies[dst] = src
852 853 return copies
853 854 except (ValueError, IndexError):
854 855 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
855 856 # used different syntax for the value.
856 857 return None
857 858
858 859
859 860 def encodefileindices(files, subset):
860 861 subset = set(subset)
861 862 indices = []
862 863 for i, f in enumerate(files):
863 864 if f in subset:
864 865 indices.append(b'%d' % i)
865 866 return b'\n'.join(indices)
866 867
867 868
868 869 def decodefileindices(files, data):
869 870 try:
870 871 subset = []
871 872 if not data:
872 873 return subset
873 874 for strindex in data.split(b'\n'):
874 875 i = int(strindex)
875 876 if i < 0 or i >= len(files):
876 877 return None
877 878 subset.append(files[i])
878 879 return subset
879 880 except (ValueError, IndexError):
880 881 # Perhaps someone had chosen the same key name (e.g. "added") and
881 882 # used different syntax for the value.
882 883 return None
883 884
884 885
885 886 def _getsidedata(srcrepo, rev):
886 887 ctx = srcrepo[rev]
887 888 filescopies = computechangesetcopies(ctx)
888 889 filesadded = computechangesetfilesadded(ctx)
889 890 filesremoved = computechangesetfilesremoved(ctx)
890 891 sidedata = {}
891 892 if any([filescopies, filesadded, filesremoved]):
892 893 sortedfiles = sorted(ctx.files())
893 894 p1copies, p2copies = filescopies
894 895 p1copies = encodecopies(sortedfiles, p1copies)
895 896 p2copies = encodecopies(sortedfiles, p2copies)
896 897 filesadded = encodefileindices(sortedfiles, filesadded)
897 898 filesremoved = encodefileindices(sortedfiles, filesremoved)
898 899 if p1copies:
899 900 sidedata[sidedatamod.SD_P1COPIES] = p1copies
900 901 if p2copies:
901 902 sidedata[sidedatamod.SD_P2COPIES] = p2copies
902 903 if filesadded:
903 904 sidedata[sidedatamod.SD_FILESADDED] = filesadded
904 905 if filesremoved:
905 906 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
906 907 return sidedata
907 908
908 909
909 910 def getsidedataadder(srcrepo, destrepo):
910 911 def sidedatacompanion(revlog, rev):
911 912 sidedata = {}
912 913 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
913 914 sidedata = _getsidedata(srcrepo, rev)
914 915 return False, (), sidedata
915 916
916 917 return sidedatacompanion
917 918
918 919
919 920 def getsidedataremover(srcrepo, destrepo):
920 921 def sidedatacompanion(revlog, rev):
921 922 f = ()
922 923 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
923 924 if revlog.flags(rev) & REVIDX_SIDEDATA:
924 925 f = (
925 926 sidedatamod.SD_P1COPIES,
926 927 sidedatamod.SD_P2COPIES,
927 928 sidedatamod.SD_FILESADDED,
928 929 sidedatamod.SD_FILESREMOVED,
929 930 )
930 931 return False, f, {}
931 932
932 933 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now