##// END OF EJS Templates
copies: move early return for "no copies" case a little earlier...
Martin von Zweigbergk -
r42342:85f59340 default
parent child Browse files
Show More
@@ -1,1026 +1,1027
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies a->b"""
112 112 t = a.copy()
113 113 for k, v in b.iteritems():
114 114 if v in t:
115 115 # found a chain
116 116 if t[v] != k:
117 117 # file wasn't renamed back to itself
118 118 t[k] = t[v]
119 119 if v not in dst:
120 120 # chain was a rename, not a copy
121 121 del t[v]
122 122 if v in src:
123 123 # file is a copy of an existing file
124 124 t[k] = v
125 125
126 126 for k, v in list(t.items()):
127 127 # remove criss-crossed copies
128 128 if k in src and v in dst:
129 129 del t[k]
130 130 # remove copies to files that were then removed
131 131 elif k not in dst:
132 132 del t[k]
133 133
134 134 return t
135 135
136 136 def _tracefile(fctx, am, limit=node.nullrev):
137 137 """return file context that is the ancestor of fctx present in ancestor
138 138 manifest am, stopping after the first ancestor lower than limit"""
139 139
140 140 for f in fctx.ancestors():
141 141 if am.get(f.path(), None) == f.filenode():
142 142 return f
143 143 if limit >= 0 and not f.isintroducedafter(limit):
144 144 return None
145 145
146 146 def _dirstatecopies(repo, match=None):
147 147 ds = repo.dirstate
148 148 c = ds.copies().copy()
149 149 for k in list(c):
150 150 if ds[k] not in 'anm' or (match and not match(k)):
151 151 del c[k]
152 152 return c
153 153
154 154 def _computeforwardmissing(a, b, match=None):
155 155 """Computes which files are in b but not a.
156 156 This is its own function so extensions can easily wrap this call to see what
157 157 files _forwardcopies is about to process.
158 158 """
159 159 ma = a.manifest()
160 160 mb = b.manifest()
161 161 return mb.filesnotin(ma, match=match)
162 162
163 163 def usechangesetcentricalgo(repo):
164 164 """Checks if we should use changeset-centric copy algorithms"""
165 165 return (repo.ui.config('experimental', 'copies.read-from') in
166 166 ('changeset-only', 'compatibility'))
167 167
168 168 def _committedforwardcopies(a, b, match):
169 169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 170 # files might have to be traced back to the fctx parent of the last
171 171 # one-side-only changeset, but not further back than that
172 172 repo = a._repo
173 173
174 174 if usechangesetcentricalgo(repo):
175 175 return _changesetforwardcopies(a, b, match)
176 176
177 177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 178 dbg = repo.ui.debug
179 179 if debug:
180 180 dbg('debug.copies: looking into rename from %s to %s\n'
181 181 % (a, b))
182 182 limit = _findlimit(repo, a, b)
183 183 if debug:
184 184 dbg('debug.copies: search limit: %d\n' % limit)
185 185 am = a.manifest()
186 186
187 187 # find where new files came from
188 188 # we currently don't try to find where old files went, too expensive
189 189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 190 cm = {}
191 191
192 192 # Computing the forward missing is quite expensive on large manifests, since
193 193 # it compares the entire manifests. We can optimize it in the common use
194 194 # case of computing what copies are in a commit versus its parent (like
195 195 # during a rebase or histedit). Note, we exclude merge commits from this
196 196 # optimization, since the ctx.files() for a merge commit is not correct for
197 197 # this comparison.
198 198 forwardmissingmatch = match
199 199 if b.p1() == a and b.p2().node() == node.nullid:
200 200 filesmatcher = matchmod.exact(b.files())
201 201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203 203
204 204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205 205
206 206 if debug:
207 207 dbg('debug.copies: missing file to search: %d\n' % len(missing))
208 208
209 209 for f in missing:
210 210 if debug:
211 211 dbg('debug.copies: tracing file: %s\n' % f)
212 212 fctx = b[f]
213 213 fctx._ancestrycontext = ancestrycontext
214 214
215 215 if debug:
216 216 start = util.timer()
217 217 ofctx = _tracefile(fctx, am, limit)
218 218 if ofctx:
219 219 if debug:
220 220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 221 cm[f] = ofctx.path()
222 222 if debug:
223 223 dbg('debug.copies: time: %f seconds\n'
224 224 % (util.timer() - start))
225 225 return cm
226 226
227 227 def _changesetforwardcopies(a, b, match):
228 228 if a.rev() == node.nullrev:
229 229 return {}
230 230
231 231 repo = a.repo()
232 232 children = {}
233 233 cl = repo.changelog
234 234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 235 for r in missingrevs:
236 236 for p in cl.parentrevs(r):
237 237 if p == node.nullrev:
238 238 continue
239 239 if p not in children:
240 240 children[p] = [r]
241 241 else:
242 242 children[p].append(r)
243 243
244 244 roots = set(children) - set(missingrevs)
245 245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 246 # The parent number is only used for knowing which parent the copies dict
247 247 # came from.
248 248 work = [(r, 1, {}) for r in roots]
249 249 heapq.heapify(work)
250 250 while work:
251 251 r, i1, copies1 = heapq.heappop(work)
252 252 if work and work[0][0] == r:
253 253 # We are tracing copies from both parents
254 254 r, i2, copies2 = heapq.heappop(work)
255 255 copies = {}
256 256 ctx = repo[r]
257 257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 258 allcopies = set(copies1) | set(copies2)
259 259 # TODO: perhaps this filtering should be done as long as ctx
260 260 # is merge, whether or not we're tracing from both parent.
261 261 for dst in allcopies:
262 262 if not match(dst):
263 263 continue
264 264 if dst not in copies2:
265 265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 266 # already exist on p2 side
267 267 if dst not in p2man:
268 268 copies[dst] = copies1[dst]
269 269 elif dst not in copies1:
270 270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 271 # already exist on p1 side
272 272 if dst not in p1man:
273 273 copies[dst] = copies2[dst]
274 274 else:
275 275 # Copied on both sides: mark as copy from p1 side
276 276 copies[dst] = copies1[dst]
277 277 else:
278 278 copies = copies1
279 279 if r == b.rev():
280 280 return copies
281 281 for c in children[r]:
282 282 childctx = repo[c]
283 283 if r == childctx.p1().rev():
284 284 parent = 1
285 285 childcopies = childctx.p1copies()
286 286 else:
287 287 assert r == childctx.p2().rev()
288 288 parent = 2
289 289 childcopies = childctx.p2copies()
290 290 if not match.always():
291 291 childcopies = {dst: src for dst, src in childcopies.items()
292 292 if match(dst)}
293 293 childcopies = _chain(a, childctx, copies, childcopies)
294 294 heapq.heappush(work, (c, parent, childcopies))
295 295 assert False
296 296
297 297 def _forwardcopies(a, b, match=None):
298 298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299 299
300 300 match = a.repo().narrowmatch(match)
301 301 # check for working copy
302 302 if b.rev() is None:
303 303 if a == b.p1():
304 304 # short-circuit to avoid issues with merge states
305 305 return _dirstatecopies(b._repo, match)
306 306
307 307 cm = _committedforwardcopies(a, b.p1(), match)
308 308 # combine copies from dirstate if necessary
309 309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 310 return _committedforwardcopies(a, b, match)
311 311
312 312 def _backwardrenames(a, b, match):
313 313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 314 return {}
315 315
316 316 # Even though we're not taking copies into account, 1:n rename situations
317 317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 318 # arbitrarily pick one of the renames.
319 319 # We don't want to pass in "match" here, since that would filter
320 320 # the destination by it. Since we're reversing the copies, we want
321 321 # to filter the source instead.
322 322 f = _forwardcopies(b, a)
323 323 r = {}
324 324 for k, v in sorted(f.iteritems()):
325 325 if match and not match(v):
326 326 continue
327 327 # remove copies
328 328 if v in a:
329 329 continue
330 330 r[v] = k
331 331 return r
332 332
333 333 def pathcopies(x, y, match=None):
334 334 """find {dst@y: src@x} copy mapping for directed compare"""
335 335 repo = x._repo
336 336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 337 if debug:
338 338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 339 % (x, y))
340 340 if x == y or not x or not y:
341 341 return {}
342 342 a = y.ancestor(x)
343 343 if a == x:
344 344 if debug:
345 345 repo.ui.debug('debug.copies: search mode: forward\n')
346 346 return _forwardcopies(x, y, match=match)
347 347 if a == y:
348 348 if debug:
349 349 repo.ui.debug('debug.copies: search mode: backward\n')
350 350 return _backwardrenames(x, y, match=match)
351 351 if debug:
352 352 repo.ui.debug('debug.copies: search mode: combined\n')
353 353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 354 _forwardcopies(a, y, match=match))
355 355
356 356 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
357 357 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
358 358 and c2. This is its own function so extensions can easily wrap this call
359 359 to see what files mergecopies is about to process.
360 360
361 361 Even though c1 and c2 are not used in this function, they are useful in
362 362 other extensions for being able to read the file nodes of the changed files.
363 363
364 364 "baselabel" can be passed to help distinguish the multiple computations
365 365 done in the graft case.
366 366 """
367 367 u1 = sorted(addedinm1 - addedinm2)
368 368 u2 = sorted(addedinm2 - addedinm1)
369 369
370 370 header = " unmatched files in %s"
371 371 if baselabel:
372 372 header += ' (from %s)' % baselabel
373 373 if u1:
374 374 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
375 375 if u2:
376 376 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
377 377
378 378 return u1, u2
379 379
380 380 def _makegetfctx(ctx):
381 381 """return a 'getfctx' function suitable for _checkcopies usage
382 382
383 383 We have to re-setup the function building 'filectx' for each
384 384 '_checkcopies' to ensure the linkrev adjustment is properly setup for
385 385 each. Linkrev adjustment is important to avoid bug in rename
386 386 detection. Moreover, having a proper '_ancestrycontext' setup ensures
387 387 the performance impact of this adjustment is kept limited. Without it,
388 388 each file could do a full dag traversal making the time complexity of
389 389 the operation explode (see issue4537).
390 390
391 391 This function exists here mostly to limit the impact on stable. Feel
392 392 free to refactor on default.
393 393 """
394 394 rev = ctx.rev()
395 395 repo = ctx._repo
396 396 ac = getattr(ctx, '_ancestrycontext', None)
397 397 if ac is None:
398 398 revs = [rev]
399 399 if rev is None:
400 400 revs = [p.rev() for p in ctx.parents()]
401 401 ac = repo.changelog.ancestors(revs, inclusive=True)
402 402 ctx._ancestrycontext = ac
403 403 def makectx(f, n):
404 404 if n in node.wdirfilenodeids: # in a working context?
405 405 if ctx.rev() is None:
406 406 return ctx.filectx(f)
407 407 return repo[None][f]
408 408 fctx = repo.filectx(f, fileid=n)
409 409 # setup only needed for filectx not create from a changectx
410 410 fctx._ancestrycontext = ac
411 411 fctx._descendantrev = rev
412 412 return fctx
413 413 return util.lrucachefunc(makectx)
414 414
415 415 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
416 416 """combine partial copy paths"""
417 417 remainder = {}
418 418 for f in copyfrom:
419 419 if f in copyto:
420 420 finalcopy[copyto[f]] = copyfrom[f]
421 421 del copyto[f]
422 422 for f in incompletediverge:
423 423 assert f not in diverge
424 424 ic = incompletediverge[f]
425 425 if ic[0] in copyto:
426 426 diverge[f] = [copyto[ic[0]], ic[1]]
427 427 else:
428 428 remainder[f] = ic
429 429 return remainder
430 430
431 431 def mergecopies(repo, c1, c2, base):
432 432 """
433 433 Finds moves and copies between context c1 and c2 that are relevant for
434 434 merging. 'base' will be used as the merge base.
435 435
436 436 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
437 437 files that were moved/ copied in one merge parent and modified in another.
438 438 For example:
439 439
440 440 o ---> 4 another commit
441 441 |
442 442 | o ---> 3 commit that modifies a.txt
443 443 | /
444 444 o / ---> 2 commit that moves a.txt to b.txt
445 445 |/
446 446 o ---> 1 merge base
447 447
448 448 If we try to rebase revision 3 on revision 4, since there is no a.txt in
449 449 revision 4, and if user have copytrace disabled, we prints the following
450 450 message:
451 451
452 452 ```other changed <file> which local deleted```
453 453
454 454 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
455 455 "dirmove".
456 456
457 457 "copy" is a mapping from destination name -> source name,
458 458 where source is in c1 and destination is in c2 or vice-versa.
459 459
460 460 "movewithdir" is a mapping from source name -> destination name,
461 461 where the file at source present in one context but not the other
462 462 needs to be moved to destination by the merge process, because the
463 463 other context moved the directory it is in.
464 464
465 465 "diverge" is a mapping of source name -> list of destination names
466 466 for divergent renames.
467 467
468 468 "renamedelete" is a mapping of source name -> list of destination
469 469 names for files deleted in c1 that were renamed in c2 or vice-versa.
470 470
471 471 "dirmove" is a mapping of detected source dir -> destination dir renames.
472 472 This is needed for handling changes to new files previously grafted into
473 473 renamed directories.
474 474
475 475 This function calls different copytracing algorithms based on config.
476 476 """
477 477 # avoid silly behavior for update from empty dir
478 478 if not c1 or not c2 or c1 == c2:
479 479 return {}, {}, {}, {}, {}
480 480
481 481 narrowmatch = c1.repo().narrowmatch()
482 482
483 483 # avoid silly behavior for parent -> working dir
484 484 if c2.node() is None and c1.node() == repo.dirstate.p1():
485 485 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
486 486
487 487 copytracing = repo.ui.config('experimental', 'copytrace')
488 488 boolctrace = stringutil.parsebool(copytracing)
489 489
490 490 # Copy trace disabling is explicitly below the node == p1 logic above
491 491 # because the logic above is required for a simple copy to be kept across a
492 492 # rebase.
493 493 if copytracing == 'heuristics':
494 494 # Do full copytracing if only non-public revisions are involved as
495 495 # that will be fast enough and will also cover the copies which could
496 496 # be missed by heuristics
497 497 if _isfullcopytraceable(repo, c1, base):
498 498 return _fullcopytracing(repo, c1, c2, base)
499 499 return _heuristicscopytracing(repo, c1, c2, base)
500 500 elif boolctrace is False:
501 501 # stringutil.parsebool() returns None when it is unable to parse the
502 502 # value, so we should rely on making sure copytracing is on such cases
503 503 return {}, {}, {}, {}, {}
504 504 else:
505 505 return _fullcopytracing(repo, c1, c2, base)
506 506
507 507 def _isfullcopytraceable(repo, c1, base):
508 508 """ Checks that if base, source and destination are all no-public branches,
509 509 if yes let's use the full copytrace algorithm for increased capabilities
510 510 since it will be fast enough.
511 511
512 512 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
513 513 number of changesets from c1 to base such that if number of changesets are
514 514 more than the limit, full copytracing algorithm won't be used.
515 515 """
516 516 if c1.rev() is None:
517 517 c1 = c1.p1()
518 518 if c1.mutable() and base.mutable():
519 519 sourcecommitlimit = repo.ui.configint('experimental',
520 520 'copytrace.sourcecommitlimit')
521 521 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
522 522 return commits < sourcecommitlimit
523 523 return False
524 524
525 525 def _fullcopytracing(repo, c1, c2, base):
526 526 """ The full copytracing algorithm which finds all the new files that were
527 527 added from merge base up to the top commit and for each file it checks if
528 528 this file was copied from another file.
529 529
530 530 This is pretty slow when a lot of changesets are involved but will track all
531 531 the copies.
532 532 """
533 533 # In certain scenarios (e.g. graft, update or rebase), base can be
534 534 # overridden We still need to know a real common ancestor in this case We
535 535 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
536 536 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
537 537 # caller may not know if the revision passed in lieu of the CA is a genuine
538 538 # common ancestor or not without explicitly checking it, it's better to
539 539 # determine that here.
540 540 #
541 541 # base.isancestorof(wc) is False, work around that
542 542 _c1 = c1.p1() if c1.rev() is None else c1
543 543 _c2 = c2.p1() if c2.rev() is None else c2
544 544 # an endpoint is "dirty" if it isn't a descendant of the merge base
545 545 # if we have a dirty endpoint, we need to trigger graft logic, and also
546 546 # keep track of which endpoint is dirty
547 547 dirtyc1 = not base.isancestorof(_c1)
548 548 dirtyc2 = not base.isancestorof(_c2)
549 549 graft = dirtyc1 or dirtyc2
550 550 tca = base
551 551 if graft:
552 552 tca = _c1.ancestor(_c2)
553 553
554 554 limit = _findlimit(repo, c1, c2)
555 555 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
556 556
557 557 m1 = c1.manifest()
558 558 m2 = c2.manifest()
559 559 mb = base.manifest()
560 560
561 561 # gather data from _checkcopies:
562 562 # - diverge = record all diverges in this dict
563 563 # - copy = record all non-divergent copies in this dict
564 564 # - fullcopy = record all copies in this dict
565 565 # - incomplete = record non-divergent partial copies here
566 566 # - incompletediverge = record divergent partial copies here
567 567 diverge = {} # divergence data is shared
568 568 incompletediverge = {}
569 569 data1 = {'copy': {},
570 570 'fullcopy': {},
571 571 'incomplete': {},
572 572 'diverge': diverge,
573 573 'incompletediverge': incompletediverge,
574 574 }
575 575 data2 = {'copy': {},
576 576 'fullcopy': {},
577 577 'incomplete': {},
578 578 'diverge': diverge,
579 579 'incompletediverge': incompletediverge,
580 580 }
581 581
582 582 # find interesting file sets from manifests
583 583 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
584 584 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
585 585 bothnew = sorted(addedinm1 & addedinm2)
586 586 if tca == base:
587 587 # unmatched file from base
588 588 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
589 589 u1u, u2u = u1r, u2r
590 590 else:
591 591 # unmatched file from base (DAG rotation in the graft case)
592 592 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
593 593 baselabel='base')
594 594 # unmatched file from topological common ancestors (no DAG rotation)
595 595 # need to recompute this for directory move handling when grafting
596 596 mta = tca.manifest()
597 597 u1u, u2u = _computenonoverlap(repo, c1, c2,
598 598 m1.filesnotin(mta, repo.narrowmatch()),
599 599 m2.filesnotin(mta, repo.narrowmatch()),
600 600 baselabel='topological common ancestor')
601 601
602 602 for f in u1u:
603 603 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
604 604
605 605 for f in u2u:
606 606 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
607 607
608 608 copy = dict(data1['copy'])
609 609 copy.update(data2['copy'])
610 610 fullcopy = dict(data1['fullcopy'])
611 611 fullcopy.update(data2['fullcopy'])
612 612
613 613 if dirtyc1:
614 614 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
615 615 incompletediverge)
616 616 if dirtyc2:
617 617 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
618 618 incompletediverge)
619 619
620 620 renamedelete = {}
621 621 renamedeleteset = set()
622 622 divergeset = set()
623 623 for of, fl in list(diverge.items()):
624 624 if len(fl) == 1 or of in c1 or of in c2:
625 625 del diverge[of] # not actually divergent, or not a rename
626 626 if of not in c1 and of not in c2:
627 627 # renamed on one side, deleted on the other side, but filter
628 628 # out files that have been renamed and then deleted
629 629 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
630 630 renamedeleteset.update(fl) # reverse map for below
631 631 else:
632 632 divergeset.update(fl) # reverse map for below
633 633
634 634 if bothnew:
635 635 repo.ui.debug(" unmatched files new in both:\n %s\n"
636 636 % "\n ".join(bothnew))
637 637 bothdiverge = {}
638 638 bothincompletediverge = {}
639 639 remainder = {}
640 640 both1 = {'copy': {},
641 641 'fullcopy': {},
642 642 'incomplete': {},
643 643 'diverge': bothdiverge,
644 644 'incompletediverge': bothincompletediverge
645 645 }
646 646 both2 = {'copy': {},
647 647 'fullcopy': {},
648 648 'incomplete': {},
649 649 'diverge': bothdiverge,
650 650 'incompletediverge': bothincompletediverge
651 651 }
652 652 for f in bothnew:
653 653 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
654 654 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
655 655 if dirtyc1 and dirtyc2:
656 656 remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
657 657 copy, bothdiverge, bothincompletediverge)
658 658 remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
659 659 copy, bothdiverge, bothincompletediverge)
660 660 remainder.update(remainder1)
661 661 elif dirtyc1:
662 662 # incomplete copies may only be found on the "dirty" side for bothnew
663 663 assert not both2['incomplete']
664 664 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
665 665 bothincompletediverge)
666 666 elif dirtyc2:
667 667 assert not both1['incomplete']
668 668 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
669 669 bothincompletediverge)
670 670 else:
671 671 # incomplete copies and divergences can't happen outside grafts
672 672 assert not both1['incomplete']
673 673 assert not both2['incomplete']
674 674 assert not bothincompletediverge
675 675 for f in remainder:
676 676 assert f not in bothdiverge
677 677 ic = remainder[f]
678 678 if ic[0] in (m1 if dirtyc1 else m2):
679 679 # backed-out rename on one side, but watch out for deleted files
680 680 bothdiverge[f] = ic
681 681 for of, fl in bothdiverge.items():
682 682 if len(fl) == 2 and fl[0] == fl[1]:
683 683 copy[fl[0]] = of # not actually divergent, just matching renames
684 684
685 685 # Sometimes we get invalid copies here (the "and not remotebase" in
686 686 # _checkcopies() seems suspicious). Filter them out.
687 687 for dst, src in fullcopy.copy().items():
688 688 if src not in mb:
689 689 del fullcopy[dst]
690 690 # Sometimes we forget to add entries from "copy" to "fullcopy", so fix
691 691 # that up here
692 692 for dst, src in copy.items():
693 693 fullcopy[dst] = src
694 694 # Sometimes we forget to add entries from "diverge" to "fullcopy", so fix
695 695 # that up here
696 696 for src, dsts in diverge.items():
697 697 for dst in dsts:
698 698 fullcopy[dst] = src
699 if fullcopy and repo.ui.debugflag:
699
700 if not fullcopy:
701 return copy, {}, diverge, renamedelete, {}
702
703 if repo.ui.debugflag:
700 704 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
701 705 "% = renamed and deleted):\n")
702 706 for f in sorted(fullcopy):
703 707 note = ""
704 708 if f in copy:
705 709 note += "*"
706 710 if f in divergeset:
707 711 note += "!"
708 712 if f in renamedeleteset:
709 713 note += "%"
710 714 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
711 715 note))
712 716 del divergeset
713 717
714 if not fullcopy:
715 return copy, {}, diverge, renamedelete, {}
716
717 718 repo.ui.debug(" checking for directory renames\n")
718 719
719 720 # generate a directory move map
720 721 d1, d2 = c1.dirs(), c2.dirs()
721 722 # Hack for adding '', which is not otherwise added, to d1 and d2
722 723 d1.addpath('/')
723 724 d2.addpath('/')
724 725 invalid = set()
725 726 dirmove = {}
726 727
727 728 # examine each file copy for a potential directory move, which is
728 729 # when all the files in a directory are moved to a new directory
729 730 for dst, src in fullcopy.iteritems():
730 731 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
731 732 if dsrc in invalid:
732 733 # already seen to be uninteresting
733 734 continue
734 735 elif dsrc in d1 and ddst in d1:
735 736 # directory wasn't entirely moved locally
736 737 invalid.add(dsrc)
737 738 elif dsrc in d2 and ddst in d2:
738 739 # directory wasn't entirely moved remotely
739 740 invalid.add(dsrc)
740 741 elif dsrc in dirmove and dirmove[dsrc] != ddst:
741 742 # files from the same directory moved to two different places
742 743 invalid.add(dsrc)
743 744 else:
744 745 # looks good so far
745 746 dirmove[dsrc] = ddst
746 747
747 748 for i in invalid:
748 749 if i in dirmove:
749 750 del dirmove[i]
750 751 del d1, d2, invalid
751 752
752 753 if not dirmove:
753 754 return copy, {}, diverge, renamedelete, {}
754 755
755 756 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
756 757
757 758 for d in dirmove:
758 759 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
759 760 (d, dirmove[d]))
760 761
761 762 movewithdir = {}
762 763 # check unaccounted nonoverlapping files against directory moves
763 764 for f in u1r + u2r:
764 765 if f not in fullcopy:
765 766 for d in dirmove:
766 767 if f.startswith(d):
767 768 # new file added in a directory that was moved, move it
768 769 df = dirmove[d] + f[len(d):]
769 770 if df not in copy:
770 771 movewithdir[f] = df
771 772 repo.ui.debug((" pending file src: '%s' -> "
772 773 "dst: '%s'\n") % (f, df))
773 774 break
774 775
775 776 return copy, movewithdir, diverge, renamedelete, dirmove
776 777
777 778 def _heuristicscopytracing(repo, c1, c2, base):
778 779 """ Fast copytracing using filename heuristics
779 780
780 781 Assumes that moves or renames are of following two types:
781 782
782 783 1) Inside a directory only (same directory name but different filenames)
783 784 2) Move from one directory to another
784 785 (same filenames but different directory names)
785 786
786 787 Works only when there are no merge commits in the "source branch".
787 788 Source branch is commits from base up to c2 not including base.
788 789
789 790 If merge is involved it fallbacks to _fullcopytracing().
790 791
791 792 Can be used by setting the following config:
792 793
793 794 [experimental]
794 795 copytrace = heuristics
795 796
796 797 In some cases the copy/move candidates found by heuristics can be very large
797 798 in number and that will make the algorithm slow. The number of possible
798 799 candidates to check can be limited by using the config
799 800 `experimental.copytrace.movecandidateslimit` which defaults to 100.
800 801 """
801 802
802 803 if c1.rev() is None:
803 804 c1 = c1.p1()
804 805 if c2.rev() is None:
805 806 c2 = c2.p1()
806 807
807 808 copies = {}
808 809
809 810 changedfiles = set()
810 811 m1 = c1.manifest()
811 812 if not repo.revs('%d::%d', base.rev(), c2.rev()):
812 813 # If base is not in c2 branch, we switch to fullcopytracing
813 814 repo.ui.debug("switching to full copytracing as base is not "
814 815 "an ancestor of c2\n")
815 816 return _fullcopytracing(repo, c1, c2, base)
816 817
817 818 ctx = c2
818 819 while ctx != base:
819 820 if len(ctx.parents()) == 2:
820 821 # To keep things simple let's not handle merges
821 822 repo.ui.debug("switching to full copytracing because of merges\n")
822 823 return _fullcopytracing(repo, c1, c2, base)
823 824 changedfiles.update(ctx.files())
824 825 ctx = ctx.p1()
825 826
826 827 cp = _forwardcopies(base, c2)
827 828 for dst, src in cp.iteritems():
828 829 if src in m1:
829 830 copies[dst] = src
830 831
831 832 # file is missing if it isn't present in the destination, but is present in
832 833 # the base and present in the source.
833 834 # Presence in the base is important to exclude added files, presence in the
834 835 # source is important to exclude removed files.
835 836 filt = lambda f: f not in m1 and f in base and f in c2
836 837 missingfiles = [f for f in changedfiles if filt(f)]
837 838
838 839 if missingfiles:
839 840 basenametofilename = collections.defaultdict(list)
840 841 dirnametofilename = collections.defaultdict(list)
841 842
842 843 for f in m1.filesnotin(base.manifest()):
843 844 basename = os.path.basename(f)
844 845 dirname = os.path.dirname(f)
845 846 basenametofilename[basename].append(f)
846 847 dirnametofilename[dirname].append(f)
847 848
848 849 for f in missingfiles:
849 850 basename = os.path.basename(f)
850 851 dirname = os.path.dirname(f)
851 852 samebasename = basenametofilename[basename]
852 853 samedirname = dirnametofilename[dirname]
853 854 movecandidates = samebasename + samedirname
854 855 # f is guaranteed to be present in c2, that's why
855 856 # c2.filectx(f) won't fail
856 857 f2 = c2.filectx(f)
857 858 # we can have a lot of candidates which can slow down the heuristics
858 859 # config value to limit the number of candidates moves to check
859 860 maxcandidates = repo.ui.configint('experimental',
860 861 'copytrace.movecandidateslimit')
861 862
862 863 if len(movecandidates) > maxcandidates:
863 864 repo.ui.status(_("skipping copytracing for '%s', more "
864 865 "candidates than the limit: %d\n")
865 866 % (f, len(movecandidates)))
866 867 continue
867 868
868 869 for candidate in movecandidates:
869 870 f1 = c1.filectx(candidate)
870 871 if _related(f1, f2):
871 872 # if there are a few related copies then we'll merge
872 873 # changes into all of them. This matches the behaviour
873 874 # of upstream copytracing
874 875 copies[candidate] = f
875 876
876 877 return copies, {}, {}, {}, {}
877 878
878 879 def _related(f1, f2):
879 880 """return True if f1 and f2 filectx have a common ancestor
880 881
881 882 Walk back to common ancestor to see if the two files originate
882 883 from the same file. Since workingfilectx's rev() is None it messes
883 884 up the integer comparison logic, hence the pre-step check for
884 885 None (f1 and f2 can only be workingfilectx's initially).
885 886 """
886 887
887 888 if f1 == f2:
888 889 return True # a match
889 890
890 891 g1, g2 = f1.ancestors(), f2.ancestors()
891 892 try:
892 893 f1r, f2r = f1.linkrev(), f2.linkrev()
893 894
894 895 if f1r is None:
895 896 f1 = next(g1)
896 897 if f2r is None:
897 898 f2 = next(g2)
898 899
899 900 while True:
900 901 f1r, f2r = f1.linkrev(), f2.linkrev()
901 902 if f1r > f2r:
902 903 f1 = next(g1)
903 904 elif f2r > f1r:
904 905 f2 = next(g2)
905 906 else: # f1 and f2 point to files in the same linkrev
906 907 return f1 == f2 # true if they point to the same file
907 908 except StopIteration:
908 909 return False
909 910
910 911 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
911 912 """
912 913 check possible copies of f from msrc to mdst
913 914
914 915 srcctx = starting context for f in msrc
915 916 dstctx = destination context for f in mdst
916 917 f = the filename to check (as in msrc)
917 918 base = the changectx used as a merge base
918 919 tca = topological common ancestor for graft-like scenarios
919 920 remotebase = True if base is outside tca::srcctx, False otherwise
920 921 limit = the rev number to not search beyond
921 922 data = dictionary of dictionary to store copy data. (see mergecopies)
922 923
923 924 note: limit is only an optimization, and provides no guarantee that
924 925 irrelevant revisions will not be visited
925 926 there is no easy way to make this algorithm stop in a guaranteed way
926 927 once it "goes behind a certain revision".
927 928 """
928 929
929 930 msrc = srcctx.manifest()
930 931 mdst = dstctx.manifest()
931 932 mb = base.manifest()
932 933 mta = tca.manifest()
933 934 # Might be true if this call is about finding backward renames,
934 935 # This happens in the case of grafts because the DAG is then rotated.
935 936 # If the file exists in both the base and the source, we are not looking
936 937 # for a rename on the source side, but on the part of the DAG that is
937 938 # traversed backwards.
938 939 #
939 940 # In the case there is both backward and forward renames (before and after
940 941 # the base) this is more complicated as we must detect a divergence.
941 942 # We use 'backwards = False' in that case.
942 943 backwards = not remotebase and base != tca and f in mb
943 944 getsrcfctx = _makegetfctx(srcctx)
944 945 getdstfctx = _makegetfctx(dstctx)
945 946
946 947 if msrc[f] == mb.get(f) and not remotebase:
947 948 # Nothing to merge
948 949 return
949 950
950 951 of = None
951 952 seen = {f}
952 953 for oc in getsrcfctx(f, msrc[f]).ancestors():
953 954 of = oc.path()
954 955 if of in seen:
955 956 # check limit late - grab last rename before
956 957 if oc.linkrev() < limit:
957 958 break
958 959 continue
959 960 seen.add(of)
960 961
961 962 # remember for dir rename detection
962 963 if backwards:
963 964 data['fullcopy'][of] = f # grafting backwards through renames
964 965 else:
965 966 data['fullcopy'][f] = of
966 967 if of not in mdst:
967 968 continue # no match, keep looking
968 969 if mdst[of] == mb.get(of):
969 970 return # no merge needed, quit early
970 971 c2 = getdstfctx(of, mdst[of])
971 972 # c2 might be a plain new file on added on destination side that is
972 973 # unrelated to the droids we are looking for.
973 974 cr = _related(oc, c2)
974 975 if cr and (of == f or of == c2.path()): # non-divergent
975 976 if backwards:
976 977 data['copy'][of] = f
977 978 elif of in mb:
978 979 data['copy'][f] = of
979 980 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
980 981 data['copy'][of] = f
981 982 del data['fullcopy'][f]
982 983 data['fullcopy'][of] = f
983 984 else: # divergence w.r.t. graft CA on one side of topological CA
984 985 for sf in seen:
985 986 if sf in mb:
986 987 assert sf not in data['diverge']
987 988 data['diverge'][sf] = [f, of]
988 989 break
989 990 return
990 991
991 992 if of in mta:
992 993 if backwards or remotebase:
993 994 data['incomplete'][of] = f
994 995 else:
995 996 for sf in seen:
996 997 if sf in mb:
997 998 if tca == base:
998 999 data['diverge'].setdefault(sf, []).append(f)
999 1000 else:
1000 1001 data['incompletediverge'][sf] = [of, f]
1001 1002 return
1002 1003
1003 1004 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
1004 1005 """reproduce copies from fromrev to rev in the dirstate
1005 1006
1006 1007 If skiprev is specified, it's a revision that should be used to
1007 1008 filter copy records. Any copies that occur between fromrev and
1008 1009 skiprev will not be duplicated, even if they appear in the set of
1009 1010 copies between fromrev and rev.
1010 1011 """
1011 1012 exclude = {}
1012 1013 ctraceconfig = repo.ui.config('experimental', 'copytrace')
1013 1014 bctrace = stringutil.parsebool(ctraceconfig)
1014 1015 if (skiprev is not None and
1015 1016 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
1016 1017 # copytrace='off' skips this line, but not the entire function because
1017 1018 # the line below is O(size of the repo) during a rebase, while the rest
1018 1019 # of the function is much faster (and is required for carrying copy
1019 1020 # metadata across the rebase anyway).
1020 1021 exclude = pathcopies(repo[fromrev], repo[skiprev])
1021 1022 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
1022 1023 # copies.pathcopies returns backward renames, so dst might not
1023 1024 # actually be in the dirstate
1024 1025 if dst in exclude:
1025 1026 continue
1026 1027 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now