##// END OF EJS Templates
copies: move comment about implementation of mergecopies() to end...
Martin von Zweigbergk -
r42287:967c098e default
parent child Browse files
Show More
@@ -1,1011 +1,1012 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies a->b"""
112 112 t = a.copy()
113 113 for k, v in b.iteritems():
114 114 if v in t:
115 115 # found a chain
116 116 if t[v] != k:
117 117 # file wasn't renamed back to itself
118 118 t[k] = t[v]
119 119 if v not in dst:
120 120 # chain was a rename, not a copy
121 121 del t[v]
122 122 if v in src:
123 123 # file is a copy of an existing file
124 124 t[k] = v
125 125
126 126 for k, v in list(t.items()):
127 127 # remove criss-crossed copies
128 128 if k in src and v in dst:
129 129 del t[k]
130 130 # remove copies to files that were then removed
131 131 elif k not in dst:
132 132 del t[k]
133 133
134 134 return t
135 135
136 136 def _tracefile(fctx, am, limit=node.nullrev):
137 137 """return file context that is the ancestor of fctx present in ancestor
138 138 manifest am, stopping after the first ancestor lower than limit"""
139 139
140 140 for f in fctx.ancestors():
141 141 if am.get(f.path(), None) == f.filenode():
142 142 return f
143 143 if limit >= 0 and not f.isintroducedafter(limit):
144 144 return None
145 145
146 146 def _dirstatecopies(repo, match=None):
147 147 ds = repo.dirstate
148 148 c = ds.copies().copy()
149 149 for k in list(c):
150 150 if ds[k] not in 'anm' or (match and not match(k)):
151 151 del c[k]
152 152 return c
153 153
154 154 def _computeforwardmissing(a, b, match=None):
155 155 """Computes which files are in b but not a.
156 156 This is its own function so extensions can easily wrap this call to see what
157 157 files _forwardcopies is about to process.
158 158 """
159 159 ma = a.manifest()
160 160 mb = b.manifest()
161 161 return mb.filesnotin(ma, match=match)
162 162
163 163 def usechangesetcentricalgo(repo):
164 164 """Checks if we should use changeset-centric copy algorithms"""
165 165 return (repo.ui.config('experimental', 'copies.read-from') ==
166 166 'compatibility')
167 167
168 168 def _committedforwardcopies(a, b, match):
169 169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 170 # files might have to be traced back to the fctx parent of the last
171 171 # one-side-only changeset, but not further back than that
172 172 repo = a._repo
173 173
174 174 if usechangesetcentricalgo(repo):
175 175 return _changesetforwardcopies(a, b, match)
176 176
177 177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 178 dbg = repo.ui.debug
179 179 if debug:
180 180 dbg('debug.copies: looking into rename from %s to %s\n'
181 181 % (a, b))
182 182 limit = _findlimit(repo, a, b)
183 183 if debug:
184 184 dbg('debug.copies: search limit: %d\n' % limit)
185 185 am = a.manifest()
186 186
187 187 # find where new files came from
188 188 # we currently don't try to find where old files went, too expensive
189 189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 190 cm = {}
191 191
192 192 # Computing the forward missing is quite expensive on large manifests, since
193 193 # it compares the entire manifests. We can optimize it in the common use
194 194 # case of computing what copies are in a commit versus its parent (like
195 195 # during a rebase or histedit). Note, we exclude merge commits from this
196 196 # optimization, since the ctx.files() for a merge commit is not correct for
197 197 # this comparison.
198 198 forwardmissingmatch = match
199 199 if b.p1() == a and b.p2().node() == node.nullid:
200 200 filesmatcher = matchmod.exact(b.files())
201 201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203 203
204 204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205 205
206 206 if debug:
207 207 dbg('debug.copies: missing file to search: %d\n' % len(missing))
208 208
209 209 for f in missing:
210 210 if debug:
211 211 dbg('debug.copies: tracing file: %s\n' % f)
212 212 fctx = b[f]
213 213 fctx._ancestrycontext = ancestrycontext
214 214
215 215 if debug:
216 216 start = util.timer()
217 217 ofctx = _tracefile(fctx, am, limit)
218 218 if ofctx:
219 219 if debug:
220 220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 221 cm[f] = ofctx.path()
222 222 if debug:
223 223 dbg('debug.copies: time: %f seconds\n'
224 224 % (util.timer() - start))
225 225 return cm
226 226
227 227 def _changesetforwardcopies(a, b, match):
228 228 if a.rev() == node.nullrev:
229 229 return {}
230 230
231 231 repo = a.repo()
232 232 children = {}
233 233 cl = repo.changelog
234 234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 235 for r in missingrevs:
236 236 for p in cl.parentrevs(r):
237 237 if p == node.nullrev:
238 238 continue
239 239 if p not in children:
240 240 children[p] = [r]
241 241 else:
242 242 children[p].append(r)
243 243
244 244 roots = set(children) - set(missingrevs)
245 245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 246 # The parent number is only used for knowing which parent the copies dict
247 247 # came from.
248 248 work = [(r, 1, {}) for r in roots]
249 249 heapq.heapify(work)
250 250 while work:
251 251 r, i1, copies1 = heapq.heappop(work)
252 252 if work and work[0][0] == r:
253 253 # We are tracing copies from both parents
254 254 r, i2, copies2 = heapq.heappop(work)
255 255 copies = {}
256 256 ctx = repo[r]
257 257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 258 allcopies = set(copies1) | set(copies2)
259 259 # TODO: perhaps this filtering should be done as long as ctx
260 260 # is merge, whether or not we're tracing from both parent.
261 261 for dst in allcopies:
262 262 if not match(dst):
263 263 continue
264 264 if dst not in copies2:
265 265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 266 # already exist on p2 side
267 267 if dst not in p2man:
268 268 copies[dst] = copies1[dst]
269 269 elif dst not in copies1:
270 270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 271 # already exist on p1 side
272 272 if dst not in p1man:
273 273 copies[dst] = copies2[dst]
274 274 else:
275 275 # Copied on both sides: mark as copy from p1 side
276 276 copies[dst] = copies1[dst]
277 277 else:
278 278 copies = copies1
279 279 if r == b.rev():
280 280 return copies
281 281 for c in children[r]:
282 282 childctx = repo[c]
283 283 if r == childctx.p1().rev():
284 284 parent = 1
285 285 childcopies = childctx.p1copies()
286 286 else:
287 287 assert r == childctx.p2().rev()
288 288 parent = 2
289 289 childcopies = childctx.p2copies()
290 290 if not match.always():
291 291 childcopies = {dst: src for dst, src in childcopies.items()
292 292 if match(dst)}
293 293 childcopies = _chain(a, childctx, copies, childcopies)
294 294 heapq.heappush(work, (c, parent, childcopies))
295 295 assert False
296 296
297 297 def _forwardcopies(a, b, match=None):
298 298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299 299
300 300 match = a.repo().narrowmatch(match)
301 301 # check for working copy
302 302 if b.rev() is None:
303 303 if a == b.p1():
304 304 # short-circuit to avoid issues with merge states
305 305 return _dirstatecopies(b._repo, match)
306 306
307 307 cm = _committedforwardcopies(a, b.p1(), match)
308 308 # combine copies from dirstate if necessary
309 309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 310 return _committedforwardcopies(a, b, match)
311 311
312 312 def _backwardrenames(a, b, match):
313 313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 314 return {}
315 315
316 316 # Even though we're not taking copies into account, 1:n rename situations
317 317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 318 # arbitrarily pick one of the renames.
319 319 # We don't want to pass in "match" here, since that would filter
320 320 # the destination by it. Since we're reversing the copies, we want
321 321 # to filter the source instead.
322 322 f = _forwardcopies(b, a)
323 323 r = {}
324 324 for k, v in sorted(f.iteritems()):
325 325 if match and not match(v):
326 326 continue
327 327 # remove copies
328 328 if v in a:
329 329 continue
330 330 r[v] = k
331 331 return r
332 332
333 333 def pathcopies(x, y, match=None):
334 334 """find {dst@y: src@x} copy mapping for directed compare"""
335 335 repo = x._repo
336 336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 337 if debug:
338 338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 339 % (x, y))
340 340 if x == y or not x or not y:
341 341 return {}
342 342 a = y.ancestor(x)
343 343 if a == x:
344 344 if debug:
345 345 repo.ui.debug('debug.copies: search mode: forward\n')
346 346 return _forwardcopies(x, y, match=match)
347 347 if a == y:
348 348 if debug:
349 349 repo.ui.debug('debug.copies: search mode: backward\n')
350 350 return _backwardrenames(x, y, match=match)
351 351 if debug:
352 352 repo.ui.debug('debug.copies: search mode: combined\n')
353 353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 354 _forwardcopies(a, y, match=match))
355 355
356 356 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
357 357 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
358 358 and c2. This is its own function so extensions can easily wrap this call
359 359 to see what files mergecopies is about to process.
360 360
361 361 Even though c1 and c2 are not used in this function, they are useful in
362 362 other extensions for being able to read the file nodes of the changed files.
363 363
364 364 "baselabel" can be passed to help distinguish the multiple computations
365 365 done in the graft case.
366 366 """
367 367 u1 = sorted(addedinm1 - addedinm2)
368 368 u2 = sorted(addedinm2 - addedinm1)
369 369
370 370 header = " unmatched files in %s"
371 371 if baselabel:
372 372 header += ' (from %s)' % baselabel
373 373 if u1:
374 374 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
375 375 if u2:
376 376 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
377 377
378 378 return u1, u2
379 379
380 380 def _makegetfctx(ctx):
381 381 """return a 'getfctx' function suitable for _checkcopies usage
382 382
383 383 We have to re-setup the function building 'filectx' for each
384 384 '_checkcopies' to ensure the linkrev adjustment is properly setup for
385 385 each. Linkrev adjustment is important to avoid bug in rename
386 386 detection. Moreover, having a proper '_ancestrycontext' setup ensures
387 387 the performance impact of this adjustment is kept limited. Without it,
388 388 each file could do a full dag traversal making the time complexity of
389 389 the operation explode (see issue4537).
390 390
391 391 This function exists here mostly to limit the impact on stable. Feel
392 392 free to refactor on default.
393 393 """
394 394 rev = ctx.rev()
395 395 repo = ctx._repo
396 396 ac = getattr(ctx, '_ancestrycontext', None)
397 397 if ac is None:
398 398 revs = [rev]
399 399 if rev is None:
400 400 revs = [p.rev() for p in ctx.parents()]
401 401 ac = repo.changelog.ancestors(revs, inclusive=True)
402 402 ctx._ancestrycontext = ac
403 403 def makectx(f, n):
404 404 if n in node.wdirfilenodeids: # in a working context?
405 405 if ctx.rev() is None:
406 406 return ctx.filectx(f)
407 407 return repo[None][f]
408 408 fctx = repo.filectx(f, fileid=n)
409 409 # setup only needed for filectx not create from a changectx
410 410 fctx._ancestrycontext = ac
411 411 fctx._descendantrev = rev
412 412 return fctx
413 413 return util.lrucachefunc(makectx)
414 414
415 415 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
416 416 """combine partial copy paths"""
417 417 remainder = {}
418 418 for f in copyfrom:
419 419 if f in copyto:
420 420 finalcopy[copyto[f]] = copyfrom[f]
421 421 del copyto[f]
422 422 for f in incompletediverge:
423 423 assert f not in diverge
424 424 ic = incompletediverge[f]
425 425 if ic[0] in copyto:
426 426 diverge[f] = [copyto[ic[0]], ic[1]]
427 427 else:
428 428 remainder[f] = ic
429 429 return remainder
430 430
431 431 def mergecopies(repo, c1, c2, base):
432 432 """
433 The function calling different copytracing algorithms on the basis of config
434 which find moves and copies between context c1 and c2 that are relevant for
433 Finds moves and copies between context c1 and c2 that are relevant for
435 434 merging. 'base' will be used as the merge base.
436 435
437 436 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
438 437 files that were moved/ copied in one merge parent and modified in another.
439 438 For example:
440 439
441 440 o ---> 4 another commit
442 441 |
443 442 | o ---> 3 commit that modifies a.txt
444 443 | /
445 444 o / ---> 2 commit that moves a.txt to b.txt
446 445 |/
447 446 o ---> 1 merge base
448 447
449 448 If we try to rebase revision 3 on revision 4, since there is no a.txt in
450 449 revision 4, and if user have copytrace disabled, we prints the following
451 450 message:
452 451
453 452 ```other changed <file> which local deleted```
454 453
455 454 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
456 455 "dirmove".
457 456
458 457 "copy" is a mapping from destination name -> source name,
459 458 where source is in c1 and destination is in c2 or vice-versa.
460 459
461 460 "movewithdir" is a mapping from source name -> destination name,
462 461 where the file at source present in one context but not the other
463 462 needs to be moved to destination by the merge process, because the
464 463 other context moved the directory it is in.
465 464
466 465 "diverge" is a mapping of source name -> list of destination names
467 466 for divergent renames.
468 467
469 468 "renamedelete" is a mapping of source name -> list of destination
470 469 names for files deleted in c1 that were renamed in c2 or vice-versa.
471 470
472 471 "dirmove" is a mapping of detected source dir -> destination dir renames.
473 472 This is needed for handling changes to new files previously grafted into
474 473 renamed directories.
474
475 This function calls different copytracing algorithms based on config.
475 476 """
476 477 # avoid silly behavior for update from empty dir
477 478 if not c1 or not c2 or c1 == c2:
478 479 return {}, {}, {}, {}, {}
479 480
480 481 narrowmatch = c1.repo().narrowmatch()
481 482
482 483 # avoid silly behavior for parent -> working dir
483 484 if c2.node() is None and c1.node() == repo.dirstate.p1():
484 485 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
485 486
486 487 copytracing = repo.ui.config('experimental', 'copytrace')
487 488 boolctrace = stringutil.parsebool(copytracing)
488 489
489 490 # Copy trace disabling is explicitly below the node == p1 logic above
490 491 # because the logic above is required for a simple copy to be kept across a
491 492 # rebase.
492 493 if copytracing == 'heuristics':
493 494 # Do full copytracing if only non-public revisions are involved as
494 495 # that will be fast enough and will also cover the copies which could
495 496 # be missed by heuristics
496 497 if _isfullcopytraceable(repo, c1, base):
497 498 return _fullcopytracing(repo, c1, c2, base)
498 499 return _heuristicscopytracing(repo, c1, c2, base)
499 500 elif boolctrace is False:
500 501 # stringutil.parsebool() returns None when it is unable to parse the
501 502 # value, so we should rely on making sure copytracing is on such cases
502 503 return {}, {}, {}, {}, {}
503 504 else:
504 505 return _fullcopytracing(repo, c1, c2, base)
505 506
506 507 def _isfullcopytraceable(repo, c1, base):
507 508 """ Checks that if base, source and destination are all no-public branches,
508 509 if yes let's use the full copytrace algorithm for increased capabilities
509 510 since it will be fast enough.
510 511
511 512 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
512 513 number of changesets from c1 to base such that if number of changesets are
513 514 more than the limit, full copytracing algorithm won't be used.
514 515 """
515 516 if c1.rev() is None:
516 517 c1 = c1.p1()
517 518 if c1.mutable() and base.mutable():
518 519 sourcecommitlimit = repo.ui.configint('experimental',
519 520 'copytrace.sourcecommitlimit')
520 521 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
521 522 return commits < sourcecommitlimit
522 523 return False
523 524
524 525 def _fullcopytracing(repo, c1, c2, base):
525 526 """ The full copytracing algorithm which finds all the new files that were
526 527 added from merge base up to the top commit and for each file it checks if
527 528 this file was copied from another file.
528 529
529 530 This is pretty slow when a lot of changesets are involved but will track all
530 531 the copies.
531 532 """
532 533 # In certain scenarios (e.g. graft, update or rebase), base can be
533 534 # overridden We still need to know a real common ancestor in this case We
534 535 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
535 536 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
536 537 # caller may not know if the revision passed in lieu of the CA is a genuine
537 538 # common ancestor or not without explicitly checking it, it's better to
538 539 # determine that here.
539 540 #
540 541 # base.isancestorof(wc) is False, work around that
541 542 _c1 = c1.p1() if c1.rev() is None else c1
542 543 _c2 = c2.p1() if c2.rev() is None else c2
543 544 # an endpoint is "dirty" if it isn't a descendant of the merge base
544 545 # if we have a dirty endpoint, we need to trigger graft logic, and also
545 546 # keep track of which endpoint is dirty
546 547 dirtyc1 = not base.isancestorof(_c1)
547 548 dirtyc2 = not base.isancestorof(_c2)
548 549 graft = dirtyc1 or dirtyc2
549 550 tca = base
550 551 if graft:
551 552 tca = _c1.ancestor(_c2)
552 553
553 554 limit = _findlimit(repo, c1, c2)
554 555 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
555 556
556 557 m1 = c1.manifest()
557 558 m2 = c2.manifest()
558 559 mb = base.manifest()
559 560
560 561 # gather data from _checkcopies:
561 562 # - diverge = record all diverges in this dict
562 563 # - copy = record all non-divergent copies in this dict
563 564 # - fullcopy = record all copies in this dict
564 565 # - incomplete = record non-divergent partial copies here
565 566 # - incompletediverge = record divergent partial copies here
566 567 diverge = {} # divergence data is shared
567 568 incompletediverge = {}
568 569 data1 = {'copy': {},
569 570 'fullcopy': {},
570 571 'incomplete': {},
571 572 'diverge': diverge,
572 573 'incompletediverge': incompletediverge,
573 574 }
574 575 data2 = {'copy': {},
575 576 'fullcopy': {},
576 577 'incomplete': {},
577 578 'diverge': diverge,
578 579 'incompletediverge': incompletediverge,
579 580 }
580 581
581 582 # find interesting file sets from manifests
582 583 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
583 584 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
584 585 bothnew = sorted(addedinm1 & addedinm2)
585 586 if tca == base:
586 587 # unmatched file from base
587 588 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
588 589 u1u, u2u = u1r, u2r
589 590 else:
590 591 # unmatched file from base (DAG rotation in the graft case)
591 592 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
592 593 baselabel='base')
593 594 # unmatched file from topological common ancestors (no DAG rotation)
594 595 # need to recompute this for directory move handling when grafting
595 596 mta = tca.manifest()
596 597 u1u, u2u = _computenonoverlap(repo, c1, c2,
597 598 m1.filesnotin(mta, repo.narrowmatch()),
598 599 m2.filesnotin(mta, repo.narrowmatch()),
599 600 baselabel='topological common ancestor')
600 601
601 602 for f in u1u:
602 603 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
603 604
604 605 for f in u2u:
605 606 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
606 607
607 608 copy = dict(data1['copy'])
608 609 copy.update(data2['copy'])
609 610 fullcopy = dict(data1['fullcopy'])
610 611 fullcopy.update(data2['fullcopy'])
611 612
612 613 if dirtyc1:
613 614 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
614 615 incompletediverge)
615 616 if dirtyc2:
616 617 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
617 618 incompletediverge)
618 619
619 620 renamedelete = {}
620 621 renamedeleteset = set()
621 622 divergeset = set()
622 623 for of, fl in list(diverge.items()):
623 624 if len(fl) == 1 or of in c1 or of in c2:
624 625 del diverge[of] # not actually divergent, or not a rename
625 626 if of not in c1 and of not in c2:
626 627 # renamed on one side, deleted on the other side, but filter
627 628 # out files that have been renamed and then deleted
628 629 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
629 630 renamedeleteset.update(fl) # reverse map for below
630 631 else:
631 632 divergeset.update(fl) # reverse map for below
632 633
633 634 if bothnew:
634 635 repo.ui.debug(" unmatched files new in both:\n %s\n"
635 636 % "\n ".join(bothnew))
636 637 bothdiverge = {}
637 638 bothincompletediverge = {}
638 639 remainder = {}
639 640 both1 = {'copy': {},
640 641 'fullcopy': {},
641 642 'incomplete': {},
642 643 'diverge': bothdiverge,
643 644 'incompletediverge': bothincompletediverge
644 645 }
645 646 both2 = {'copy': {},
646 647 'fullcopy': {},
647 648 'incomplete': {},
648 649 'diverge': bothdiverge,
649 650 'incompletediverge': bothincompletediverge
650 651 }
651 652 for f in bothnew:
652 653 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
653 654 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
654 655 if dirtyc1 and dirtyc2:
655 656 remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
656 657 copy, bothdiverge, bothincompletediverge)
657 658 remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
658 659 copy, bothdiverge, bothincompletediverge)
659 660 remainder.update(remainder1)
660 661 elif dirtyc1:
661 662 # incomplete copies may only be found on the "dirty" side for bothnew
662 663 assert not both2['incomplete']
663 664 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
664 665 bothincompletediverge)
665 666 elif dirtyc2:
666 667 assert not both1['incomplete']
667 668 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
668 669 bothincompletediverge)
669 670 else:
670 671 # incomplete copies and divergences can't happen outside grafts
671 672 assert not both1['incomplete']
672 673 assert not both2['incomplete']
673 674 assert not bothincompletediverge
674 675 for f in remainder:
675 676 assert f not in bothdiverge
676 677 ic = remainder[f]
677 678 if ic[0] in (m1 if dirtyc1 else m2):
678 679 # backed-out rename on one side, but watch out for deleted files
679 680 bothdiverge[f] = ic
680 681 for of, fl in bothdiverge.items():
681 682 if len(fl) == 2 and fl[0] == fl[1]:
682 683 copy[fl[0]] = of # not actually divergent, just matching renames
683 684
684 685 if fullcopy and repo.ui.debugflag:
685 686 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
686 687 "% = renamed and deleted):\n")
687 688 for f in sorted(fullcopy):
688 689 note = ""
689 690 if f in copy:
690 691 note += "*"
691 692 if f in divergeset:
692 693 note += "!"
693 694 if f in renamedeleteset:
694 695 note += "%"
695 696 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
696 697 note))
697 698 del divergeset
698 699
699 700 if not fullcopy:
700 701 return copy, {}, diverge, renamedelete, {}
701 702
702 703 repo.ui.debug(" checking for directory renames\n")
703 704
704 705 # generate a directory move map
705 706 d1, d2 = c1.dirs(), c2.dirs()
706 707 # Hack for adding '', which is not otherwise added, to d1 and d2
707 708 d1.addpath('/')
708 709 d2.addpath('/')
709 710 invalid = set()
710 711 dirmove = {}
711 712
712 713 # examine each file copy for a potential directory move, which is
713 714 # when all the files in a directory are moved to a new directory
714 715 for dst, src in fullcopy.iteritems():
715 716 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
716 717 if dsrc in invalid:
717 718 # already seen to be uninteresting
718 719 continue
719 720 elif dsrc in d1 and ddst in d1:
720 721 # directory wasn't entirely moved locally
721 722 invalid.add(dsrc)
722 723 elif dsrc in d2 and ddst in d2:
723 724 # directory wasn't entirely moved remotely
724 725 invalid.add(dsrc)
725 726 elif dsrc in dirmove and dirmove[dsrc] != ddst:
726 727 # files from the same directory moved to two different places
727 728 invalid.add(dsrc)
728 729 else:
729 730 # looks good so far
730 731 dirmove[dsrc] = ddst
731 732
732 733 for i in invalid:
733 734 if i in dirmove:
734 735 del dirmove[i]
735 736 del d1, d2, invalid
736 737
737 738 if not dirmove:
738 739 return copy, {}, diverge, renamedelete, {}
739 740
740 741 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
741 742
742 743 for d in dirmove:
743 744 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
744 745 (d, dirmove[d]))
745 746
746 747 movewithdir = {}
747 748 # check unaccounted nonoverlapping files against directory moves
748 749 for f in u1r + u2r:
749 750 if f not in fullcopy:
750 751 for d in dirmove:
751 752 if f.startswith(d):
752 753 # new file added in a directory that was moved, move it
753 754 df = dirmove[d] + f[len(d):]
754 755 if df not in copy:
755 756 movewithdir[f] = df
756 757 repo.ui.debug((" pending file src: '%s' -> "
757 758 "dst: '%s'\n") % (f, df))
758 759 break
759 760
760 761 return copy, movewithdir, diverge, renamedelete, dirmove
761 762
762 763 def _heuristicscopytracing(repo, c1, c2, base):
763 764 """ Fast copytracing using filename heuristics
764 765
765 766 Assumes that moves or renames are of following two types:
766 767
767 768 1) Inside a directory only (same directory name but different filenames)
768 769 2) Move from one directory to another
769 770 (same filenames but different directory names)
770 771
771 772 Works only when there are no merge commits in the "source branch".
772 773 Source branch is commits from base up to c2 not including base.
773 774
774 775 If merge is involved it fallbacks to _fullcopytracing().
775 776
776 777 Can be used by setting the following config:
777 778
778 779 [experimental]
779 780 copytrace = heuristics
780 781
781 782 In some cases the copy/move candidates found by heuristics can be very large
782 783 in number and that will make the algorithm slow. The number of possible
783 784 candidates to check can be limited by using the config
784 785 `experimental.copytrace.movecandidateslimit` which defaults to 100.
785 786 """
786 787
787 788 if c1.rev() is None:
788 789 c1 = c1.p1()
789 790 if c2.rev() is None:
790 791 c2 = c2.p1()
791 792
792 793 copies = {}
793 794
794 795 changedfiles = set()
795 796 m1 = c1.manifest()
796 797 if not repo.revs('%d::%d', base.rev(), c2.rev()):
797 798 # If base is not in c2 branch, we switch to fullcopytracing
798 799 repo.ui.debug("switching to full copytracing as base is not "
799 800 "an ancestor of c2\n")
800 801 return _fullcopytracing(repo, c1, c2, base)
801 802
802 803 ctx = c2
803 804 while ctx != base:
804 805 if len(ctx.parents()) == 2:
805 806 # To keep things simple let's not handle merges
806 807 repo.ui.debug("switching to full copytracing because of merges\n")
807 808 return _fullcopytracing(repo, c1, c2, base)
808 809 changedfiles.update(ctx.files())
809 810 ctx = ctx.p1()
810 811
811 812 cp = _forwardcopies(base, c2)
812 813 for dst, src in cp.iteritems():
813 814 if src in m1:
814 815 copies[dst] = src
815 816
816 817 # file is missing if it isn't present in the destination, but is present in
817 818 # the base and present in the source.
818 819 # Presence in the base is important to exclude added files, presence in the
819 820 # source is important to exclude removed files.
820 821 filt = lambda f: f not in m1 and f in base and f in c2
821 822 missingfiles = [f for f in changedfiles if filt(f)]
822 823
823 824 if missingfiles:
824 825 basenametofilename = collections.defaultdict(list)
825 826 dirnametofilename = collections.defaultdict(list)
826 827
827 828 for f in m1.filesnotin(base.manifest()):
828 829 basename = os.path.basename(f)
829 830 dirname = os.path.dirname(f)
830 831 basenametofilename[basename].append(f)
831 832 dirnametofilename[dirname].append(f)
832 833
833 834 for f in missingfiles:
834 835 basename = os.path.basename(f)
835 836 dirname = os.path.dirname(f)
836 837 samebasename = basenametofilename[basename]
837 838 samedirname = dirnametofilename[dirname]
838 839 movecandidates = samebasename + samedirname
839 840 # f is guaranteed to be present in c2, that's why
840 841 # c2.filectx(f) won't fail
841 842 f2 = c2.filectx(f)
842 843 # we can have a lot of candidates which can slow down the heuristics
843 844 # config value to limit the number of candidates moves to check
844 845 maxcandidates = repo.ui.configint('experimental',
845 846 'copytrace.movecandidateslimit')
846 847
847 848 if len(movecandidates) > maxcandidates:
848 849 repo.ui.status(_("skipping copytracing for '%s', more "
849 850 "candidates than the limit: %d\n")
850 851 % (f, len(movecandidates)))
851 852 continue
852 853
853 854 for candidate in movecandidates:
854 855 f1 = c1.filectx(candidate)
855 856 if _related(f1, f2):
856 857 # if there are a few related copies then we'll merge
857 858 # changes into all of them. This matches the behaviour
858 859 # of upstream copytracing
859 860 copies[candidate] = f
860 861
861 862 return copies, {}, {}, {}, {}
862 863
863 864 def _related(f1, f2):
864 865 """return True if f1 and f2 filectx have a common ancestor
865 866
866 867 Walk back to common ancestor to see if the two files originate
867 868 from the same file. Since workingfilectx's rev() is None it messes
868 869 up the integer comparison logic, hence the pre-step check for
869 870 None (f1 and f2 can only be workingfilectx's initially).
870 871 """
871 872
872 873 if f1 == f2:
873 874 return True # a match
874 875
875 876 g1, g2 = f1.ancestors(), f2.ancestors()
876 877 try:
877 878 f1r, f2r = f1.linkrev(), f2.linkrev()
878 879
879 880 if f1r is None:
880 881 f1 = next(g1)
881 882 if f2r is None:
882 883 f2 = next(g2)
883 884
884 885 while True:
885 886 f1r, f2r = f1.linkrev(), f2.linkrev()
886 887 if f1r > f2r:
887 888 f1 = next(g1)
888 889 elif f2r > f1r:
889 890 f2 = next(g2)
890 891 else: # f1 and f2 point to files in the same linkrev
891 892 return f1 == f2 # true if they point to the same file
892 893 except StopIteration:
893 894 return False
894 895
895 896 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
896 897 """
897 898 check possible copies of f from msrc to mdst
898 899
899 900 srcctx = starting context for f in msrc
900 901 dstctx = destination context for f in mdst
901 902 f = the filename to check (as in msrc)
902 903 base = the changectx used as a merge base
903 904 tca = topological common ancestor for graft-like scenarios
904 905 remotebase = True if base is outside tca::srcctx, False otherwise
905 906 limit = the rev number to not search beyond
906 907 data = dictionary of dictionary to store copy data. (see mergecopies)
907 908
908 909 note: limit is only an optimization, and provides no guarantee that
909 910 irrelevant revisions will not be visited
910 911 there is no easy way to make this algorithm stop in a guaranteed way
911 912 once it "goes behind a certain revision".
912 913 """
913 914
914 915 msrc = srcctx.manifest()
915 916 mdst = dstctx.manifest()
916 917 mb = base.manifest()
917 918 mta = tca.manifest()
918 919 # Might be true if this call is about finding backward renames,
919 920 # This happens in the case of grafts because the DAG is then rotated.
920 921 # If the file exists in both the base and the source, we are not looking
921 922 # for a rename on the source side, but on the part of the DAG that is
922 923 # traversed backwards.
923 924 #
924 925 # In the case there is both backward and forward renames (before and after
925 926 # the base) this is more complicated as we must detect a divergence.
926 927 # We use 'backwards = False' in that case.
927 928 backwards = not remotebase and base != tca and f in mb
928 929 getsrcfctx = _makegetfctx(srcctx)
929 930 getdstfctx = _makegetfctx(dstctx)
930 931
931 932 if msrc[f] == mb.get(f) and not remotebase:
932 933 # Nothing to merge
933 934 return
934 935
935 936 of = None
936 937 seen = {f}
937 938 for oc in getsrcfctx(f, msrc[f]).ancestors():
938 939 of = oc.path()
939 940 if of in seen:
940 941 # check limit late - grab last rename before
941 942 if oc.linkrev() < limit:
942 943 break
943 944 continue
944 945 seen.add(of)
945 946
946 947 # remember for dir rename detection
947 948 if backwards:
948 949 data['fullcopy'][of] = f # grafting backwards through renames
949 950 else:
950 951 data['fullcopy'][f] = of
951 952 if of not in mdst:
952 953 continue # no match, keep looking
953 954 if mdst[of] == mb.get(of):
954 955 return # no merge needed, quit early
955 956 c2 = getdstfctx(of, mdst[of])
956 957 # c2 might be a plain new file on added on destination side that is
957 958 # unrelated to the droids we are looking for.
958 959 cr = _related(oc, c2)
959 960 if cr and (of == f or of == c2.path()): # non-divergent
960 961 if backwards:
961 962 data['copy'][of] = f
962 963 elif of in mb:
963 964 data['copy'][f] = of
964 965 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
965 966 data['copy'][of] = f
966 967 del data['fullcopy'][f]
967 968 data['fullcopy'][of] = f
968 969 else: # divergence w.r.t. graft CA on one side of topological CA
969 970 for sf in seen:
970 971 if sf in mb:
971 972 assert sf not in data['diverge']
972 973 data['diverge'][sf] = [f, of]
973 974 break
974 975 return
975 976
976 977 if of in mta:
977 978 if backwards or remotebase:
978 979 data['incomplete'][of] = f
979 980 else:
980 981 for sf in seen:
981 982 if sf in mb:
982 983 if tca == base:
983 984 data['diverge'].setdefault(sf, []).append(f)
984 985 else:
985 986 data['incompletediverge'][sf] = [of, f]
986 987 return
987 988
988 989 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
989 990 """reproduce copies from fromrev to rev in the dirstate
990 991
991 992 If skiprev is specified, it's a revision that should be used to
992 993 filter copy records. Any copies that occur between fromrev and
993 994 skiprev will not be duplicated, even if they appear in the set of
994 995 copies between fromrev and rev.
995 996 """
996 997 exclude = {}
997 998 ctraceconfig = repo.ui.config('experimental', 'copytrace')
998 999 bctrace = stringutil.parsebool(ctraceconfig)
999 1000 if (skiprev is not None and
1000 1001 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
1001 1002 # copytrace='off' skips this line, but not the entire function because
1002 1003 # the line below is O(size of the repo) during a rebase, while the rest
1003 1004 # of the function is much faster (and is required for carrying copy
1004 1005 # metadata across the rebase anyway).
1005 1006 exclude = pathcopies(repo[fromrev], repo[skiprev])
1006 1007 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
1007 1008 # copies.pathcopies returns backward renames, so dst might not
1008 1009 # actually be in the dirstate
1009 1010 if dst in exclude:
1010 1011 continue
1011 1012 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now