##// END OF EJS Templates
copies: correctly skip directories that have already been considered...
Kyle Lippincott -
r39299:eebd5918 default
parent child Browse files
Show More
@@ -1,883 +1,885
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 """chain two sets of copies a->b"""
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 126 for k, v in list(t.items()):
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 """return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit"""
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 159 def _committedforwardcopies(a, b, match):
160 160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 161 # files might have to be traced back to the fctx parent of the last
162 162 # one-side-only changeset, but not further back than that
163 163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 164 if limit is None:
165 165 limit = -1
166 166 am = a.manifest()
167 167
168 168 # find where new files came from
169 169 # we currently don't try to find where old files went, too expensive
170 170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 171 cm = {}
172 172
173 173 # Computing the forward missing is quite expensive on large manifests, since
174 174 # it compares the entire manifests. We can optimize it in the common use
175 175 # case of computing what copies are in a commit versus its parent (like
176 176 # during a rebase or histedit). Note, we exclude merge commits from this
177 177 # optimization, since the ctx.files() for a merge commit is not correct for
178 178 # this comparison.
179 179 forwardmissingmatch = match
180 180 if b.p1() == a and b.p2().node() == node.nullid:
181 181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184 184
185 185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 186 for f in missing:
187 187 fctx = b[f]
188 188 fctx._ancestrycontext = ancestrycontext
189 189 ofctx = _tracefile(fctx, am, limit)
190 190 if ofctx:
191 191 cm[f] = ofctx.path()
192 192 return cm
193 193
194 194 def _forwardcopies(a, b, match=None):
195 195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196 196
197 197 # check for working copy
198 198 if b.rev() is None:
199 199 if a == b.p1():
200 200 # short-circuit to avoid issues with merge states
201 201 return _dirstatecopies(b, match)
202 202
203 203 cm = _committedforwardcopies(a, b.p1(), match)
204 204 # combine copies from dirstate if necessary
205 205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 206 return _committedforwardcopies(a, b, match)
207 207
208 208 def _backwardrenames(a, b):
209 209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 210 return {}
211 211
212 212 # Even though we're not taking copies into account, 1:n rename situations
213 213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 214 # arbitrarily pick one of the renames.
215 215 f = _forwardcopies(b, a)
216 216 r = {}
217 217 for k, v in sorted(f.iteritems()):
218 218 # remove copies
219 219 if v in a:
220 220 continue
221 221 r[v] = k
222 222 return r
223 223
224 224 def pathcopies(x, y, match=None):
225 225 """find {dst@y: src@x} copy mapping for directed compare"""
226 226 if x == y or not x or not y:
227 227 return {}
228 228 a = y.ancestor(x)
229 229 if a == x:
230 230 return _forwardcopies(x, y, match=match)
231 231 if a == y:
232 232 return _backwardrenames(x, y)
233 233 return _chain(x, y, _backwardrenames(x, a),
234 234 _forwardcopies(a, y, match=match))
235 235
236 236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 238 and c2. This is its own function so extensions can easily wrap this call
239 239 to see what files mergecopies is about to process.
240 240
241 241 Even though c1 and c2 are not used in this function, they are useful in
242 242 other extensions for being able to read the file nodes of the changed files.
243 243
244 244 "baselabel" can be passed to help distinguish the multiple computations
245 245 done in the graft case.
246 246 """
247 247 u1 = sorted(addedinm1 - addedinm2)
248 248 u2 = sorted(addedinm2 - addedinm1)
249 249
250 250 header = " unmatched files in %s"
251 251 if baselabel:
252 252 header += ' (from %s)' % baselabel
253 253 if u1:
254 254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 255 if u2:
256 256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 257
258 258 narrowmatch = repo.narrowmatch()
259 259 if not narrowmatch.always():
260 260 u1 = [f for f in u1 if narrowmatch(f)]
261 261 u2 = [f for f in u2 if narrowmatch(f)]
262 262 return u1, u2
263 263
264 264 def _makegetfctx(ctx):
265 265 """return a 'getfctx' function suitable for _checkcopies usage
266 266
267 267 We have to re-setup the function building 'filectx' for each
268 268 '_checkcopies' to ensure the linkrev adjustment is properly setup for
269 269 each. Linkrev adjustment is important to avoid bug in rename
270 270 detection. Moreover, having a proper '_ancestrycontext' setup ensures
271 271 the performance impact of this adjustment is kept limited. Without it,
272 272 each file could do a full dag traversal making the time complexity of
273 273 the operation explode (see issue4537).
274 274
275 275 This function exists here mostly to limit the impact on stable. Feel
276 276 free to refactor on default.
277 277 """
278 278 rev = ctx.rev()
279 279 repo = ctx._repo
280 280 ac = getattr(ctx, '_ancestrycontext', None)
281 281 if ac is None:
282 282 revs = [rev]
283 283 if rev is None:
284 284 revs = [p.rev() for p in ctx.parents()]
285 285 ac = repo.changelog.ancestors(revs, inclusive=True)
286 286 ctx._ancestrycontext = ac
287 287 def makectx(f, n):
288 288 if n in node.wdirfilenodeids: # in a working context?
289 289 if ctx.rev() is None:
290 290 return ctx.filectx(f)
291 291 return repo[None][f]
292 292 fctx = repo.filectx(f, fileid=n)
293 293 # setup only needed for filectx not create from a changectx
294 294 fctx._ancestrycontext = ac
295 295 fctx._descendantrev = rev
296 296 return fctx
297 297 return util.lrucachefunc(makectx)
298 298
299 299 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
300 300 """combine partial copy paths"""
301 301 remainder = {}
302 302 for f in copyfrom:
303 303 if f in copyto:
304 304 finalcopy[copyto[f]] = copyfrom[f]
305 305 del copyto[f]
306 306 for f in incompletediverge:
307 307 assert f not in diverge
308 308 ic = incompletediverge[f]
309 309 if ic[0] in copyto:
310 310 diverge[f] = [copyto[ic[0]], ic[1]]
311 311 else:
312 312 remainder[f] = ic
313 313 return remainder
314 314
315 315 def mergecopies(repo, c1, c2, base):
316 316 """
317 317 The function calling different copytracing algorithms on the basis of config
318 318 which find moves and copies between context c1 and c2 that are relevant for
319 319 merging. 'base' will be used as the merge base.
320 320
321 321 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
322 322 files that were moved/ copied in one merge parent and modified in another.
323 323 For example:
324 324
325 325 o ---> 4 another commit
326 326 |
327 327 | o ---> 3 commit that modifies a.txt
328 328 | /
329 329 o / ---> 2 commit that moves a.txt to b.txt
330 330 |/
331 331 o ---> 1 merge base
332 332
333 333 If we try to rebase revision 3 on revision 4, since there is no a.txt in
334 334 revision 4, and if user have copytrace disabled, we prints the following
335 335 message:
336 336
337 337 ```other changed <file> which local deleted```
338 338
339 339 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
340 340 "dirmove".
341 341
342 342 "copy" is a mapping from destination name -> source name,
343 343 where source is in c1 and destination is in c2 or vice-versa.
344 344
345 345 "movewithdir" is a mapping from source name -> destination name,
346 346 where the file at source present in one context but not the other
347 347 needs to be moved to destination by the merge process, because the
348 348 other context moved the directory it is in.
349 349
350 350 "diverge" is a mapping of source name -> list of destination names
351 351 for divergent renames.
352 352
353 353 "renamedelete" is a mapping of source name -> list of destination
354 354 names for files deleted in c1 that were renamed in c2 or vice-versa.
355 355
356 356 "dirmove" is a mapping of detected source dir -> destination dir renames.
357 357 This is needed for handling changes to new files previously grafted into
358 358 renamed directories.
359 359 """
360 360 # avoid silly behavior for update from empty dir
361 361 if not c1 or not c2 or c1 == c2:
362 362 return {}, {}, {}, {}, {}
363 363
364 364 # avoid silly behavior for parent -> working dir
365 365 if c2.node() is None and c1.node() == repo.dirstate.p1():
366 366 return repo.dirstate.copies(), {}, {}, {}, {}
367 367
368 368 copytracing = repo.ui.config('experimental', 'copytrace')
369 369
370 370 # Copy trace disabling is explicitly below the node == p1 logic above
371 371 # because the logic above is required for a simple copy to be kept across a
372 372 # rebase.
373 373 if copytracing == 'off':
374 374 return {}, {}, {}, {}, {}
375 375 elif copytracing == 'heuristics':
376 376 # Do full copytracing if only non-public revisions are involved as
377 377 # that will be fast enough and will also cover the copies which could
378 378 # be missed by heuristics
379 379 if _isfullcopytraceable(repo, c1, base):
380 380 return _fullcopytracing(repo, c1, c2, base)
381 381 return _heuristicscopytracing(repo, c1, c2, base)
382 382 else:
383 383 return _fullcopytracing(repo, c1, c2, base)
384 384
385 385 def _isfullcopytraceable(repo, c1, base):
386 386 """ Checks that if base, source and destination are all no-public branches,
387 387 if yes let's use the full copytrace algorithm for increased capabilities
388 388 since it will be fast enough.
389 389
390 390 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
391 391 number of changesets from c1 to base such that if number of changesets are
392 392 more than the limit, full copytracing algorithm won't be used.
393 393 """
394 394 if c1.rev() is None:
395 395 c1 = c1.p1()
396 396 if c1.mutable() and base.mutable():
397 397 sourcecommitlimit = repo.ui.configint('experimental',
398 398 'copytrace.sourcecommitlimit')
399 399 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
400 400 return commits < sourcecommitlimit
401 401 return False
402 402
403 403 def _fullcopytracing(repo, c1, c2, base):
404 404 """ The full copytracing algorithm which finds all the new files that were
405 405 added from merge base up to the top commit and for each file it checks if
406 406 this file was copied from another file.
407 407
408 408 This is pretty slow when a lot of changesets are involved but will track all
409 409 the copies.
410 410 """
411 411 # In certain scenarios (e.g. graft, update or rebase), base can be
412 412 # overridden We still need to know a real common ancestor in this case We
413 413 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
414 414 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
415 415 # caller may not know if the revision passed in lieu of the CA is a genuine
416 416 # common ancestor or not without explicitly checking it, it's better to
417 417 # determine that here.
418 418 #
419 419 # base.isancestorof(wc) is False, work around that
420 420 _c1 = c1.p1() if c1.rev() is None else c1
421 421 _c2 = c2.p1() if c2.rev() is None else c2
422 422 # an endpoint is "dirty" if it isn't a descendant of the merge base
423 423 # if we have a dirty endpoint, we need to trigger graft logic, and also
424 424 # keep track of which endpoint is dirty
425 425 dirtyc1 = not base.isancestorof(_c1)
426 426 dirtyc2 = not base.isancestorof(_c2)
427 427 graft = dirtyc1 or dirtyc2
428 428 tca = base
429 429 if graft:
430 430 tca = _c1.ancestor(_c2)
431 431
432 432 limit = _findlimit(repo, c1.rev(), c2.rev())
433 433 if limit is None:
434 434 # no common ancestor, no copies
435 435 return {}, {}, {}, {}, {}
436 436 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
437 437
438 438 m1 = c1.manifest()
439 439 m2 = c2.manifest()
440 440 mb = base.manifest()
441 441
442 442 # gather data from _checkcopies:
443 443 # - diverge = record all diverges in this dict
444 444 # - copy = record all non-divergent copies in this dict
445 445 # - fullcopy = record all copies in this dict
446 446 # - incomplete = record non-divergent partial copies here
447 447 # - incompletediverge = record divergent partial copies here
448 448 diverge = {} # divergence data is shared
449 449 incompletediverge = {}
450 450 data1 = {'copy': {},
451 451 'fullcopy': {},
452 452 'incomplete': {},
453 453 'diverge': diverge,
454 454 'incompletediverge': incompletediverge,
455 455 }
456 456 data2 = {'copy': {},
457 457 'fullcopy': {},
458 458 'incomplete': {},
459 459 'diverge': diverge,
460 460 'incompletediverge': incompletediverge,
461 461 }
462 462
463 463 # find interesting file sets from manifests
464 464 addedinm1 = m1.filesnotin(mb)
465 465 addedinm2 = m2.filesnotin(mb)
466 466 bothnew = sorted(addedinm1 & addedinm2)
467 467 if tca == base:
468 468 # unmatched file from base
469 469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
470 470 u1u, u2u = u1r, u2r
471 471 else:
472 472 # unmatched file from base (DAG rotation in the graft case)
473 473 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
474 474 baselabel='base')
475 475 # unmatched file from topological common ancestors (no DAG rotation)
476 476 # need to recompute this for directory move handling when grafting
477 477 mta = tca.manifest()
478 478 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
479 479 m2.filesnotin(mta),
480 480 baselabel='topological common ancestor')
481 481
482 482 for f in u1u:
483 483 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
484 484
485 485 for f in u2u:
486 486 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
487 487
488 488 copy = dict(data1['copy'])
489 489 copy.update(data2['copy'])
490 490 fullcopy = dict(data1['fullcopy'])
491 491 fullcopy.update(data2['fullcopy'])
492 492
493 493 if dirtyc1:
494 494 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
495 495 incompletediverge)
496 496 else:
497 497 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
498 498 incompletediverge)
499 499
500 500 renamedelete = {}
501 501 renamedeleteset = set()
502 502 divergeset = set()
503 503 for of, fl in list(diverge.items()):
504 504 if len(fl) == 1 or of in c1 or of in c2:
505 505 del diverge[of] # not actually divergent, or not a rename
506 506 if of not in c1 and of not in c2:
507 507 # renamed on one side, deleted on the other side, but filter
508 508 # out files that have been renamed and then deleted
509 509 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
510 510 renamedeleteset.update(fl) # reverse map for below
511 511 else:
512 512 divergeset.update(fl) # reverse map for below
513 513
514 514 if bothnew:
515 515 repo.ui.debug(" unmatched files new in both:\n %s\n"
516 516 % "\n ".join(bothnew))
517 517 bothdiverge = {}
518 518 bothincompletediverge = {}
519 519 remainder = {}
520 520 both1 = {'copy': {},
521 521 'fullcopy': {},
522 522 'incomplete': {},
523 523 'diverge': bothdiverge,
524 524 'incompletediverge': bothincompletediverge
525 525 }
526 526 both2 = {'copy': {},
527 527 'fullcopy': {},
528 528 'incomplete': {},
529 529 'diverge': bothdiverge,
530 530 'incompletediverge': bothincompletediverge
531 531 }
532 532 for f in bothnew:
533 533 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
534 534 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
535 535 if dirtyc1:
536 536 # incomplete copies may only be found on the "dirty" side for bothnew
537 537 assert not both2['incomplete']
538 538 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
539 539 bothincompletediverge)
540 540 elif dirtyc2:
541 541 assert not both1['incomplete']
542 542 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
543 543 bothincompletediverge)
544 544 else:
545 545 # incomplete copies and divergences can't happen outside grafts
546 546 assert not both1['incomplete']
547 547 assert not both2['incomplete']
548 548 assert not bothincompletediverge
549 549 for f in remainder:
550 550 assert f not in bothdiverge
551 551 ic = remainder[f]
552 552 if ic[0] in (m1 if dirtyc1 else m2):
553 553 # backed-out rename on one side, but watch out for deleted files
554 554 bothdiverge[f] = ic
555 555 for of, fl in bothdiverge.items():
556 556 if len(fl) == 2 and fl[0] == fl[1]:
557 557 copy[fl[0]] = of # not actually divergent, just matching renames
558 558
559 559 if fullcopy and repo.ui.debugflag:
560 560 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
561 561 "% = renamed and deleted):\n")
562 562 for f in sorted(fullcopy):
563 563 note = ""
564 564 if f in copy:
565 565 note += "*"
566 566 if f in divergeset:
567 567 note += "!"
568 568 if f in renamedeleteset:
569 569 note += "%"
570 570 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
571 571 note))
572 572 del divergeset
573 573
574 574 if not fullcopy:
575 575 return copy, {}, diverge, renamedelete, {}
576 576
577 577 repo.ui.debug(" checking for directory renames\n")
578 578
579 579 # generate a directory move map
580 580 d1, d2 = c1.dirs(), c2.dirs()
581 581 # Hack for adding '', which is not otherwise added, to d1 and d2
582 582 d1.addpath('/')
583 583 d2.addpath('/')
584 584 invalid = set()
585 585 dirmove = {}
586 586
587 587 # examine each file copy for a potential directory move, which is
588 588 # when all the files in a directory are moved to a new directory
589 589 for dst, src in fullcopy.iteritems():
590 590 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
591 591 if dsrc in invalid:
592 592 # already seen to be uninteresting
593 593 continue
594 594 elif dsrc in d1 and ddst in d1:
595 595 # directory wasn't entirely moved locally
596 invalid.add(dsrc + "/")
596 invalid.add(dsrc)
597 597 elif dsrc in d2 and ddst in d2:
598 598 # directory wasn't entirely moved remotely
599 invalid.add(dsrc + "/")
600 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
599 invalid.add(dsrc)
600 elif dsrc in dirmove and dirmove[dsrc] != ddst:
601 601 # files from the same directory moved to two different places
602 invalid.add(dsrc + "/")
602 invalid.add(dsrc)
603 603 else:
604 604 # looks good so far
605 dirmove[dsrc + "/"] = ddst + "/"
605 dirmove[dsrc] = ddst
606 606
607 607 for i in invalid:
608 608 if i in dirmove:
609 609 del dirmove[i]
610 610 del d1, d2, invalid
611 611
612 612 if not dirmove:
613 613 return copy, {}, diverge, renamedelete, {}
614 614
615 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
616
615 617 for d in dirmove:
616 618 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
617 619 (d, dirmove[d]))
618 620
619 621 movewithdir = {}
620 622 # check unaccounted nonoverlapping files against directory moves
621 623 for f in u1r + u2r:
622 624 if f not in fullcopy:
623 625 for d in dirmove:
624 626 if f.startswith(d):
625 627 # new file added in a directory that was moved, move it
626 628 df = dirmove[d] + f[len(d):]
627 629 if df not in copy:
628 630 movewithdir[f] = df
629 631 repo.ui.debug((" pending file src: '%s' -> "
630 632 "dst: '%s'\n") % (f, df))
631 633 break
632 634
633 635 return copy, movewithdir, diverge, renamedelete, dirmove
634 636
635 637 def _heuristicscopytracing(repo, c1, c2, base):
636 638 """ Fast copytracing using filename heuristics
637 639
638 640 Assumes that moves or renames are of following two types:
639 641
640 642 1) Inside a directory only (same directory name but different filenames)
641 643 2) Move from one directory to another
642 644 (same filenames but different directory names)
643 645
644 646 Works only when there are no merge commits in the "source branch".
645 647 Source branch is commits from base up to c2 not including base.
646 648
647 649 If merge is involved it fallbacks to _fullcopytracing().
648 650
649 651 Can be used by setting the following config:
650 652
651 653 [experimental]
652 654 copytrace = heuristics
653 655
654 656 In some cases the copy/move candidates found by heuristics can be very large
655 657 in number and that will make the algorithm slow. The number of possible
656 658 candidates to check can be limited by using the config
657 659 `experimental.copytrace.movecandidateslimit` which defaults to 100.
658 660 """
659 661
660 662 if c1.rev() is None:
661 663 c1 = c1.p1()
662 664 if c2.rev() is None:
663 665 c2 = c2.p1()
664 666
665 667 copies = {}
666 668
667 669 changedfiles = set()
668 670 m1 = c1.manifest()
669 671 if not repo.revs('%d::%d', base.rev(), c2.rev()):
670 672 # If base is not in c2 branch, we switch to fullcopytracing
671 673 repo.ui.debug("switching to full copytracing as base is not "
672 674 "an ancestor of c2\n")
673 675 return _fullcopytracing(repo, c1, c2, base)
674 676
675 677 ctx = c2
676 678 while ctx != base:
677 679 if len(ctx.parents()) == 2:
678 680 # To keep things simple let's not handle merges
679 681 repo.ui.debug("switching to full copytracing because of merges\n")
680 682 return _fullcopytracing(repo, c1, c2, base)
681 683 changedfiles.update(ctx.files())
682 684 ctx = ctx.p1()
683 685
684 686 cp = _forwardcopies(base, c2)
685 687 for dst, src in cp.iteritems():
686 688 if src in m1:
687 689 copies[dst] = src
688 690
689 691 # file is missing if it isn't present in the destination, but is present in
690 692 # the base and present in the source.
691 693 # Presence in the base is important to exclude added files, presence in the
692 694 # source is important to exclude removed files.
693 695 filt = lambda f: f not in m1 and f in base and f in c2
694 696 missingfiles = [f for f in changedfiles if filt(f)]
695 697
696 698 if missingfiles:
697 699 basenametofilename = collections.defaultdict(list)
698 700 dirnametofilename = collections.defaultdict(list)
699 701
700 702 for f in m1.filesnotin(base.manifest()):
701 703 basename = os.path.basename(f)
702 704 dirname = os.path.dirname(f)
703 705 basenametofilename[basename].append(f)
704 706 dirnametofilename[dirname].append(f)
705 707
706 708 for f in missingfiles:
707 709 basename = os.path.basename(f)
708 710 dirname = os.path.dirname(f)
709 711 samebasename = basenametofilename[basename]
710 712 samedirname = dirnametofilename[dirname]
711 713 movecandidates = samebasename + samedirname
712 714 # f is guaranteed to be present in c2, that's why
713 715 # c2.filectx(f) won't fail
714 716 f2 = c2.filectx(f)
715 717 # we can have a lot of candidates which can slow down the heuristics
716 718 # config value to limit the number of candidates moves to check
717 719 maxcandidates = repo.ui.configint('experimental',
718 720 'copytrace.movecandidateslimit')
719 721
720 722 if len(movecandidates) > maxcandidates:
721 723 repo.ui.status(_("skipping copytracing for '%s', more "
722 724 "candidates than the limit: %d\n")
723 725 % (f, len(movecandidates)))
724 726 continue
725 727
726 728 for candidate in movecandidates:
727 729 f1 = c1.filectx(candidate)
728 730 if _related(f1, f2):
729 731 # if there are a few related copies then we'll merge
730 732 # changes into all of them. This matches the behaviour
731 733 # of upstream copytracing
732 734 copies[candidate] = f
733 735
734 736 return copies, {}, {}, {}, {}
735 737
736 738 def _related(f1, f2):
737 739 """return True if f1 and f2 filectx have a common ancestor
738 740
739 741 Walk back to common ancestor to see if the two files originate
740 742 from the same file. Since workingfilectx's rev() is None it messes
741 743 up the integer comparison logic, hence the pre-step check for
742 744 None (f1 and f2 can only be workingfilectx's initially).
743 745 """
744 746
745 747 if f1 == f2:
746 748 return f1 # a match
747 749
748 750 g1, g2 = f1.ancestors(), f2.ancestors()
749 751 try:
750 752 f1r, f2r = f1.linkrev(), f2.linkrev()
751 753
752 754 if f1r is None:
753 755 f1 = next(g1)
754 756 if f2r is None:
755 757 f2 = next(g2)
756 758
757 759 while True:
758 760 f1r, f2r = f1.linkrev(), f2.linkrev()
759 761 if f1r > f2r:
760 762 f1 = next(g1)
761 763 elif f2r > f1r:
762 764 f2 = next(g2)
763 765 else: # f1 and f2 point to files in the same linkrev
764 766 return f1 == f2 # true if they point to the same file
765 767 except StopIteration:
766 768 return False
767 769
768 770 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 771 """
770 772 check possible copies of f from msrc to mdst
771 773
772 774 srcctx = starting context for f in msrc
773 775 dstctx = destination context for f in mdst
774 776 f = the filename to check (as in msrc)
775 777 base = the changectx used as a merge base
776 778 tca = topological common ancestor for graft-like scenarios
777 779 remotebase = True if base is outside tca::srcctx, False otherwise
778 780 limit = the rev number to not search beyond
779 781 data = dictionary of dictionary to store copy data. (see mergecopies)
780 782
781 783 note: limit is only an optimization, and provides no guarantee that
782 784 irrelevant revisions will not be visited
783 785 there is no easy way to make this algorithm stop in a guaranteed way
784 786 once it "goes behind a certain revision".
785 787 """
786 788
787 789 msrc = srcctx.manifest()
788 790 mdst = dstctx.manifest()
789 791 mb = base.manifest()
790 792 mta = tca.manifest()
791 793 # Might be true if this call is about finding backward renames,
792 794 # This happens in the case of grafts because the DAG is then rotated.
793 795 # If the file exists in both the base and the source, we are not looking
794 796 # for a rename on the source side, but on the part of the DAG that is
795 797 # traversed backwards.
796 798 #
797 799 # In the case there is both backward and forward renames (before and after
798 800 # the base) this is more complicated as we must detect a divergence.
799 801 # We use 'backwards = False' in that case.
800 802 backwards = not remotebase and base != tca and f in mb
801 803 getsrcfctx = _makegetfctx(srcctx)
802 804 getdstfctx = _makegetfctx(dstctx)
803 805
804 806 if msrc[f] == mb.get(f) and not remotebase:
805 807 # Nothing to merge
806 808 return
807 809
808 810 of = None
809 811 seen = {f}
810 812 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 813 ocr = oc.linkrev()
812 814 of = oc.path()
813 815 if of in seen:
814 816 # check limit late - grab last rename before
815 817 if ocr < limit:
816 818 break
817 819 continue
818 820 seen.add(of)
819 821
820 822 # remember for dir rename detection
821 823 if backwards:
822 824 data['fullcopy'][of] = f # grafting backwards through renames
823 825 else:
824 826 data['fullcopy'][f] = of
825 827 if of not in mdst:
826 828 continue # no match, keep looking
827 829 if mdst[of] == mb.get(of):
828 830 return # no merge needed, quit early
829 831 c2 = getdstfctx(of, mdst[of])
830 832 # c2 might be a plain new file on added on destination side that is
831 833 # unrelated to the droids we are looking for.
832 834 cr = _related(oc, c2)
833 835 if cr and (of == f or of == c2.path()): # non-divergent
834 836 if backwards:
835 837 data['copy'][of] = f
836 838 elif of in mb:
837 839 data['copy'][f] = of
838 840 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 841 data['copy'][of] = f
840 842 del data['fullcopy'][f]
841 843 data['fullcopy'][of] = f
842 844 else: # divergence w.r.t. graft CA on one side of topological CA
843 845 for sf in seen:
844 846 if sf in mb:
845 847 assert sf not in data['diverge']
846 848 data['diverge'][sf] = [f, of]
847 849 break
848 850 return
849 851
850 852 if of in mta:
851 853 if backwards or remotebase:
852 854 data['incomplete'][of] = f
853 855 else:
854 856 for sf in seen:
855 857 if sf in mb:
856 858 if tca == base:
857 859 data['diverge'].setdefault(sf, []).append(f)
858 860 else:
859 861 data['incompletediverge'][sf] = [of, f]
860 862 return
861 863
862 864 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 865 """reproduce copies from fromrev to rev in the dirstate
864 866
865 867 If skiprev is specified, it's a revision that should be used to
866 868 filter copy records. Any copies that occur between fromrev and
867 869 skiprev will not be duplicated, even if they appear in the set of
868 870 copies between fromrev and rev.
869 871 """
870 872 exclude = {}
871 873 if (skiprev is not None and
872 874 repo.ui.config('experimental', 'copytrace') != 'off'):
873 875 # copytrace='off' skips this line, but not the entire function because
874 876 # the line below is O(size of the repo) during a rebase, while the rest
875 877 # of the function is much faster (and is required for carrying copy
876 878 # metadata across the rebase anyway).
877 879 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 880 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 881 # copies.pathcopies returns backward renames, so dst might not
880 882 # actually be in the dirstate
881 883 if dst in exclude:
882 884 continue
883 885 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now