##// END OF EJS Templates
copies: always respect matcher arg to _forwardcopies()...
Martin von Zweigbergk -
r35421:7ddc1e96 default
parent child Browse files
Show More
@@ -1,884 +1,884
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 '''chain two sets of copies a->b'''
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 126 for k, v in t.items():
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 '''return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit'''
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 def _dirstatecopies(d):
142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 if ds[k] not in 'anm':
146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 159 def _forwardcopies(a, b, match=None):
160 160 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
161 161
162 162 # check for working copy
163 163 w = None
164 164 if b.rev() is None:
165 165 w = b
166 166 b = w.p1()
167 167 if a == b:
168 168 # short-circuit to avoid issues with merge states
169 return _dirstatecopies(w)
169 return _dirstatecopies(w, match)
170 170
171 171 # files might have to be traced back to the fctx parent of the last
172 172 # one-side-only changeset, but not further back than that
173 173 limit = _findlimit(a._repo, a.rev(), b.rev())
174 174 if limit is None:
175 175 limit = -1
176 176 am = a.manifest()
177 177
178 178 # find where new files came from
179 179 # we currently don't try to find where old files went, too expensive
180 180 # this means we can miss a case like 'hg rm b; hg cp a b'
181 181 cm = {}
182 182
183 183 # Computing the forward missing is quite expensive on large manifests, since
184 184 # it compares the entire manifests. We can optimize it in the common use
185 185 # case of computing what copies are in a commit versus its parent (like
186 186 # during a rebase or histedit). Note, we exclude merge commits from this
187 187 # optimization, since the ctx.files() for a merge commit is not correct for
188 188 # this comparison.
189 189 forwardmissingmatch = match
190 190 if b.p1() == a and b.p2().node() == node.nullid:
191 191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 194
195 195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 196 for f in missing:
197 197 fctx = b[f]
198 198 fctx._ancestrycontext = ancestrycontext
199 199 ofctx = _tracefile(fctx, am, limit)
200 200 if ofctx:
201 201 cm[f] = ofctx.path()
202 202
203 203 # combine copies from dirstate if necessary
204 204 if w is not None:
205 cm = _chain(a, w, cm, _dirstatecopies(w))
205 cm = _chain(a, w, cm, _dirstatecopies(w, match))
206 206
207 207 return cm
208 208
209 209 def _backwardrenames(a, b):
210 210 if a._repo.ui.config('experimental', 'copytrace') == 'off':
211 211 return {}
212 212
213 213 # Even though we're not taking copies into account, 1:n rename situations
214 214 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
215 215 # arbitrarily pick one of the renames.
216 216 f = _forwardcopies(b, a)
217 217 r = {}
218 218 for k, v in sorted(f.iteritems()):
219 219 # remove copies
220 220 if v in a:
221 221 continue
222 222 r[v] = k
223 223 return r
224 224
225 225 def pathcopies(x, y, match=None):
226 226 '''find {dst@y: src@x} copy mapping for directed compare'''
227 227 if x == y or not x or not y:
228 228 return {}
229 229 a = y.ancestor(x)
230 230 if a == x:
231 231 return _forwardcopies(x, y, match=match)
232 232 if a == y:
233 233 return _backwardrenames(x, y)
234 234 return _chain(x, y, _backwardrenames(x, a),
235 235 _forwardcopies(a, y, match=match))
236 236
237 237 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
238 238 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
239 239 and c2. This is its own function so extensions can easily wrap this call
240 240 to see what files mergecopies is about to process.
241 241
242 242 Even though c1 and c2 are not used in this function, they are useful in
243 243 other extensions for being able to read the file nodes of the changed files.
244 244
245 245 "baselabel" can be passed to help distinguish the multiple computations
246 246 done in the graft case.
247 247 """
248 248 u1 = sorted(addedinm1 - addedinm2)
249 249 u2 = sorted(addedinm2 - addedinm1)
250 250
251 251 header = " unmatched files in %s"
252 252 if baselabel:
253 253 header += ' (from %s)' % baselabel
254 254 if u1:
255 255 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
256 256 if u2:
257 257 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
258 258 return u1, u2
259 259
260 260 def _makegetfctx(ctx):
261 261 """return a 'getfctx' function suitable for _checkcopies usage
262 262
263 263 We have to re-setup the function building 'filectx' for each
264 264 '_checkcopies' to ensure the linkrev adjustment is properly setup for
265 265 each. Linkrev adjustment is important to avoid bug in rename
266 266 detection. Moreover, having a proper '_ancestrycontext' setup ensures
267 267 the performance impact of this adjustment is kept limited. Without it,
268 268 each file could do a full dag traversal making the time complexity of
269 269 the operation explode (see issue4537).
270 270
271 271 This function exists here mostly to limit the impact on stable. Feel
272 272 free to refactor on default.
273 273 """
274 274 rev = ctx.rev()
275 275 repo = ctx._repo
276 276 ac = getattr(ctx, '_ancestrycontext', None)
277 277 if ac is None:
278 278 revs = [rev]
279 279 if rev is None:
280 280 revs = [p.rev() for p in ctx.parents()]
281 281 ac = repo.changelog.ancestors(revs, inclusive=True)
282 282 ctx._ancestrycontext = ac
283 283 def makectx(f, n):
284 284 if n in node.wdirnodes: # in a working context?
285 285 if ctx.rev() is None:
286 286 return ctx.filectx(f)
287 287 return repo[None][f]
288 288 fctx = repo.filectx(f, fileid=n)
289 289 # setup only needed for filectx not create from a changectx
290 290 fctx._ancestrycontext = ac
291 291 fctx._descendantrev = rev
292 292 return fctx
293 293 return util.lrucachefunc(makectx)
294 294
295 295 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
296 296 """combine partial copy paths"""
297 297 remainder = {}
298 298 for f in copyfrom:
299 299 if f in copyto:
300 300 finalcopy[copyto[f]] = copyfrom[f]
301 301 del copyto[f]
302 302 for f in incompletediverge:
303 303 assert f not in diverge
304 304 ic = incompletediverge[f]
305 305 if ic[0] in copyto:
306 306 diverge[f] = [copyto[ic[0]], ic[1]]
307 307 else:
308 308 remainder[f] = ic
309 309 return remainder
310 310
311 311 def mergecopies(repo, c1, c2, base):
312 312 """
313 313 The function calling different copytracing algorithms on the basis of config
314 314 which find moves and copies between context c1 and c2 that are relevant for
315 315 merging. 'base' will be used as the merge base.
316 316
317 317 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
318 318 files that were moved/ copied in one merge parent and modified in another.
319 319 For example:
320 320
321 321 o ---> 4 another commit
322 322 |
323 323 | o ---> 3 commit that modifies a.txt
324 324 | /
325 325 o / ---> 2 commit that moves a.txt to b.txt
326 326 |/
327 327 o ---> 1 merge base
328 328
329 329 If we try to rebase revision 3 on revision 4, since there is no a.txt in
330 330 revision 4, and if user have copytrace disabled, we prints the following
331 331 message:
332 332
333 333 ```other changed <file> which local deleted```
334 334
335 335 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
336 336 "dirmove".
337 337
338 338 "copy" is a mapping from destination name -> source name,
339 339 where source is in c1 and destination is in c2 or vice-versa.
340 340
341 341 "movewithdir" is a mapping from source name -> destination name,
342 342 where the file at source present in one context but not the other
343 343 needs to be moved to destination by the merge process, because the
344 344 other context moved the directory it is in.
345 345
346 346 "diverge" is a mapping of source name -> list of destination names
347 347 for divergent renames.
348 348
349 349 "renamedelete" is a mapping of source name -> list of destination
350 350 names for files deleted in c1 that were renamed in c2 or vice-versa.
351 351
352 352 "dirmove" is a mapping of detected source dir -> destination dir renames.
353 353 This is needed for handling changes to new files previously grafted into
354 354 renamed directories.
355 355 """
356 356 # avoid silly behavior for update from empty dir
357 357 if not c1 or not c2 or c1 == c2:
358 358 return {}, {}, {}, {}, {}
359 359
360 360 # avoid silly behavior for parent -> working dir
361 361 if c2.node() is None and c1.node() == repo.dirstate.p1():
362 362 return repo.dirstate.copies(), {}, {}, {}, {}
363 363
364 364 copytracing = repo.ui.config('experimental', 'copytrace')
365 365
366 366 # Copy trace disabling is explicitly below the node == p1 logic above
367 367 # because the logic above is required for a simple copy to be kept across a
368 368 # rebase.
369 369 if copytracing == 'off':
370 370 return {}, {}, {}, {}, {}
371 371 elif copytracing == 'heuristics':
372 372 # Do full copytracing if only non-public revisions are involved as
373 373 # that will be fast enough and will also cover the copies which could
374 374 # be missed by heuristics
375 375 if _isfullcopytraceable(repo, c1, base):
376 376 return _fullcopytracing(repo, c1, c2, base)
377 377 return _heuristicscopytracing(repo, c1, c2, base)
378 378 else:
379 379 return _fullcopytracing(repo, c1, c2, base)
380 380
381 381 def _isfullcopytraceable(repo, c1, base):
382 382 """ Checks that if base, source and destination are all no-public branches,
383 383 if yes let's use the full copytrace algorithm for increased capabilities
384 384 since it will be fast enough.
385 385
386 386 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
387 387 number of changesets from c1 to base such that if number of changesets are
388 388 more than the limit, full copytracing algorithm won't be used.
389 389 """
390 390 if c1.rev() is None:
391 391 c1 = c1.p1()
392 392 if c1.mutable() and base.mutable():
393 393 sourcecommitlimit = repo.ui.configint('experimental',
394 394 'copytrace.sourcecommitlimit')
395 395 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
396 396 return commits < sourcecommitlimit
397 397 return False
398 398
399 399 def _fullcopytracing(repo, c1, c2, base):
400 400 """ The full copytracing algorithm which finds all the new files that were
401 401 added from merge base up to the top commit and for each file it checks if
402 402 this file was copied from another file.
403 403
404 404 This is pretty slow when a lot of changesets are involved but will track all
405 405 the copies.
406 406 """
407 407 # In certain scenarios (e.g. graft, update or rebase), base can be
408 408 # overridden We still need to know a real common ancestor in this case We
409 409 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
410 410 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
411 411 # caller may not know if the revision passed in lieu of the CA is a genuine
412 412 # common ancestor or not without explicitly checking it, it's better to
413 413 # determine that here.
414 414 #
415 415 # base.descendant(wc) and base.descendant(base) are False, work around that
416 416 _c1 = c1.p1() if c1.rev() is None else c1
417 417 _c2 = c2.p1() if c2.rev() is None else c2
418 418 # an endpoint is "dirty" if it isn't a descendant of the merge base
419 419 # if we have a dirty endpoint, we need to trigger graft logic, and also
420 420 # keep track of which endpoint is dirty
421 421 dirtyc1 = not (base == _c1 or base.descendant(_c1))
422 422 dirtyc2 = not (base == _c2 or base.descendant(_c2))
423 423 graft = dirtyc1 or dirtyc2
424 424 tca = base
425 425 if graft:
426 426 tca = _c1.ancestor(_c2)
427 427
428 428 limit = _findlimit(repo, c1.rev(), c2.rev())
429 429 if limit is None:
430 430 # no common ancestor, no copies
431 431 return {}, {}, {}, {}, {}
432 432 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
433 433
434 434 m1 = c1.manifest()
435 435 m2 = c2.manifest()
436 436 mb = base.manifest()
437 437
438 438 # gather data from _checkcopies:
439 439 # - diverge = record all diverges in this dict
440 440 # - copy = record all non-divergent copies in this dict
441 441 # - fullcopy = record all copies in this dict
442 442 # - incomplete = record non-divergent partial copies here
443 443 # - incompletediverge = record divergent partial copies here
444 444 diverge = {} # divergence data is shared
445 445 incompletediverge = {}
446 446 data1 = {'copy': {},
447 447 'fullcopy': {},
448 448 'incomplete': {},
449 449 'diverge': diverge,
450 450 'incompletediverge': incompletediverge,
451 451 }
452 452 data2 = {'copy': {},
453 453 'fullcopy': {},
454 454 'incomplete': {},
455 455 'diverge': diverge,
456 456 'incompletediverge': incompletediverge,
457 457 }
458 458
459 459 # find interesting file sets from manifests
460 460 addedinm1 = m1.filesnotin(mb)
461 461 addedinm2 = m2.filesnotin(mb)
462 462 bothnew = sorted(addedinm1 & addedinm2)
463 463 if tca == base:
464 464 # unmatched file from base
465 465 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
466 466 u1u, u2u = u1r, u2r
467 467 else:
468 468 # unmatched file from base (DAG rotation in the graft case)
469 469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
470 470 baselabel='base')
471 471 # unmatched file from topological common ancestors (no DAG rotation)
472 472 # need to recompute this for directory move handling when grafting
473 473 mta = tca.manifest()
474 474 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
475 475 m2.filesnotin(mta),
476 476 baselabel='topological common ancestor')
477 477
478 478 for f in u1u:
479 479 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
480 480
481 481 for f in u2u:
482 482 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
483 483
484 484 copy = dict(data1['copy'])
485 485 copy.update(data2['copy'])
486 486 fullcopy = dict(data1['fullcopy'])
487 487 fullcopy.update(data2['fullcopy'])
488 488
489 489 if dirtyc1:
490 490 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
491 491 incompletediverge)
492 492 else:
493 493 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
494 494 incompletediverge)
495 495
496 496 renamedelete = {}
497 497 renamedeleteset = set()
498 498 divergeset = set()
499 499 for of, fl in list(diverge.items()):
500 500 if len(fl) == 1 or of in c1 or of in c2:
501 501 del diverge[of] # not actually divergent, or not a rename
502 502 if of not in c1 and of not in c2:
503 503 # renamed on one side, deleted on the other side, but filter
504 504 # out files that have been renamed and then deleted
505 505 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
506 506 renamedeleteset.update(fl) # reverse map for below
507 507 else:
508 508 divergeset.update(fl) # reverse map for below
509 509
510 510 if bothnew:
511 511 repo.ui.debug(" unmatched files new in both:\n %s\n"
512 512 % "\n ".join(bothnew))
513 513 bothdiverge = {}
514 514 bothincompletediverge = {}
515 515 remainder = {}
516 516 both1 = {'copy': {},
517 517 'fullcopy': {},
518 518 'incomplete': {},
519 519 'diverge': bothdiverge,
520 520 'incompletediverge': bothincompletediverge
521 521 }
522 522 both2 = {'copy': {},
523 523 'fullcopy': {},
524 524 'incomplete': {},
525 525 'diverge': bothdiverge,
526 526 'incompletediverge': bothincompletediverge
527 527 }
528 528 for f in bothnew:
529 529 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
530 530 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
531 531 if dirtyc1:
532 532 # incomplete copies may only be found on the "dirty" side for bothnew
533 533 assert not both2['incomplete']
534 534 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
535 535 bothincompletediverge)
536 536 elif dirtyc2:
537 537 assert not both1['incomplete']
538 538 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
539 539 bothincompletediverge)
540 540 else:
541 541 # incomplete copies and divergences can't happen outside grafts
542 542 assert not both1['incomplete']
543 543 assert not both2['incomplete']
544 544 assert not bothincompletediverge
545 545 for f in remainder:
546 546 assert f not in bothdiverge
547 547 ic = remainder[f]
548 548 if ic[0] in (m1 if dirtyc1 else m2):
549 549 # backed-out rename on one side, but watch out for deleted files
550 550 bothdiverge[f] = ic
551 551 for of, fl in bothdiverge.items():
552 552 if len(fl) == 2 and fl[0] == fl[1]:
553 553 copy[fl[0]] = of # not actually divergent, just matching renames
554 554
555 555 if fullcopy and repo.ui.debugflag:
556 556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 557 "% = renamed and deleted):\n")
558 558 for f in sorted(fullcopy):
559 559 note = ""
560 560 if f in copy:
561 561 note += "*"
562 562 if f in divergeset:
563 563 note += "!"
564 564 if f in renamedeleteset:
565 565 note += "%"
566 566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 567 note))
568 568 del divergeset
569 569
570 570 if not fullcopy:
571 571 return copy, {}, diverge, renamedelete, {}
572 572
573 573 repo.ui.debug(" checking for directory renames\n")
574 574
575 575 # generate a directory move map
576 576 d1, d2 = c1.dirs(), c2.dirs()
577 577 # Hack for adding '', which is not otherwise added, to d1 and d2
578 578 d1.addpath('/')
579 579 d2.addpath('/')
580 580 invalid = set()
581 581 dirmove = {}
582 582
583 583 # examine each file copy for a potential directory move, which is
584 584 # when all the files in a directory are moved to a new directory
585 585 for dst, src in fullcopy.iteritems():
586 586 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
587 587 if dsrc in invalid:
588 588 # already seen to be uninteresting
589 589 continue
590 590 elif dsrc in d1 and ddst in d1:
591 591 # directory wasn't entirely moved locally
592 592 invalid.add(dsrc + "/")
593 593 elif dsrc in d2 and ddst in d2:
594 594 # directory wasn't entirely moved remotely
595 595 invalid.add(dsrc + "/")
596 596 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
597 597 # files from the same directory moved to two different places
598 598 invalid.add(dsrc + "/")
599 599 else:
600 600 # looks good so far
601 601 dirmove[dsrc + "/"] = ddst + "/"
602 602
603 603 for i in invalid:
604 604 if i in dirmove:
605 605 del dirmove[i]
606 606 del d1, d2, invalid
607 607
608 608 if not dirmove:
609 609 return copy, {}, diverge, renamedelete, {}
610 610
611 611 for d in dirmove:
612 612 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
613 613 (d, dirmove[d]))
614 614
615 615 movewithdir = {}
616 616 # check unaccounted nonoverlapping files against directory moves
617 617 for f in u1r + u2r:
618 618 if f not in fullcopy:
619 619 for d in dirmove:
620 620 if f.startswith(d):
621 621 # new file added in a directory that was moved, move it
622 622 df = dirmove[d] + f[len(d):]
623 623 if df not in copy:
624 624 movewithdir[f] = df
625 625 repo.ui.debug((" pending file src: '%s' -> "
626 626 "dst: '%s'\n") % (f, df))
627 627 break
628 628
629 629 return copy, movewithdir, diverge, renamedelete, dirmove
630 630
631 631 def _heuristicscopytracing(repo, c1, c2, base):
632 632 """ Fast copytracing using filename heuristics
633 633
634 634 Assumes that moves or renames are of following two types:
635 635
636 636 1) Inside a directory only (same directory name but different filenames)
637 637 2) Move from one directory to another
638 638 (same filenames but different directory names)
639 639
640 640 Works only when there are no merge commits in the "source branch".
641 641 Source branch is commits from base up to c2 not including base.
642 642
643 643 If merge is involved it fallbacks to _fullcopytracing().
644 644
645 645 Can be used by setting the following config:
646 646
647 647 [experimental]
648 648 copytrace = heuristics
649 649
650 650 In some cases the copy/move candidates found by heuristics can be very large
651 651 in number and that will make the algorithm slow. The number of possible
652 652 candidates to check can be limited by using the config
653 653 `experimental.copytrace.movecandidateslimit` which defaults to 100.
654 654 """
655 655
656 656 if c1.rev() is None:
657 657 c1 = c1.p1()
658 658 if c2.rev() is None:
659 659 c2 = c2.p1()
660 660
661 661 copies = {}
662 662
663 663 changedfiles = set()
664 664 m1 = c1.manifest()
665 665 if not repo.revs('%d::%d', base.rev(), c2.rev()):
666 666 # If base is not in c2 branch, we switch to fullcopytracing
667 667 repo.ui.debug("switching to full copytracing as base is not "
668 668 "an ancestor of c2\n")
669 669 return _fullcopytracing(repo, c1, c2, base)
670 670
671 671 ctx = c2
672 672 while ctx != base:
673 673 if len(ctx.parents()) == 2:
674 674 # To keep things simple let's not handle merges
675 675 repo.ui.debug("switching to full copytracing because of merges\n")
676 676 return _fullcopytracing(repo, c1, c2, base)
677 677 changedfiles.update(ctx.files())
678 678 ctx = ctx.p1()
679 679
680 680 cp = _forwardcopies(base, c2)
681 681 for dst, src in cp.iteritems():
682 682 if src in m1:
683 683 copies[dst] = src
684 684
685 685 # file is missing if it isn't present in the destination, but is present in
686 686 # the base and present in the source.
687 687 # Presence in the base is important to exclude added files, presence in the
688 688 # source is important to exclude removed files.
689 689 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
690 690 changedfiles)
691 691
692 692 if missingfiles:
693 693 basenametofilename = collections.defaultdict(list)
694 694 dirnametofilename = collections.defaultdict(list)
695 695
696 696 for f in m1.filesnotin(base.manifest()):
697 697 basename = os.path.basename(f)
698 698 dirname = os.path.dirname(f)
699 699 basenametofilename[basename].append(f)
700 700 dirnametofilename[dirname].append(f)
701 701
702 702 # in case of a rebase/graft, base may not be a common ancestor
703 703 anc = c1.ancestor(c2)
704 704
705 705 for f in missingfiles:
706 706 basename = os.path.basename(f)
707 707 dirname = os.path.dirname(f)
708 708 samebasename = basenametofilename[basename]
709 709 samedirname = dirnametofilename[dirname]
710 710 movecandidates = samebasename + samedirname
711 711 # f is guaranteed to be present in c2, that's why
712 712 # c2.filectx(f) won't fail
713 713 f2 = c2.filectx(f)
714 714 # we can have a lot of candidates which can slow down the heuristics
715 715 # config value to limit the number of candidates moves to check
716 716 maxcandidates = repo.ui.configint('experimental',
717 717 'copytrace.movecandidateslimit')
718 718
719 719 if len(movecandidates) > maxcandidates:
720 720 repo.ui.status(_("skipping copytracing for '%s', more "
721 721 "candidates than the limit: %d\n")
722 722 % (f, len(movecandidates)))
723 723 continue
724 724
725 725 for candidate in movecandidates:
726 726 f1 = c1.filectx(candidate)
727 727 if _related(f1, f2, anc.rev()):
728 728 # if there are a few related copies then we'll merge
729 729 # changes into all of them. This matches the behaviour
730 730 # of upstream copytracing
731 731 copies[candidate] = f
732 732
733 733 return copies, {}, {}, {}, {}
734 734
735 735 def _related(f1, f2, limit):
736 736 """return True if f1 and f2 filectx have a common ancestor
737 737
738 738 Walk back to common ancestor to see if the two files originate
739 739 from the same file. Since workingfilectx's rev() is None it messes
740 740 up the integer comparison logic, hence the pre-step check for
741 741 None (f1 and f2 can only be workingfilectx's initially).
742 742 """
743 743
744 744 if f1 == f2:
745 745 return f1 # a match
746 746
747 747 g1, g2 = f1.ancestors(), f2.ancestors()
748 748 try:
749 749 f1r, f2r = f1.linkrev(), f2.linkrev()
750 750
751 751 if f1r is None:
752 752 f1 = next(g1)
753 753 if f2r is None:
754 754 f2 = next(g2)
755 755
756 756 while True:
757 757 f1r, f2r = f1.linkrev(), f2.linkrev()
758 758 if f1r > f2r:
759 759 f1 = next(g1)
760 760 elif f2r > f1r:
761 761 f2 = next(g2)
762 762 elif f1 == f2:
763 763 return f1 # a match
764 764 elif f1r == f2r or f1r < limit or f2r < limit:
765 765 return False # copy no longer relevant
766 766 except StopIteration:
767 767 return False
768 768
769 769 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
770 770 """
771 771 check possible copies of f from msrc to mdst
772 772
773 773 srcctx = starting context for f in msrc
774 774 dstctx = destination context for f in mdst
775 775 f = the filename to check (as in msrc)
776 776 base = the changectx used as a merge base
777 777 tca = topological common ancestor for graft-like scenarios
778 778 remotebase = True if base is outside tca::srcctx, False otherwise
779 779 limit = the rev number to not search beyond
780 780 data = dictionary of dictionary to store copy data. (see mergecopies)
781 781
782 782 note: limit is only an optimization, and provides no guarantee that
783 783 irrelevant revisions will not be visited
784 784 there is no easy way to make this algorithm stop in a guaranteed way
785 785 once it "goes behind a certain revision".
786 786 """
787 787
788 788 msrc = srcctx.manifest()
789 789 mdst = dstctx.manifest()
790 790 mb = base.manifest()
791 791 mta = tca.manifest()
792 792 # Might be true if this call is about finding backward renames,
793 793 # This happens in the case of grafts because the DAG is then rotated.
794 794 # If the file exists in both the base and the source, we are not looking
795 795 # for a rename on the source side, but on the part of the DAG that is
796 796 # traversed backwards.
797 797 #
798 798 # In the case there is both backward and forward renames (before and after
799 799 # the base) this is more complicated as we must detect a divergence.
800 800 # We use 'backwards = False' in that case.
801 801 backwards = not remotebase and base != tca and f in mb
802 802 getsrcfctx = _makegetfctx(srcctx)
803 803 getdstfctx = _makegetfctx(dstctx)
804 804
805 805 if msrc[f] == mb.get(f) and not remotebase:
806 806 # Nothing to merge
807 807 return
808 808
809 809 of = None
810 810 seen = {f}
811 811 for oc in getsrcfctx(f, msrc[f]).ancestors():
812 812 ocr = oc.linkrev()
813 813 of = oc.path()
814 814 if of in seen:
815 815 # check limit late - grab last rename before
816 816 if ocr < limit:
817 817 break
818 818 continue
819 819 seen.add(of)
820 820
821 821 # remember for dir rename detection
822 822 if backwards:
823 823 data['fullcopy'][of] = f # grafting backwards through renames
824 824 else:
825 825 data['fullcopy'][f] = of
826 826 if of not in mdst:
827 827 continue # no match, keep looking
828 828 if mdst[of] == mb.get(of):
829 829 return # no merge needed, quit early
830 830 c2 = getdstfctx(of, mdst[of])
831 831 # c2 might be a plain new file on added on destination side that is
832 832 # unrelated to the droids we are looking for.
833 833 cr = _related(oc, c2, tca.rev())
834 834 if cr and (of == f or of == c2.path()): # non-divergent
835 835 if backwards:
836 836 data['copy'][of] = f
837 837 elif of in mb:
838 838 data['copy'][f] = of
839 839 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
840 840 data['copy'][of] = f
841 841 del data['fullcopy'][f]
842 842 data['fullcopy'][of] = f
843 843 else: # divergence w.r.t. graft CA on one side of topological CA
844 844 for sf in seen:
845 845 if sf in mb:
846 846 assert sf not in data['diverge']
847 847 data['diverge'][sf] = [f, of]
848 848 break
849 849 return
850 850
851 851 if of in mta:
852 852 if backwards or remotebase:
853 853 data['incomplete'][of] = f
854 854 else:
855 855 for sf in seen:
856 856 if sf in mb:
857 857 if tca == base:
858 858 data['diverge'].setdefault(sf, []).append(f)
859 859 else:
860 860 data['incompletediverge'][sf] = [of, f]
861 861 return
862 862
863 863 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
864 864 '''reproduce copies from fromrev to rev in the dirstate
865 865
866 866 If skiprev is specified, it's a revision that should be used to
867 867 filter copy records. Any copies that occur between fromrev and
868 868 skiprev will not be duplicated, even if they appear in the set of
869 869 copies between fromrev and rev.
870 870 '''
871 871 exclude = {}
872 872 if (skiprev is not None and
873 873 repo.ui.config('experimental', 'copytrace') != 'off'):
874 874 # copytrace='off' skips this line, but not the entire function because
875 875 # the line below is O(size of the repo) during a rebase, while the rest
876 876 # of the function is much faster (and is required for carrying copy
877 877 # metadata across the rebase anyway).
878 878 exclude = pathcopies(repo[fromrev], repo[skiprev])
879 879 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
880 880 # copies.pathcopies returns backward renames, so dst might not
881 881 # actually be in the dirstate
882 882 if dst in exclude:
883 883 continue
884 884 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now