##// END OF EJS Templates
copies: clean up _related logic...
Gábor Stefanik -
r37410:a4f02a17 default
parent child Browse files
Show More
@@ -1,883 +1,878 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 """chain two sets of copies a->b"""
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 126 for k, v in list(t.items()):
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 """return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit"""
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 159 def _committedforwardcopies(a, b, match):
160 160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 161 # files might have to be traced back to the fctx parent of the last
162 162 # one-side-only changeset, but not further back than that
163 163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 164 if limit is None:
165 165 limit = -1
166 166 am = a.manifest()
167 167
168 168 # find where new files came from
169 169 # we currently don't try to find where old files went, too expensive
170 170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 171 cm = {}
172 172
173 173 # Computing the forward missing is quite expensive on large manifests, since
174 174 # it compares the entire manifests. We can optimize it in the common use
175 175 # case of computing what copies are in a commit versus its parent (like
176 176 # during a rebase or histedit). Note, we exclude merge commits from this
177 177 # optimization, since the ctx.files() for a merge commit is not correct for
178 178 # this comparison.
179 179 forwardmissingmatch = match
180 180 if b.p1() == a and b.p2().node() == node.nullid:
181 181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184 184
185 185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 186 for f in missing:
187 187 fctx = b[f]
188 188 fctx._ancestrycontext = ancestrycontext
189 189 ofctx = _tracefile(fctx, am, limit)
190 190 if ofctx:
191 191 cm[f] = ofctx.path()
192 192 return cm
193 193
194 194 def _forwardcopies(a, b, match=None):
195 195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196 196
197 197 # check for working copy
198 198 if b.rev() is None:
199 199 if a == b.p1():
200 200 # short-circuit to avoid issues with merge states
201 201 return _dirstatecopies(b, match)
202 202
203 203 cm = _committedforwardcopies(a, b.p1(), match)
204 204 # combine copies from dirstate if necessary
205 205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 206 return _committedforwardcopies(a, b, match)
207 207
208 208 def _backwardrenames(a, b):
209 209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 210 return {}
211 211
212 212 # Even though we're not taking copies into account, 1:n rename situations
213 213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 214 # arbitrarily pick one of the renames.
215 215 f = _forwardcopies(b, a)
216 216 r = {}
217 217 for k, v in sorted(f.iteritems()):
218 218 # remove copies
219 219 if v in a:
220 220 continue
221 221 r[v] = k
222 222 return r
223 223
224 224 def pathcopies(x, y, match=None):
225 225 """find {dst@y: src@x} copy mapping for directed compare"""
226 226 if x == y or not x or not y:
227 227 return {}
228 228 a = y.ancestor(x)
229 229 if a == x:
230 230 return _forwardcopies(x, y, match=match)
231 231 if a == y:
232 232 return _backwardrenames(x, y)
233 233 return _chain(x, y, _backwardrenames(x, a),
234 234 _forwardcopies(a, y, match=match))
235 235
236 236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 238 and c2. This is its own function so extensions can easily wrap this call
239 239 to see what files mergecopies is about to process.
240 240
241 241 Even though c1 and c2 are not used in this function, they are useful in
242 242 other extensions for being able to read the file nodes of the changed files.
243 243
244 244 "baselabel" can be passed to help distinguish the multiple computations
245 245 done in the graft case.
246 246 """
247 247 u1 = sorted(addedinm1 - addedinm2)
248 248 u2 = sorted(addedinm2 - addedinm1)
249 249
250 250 header = " unmatched files in %s"
251 251 if baselabel:
252 252 header += ' (from %s)' % baselabel
253 253 if u1:
254 254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 255 if u2:
256 256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 257 return u1, u2
258 258
259 259 def _makegetfctx(ctx):
260 260 """return a 'getfctx' function suitable for _checkcopies usage
261 261
262 262 We have to re-setup the function building 'filectx' for each
263 263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 264 each. Linkrev adjustment is important to avoid bug in rename
265 265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 266 the performance impact of this adjustment is kept limited. Without it,
267 267 each file could do a full dag traversal making the time complexity of
268 268 the operation explode (see issue4537).
269 269
270 270 This function exists here mostly to limit the impact on stable. Feel
271 271 free to refactor on default.
272 272 """
273 273 rev = ctx.rev()
274 274 repo = ctx._repo
275 275 ac = getattr(ctx, '_ancestrycontext', None)
276 276 if ac is None:
277 277 revs = [rev]
278 278 if rev is None:
279 279 revs = [p.rev() for p in ctx.parents()]
280 280 ac = repo.changelog.ancestors(revs, inclusive=True)
281 281 ctx._ancestrycontext = ac
282 282 def makectx(f, n):
283 283 if n in node.wdirnodes: # in a working context?
284 284 if ctx.rev() is None:
285 285 return ctx.filectx(f)
286 286 return repo[None][f]
287 287 fctx = repo.filectx(f, fileid=n)
288 288 # setup only needed for filectx not create from a changectx
289 289 fctx._ancestrycontext = ac
290 290 fctx._descendantrev = rev
291 291 return fctx
292 292 return util.lrucachefunc(makectx)
293 293
294 294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 295 """combine partial copy paths"""
296 296 remainder = {}
297 297 for f in copyfrom:
298 298 if f in copyto:
299 299 finalcopy[copyto[f]] = copyfrom[f]
300 300 del copyto[f]
301 301 for f in incompletediverge:
302 302 assert f not in diverge
303 303 ic = incompletediverge[f]
304 304 if ic[0] in copyto:
305 305 diverge[f] = [copyto[ic[0]], ic[1]]
306 306 else:
307 307 remainder[f] = ic
308 308 return remainder
309 309
310 310 def mergecopies(repo, c1, c2, base):
311 311 """
312 312 The function calling different copytracing algorithms on the basis of config
313 313 which find moves and copies between context c1 and c2 that are relevant for
314 314 merging. 'base' will be used as the merge base.
315 315
316 316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 317 files that were moved/ copied in one merge parent and modified in another.
318 318 For example:
319 319
320 320 o ---> 4 another commit
321 321 |
322 322 | o ---> 3 commit that modifies a.txt
323 323 | /
324 324 o / ---> 2 commit that moves a.txt to b.txt
325 325 |/
326 326 o ---> 1 merge base
327 327
328 328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 329 revision 4, and if user have copytrace disabled, we prints the following
330 330 message:
331 331
332 332 ```other changed <file> which local deleted```
333 333
334 334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 335 "dirmove".
336 336
337 337 "copy" is a mapping from destination name -> source name,
338 338 where source is in c1 and destination is in c2 or vice-versa.
339 339
340 340 "movewithdir" is a mapping from source name -> destination name,
341 341 where the file at source present in one context but not the other
342 342 needs to be moved to destination by the merge process, because the
343 343 other context moved the directory it is in.
344 344
345 345 "diverge" is a mapping of source name -> list of destination names
346 346 for divergent renames.
347 347
348 348 "renamedelete" is a mapping of source name -> list of destination
349 349 names for files deleted in c1 that were renamed in c2 or vice-versa.
350 350
351 351 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 352 This is needed for handling changes to new files previously grafted into
353 353 renamed directories.
354 354 """
355 355 # avoid silly behavior for update from empty dir
356 356 if not c1 or not c2 or c1 == c2:
357 357 return {}, {}, {}, {}, {}
358 358
359 359 # avoid silly behavior for parent -> working dir
360 360 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 361 return repo.dirstate.copies(), {}, {}, {}, {}
362 362
363 363 copytracing = repo.ui.config('experimental', 'copytrace')
364 364
365 365 # Copy trace disabling is explicitly below the node == p1 logic above
366 366 # because the logic above is required for a simple copy to be kept across a
367 367 # rebase.
368 368 if copytracing == 'off':
369 369 return {}, {}, {}, {}, {}
370 370 elif copytracing == 'heuristics':
371 371 # Do full copytracing if only non-public revisions are involved as
372 372 # that will be fast enough and will also cover the copies which could
373 373 # be missed by heuristics
374 374 if _isfullcopytraceable(repo, c1, base):
375 375 return _fullcopytracing(repo, c1, c2, base)
376 376 return _heuristicscopytracing(repo, c1, c2, base)
377 377 else:
378 378 return _fullcopytracing(repo, c1, c2, base)
379 379
380 380 def _isfullcopytraceable(repo, c1, base):
381 381 """ Checks that if base, source and destination are all no-public branches,
382 382 if yes let's use the full copytrace algorithm for increased capabilities
383 383 since it will be fast enough.
384 384
385 385 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 386 number of changesets from c1 to base such that if number of changesets are
387 387 more than the limit, full copytracing algorithm won't be used.
388 388 """
389 389 if c1.rev() is None:
390 390 c1 = c1.p1()
391 391 if c1.mutable() and base.mutable():
392 392 sourcecommitlimit = repo.ui.configint('experimental',
393 393 'copytrace.sourcecommitlimit')
394 394 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 395 return commits < sourcecommitlimit
396 396 return False
397 397
398 398 def _fullcopytracing(repo, c1, c2, base):
399 399 """ The full copytracing algorithm which finds all the new files that were
400 400 added from merge base up to the top commit and for each file it checks if
401 401 this file was copied from another file.
402 402
403 403 This is pretty slow when a lot of changesets are involved but will track all
404 404 the copies.
405 405 """
406 406 # In certain scenarios (e.g. graft, update or rebase), base can be
407 407 # overridden We still need to know a real common ancestor in this case We
408 408 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 409 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 410 # caller may not know if the revision passed in lieu of the CA is a genuine
411 411 # common ancestor or not without explicitly checking it, it's better to
412 412 # determine that here.
413 413 #
414 414 # base.descendant(wc) and base.descendant(base) are False, work around that
415 415 _c1 = c1.p1() if c1.rev() is None else c1
416 416 _c2 = c2.p1() if c2.rev() is None else c2
417 417 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 418 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 419 # keep track of which endpoint is dirty
420 420 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 421 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 422 graft = dirtyc1 or dirtyc2
423 423 tca = base
424 424 if graft:
425 425 tca = _c1.ancestor(_c2)
426 426
427 427 limit = _findlimit(repo, c1.rev(), c2.rev())
428 428 if limit is None:
429 429 # no common ancestor, no copies
430 430 return {}, {}, {}, {}, {}
431 431 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432 432
433 433 m1 = c1.manifest()
434 434 m2 = c2.manifest()
435 435 mb = base.manifest()
436 436
437 437 # gather data from _checkcopies:
438 438 # - diverge = record all diverges in this dict
439 439 # - copy = record all non-divergent copies in this dict
440 440 # - fullcopy = record all copies in this dict
441 441 # - incomplete = record non-divergent partial copies here
442 442 # - incompletediverge = record divergent partial copies here
443 443 diverge = {} # divergence data is shared
444 444 incompletediverge = {}
445 445 data1 = {'copy': {},
446 446 'fullcopy': {},
447 447 'incomplete': {},
448 448 'diverge': diverge,
449 449 'incompletediverge': incompletediverge,
450 450 }
451 451 data2 = {'copy': {},
452 452 'fullcopy': {},
453 453 'incomplete': {},
454 454 'diverge': diverge,
455 455 'incompletediverge': incompletediverge,
456 456 }
457 457
458 458 # find interesting file sets from manifests
459 459 addedinm1 = m1.filesnotin(mb)
460 460 addedinm2 = m2.filesnotin(mb)
461 461 bothnew = sorted(addedinm1 & addedinm2)
462 462 if tca == base:
463 463 # unmatched file from base
464 464 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 465 u1u, u2u = u1r, u2r
466 466 else:
467 467 # unmatched file from base (DAG rotation in the graft case)
468 468 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 469 baselabel='base')
470 470 # unmatched file from topological common ancestors (no DAG rotation)
471 471 # need to recompute this for directory move handling when grafting
472 472 mta = tca.manifest()
473 473 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 474 m2.filesnotin(mta),
475 475 baselabel='topological common ancestor')
476 476
477 477 for f in u1u:
478 478 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479 479
480 480 for f in u2u:
481 481 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482 482
483 483 copy = dict(data1['copy'])
484 484 copy.update(data2['copy'])
485 485 fullcopy = dict(data1['fullcopy'])
486 486 fullcopy.update(data2['fullcopy'])
487 487
488 488 if dirtyc1:
489 489 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 490 incompletediverge)
491 491 else:
492 492 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 493 incompletediverge)
494 494
495 495 renamedelete = {}
496 496 renamedeleteset = set()
497 497 divergeset = set()
498 498 for of, fl in list(diverge.items()):
499 499 if len(fl) == 1 or of in c1 or of in c2:
500 500 del diverge[of] # not actually divergent, or not a rename
501 501 if of not in c1 and of not in c2:
502 502 # renamed on one side, deleted on the other side, but filter
503 503 # out files that have been renamed and then deleted
504 504 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 505 renamedeleteset.update(fl) # reverse map for below
506 506 else:
507 507 divergeset.update(fl) # reverse map for below
508 508
509 509 if bothnew:
510 510 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 511 % "\n ".join(bothnew))
512 512 bothdiverge = {}
513 513 bothincompletediverge = {}
514 514 remainder = {}
515 515 both1 = {'copy': {},
516 516 'fullcopy': {},
517 517 'incomplete': {},
518 518 'diverge': bothdiverge,
519 519 'incompletediverge': bothincompletediverge
520 520 }
521 521 both2 = {'copy': {},
522 522 'fullcopy': {},
523 523 'incomplete': {},
524 524 'diverge': bothdiverge,
525 525 'incompletediverge': bothincompletediverge
526 526 }
527 527 for f in bothnew:
528 528 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 529 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 530 if dirtyc1:
531 531 # incomplete copies may only be found on the "dirty" side for bothnew
532 532 assert not both2['incomplete']
533 533 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 534 bothincompletediverge)
535 535 elif dirtyc2:
536 536 assert not both1['incomplete']
537 537 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 538 bothincompletediverge)
539 539 else:
540 540 # incomplete copies and divergences can't happen outside grafts
541 541 assert not both1['incomplete']
542 542 assert not both2['incomplete']
543 543 assert not bothincompletediverge
544 544 for f in remainder:
545 545 assert f not in bothdiverge
546 546 ic = remainder[f]
547 547 if ic[0] in (m1 if dirtyc1 else m2):
548 548 # backed-out rename on one side, but watch out for deleted files
549 549 bothdiverge[f] = ic
550 550 for of, fl in bothdiverge.items():
551 551 if len(fl) == 2 and fl[0] == fl[1]:
552 552 copy[fl[0]] = of # not actually divergent, just matching renames
553 553
554 554 if fullcopy and repo.ui.debugflag:
555 555 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 556 "% = renamed and deleted):\n")
557 557 for f in sorted(fullcopy):
558 558 note = ""
559 559 if f in copy:
560 560 note += "*"
561 561 if f in divergeset:
562 562 note += "!"
563 563 if f in renamedeleteset:
564 564 note += "%"
565 565 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 566 note))
567 567 del divergeset
568 568
569 569 if not fullcopy:
570 570 return copy, {}, diverge, renamedelete, {}
571 571
572 572 repo.ui.debug(" checking for directory renames\n")
573 573
574 574 # generate a directory move map
575 575 d1, d2 = c1.dirs(), c2.dirs()
576 576 # Hack for adding '', which is not otherwise added, to d1 and d2
577 577 d1.addpath('/')
578 578 d2.addpath('/')
579 579 invalid = set()
580 580 dirmove = {}
581 581
582 582 # examine each file copy for a potential directory move, which is
583 583 # when all the files in a directory are moved to a new directory
584 584 for dst, src in fullcopy.iteritems():
585 585 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 586 if dsrc in invalid:
587 587 # already seen to be uninteresting
588 588 continue
589 589 elif dsrc in d1 and ddst in d1:
590 590 # directory wasn't entirely moved locally
591 591 invalid.add(dsrc + "/")
592 592 elif dsrc in d2 and ddst in d2:
593 593 # directory wasn't entirely moved remotely
594 594 invalid.add(dsrc + "/")
595 595 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 596 # files from the same directory moved to two different places
597 597 invalid.add(dsrc + "/")
598 598 else:
599 599 # looks good so far
600 600 dirmove[dsrc + "/"] = ddst + "/"
601 601
602 602 for i in invalid:
603 603 if i in dirmove:
604 604 del dirmove[i]
605 605 del d1, d2, invalid
606 606
607 607 if not dirmove:
608 608 return copy, {}, diverge, renamedelete, {}
609 609
610 610 for d in dirmove:
611 611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 612 (d, dirmove[d]))
613 613
614 614 movewithdir = {}
615 615 # check unaccounted nonoverlapping files against directory moves
616 616 for f in u1r + u2r:
617 617 if f not in fullcopy:
618 618 for d in dirmove:
619 619 if f.startswith(d):
620 620 # new file added in a directory that was moved, move it
621 621 df = dirmove[d] + f[len(d):]
622 622 if df not in copy:
623 623 movewithdir[f] = df
624 624 repo.ui.debug((" pending file src: '%s' -> "
625 625 "dst: '%s'\n") % (f, df))
626 626 break
627 627
628 628 return copy, movewithdir, diverge, renamedelete, dirmove
629 629
630 630 def _heuristicscopytracing(repo, c1, c2, base):
631 631 """ Fast copytracing using filename heuristics
632 632
633 633 Assumes that moves or renames are of following two types:
634 634
635 635 1) Inside a directory only (same directory name but different filenames)
636 636 2) Move from one directory to another
637 637 (same filenames but different directory names)
638 638
639 639 Works only when there are no merge commits in the "source branch".
640 640 Source branch is commits from base up to c2 not including base.
641 641
642 642 If merge is involved it fallbacks to _fullcopytracing().
643 643
644 644 Can be used by setting the following config:
645 645
646 646 [experimental]
647 647 copytrace = heuristics
648 648
649 649 In some cases the copy/move candidates found by heuristics can be very large
650 650 in number and that will make the algorithm slow. The number of possible
651 651 candidates to check can be limited by using the config
652 652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 653 """
654 654
655 655 if c1.rev() is None:
656 656 c1 = c1.p1()
657 657 if c2.rev() is None:
658 658 c2 = c2.p1()
659 659
660 660 copies = {}
661 661
662 662 changedfiles = set()
663 663 m1 = c1.manifest()
664 664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 665 # If base is not in c2 branch, we switch to fullcopytracing
666 666 repo.ui.debug("switching to full copytracing as base is not "
667 667 "an ancestor of c2\n")
668 668 return _fullcopytracing(repo, c1, c2, base)
669 669
670 670 ctx = c2
671 671 while ctx != base:
672 672 if len(ctx.parents()) == 2:
673 673 # To keep things simple let's not handle merges
674 674 repo.ui.debug("switching to full copytracing because of merges\n")
675 675 return _fullcopytracing(repo, c1, c2, base)
676 676 changedfiles.update(ctx.files())
677 677 ctx = ctx.p1()
678 678
679 679 cp = _forwardcopies(base, c2)
680 680 for dst, src in cp.iteritems():
681 681 if src in m1:
682 682 copies[dst] = src
683 683
684 684 # file is missing if it isn't present in the destination, but is present in
685 685 # the base and present in the source.
686 686 # Presence in the base is important to exclude added files, presence in the
687 687 # source is important to exclude removed files.
688 688 filt = lambda f: f not in m1 and f in base and f in c2
689 689 missingfiles = [f for f in changedfiles if filt(f)]
690 690
691 691 if missingfiles:
692 692 basenametofilename = collections.defaultdict(list)
693 693 dirnametofilename = collections.defaultdict(list)
694 694
695 695 for f in m1.filesnotin(base.manifest()):
696 696 basename = os.path.basename(f)
697 697 dirname = os.path.dirname(f)
698 698 basenametofilename[basename].append(f)
699 699 dirnametofilename[dirname].append(f)
700 700
701 # in case of a rebase/graft, base may not be a common ancestor
702 anc = c1.ancestor(c2)
703
704 701 for f in missingfiles:
705 702 basename = os.path.basename(f)
706 703 dirname = os.path.dirname(f)
707 704 samebasename = basenametofilename[basename]
708 705 samedirname = dirnametofilename[dirname]
709 706 movecandidates = samebasename + samedirname
710 707 # f is guaranteed to be present in c2, that's why
711 708 # c2.filectx(f) won't fail
712 709 f2 = c2.filectx(f)
713 710 # we can have a lot of candidates which can slow down the heuristics
714 711 # config value to limit the number of candidates moves to check
715 712 maxcandidates = repo.ui.configint('experimental',
716 713 'copytrace.movecandidateslimit')
717 714
718 715 if len(movecandidates) > maxcandidates:
719 716 repo.ui.status(_("skipping copytracing for '%s', more "
720 717 "candidates than the limit: %d\n")
721 718 % (f, len(movecandidates)))
722 719 continue
723 720
724 721 for candidate in movecandidates:
725 722 f1 = c1.filectx(candidate)
726 if _related(f1, f2, anc.rev()):
723 if _related(f1, f2):
727 724 # if there are a few related copies then we'll merge
728 725 # changes into all of them. This matches the behaviour
729 726 # of upstream copytracing
730 727 copies[candidate] = f
731 728
732 729 return copies, {}, {}, {}, {}
733 730
734 def _related(f1, f2, limit):
731 def _related(f1, f2):
735 732 """return True if f1 and f2 filectx have a common ancestor
736 733
737 734 Walk back to common ancestor to see if the two files originate
738 735 from the same file. Since workingfilectx's rev() is None it messes
739 736 up the integer comparison logic, hence the pre-step check for
740 737 None (f1 and f2 can only be workingfilectx's initially).
741 738 """
742 739
743 740 if f1 == f2:
744 741 return f1 # a match
745 742
746 743 g1, g2 = f1.ancestors(), f2.ancestors()
747 744 try:
748 745 f1r, f2r = f1.linkrev(), f2.linkrev()
749 746
750 747 if f1r is None:
751 748 f1 = next(g1)
752 749 if f2r is None:
753 750 f2 = next(g2)
754 751
755 752 while True:
756 753 f1r, f2r = f1.linkrev(), f2.linkrev()
757 754 if f1r > f2r:
758 755 f1 = next(g1)
759 756 elif f2r > f1r:
760 757 f2 = next(g2)
761 elif f1 == f2:
762 return f1 # a match
763 elif f1r == f2r or f1r < limit or f2r < limit:
764 return False # copy no longer relevant
758 else: # f1 and f2 point to files in the same linkrev
759 return f1 == f2 # true if they point to the same file
765 760 except StopIteration:
766 761 return False
767 762
768 763 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 764 """
770 765 check possible copies of f from msrc to mdst
771 766
772 767 srcctx = starting context for f in msrc
773 768 dstctx = destination context for f in mdst
774 769 f = the filename to check (as in msrc)
775 770 base = the changectx used as a merge base
776 771 tca = topological common ancestor for graft-like scenarios
777 772 remotebase = True if base is outside tca::srcctx, False otherwise
778 773 limit = the rev number to not search beyond
779 774 data = dictionary of dictionary to store copy data. (see mergecopies)
780 775
781 776 note: limit is only an optimization, and provides no guarantee that
782 777 irrelevant revisions will not be visited
783 778 there is no easy way to make this algorithm stop in a guaranteed way
784 779 once it "goes behind a certain revision".
785 780 """
786 781
787 782 msrc = srcctx.manifest()
788 783 mdst = dstctx.manifest()
789 784 mb = base.manifest()
790 785 mta = tca.manifest()
791 786 # Might be true if this call is about finding backward renames,
792 787 # This happens in the case of grafts because the DAG is then rotated.
793 788 # If the file exists in both the base and the source, we are not looking
794 789 # for a rename on the source side, but on the part of the DAG that is
795 790 # traversed backwards.
796 791 #
797 792 # In the case there is both backward and forward renames (before and after
798 793 # the base) this is more complicated as we must detect a divergence.
799 794 # We use 'backwards = False' in that case.
800 795 backwards = not remotebase and base != tca and f in mb
801 796 getsrcfctx = _makegetfctx(srcctx)
802 797 getdstfctx = _makegetfctx(dstctx)
803 798
804 799 if msrc[f] == mb.get(f) and not remotebase:
805 800 # Nothing to merge
806 801 return
807 802
808 803 of = None
809 804 seen = {f}
810 805 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 806 ocr = oc.linkrev()
812 807 of = oc.path()
813 808 if of in seen:
814 809 # check limit late - grab last rename before
815 810 if ocr < limit:
816 811 break
817 812 continue
818 813 seen.add(of)
819 814
820 815 # remember for dir rename detection
821 816 if backwards:
822 817 data['fullcopy'][of] = f # grafting backwards through renames
823 818 else:
824 819 data['fullcopy'][f] = of
825 820 if of not in mdst:
826 821 continue # no match, keep looking
827 822 if mdst[of] == mb.get(of):
828 823 return # no merge needed, quit early
829 824 c2 = getdstfctx(of, mdst[of])
830 825 # c2 might be a plain new file on added on destination side that is
831 826 # unrelated to the droids we are looking for.
832 cr = _related(oc, c2, tca.rev())
827 cr = _related(oc, c2)
833 828 if cr and (of == f or of == c2.path()): # non-divergent
834 829 if backwards:
835 830 data['copy'][of] = f
836 831 elif of in mb:
837 832 data['copy'][f] = of
838 833 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 834 data['copy'][of] = f
840 835 del data['fullcopy'][f]
841 836 data['fullcopy'][of] = f
842 837 else: # divergence w.r.t. graft CA on one side of topological CA
843 838 for sf in seen:
844 839 if sf in mb:
845 840 assert sf not in data['diverge']
846 841 data['diverge'][sf] = [f, of]
847 842 break
848 843 return
849 844
850 845 if of in mta:
851 846 if backwards or remotebase:
852 847 data['incomplete'][of] = f
853 848 else:
854 849 for sf in seen:
855 850 if sf in mb:
856 851 if tca == base:
857 852 data['diverge'].setdefault(sf, []).append(f)
858 853 else:
859 854 data['incompletediverge'][sf] = [of, f]
860 855 return
861 856
862 857 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 858 """reproduce copies from fromrev to rev in the dirstate
864 859
865 860 If skiprev is specified, it's a revision that should be used to
866 861 filter copy records. Any copies that occur between fromrev and
867 862 skiprev will not be duplicated, even if they appear in the set of
868 863 copies between fromrev and rev.
869 864 """
870 865 exclude = {}
871 866 if (skiprev is not None and
872 867 repo.ui.config('experimental', 'copytrace') != 'off'):
873 868 # copytrace='off' skips this line, but not the entire function because
874 869 # the line below is O(size of the repo) during a rebase, while the rest
875 870 # of the function is much faster (and is required for carrying copy
876 871 # metadata across the rebase anyway).
877 872 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 873 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 874 # copies.pathcopies returns backward renames, so dst might not
880 875 # actually be in the dirstate
881 876 if dst in exclude:
882 877 continue
883 878 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now