##// END OF EJS Templates
copies: delete misplaced comment...
Martin von Zweigbergk -
r42445:d1c7446d default
parent child Browse files
Show More
@@ -1,803 +1,801 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b'"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126
127 127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 129 # overwritten later, and case 3 will be removed later.
130 130 t = a.copy()
131 131 for k, v in b.iteritems():
132 132 if v in t:
133 133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
134 134 t[k] = t[v]
135 135 else:
136 136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
137 137 t[k] = v
138 138
139 139 for k, v in list(t.items()):
140 140 # remove copies from files that didn't exist, i.e. case 5
141 141 if v not in src:
142 142 del t[k]
143 143 # remove criss-crossed copies, i.e. case 3
144 144 elif k in src and v in dst:
145 145 del t[k]
146 146 # remove copies to files that were then removed, i.e. case 1
147 147 # and file 'y' in cases 3 & 4 (in case of rename)
148 148 elif k not in dst:
149 149 del t[k]
150 150
151 151 return t
152 152
153 153 def _tracefile(fctx, am, limit):
154 154 """return file context that is the ancestor of fctx present in ancestor
155 155 manifest am, stopping after the first ancestor lower than limit"""
156 156
157 157 for f in fctx.ancestors():
158 158 if am.get(f.path(), None) == f.filenode():
159 159 return f
160 160 if not f.isintroducedafter(limit):
161 161 return None
162 162
163 163 def _dirstatecopies(repo, match=None):
164 164 ds = repo.dirstate
165 165 c = ds.copies().copy()
166 166 for k in list(c):
167 167 if ds[k] not in 'anm' or (match and not match(k)):
168 168 del c[k]
169 169 return c
170 170
171 171 def _computeforwardmissing(a, b, match=None):
172 172 """Computes which files are in b but not a.
173 173 This is its own function so extensions can easily wrap this call to see what
174 174 files _forwardcopies is about to process.
175 175 """
176 176 ma = a.manifest()
177 177 mb = b.manifest()
178 178 return mb.filesnotin(ma, match=match)
179 179
180 180 def usechangesetcentricalgo(repo):
181 181 """Checks if we should use changeset-centric copy algorithms"""
182 182 return (repo.ui.config('experimental', 'copies.read-from') in
183 183 ('changeset-only', 'compatibility'))
184 184
185 185 def _committedforwardcopies(a, b, match):
186 186 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
187 187 # files might have to be traced back to the fctx parent of the last
188 188 # one-side-only changeset, but not further back than that
189 189 repo = a._repo
190 190
191 191 if usechangesetcentricalgo(repo):
192 192 return _changesetforwardcopies(a, b, match)
193 193
194 194 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
195 195 dbg = repo.ui.debug
196 196 if debug:
197 197 dbg('debug.copies: looking into rename from %s to %s\n'
198 198 % (a, b))
199 199 limit = _findlimit(repo, a, b)
200 200 if debug:
201 201 dbg('debug.copies: search limit: %d\n' % limit)
202 202 am = a.manifest()
203 203
204 204 # find where new files came from
205 205 # we currently don't try to find where old files went, too expensive
206 206 # this means we can miss a case like 'hg rm b; hg cp a b'
207 207 cm = {}
208 208
209 209 # Computing the forward missing is quite expensive on large manifests, since
210 210 # it compares the entire manifests. We can optimize it in the common use
211 211 # case of computing what copies are in a commit versus its parent (like
212 212 # during a rebase or histedit). Note, we exclude merge commits from this
213 213 # optimization, since the ctx.files() for a merge commit is not correct for
214 214 # this comparison.
215 215 forwardmissingmatch = match
216 216 if b.p1() == a and b.p2().node() == node.nullid:
217 217 filesmatcher = matchmod.exact(b.files())
218 218 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
219 219 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
220 220
221 221 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
222 222
223 223 if debug:
224 224 dbg('debug.copies: missing files to search: %d\n' % len(missing))
225 225
226 226 for f in sorted(missing):
227 227 if debug:
228 228 dbg('debug.copies: tracing file: %s\n' % f)
229 229 fctx = b[f]
230 230 fctx._ancestrycontext = ancestrycontext
231 231
232 232 if debug:
233 233 start = util.timer()
234 234 ofctx = _tracefile(fctx, am, limit)
235 235 if ofctx:
236 236 if debug:
237 237 dbg('debug.copies: rename of: %s\n' % ofctx._path)
238 238 cm[f] = ofctx.path()
239 239 if debug:
240 240 dbg('debug.copies: time: %f seconds\n'
241 241 % (util.timer() - start))
242 242 return cm
243 243
244 244 def _changesetforwardcopies(a, b, match):
245 245 if a.rev() == node.nullrev:
246 246 return {}
247 247
248 248 repo = a.repo()
249 249 children = {}
250 250 cl = repo.changelog
251 251 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
252 252 for r in missingrevs:
253 253 for p in cl.parentrevs(r):
254 254 if p == node.nullrev:
255 255 continue
256 256 if p not in children:
257 257 children[p] = [r]
258 258 else:
259 259 children[p].append(r)
260 260
261 261 roots = set(children) - set(missingrevs)
262 262 # 'work' contains 3-tuples of a (revision number, parent number, copies).
263 263 # The parent number is only used for knowing which parent the copies dict
264 264 # came from.
265 265 work = [(r, 1, {}) for r in roots]
266 266 heapq.heapify(work)
267 267 while work:
268 268 r, i1, copies1 = heapq.heappop(work)
269 269 if work and work[0][0] == r:
270 270 # We are tracing copies from both parents
271 271 r, i2, copies2 = heapq.heappop(work)
272 272 copies = {}
273 273 ctx = repo[r]
274 274 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
275 275 allcopies = set(copies1) | set(copies2)
276 276 # TODO: perhaps this filtering should be done as long as ctx
277 277 # is merge, whether or not we're tracing from both parent.
278 278 for dst in allcopies:
279 279 if not match(dst):
280 280 continue
281 281 if dst not in copies2:
282 282 # Copied on p1 side: mark as copy from p1 side if it didn't
283 283 # already exist on p2 side
284 284 if dst not in p2man:
285 285 copies[dst] = copies1[dst]
286 286 elif dst not in copies1:
287 287 # Copied on p2 side: mark as copy from p2 side if it didn't
288 288 # already exist on p1 side
289 289 if dst not in p1man:
290 290 copies[dst] = copies2[dst]
291 291 else:
292 292 # Copied on both sides: mark as copy from p1 side
293 293 copies[dst] = copies1[dst]
294 294 else:
295 295 copies = copies1
296 296 if r == b.rev():
297 297 return copies
298 298 for c in children[r]:
299 299 childctx = repo[c]
300 300 if r == childctx.p1().rev():
301 301 parent = 1
302 302 childcopies = childctx.p1copies()
303 303 else:
304 304 assert r == childctx.p2().rev()
305 305 parent = 2
306 306 childcopies = childctx.p2copies()
307 307 if not match.always():
308 308 childcopies = {dst: src for dst, src in childcopies.items()
309 309 if match(dst)}
310 310 childcopies = _chain(a, childctx, copies, childcopies)
311 311 heapq.heappush(work, (c, parent, childcopies))
312 312 assert False
313 313
314 314 def _forwardcopies(a, b, match=None):
315 315 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
316 316
317 317 match = a.repo().narrowmatch(match)
318 318 # check for working copy
319 319 if b.rev() is None:
320 320 if a == b.p1():
321 321 # short-circuit to avoid issues with merge states
322 322 return _dirstatecopies(b._repo, match)
323 323
324 324 cm = _committedforwardcopies(a, b.p1(), match)
325 325 # combine copies from dirstate if necessary
326 326 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
327 327 return _committedforwardcopies(a, b, match)
328 328
329 329 def _backwardrenames(a, b, match):
330 330 if a._repo.ui.config('experimental', 'copytrace') == 'off':
331 331 return {}
332 332
333 333 # Even though we're not taking copies into account, 1:n rename situations
334 334 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
335 335 # arbitrarily pick one of the renames.
336 336 # We don't want to pass in "match" here, since that would filter
337 337 # the destination by it. Since we're reversing the copies, we want
338 338 # to filter the source instead.
339 339 f = _forwardcopies(b, a)
340 340 r = {}
341 341 for k, v in sorted(f.iteritems()):
342 342 if match and not match(v):
343 343 continue
344 344 # remove copies
345 345 if v in a:
346 346 continue
347 347 r[v] = k
348 348 return r
349 349
350 350 def pathcopies(x, y, match=None):
351 351 """find {dst@y: src@x} copy mapping for directed compare"""
352 352 repo = x._repo
353 353 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
354 354 if debug:
355 355 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
356 356 % (x, y))
357 357 if x == y or not x or not y:
358 358 return {}
359 359 a = y.ancestor(x)
360 360 if a == x:
361 361 if debug:
362 362 repo.ui.debug('debug.copies: search mode: forward\n')
363 363 return _forwardcopies(x, y, match=match)
364 364 if a == y:
365 365 if debug:
366 366 repo.ui.debug('debug.copies: search mode: backward\n')
367 367 return _backwardrenames(x, y, match=match)
368 368 if debug:
369 369 repo.ui.debug('debug.copies: search mode: combined\n')
370 370 return _chain(x, y, _backwardrenames(x, a, match=match),
371 371 _forwardcopies(a, y, match=match))
372 372
373 373 def mergecopies(repo, c1, c2, base):
374 374 """
375 375 Finds moves and copies between context c1 and c2 that are relevant for
376 376 merging. 'base' will be used as the merge base.
377 377
378 378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
379 379 files that were moved/ copied in one merge parent and modified in another.
380 380 For example:
381 381
382 382 o ---> 4 another commit
383 383 |
384 384 | o ---> 3 commit that modifies a.txt
385 385 | /
386 386 o / ---> 2 commit that moves a.txt to b.txt
387 387 |/
388 388 o ---> 1 merge base
389 389
390 390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
391 391 revision 4, and if user have copytrace disabled, we prints the following
392 392 message:
393 393
394 394 ```other changed <file> which local deleted```
395 395
396 396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
397 397 "dirmove".
398 398
399 399 "copy" is a mapping from destination name -> source name,
400 400 where source is in c1 and destination is in c2 or vice-versa.
401 401
402 402 "movewithdir" is a mapping from source name -> destination name,
403 403 where the file at source present in one context but not the other
404 404 needs to be moved to destination by the merge process, because the
405 405 other context moved the directory it is in.
406 406
407 407 "diverge" is a mapping of source name -> list of destination names
408 408 for divergent renames.
409 409
410 410 "renamedelete" is a mapping of source name -> list of destination
411 411 names for files deleted in c1 that were renamed in c2 or vice-versa.
412 412
413 413 "dirmove" is a mapping of detected source dir -> destination dir renames.
414 414 This is needed for handling changes to new files previously grafted into
415 415 renamed directories.
416 416
417 417 This function calls different copytracing algorithms based on config.
418 418 """
419 419 # avoid silly behavior for update from empty dir
420 420 if not c1 or not c2 or c1 == c2:
421 421 return {}, {}, {}, {}, {}
422 422
423 423 narrowmatch = c1.repo().narrowmatch()
424 424
425 425 # avoid silly behavior for parent -> working dir
426 426 if c2.node() is None and c1.node() == repo.dirstate.p1():
427 427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
428 428
429 429 copytracing = repo.ui.config('experimental', 'copytrace')
430 430 if stringutil.parsebool(copytracing) is False:
431 431 # stringutil.parsebool() returns None when it is unable to parse the
432 432 # value, so we should rely on making sure copytracing is on such cases
433 433 return {}, {}, {}, {}, {}
434 434
435 435 if usechangesetcentricalgo(repo):
436 436 # The heuristics don't make sense when we need changeset-centric algos
437 437 return _fullcopytracing(repo, c1, c2, base)
438 438
439 439 # Copy trace disabling is explicitly below the node == p1 logic above
440 440 # because the logic above is required for a simple copy to be kept across a
441 441 # rebase.
442 442 if copytracing == 'heuristics':
443 443 # Do full copytracing if only non-public revisions are involved as
444 444 # that will be fast enough and will also cover the copies which could
445 445 # be missed by heuristics
446 446 if _isfullcopytraceable(repo, c1, base):
447 447 return _fullcopytracing(repo, c1, c2, base)
448 448 return _heuristicscopytracing(repo, c1, c2, base)
449 449 else:
450 450 return _fullcopytracing(repo, c1, c2, base)
451 451
452 452 def _isfullcopytraceable(repo, c1, base):
453 453 """ Checks that if base, source and destination are all no-public branches,
454 454 if yes let's use the full copytrace algorithm for increased capabilities
455 455 since it will be fast enough.
456 456
457 457 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
458 458 number of changesets from c1 to base such that if number of changesets are
459 459 more than the limit, full copytracing algorithm won't be used.
460 460 """
461 461 if c1.rev() is None:
462 462 c1 = c1.p1()
463 463 if c1.mutable() and base.mutable():
464 464 sourcecommitlimit = repo.ui.configint('experimental',
465 465 'copytrace.sourcecommitlimit')
466 466 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
467 467 return commits < sourcecommitlimit
468 468 return False
469 469
470 470 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
471 471 copy, renamedelete):
472 472 if src not in m2:
473 473 # deleted on side 2
474 474 if src not in m1:
475 475 # renamed on side 1, deleted on side 2
476 476 renamedelete[src] = dsts1
477 477 elif m2[src] != mb[src]:
478 478 if not _related(c2[src], base[src]):
479 479 return
480 480 # modified on side 2
481 481 for dst in dsts1:
482 482 if dst not in m2:
483 483 # dst not added on side 2 (handle as regular
484 484 # "both created" case in manifestmerge otherwise)
485 485 copy[dst] = src
486 486
487 487 def _fullcopytracing(repo, c1, c2, base):
488 488 """ The full copytracing algorithm which finds all the new files that were
489 489 added from merge base up to the top commit and for each file it checks if
490 490 this file was copied from another file.
491 491
492 492 This is pretty slow when a lot of changesets are involved but will track all
493 493 the copies.
494 494 """
495 495 m1 = c1.manifest()
496 496 m2 = c2.manifest()
497 497 mb = base.manifest()
498 498
499 499 copies1 = pathcopies(base, c1)
500 500 copies2 = pathcopies(base, c2)
501 501
502 502 inversecopies1 = {}
503 503 inversecopies2 = {}
504 504 for dst, src in copies1.items():
505 505 inversecopies1.setdefault(src, []).append(dst)
506 506 for dst, src in copies2.items():
507 507 inversecopies2.setdefault(src, []).append(dst)
508 508
509 509 copy = {}
510 510 diverge = {}
511 511 renamedelete = {}
512 512 allsources = set(inversecopies1) | set(inversecopies2)
513 513 for src in allsources:
514 514 dsts1 = inversecopies1.get(src)
515 515 dsts2 = inversecopies2.get(src)
516 516 if dsts1 and dsts2:
517 517 # copied/renamed on both sides
518 518 if src not in m1 and src not in m2:
519 519 # renamed on both sides
520 520 dsts1 = set(dsts1)
521 521 dsts2 = set(dsts2)
522 522 # If there's some overlap in the rename destinations, we
523 523 # consider it not divergent. For example, if side 1 copies 'a'
524 524 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
525 525 # and 'd' and deletes 'a'.
526 526 if dsts1 & dsts2:
527 527 for dst in (dsts1 & dsts2):
528 528 copy[dst] = src
529 529 else:
530 530 diverge[src] = sorted(dsts1 | dsts2)
531 531 elif src in m1 and src in m2:
532 532 # copied on both sides
533 533 dsts1 = set(dsts1)
534 534 dsts2 = set(dsts2)
535 535 for dst in (dsts1 & dsts2):
536 536 copy[dst] = src
537 537 # TODO: Handle cases where it was renamed on one side and copied
538 538 # on the other side
539 539 elif dsts1:
540 540 # copied/renamed only on side 1
541 541 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
542 542 copy, renamedelete)
543 543 elif dsts2:
544 544 # copied/renamed only on side 2
545 545 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
546 546 copy, renamedelete)
547 547
548 548 renamedeleteset = set()
549 549 divergeset = set()
550 550 for dsts in diverge.values():
551 551 divergeset.update(dsts)
552 552 for dsts in renamedelete.values():
553 553 renamedeleteset.update(dsts)
554 554
555 555 # find interesting file sets from manifests
556 556 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
557 557 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
558 558 u1 = sorted(addedinm1 - addedinm2)
559 559 u2 = sorted(addedinm2 - addedinm1)
560 560
561 561 header = " unmatched files in %s"
562 562 if u1:
563 563 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
564 564 if u2:
565 565 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
566 566
567 567 fullcopy = copies1.copy()
568 568 fullcopy.update(copies2)
569 569 if not fullcopy:
570 570 return copy, {}, diverge, renamedelete, {}
571 571
572 572 if repo.ui.debugflag:
573 573 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
574 574 "% = renamed and deleted):\n")
575 575 for f in sorted(fullcopy):
576 576 note = ""
577 577 if f in copy:
578 578 note += "*"
579 579 if f in divergeset:
580 580 note += "!"
581 581 if f in renamedeleteset:
582 582 note += "%"
583 583 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
584 584 note))
585 585 del divergeset
586 586
587 587 repo.ui.debug(" checking for directory renames\n")
588 588
589 589 # generate a directory move map
590 590 d1, d2 = c1.dirs(), c2.dirs()
591 591 # Hack for adding '', which is not otherwise added, to d1 and d2
592 592 d1.addpath('/')
593 593 d2.addpath('/')
594 594 invalid = set()
595 595 dirmove = {}
596 596
597 597 # examine each file copy for a potential directory move, which is
598 598 # when all the files in a directory are moved to a new directory
599 599 for dst, src in fullcopy.iteritems():
600 600 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
601 601 if dsrc in invalid:
602 602 # already seen to be uninteresting
603 603 continue
604 604 elif dsrc in d1 and ddst in d1:
605 605 # directory wasn't entirely moved locally
606 606 invalid.add(dsrc)
607 607 elif dsrc in d2 and ddst in d2:
608 608 # directory wasn't entirely moved remotely
609 609 invalid.add(dsrc)
610 610 elif dsrc in dirmove and dirmove[dsrc] != ddst:
611 611 # files from the same directory moved to two different places
612 612 invalid.add(dsrc)
613 613 else:
614 614 # looks good so far
615 615 dirmove[dsrc] = ddst
616 616
617 617 for i in invalid:
618 618 if i in dirmove:
619 619 del dirmove[i]
620 620 del d1, d2, invalid
621 621
622 622 if not dirmove:
623 623 return copy, {}, diverge, renamedelete, {}
624 624
625 625 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
626 626
627 627 for d in dirmove:
628 628 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
629 629 (d, dirmove[d]))
630 630
631 631 movewithdir = {}
632 632 # check unaccounted nonoverlapping files against directory moves
633 633 for f in u1 + u2:
634 634 if f not in fullcopy:
635 635 for d in dirmove:
636 636 if f.startswith(d):
637 637 # new file added in a directory that was moved, move it
638 638 df = dirmove[d] + f[len(d):]
639 639 if df not in copy:
640 640 movewithdir[f] = df
641 641 repo.ui.debug((" pending file src: '%s' -> "
642 642 "dst: '%s'\n") % (f, df))
643 643 break
644 644
645 645 return copy, movewithdir, diverge, renamedelete, dirmove
646 646
647 647 def _heuristicscopytracing(repo, c1, c2, base):
648 648 """ Fast copytracing using filename heuristics
649 649
650 650 Assumes that moves or renames are of following two types:
651 651
652 652 1) Inside a directory only (same directory name but different filenames)
653 653 2) Move from one directory to another
654 654 (same filenames but different directory names)
655 655
656 656 Works only when there are no merge commits in the "source branch".
657 657 Source branch is commits from base up to c2 not including base.
658 658
659 659 If merge is involved it fallbacks to _fullcopytracing().
660 660
661 661 Can be used by setting the following config:
662 662
663 663 [experimental]
664 664 copytrace = heuristics
665 665
666 666 In some cases the copy/move candidates found by heuristics can be very large
667 667 in number and that will make the algorithm slow. The number of possible
668 668 candidates to check can be limited by using the config
669 669 `experimental.copytrace.movecandidateslimit` which defaults to 100.
670 670 """
671 671
672 672 if c1.rev() is None:
673 673 c1 = c1.p1()
674 674 if c2.rev() is None:
675 675 c2 = c2.p1()
676 676
677 677 copies = {}
678 678
679 679 changedfiles = set()
680 680 m1 = c1.manifest()
681 681 if not repo.revs('%d::%d', base.rev(), c2.rev()):
682 682 # If base is not in c2 branch, we switch to fullcopytracing
683 683 repo.ui.debug("switching to full copytracing as base is not "
684 684 "an ancestor of c2\n")
685 685 return _fullcopytracing(repo, c1, c2, base)
686 686
687 687 ctx = c2
688 688 while ctx != base:
689 689 if len(ctx.parents()) == 2:
690 690 # To keep things simple let's not handle merges
691 691 repo.ui.debug("switching to full copytracing because of merges\n")
692 692 return _fullcopytracing(repo, c1, c2, base)
693 693 changedfiles.update(ctx.files())
694 694 ctx = ctx.p1()
695 695
696 696 cp = _forwardcopies(base, c2)
697 697 for dst, src in cp.iteritems():
698 698 if src in m1:
699 699 copies[dst] = src
700 700
701 701 # file is missing if it isn't present in the destination, but is present in
702 702 # the base and present in the source.
703 703 # Presence in the base is important to exclude added files, presence in the
704 704 # source is important to exclude removed files.
705 705 filt = lambda f: f not in m1 and f in base and f in c2
706 706 missingfiles = [f for f in changedfiles if filt(f)]
707 707
708 708 if missingfiles:
709 709 basenametofilename = collections.defaultdict(list)
710 710 dirnametofilename = collections.defaultdict(list)
711 711
712 712 for f in m1.filesnotin(base.manifest()):
713 713 basename = os.path.basename(f)
714 714 dirname = os.path.dirname(f)
715 715 basenametofilename[basename].append(f)
716 716 dirnametofilename[dirname].append(f)
717 717
718 718 for f in missingfiles:
719 719 basename = os.path.basename(f)
720 720 dirname = os.path.dirname(f)
721 721 samebasename = basenametofilename[basename]
722 722 samedirname = dirnametofilename[dirname]
723 723 movecandidates = samebasename + samedirname
724 724 # f is guaranteed to be present in c2, that's why
725 725 # c2.filectx(f) won't fail
726 726 f2 = c2.filectx(f)
727 727 # we can have a lot of candidates which can slow down the heuristics
728 728 # config value to limit the number of candidates moves to check
729 729 maxcandidates = repo.ui.configint('experimental',
730 730 'copytrace.movecandidateslimit')
731 731
732 732 if len(movecandidates) > maxcandidates:
733 733 repo.ui.status(_("skipping copytracing for '%s', more "
734 734 "candidates than the limit: %d\n")
735 735 % (f, len(movecandidates)))
736 736 continue
737 737
738 738 for candidate in movecandidates:
739 739 f1 = c1.filectx(candidate)
740 740 if _related(f1, f2):
741 741 # if there are a few related copies then we'll merge
742 742 # changes into all of them. This matches the behaviour
743 743 # of upstream copytracing
744 744 copies[candidate] = f
745 745
746 746 return copies, {}, {}, {}, {}
747 747
748 748 def _related(f1, f2):
749 749 """return True if f1 and f2 filectx have a common ancestor
750 750
751 751 Walk back to common ancestor to see if the two files originate
752 752 from the same file. Since workingfilectx's rev() is None it messes
753 753 up the integer comparison logic, hence the pre-step check for
754 754 None (f1 and f2 can only be workingfilectx's initially).
755 755 """
756 756
757 757 if f1 == f2:
758 758 return True # a match
759 759
760 760 g1, g2 = f1.ancestors(), f2.ancestors()
761 761 try:
762 762 f1r, f2r = f1.linkrev(), f2.linkrev()
763 763
764 764 if f1r is None:
765 765 f1 = next(g1)
766 766 if f2r is None:
767 767 f2 = next(g2)
768 768
769 769 while True:
770 770 f1r, f2r = f1.linkrev(), f2.linkrev()
771 771 if f1r > f2r:
772 772 f1 = next(g1)
773 773 elif f2r > f1r:
774 774 f2 = next(g2)
775 775 else: # f1 and f2 point to files in the same linkrev
776 776 return f1 == f2 # true if they point to the same file
777 777 except StopIteration:
778 778 return False
779 779
780 780 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
781 781 """reproduce copies from fromrev to rev in the dirstate
782 782
783 783 If skiprev is specified, it's a revision that should be used to
784 784 filter copy records. Any copies that occur between fromrev and
785 785 skiprev will not be duplicated, even if they appear in the set of
786 786 copies between fromrev and rev.
787 787 """
788 788 exclude = {}
789 789 ctraceconfig = repo.ui.config('experimental', 'copytrace')
790 790 bctrace = stringutil.parsebool(ctraceconfig)
791 791 if (skiprev is not None and
792 792 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
793 793 # copytrace='off' skips this line, but not the entire function because
794 794 # the line below is O(size of the repo) during a rebase, while the rest
795 795 # of the function is much faster (and is required for carrying copy
796 796 # metadata across the rebase anyway).
797 797 exclude = pathcopies(repo[fromrev], repo[skiprev])
798 798 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
799 # copies.pathcopies returns backward renames, so dst might not
800 # actually be in the dirstate
801 799 if dst in exclude:
802 800 continue
803 801 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now