##// END OF EJS Templates
copies: document cases in _chain()...
Martin von Zweigbergk -
r42413:d1c2688e default
parent child Browse files
Show More
@@ -1,786 +1,806 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 """chain two sets of copies a->b"""
111 """chain two sets of copies 'a' and 'b'"""
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
118 #
119 # case src mid dst result
120 # 1 x y - -
121 # 2 x y y x->y
122 # 3 x y x -
123 # 4 x y z x->z
124 # 5 - x y -
125 # 6 x x y x->y
126
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 # overwritten later, and case 3 will be removed later.
112 130 t = a.copy()
113 131 for k, v in b.iteritems():
114 132 if v in t:
115 # found a chain
133 # found a chain, i.e. cases 3 & 4.
116 134 if t[v] != k:
117 # file wasn't renamed back to itself
135 # file wasn't renamed back to itself (i.e. case 4, not 3)
118 136 t[k] = t[v]
119 137 if v not in dst:
120 138 # chain was a rename, not a copy
139 # this deletes the copy for 'y' in case 4
121 140 del t[v]
122 141 if v in src:
123 # file is a copy of an existing file
142 # file is a copy of an existing file, i.e. case 6.
124 143 t[k] = v
125 144
126 145 for k, v in list(t.items()):
127 # remove criss-crossed copies
146 # remove criss-crossed copies, i.e. case 3
128 147 if k in src and v in dst:
129 148 del t[k]
130 # remove copies to files that were then removed
149 # remove copies to files that were then removed, i.e. case 1
150 # and file 'y' in cases 3 & 4 (in case of rename)
131 151 elif k not in dst:
132 152 del t[k]
133 153
134 154 return t
135 155
136 156 def _tracefile(fctx, am, limit=node.nullrev):
137 157 """return file context that is the ancestor of fctx present in ancestor
138 158 manifest am, stopping after the first ancestor lower than limit"""
139 159
140 160 for f in fctx.ancestors():
141 161 if am.get(f.path(), None) == f.filenode():
142 162 return f
143 163 if limit >= 0 and not f.isintroducedafter(limit):
144 164 return None
145 165
146 166 def _dirstatecopies(repo, match=None):
147 167 ds = repo.dirstate
148 168 c = ds.copies().copy()
149 169 for k in list(c):
150 170 if ds[k] not in 'anm' or (match and not match(k)):
151 171 del c[k]
152 172 return c
153 173
154 174 def _computeforwardmissing(a, b, match=None):
155 175 """Computes which files are in b but not a.
156 176 This is its own function so extensions can easily wrap this call to see what
157 177 files _forwardcopies is about to process.
158 178 """
159 179 ma = a.manifest()
160 180 mb = b.manifest()
161 181 return mb.filesnotin(ma, match=match)
162 182
163 183 def usechangesetcentricalgo(repo):
164 184 """Checks if we should use changeset-centric copy algorithms"""
165 185 return (repo.ui.config('experimental', 'copies.read-from') in
166 186 ('changeset-only', 'compatibility'))
167 187
168 188 def _committedforwardcopies(a, b, match):
169 189 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 190 # files might have to be traced back to the fctx parent of the last
171 191 # one-side-only changeset, but not further back than that
172 192 repo = a._repo
173 193
174 194 if usechangesetcentricalgo(repo):
175 195 return _changesetforwardcopies(a, b, match)
176 196
177 197 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 198 dbg = repo.ui.debug
179 199 if debug:
180 200 dbg('debug.copies: looking into rename from %s to %s\n'
181 201 % (a, b))
182 202 limit = _findlimit(repo, a, b)
183 203 if debug:
184 204 dbg('debug.copies: search limit: %d\n' % limit)
185 205 am = a.manifest()
186 206
187 207 # find where new files came from
188 208 # we currently don't try to find where old files went, too expensive
189 209 # this means we can miss a case like 'hg rm b; hg cp a b'
190 210 cm = {}
191 211
192 212 # Computing the forward missing is quite expensive on large manifests, since
193 213 # it compares the entire manifests. We can optimize it in the common use
194 214 # case of computing what copies are in a commit versus its parent (like
195 215 # during a rebase or histedit). Note, we exclude merge commits from this
196 216 # optimization, since the ctx.files() for a merge commit is not correct for
197 217 # this comparison.
198 218 forwardmissingmatch = match
199 219 if b.p1() == a and b.p2().node() == node.nullid:
200 220 filesmatcher = matchmod.exact(b.files())
201 221 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 222 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203 223
204 224 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205 225
206 226 if debug:
207 227 dbg('debug.copies: missing files to search: %d\n' % len(missing))
208 228
209 229 for f in sorted(missing):
210 230 if debug:
211 231 dbg('debug.copies: tracing file: %s\n' % f)
212 232 fctx = b[f]
213 233 fctx._ancestrycontext = ancestrycontext
214 234
215 235 if debug:
216 236 start = util.timer()
217 237 ofctx = _tracefile(fctx, am, limit)
218 238 if ofctx:
219 239 if debug:
220 240 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 241 cm[f] = ofctx.path()
222 242 if debug:
223 243 dbg('debug.copies: time: %f seconds\n'
224 244 % (util.timer() - start))
225 245 return cm
226 246
227 247 def _changesetforwardcopies(a, b, match):
228 248 if a.rev() == node.nullrev:
229 249 return {}
230 250
231 251 repo = a.repo()
232 252 children = {}
233 253 cl = repo.changelog
234 254 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 255 for r in missingrevs:
236 256 for p in cl.parentrevs(r):
237 257 if p == node.nullrev:
238 258 continue
239 259 if p not in children:
240 260 children[p] = [r]
241 261 else:
242 262 children[p].append(r)
243 263
244 264 roots = set(children) - set(missingrevs)
245 265 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 266 # The parent number is only used for knowing which parent the copies dict
247 267 # came from.
248 268 work = [(r, 1, {}) for r in roots]
249 269 heapq.heapify(work)
250 270 while work:
251 271 r, i1, copies1 = heapq.heappop(work)
252 272 if work and work[0][0] == r:
253 273 # We are tracing copies from both parents
254 274 r, i2, copies2 = heapq.heappop(work)
255 275 copies = {}
256 276 ctx = repo[r]
257 277 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 278 allcopies = set(copies1) | set(copies2)
259 279 # TODO: perhaps this filtering should be done as long as ctx
260 280 # is merge, whether or not we're tracing from both parent.
261 281 for dst in allcopies:
262 282 if not match(dst):
263 283 continue
264 284 if dst not in copies2:
265 285 # Copied on p1 side: mark as copy from p1 side if it didn't
266 286 # already exist on p2 side
267 287 if dst not in p2man:
268 288 copies[dst] = copies1[dst]
269 289 elif dst not in copies1:
270 290 # Copied on p2 side: mark as copy from p2 side if it didn't
271 291 # already exist on p1 side
272 292 if dst not in p1man:
273 293 copies[dst] = copies2[dst]
274 294 else:
275 295 # Copied on both sides: mark as copy from p1 side
276 296 copies[dst] = copies1[dst]
277 297 else:
278 298 copies = copies1
279 299 if r == b.rev():
280 300 return copies
281 301 for c in children[r]:
282 302 childctx = repo[c]
283 303 if r == childctx.p1().rev():
284 304 parent = 1
285 305 childcopies = childctx.p1copies()
286 306 else:
287 307 assert r == childctx.p2().rev()
288 308 parent = 2
289 309 childcopies = childctx.p2copies()
290 310 if not match.always():
291 311 childcopies = {dst: src for dst, src in childcopies.items()
292 312 if match(dst)}
293 313 childcopies = _chain(a, childctx, copies, childcopies)
294 314 heapq.heappush(work, (c, parent, childcopies))
295 315 assert False
296 316
297 317 def _forwardcopies(a, b, match=None):
298 318 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299 319
300 320 match = a.repo().narrowmatch(match)
301 321 # check for working copy
302 322 if b.rev() is None:
303 323 if a == b.p1():
304 324 # short-circuit to avoid issues with merge states
305 325 return _dirstatecopies(b._repo, match)
306 326
307 327 cm = _committedforwardcopies(a, b.p1(), match)
308 328 # combine copies from dirstate if necessary
309 329 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 330 return _committedforwardcopies(a, b, match)
311 331
312 332 def _backwardrenames(a, b, match):
313 333 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 334 return {}
315 335
316 336 # Even though we're not taking copies into account, 1:n rename situations
317 337 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 338 # arbitrarily pick one of the renames.
319 339 # We don't want to pass in "match" here, since that would filter
320 340 # the destination by it. Since we're reversing the copies, we want
321 341 # to filter the source instead.
322 342 f = _forwardcopies(b, a)
323 343 r = {}
324 344 for k, v in sorted(f.iteritems()):
325 345 if match and not match(v):
326 346 continue
327 347 # remove copies
328 348 if v in a:
329 349 continue
330 350 r[v] = k
331 351 return r
332 352
333 353 def pathcopies(x, y, match=None):
334 354 """find {dst@y: src@x} copy mapping for directed compare"""
335 355 repo = x._repo
336 356 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 357 if debug:
338 358 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 359 % (x, y))
340 360 if x == y or not x or not y:
341 361 return {}
342 362 a = y.ancestor(x)
343 363 if a == x:
344 364 if debug:
345 365 repo.ui.debug('debug.copies: search mode: forward\n')
346 366 return _forwardcopies(x, y, match=match)
347 367 if a == y:
348 368 if debug:
349 369 repo.ui.debug('debug.copies: search mode: backward\n')
350 370 return _backwardrenames(x, y, match=match)
351 371 if debug:
352 372 repo.ui.debug('debug.copies: search mode: combined\n')
353 373 return _chain(x, y, _backwardrenames(x, a, match=match),
354 374 _forwardcopies(a, y, match=match))
355 375
356 376 def mergecopies(repo, c1, c2, base):
357 377 """
358 378 Finds moves and copies between context c1 and c2 that are relevant for
359 379 merging. 'base' will be used as the merge base.
360 380
361 381 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
362 382 files that were moved/ copied in one merge parent and modified in another.
363 383 For example:
364 384
365 385 o ---> 4 another commit
366 386 |
367 387 | o ---> 3 commit that modifies a.txt
368 388 | /
369 389 o / ---> 2 commit that moves a.txt to b.txt
370 390 |/
371 391 o ---> 1 merge base
372 392
373 393 If we try to rebase revision 3 on revision 4, since there is no a.txt in
374 394 revision 4, and if user have copytrace disabled, we prints the following
375 395 message:
376 396
377 397 ```other changed <file> which local deleted```
378 398
379 399 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
380 400 "dirmove".
381 401
382 402 "copy" is a mapping from destination name -> source name,
383 403 where source is in c1 and destination is in c2 or vice-versa.
384 404
385 405 "movewithdir" is a mapping from source name -> destination name,
386 406 where the file at source present in one context but not the other
387 407 needs to be moved to destination by the merge process, because the
388 408 other context moved the directory it is in.
389 409
390 410 "diverge" is a mapping of source name -> list of destination names
391 411 for divergent renames.
392 412
393 413 "renamedelete" is a mapping of source name -> list of destination
394 414 names for files deleted in c1 that were renamed in c2 or vice-versa.
395 415
396 416 "dirmove" is a mapping of detected source dir -> destination dir renames.
397 417 This is needed for handling changes to new files previously grafted into
398 418 renamed directories.
399 419
400 420 This function calls different copytracing algorithms based on config.
401 421 """
402 422 # avoid silly behavior for update from empty dir
403 423 if not c1 or not c2 or c1 == c2:
404 424 return {}, {}, {}, {}, {}
405 425
406 426 narrowmatch = c1.repo().narrowmatch()
407 427
408 428 # avoid silly behavior for parent -> working dir
409 429 if c2.node() is None and c1.node() == repo.dirstate.p1():
410 430 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
411 431
412 432 copytracing = repo.ui.config('experimental', 'copytrace')
413 433 if stringutil.parsebool(copytracing) is False:
414 434 # stringutil.parsebool() returns None when it is unable to parse the
415 435 # value, so we should rely on making sure copytracing is on such cases
416 436 return {}, {}, {}, {}, {}
417 437
418 438 if usechangesetcentricalgo(repo):
419 439 # The heuristics don't make sense when we need changeset-centric algos
420 440 return _fullcopytracing(repo, c1, c2, base)
421 441
422 442 # Copy trace disabling is explicitly below the node == p1 logic above
423 443 # because the logic above is required for a simple copy to be kept across a
424 444 # rebase.
425 445 if copytracing == 'heuristics':
426 446 # Do full copytracing if only non-public revisions are involved as
427 447 # that will be fast enough and will also cover the copies which could
428 448 # be missed by heuristics
429 449 if _isfullcopytraceable(repo, c1, base):
430 450 return _fullcopytracing(repo, c1, c2, base)
431 451 return _heuristicscopytracing(repo, c1, c2, base)
432 452 else:
433 453 return _fullcopytracing(repo, c1, c2, base)
434 454
435 455 def _isfullcopytraceable(repo, c1, base):
436 456 """ Checks that if base, source and destination are all no-public branches,
437 457 if yes let's use the full copytrace algorithm for increased capabilities
438 458 since it will be fast enough.
439 459
440 460 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
441 461 number of changesets from c1 to base such that if number of changesets are
442 462 more than the limit, full copytracing algorithm won't be used.
443 463 """
444 464 if c1.rev() is None:
445 465 c1 = c1.p1()
446 466 if c1.mutable() and base.mutable():
447 467 sourcecommitlimit = repo.ui.configint('experimental',
448 468 'copytrace.sourcecommitlimit')
449 469 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
450 470 return commits < sourcecommitlimit
451 471 return False
452 472
453 473 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
454 474 copy, renamedelete):
455 475 if src not in m2:
456 476 # deleted on side 2
457 477 if src not in m1:
458 478 # renamed on side 1, deleted on side 2
459 479 renamedelete[src] = dsts1
460 480 elif m2[src] != mb[src]:
461 481 if not _related(c2[src], base[src]):
462 482 return
463 483 # modified on side 2
464 484 for dst in dsts1:
465 485 if dst not in m2:
466 486 # dst not added on side 2 (handle as regular
467 487 # "both created" case in manifestmerge otherwise)
468 488 copy[dst] = src
469 489
470 490 def _fullcopytracing(repo, c1, c2, base):
471 491 """ The full copytracing algorithm which finds all the new files that were
472 492 added from merge base up to the top commit and for each file it checks if
473 493 this file was copied from another file.
474 494
475 495 This is pretty slow when a lot of changesets are involved but will track all
476 496 the copies.
477 497 """
478 498 m1 = c1.manifest()
479 499 m2 = c2.manifest()
480 500 mb = base.manifest()
481 501
482 502 copies1 = pathcopies(base, c1)
483 503 copies2 = pathcopies(base, c2)
484 504
485 505 inversecopies1 = {}
486 506 inversecopies2 = {}
487 507 for dst, src in copies1.items():
488 508 inversecopies1.setdefault(src, []).append(dst)
489 509 for dst, src in copies2.items():
490 510 inversecopies2.setdefault(src, []).append(dst)
491 511
492 512 copy = {}
493 513 diverge = {}
494 514 renamedelete = {}
495 515 allsources = set(inversecopies1) | set(inversecopies2)
496 516 for src in allsources:
497 517 dsts1 = inversecopies1.get(src)
498 518 dsts2 = inversecopies2.get(src)
499 519 if dsts1 and dsts2:
500 520 # copied/renamed on both sides
501 521 if src not in m1 and src not in m2:
502 522 # renamed on both sides
503 523 dsts1 = set(dsts1)
504 524 dsts2 = set(dsts2)
505 525 # If there's some overlap in the rename destinations, we
506 526 # consider it not divergent. For example, if side 1 copies 'a'
507 527 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
508 528 # and 'd' and deletes 'a'.
509 529 if dsts1 & dsts2:
510 530 for dst in (dsts1 & dsts2):
511 531 copy[dst] = src
512 532 else:
513 533 diverge[src] = sorted(dsts1 | dsts2)
514 534 elif src in m1 and src in m2:
515 535 # copied on both sides
516 536 dsts1 = set(dsts1)
517 537 dsts2 = set(dsts2)
518 538 for dst in (dsts1 & dsts2):
519 539 copy[dst] = src
520 540 # TODO: Handle cases where it was renamed on one side and copied
521 541 # on the other side
522 542 elif dsts1:
523 543 # copied/renamed only on side 1
524 544 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
525 545 copy, renamedelete)
526 546 elif dsts2:
527 547 # copied/renamed only on side 2
528 548 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
529 549 copy, renamedelete)
530 550
531 551 renamedeleteset = set()
532 552 divergeset = set()
533 553 for dsts in diverge.values():
534 554 divergeset.update(dsts)
535 555 for dsts in renamedelete.values():
536 556 renamedeleteset.update(dsts)
537 557
538 558 # find interesting file sets from manifests
539 559 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
540 560 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
541 561 u1 = sorted(addedinm1 - addedinm2)
542 562 u2 = sorted(addedinm2 - addedinm1)
543 563
544 564 header = " unmatched files in %s"
545 565 if u1:
546 566 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
547 567 if u2:
548 568 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
549 569
550 570 fullcopy = copies1.copy()
551 571 fullcopy.update(copies2)
552 572 if not fullcopy:
553 573 return copy, {}, diverge, renamedelete, {}
554 574
555 575 if repo.ui.debugflag:
556 576 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 577 "% = renamed and deleted):\n")
558 578 for f in sorted(fullcopy):
559 579 note = ""
560 580 if f in copy:
561 581 note += "*"
562 582 if f in divergeset:
563 583 note += "!"
564 584 if f in renamedeleteset:
565 585 note += "%"
566 586 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 587 note))
568 588 del divergeset
569 589
570 590 repo.ui.debug(" checking for directory renames\n")
571 591
572 592 # generate a directory move map
573 593 d1, d2 = c1.dirs(), c2.dirs()
574 594 # Hack for adding '', which is not otherwise added, to d1 and d2
575 595 d1.addpath('/')
576 596 d2.addpath('/')
577 597 invalid = set()
578 598 dirmove = {}
579 599
580 600 # examine each file copy for a potential directory move, which is
581 601 # when all the files in a directory are moved to a new directory
582 602 for dst, src in fullcopy.iteritems():
583 603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
584 604 if dsrc in invalid:
585 605 # already seen to be uninteresting
586 606 continue
587 607 elif dsrc in d1 and ddst in d1:
588 608 # directory wasn't entirely moved locally
589 609 invalid.add(dsrc)
590 610 elif dsrc in d2 and ddst in d2:
591 611 # directory wasn't entirely moved remotely
592 612 invalid.add(dsrc)
593 613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
594 614 # files from the same directory moved to two different places
595 615 invalid.add(dsrc)
596 616 else:
597 617 # looks good so far
598 618 dirmove[dsrc] = ddst
599 619
600 620 for i in invalid:
601 621 if i in dirmove:
602 622 del dirmove[i]
603 623 del d1, d2, invalid
604 624
605 625 if not dirmove:
606 626 return copy, {}, diverge, renamedelete, {}
607 627
608 628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
609 629
610 630 for d in dirmove:
611 631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 632 (d, dirmove[d]))
613 633
614 634 movewithdir = {}
615 635 # check unaccounted nonoverlapping files against directory moves
616 636 for f in u1 + u2:
617 637 if f not in fullcopy:
618 638 for d in dirmove:
619 639 if f.startswith(d):
620 640 # new file added in a directory that was moved, move it
621 641 df = dirmove[d] + f[len(d):]
622 642 if df not in copy:
623 643 movewithdir[f] = df
624 644 repo.ui.debug((" pending file src: '%s' -> "
625 645 "dst: '%s'\n") % (f, df))
626 646 break
627 647
628 648 return copy, movewithdir, diverge, renamedelete, dirmove
629 649
630 650 def _heuristicscopytracing(repo, c1, c2, base):
631 651 """ Fast copytracing using filename heuristics
632 652
633 653 Assumes that moves or renames are of following two types:
634 654
635 655 1) Inside a directory only (same directory name but different filenames)
636 656 2) Move from one directory to another
637 657 (same filenames but different directory names)
638 658
639 659 Works only when there are no merge commits in the "source branch".
640 660 Source branch is commits from base up to c2 not including base.
641 661
642 662 If merge is involved it fallbacks to _fullcopytracing().
643 663
644 664 Can be used by setting the following config:
645 665
646 666 [experimental]
647 667 copytrace = heuristics
648 668
649 669 In some cases the copy/move candidates found by heuristics can be very large
650 670 in number and that will make the algorithm slow. The number of possible
651 671 candidates to check can be limited by using the config
652 672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 673 """
654 674
655 675 if c1.rev() is None:
656 676 c1 = c1.p1()
657 677 if c2.rev() is None:
658 678 c2 = c2.p1()
659 679
660 680 copies = {}
661 681
662 682 changedfiles = set()
663 683 m1 = c1.manifest()
664 684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 685 # If base is not in c2 branch, we switch to fullcopytracing
666 686 repo.ui.debug("switching to full copytracing as base is not "
667 687 "an ancestor of c2\n")
668 688 return _fullcopytracing(repo, c1, c2, base)
669 689
670 690 ctx = c2
671 691 while ctx != base:
672 692 if len(ctx.parents()) == 2:
673 693 # To keep things simple let's not handle merges
674 694 repo.ui.debug("switching to full copytracing because of merges\n")
675 695 return _fullcopytracing(repo, c1, c2, base)
676 696 changedfiles.update(ctx.files())
677 697 ctx = ctx.p1()
678 698
679 699 cp = _forwardcopies(base, c2)
680 700 for dst, src in cp.iteritems():
681 701 if src in m1:
682 702 copies[dst] = src
683 703
684 704 # file is missing if it isn't present in the destination, but is present in
685 705 # the base and present in the source.
686 706 # Presence in the base is important to exclude added files, presence in the
687 707 # source is important to exclude removed files.
688 708 filt = lambda f: f not in m1 and f in base and f in c2
689 709 missingfiles = [f for f in changedfiles if filt(f)]
690 710
691 711 if missingfiles:
692 712 basenametofilename = collections.defaultdict(list)
693 713 dirnametofilename = collections.defaultdict(list)
694 714
695 715 for f in m1.filesnotin(base.manifest()):
696 716 basename = os.path.basename(f)
697 717 dirname = os.path.dirname(f)
698 718 basenametofilename[basename].append(f)
699 719 dirnametofilename[dirname].append(f)
700 720
701 721 for f in missingfiles:
702 722 basename = os.path.basename(f)
703 723 dirname = os.path.dirname(f)
704 724 samebasename = basenametofilename[basename]
705 725 samedirname = dirnametofilename[dirname]
706 726 movecandidates = samebasename + samedirname
707 727 # f is guaranteed to be present in c2, that's why
708 728 # c2.filectx(f) won't fail
709 729 f2 = c2.filectx(f)
710 730 # we can have a lot of candidates which can slow down the heuristics
711 731 # config value to limit the number of candidates moves to check
712 732 maxcandidates = repo.ui.configint('experimental',
713 733 'copytrace.movecandidateslimit')
714 734
715 735 if len(movecandidates) > maxcandidates:
716 736 repo.ui.status(_("skipping copytracing for '%s', more "
717 737 "candidates than the limit: %d\n")
718 738 % (f, len(movecandidates)))
719 739 continue
720 740
721 741 for candidate in movecandidates:
722 742 f1 = c1.filectx(candidate)
723 743 if _related(f1, f2):
724 744 # if there are a few related copies then we'll merge
725 745 # changes into all of them. This matches the behaviour
726 746 # of upstream copytracing
727 747 copies[candidate] = f
728 748
729 749 return copies, {}, {}, {}, {}
730 750
731 751 def _related(f1, f2):
732 752 """return True if f1 and f2 filectx have a common ancestor
733 753
734 754 Walk back to common ancestor to see if the two files originate
735 755 from the same file. Since workingfilectx's rev() is None it messes
736 756 up the integer comparison logic, hence the pre-step check for
737 757 None (f1 and f2 can only be workingfilectx's initially).
738 758 """
739 759
740 760 if f1 == f2:
741 761 return True # a match
742 762
743 763 g1, g2 = f1.ancestors(), f2.ancestors()
744 764 try:
745 765 f1r, f2r = f1.linkrev(), f2.linkrev()
746 766
747 767 if f1r is None:
748 768 f1 = next(g1)
749 769 if f2r is None:
750 770 f2 = next(g2)
751 771
752 772 while True:
753 773 f1r, f2r = f1.linkrev(), f2.linkrev()
754 774 if f1r > f2r:
755 775 f1 = next(g1)
756 776 elif f2r > f1r:
757 777 f2 = next(g2)
758 778 else: # f1 and f2 point to files in the same linkrev
759 779 return f1 == f2 # true if they point to the same file
760 780 except StopIteration:
761 781 return False
762 782
763 783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
764 784 """reproduce copies from fromrev to rev in the dirstate
765 785
766 786 If skiprev is specified, it's a revision that should be used to
767 787 filter copy records. Any copies that occur between fromrev and
768 788 skiprev will not be duplicated, even if they appear in the set of
769 789 copies between fromrev and rev.
770 790 """
771 791 exclude = {}
772 792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
773 793 bctrace = stringutil.parsebool(ctraceconfig)
774 794 if (skiprev is not None and
775 795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
776 796 # copytrace='off' skips this line, but not the entire function because
777 797 # the line below is O(size of the repo) during a rebase, while the rest
778 798 # of the function is much faster (and is required for carrying copy
779 799 # metadata across the rebase anyway).
780 800 exclude = pathcopies(repo[fromrev], repo[skiprev])
781 801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
782 802 # copies.pathcopies returns backward renames, so dst might not
783 803 # actually be in the dirstate
784 804 if dst in exclude:
785 805 continue
786 806 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now