##// END OF EJS Templates
copies: ignore heuristics copytracing when using changeset-centric algos...
Martin von Zweigbergk -
r42412:a6be3af3 default
parent child Browse files
Show More
@@ -1,782 +1,786 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies a->b"""
112 112 t = a.copy()
113 113 for k, v in b.iteritems():
114 114 if v in t:
115 115 # found a chain
116 116 if t[v] != k:
117 117 # file wasn't renamed back to itself
118 118 t[k] = t[v]
119 119 if v not in dst:
120 120 # chain was a rename, not a copy
121 121 del t[v]
122 122 if v in src:
123 123 # file is a copy of an existing file
124 124 t[k] = v
125 125
126 126 for k, v in list(t.items()):
127 127 # remove criss-crossed copies
128 128 if k in src and v in dst:
129 129 del t[k]
130 130 # remove copies to files that were then removed
131 131 elif k not in dst:
132 132 del t[k]
133 133
134 134 return t
135 135
136 136 def _tracefile(fctx, am, limit=node.nullrev):
137 137 """return file context that is the ancestor of fctx present in ancestor
138 138 manifest am, stopping after the first ancestor lower than limit"""
139 139
140 140 for f in fctx.ancestors():
141 141 if am.get(f.path(), None) == f.filenode():
142 142 return f
143 143 if limit >= 0 and not f.isintroducedafter(limit):
144 144 return None
145 145
146 146 def _dirstatecopies(repo, match=None):
147 147 ds = repo.dirstate
148 148 c = ds.copies().copy()
149 149 for k in list(c):
150 150 if ds[k] not in 'anm' or (match and not match(k)):
151 151 del c[k]
152 152 return c
153 153
154 154 def _computeforwardmissing(a, b, match=None):
155 155 """Computes which files are in b but not a.
156 156 This is its own function so extensions can easily wrap this call to see what
157 157 files _forwardcopies is about to process.
158 158 """
159 159 ma = a.manifest()
160 160 mb = b.manifest()
161 161 return mb.filesnotin(ma, match=match)
162 162
163 163 def usechangesetcentricalgo(repo):
164 164 """Checks if we should use changeset-centric copy algorithms"""
165 165 return (repo.ui.config('experimental', 'copies.read-from') in
166 166 ('changeset-only', 'compatibility'))
167 167
168 168 def _committedforwardcopies(a, b, match):
169 169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 170 # files might have to be traced back to the fctx parent of the last
171 171 # one-side-only changeset, but not further back than that
172 172 repo = a._repo
173 173
174 174 if usechangesetcentricalgo(repo):
175 175 return _changesetforwardcopies(a, b, match)
176 176
177 177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 178 dbg = repo.ui.debug
179 179 if debug:
180 180 dbg('debug.copies: looking into rename from %s to %s\n'
181 181 % (a, b))
182 182 limit = _findlimit(repo, a, b)
183 183 if debug:
184 184 dbg('debug.copies: search limit: %d\n' % limit)
185 185 am = a.manifest()
186 186
187 187 # find where new files came from
188 188 # we currently don't try to find where old files went, too expensive
189 189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 190 cm = {}
191 191
192 192 # Computing the forward missing is quite expensive on large manifests, since
193 193 # it compares the entire manifests. We can optimize it in the common use
194 194 # case of computing what copies are in a commit versus its parent (like
195 195 # during a rebase or histedit). Note, we exclude merge commits from this
196 196 # optimization, since the ctx.files() for a merge commit is not correct for
197 197 # this comparison.
198 198 forwardmissingmatch = match
199 199 if b.p1() == a and b.p2().node() == node.nullid:
200 200 filesmatcher = matchmod.exact(b.files())
201 201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203 203
204 204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205 205
206 206 if debug:
207 207 dbg('debug.copies: missing files to search: %d\n' % len(missing))
208 208
209 209 for f in sorted(missing):
210 210 if debug:
211 211 dbg('debug.copies: tracing file: %s\n' % f)
212 212 fctx = b[f]
213 213 fctx._ancestrycontext = ancestrycontext
214 214
215 215 if debug:
216 216 start = util.timer()
217 217 ofctx = _tracefile(fctx, am, limit)
218 218 if ofctx:
219 219 if debug:
220 220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 221 cm[f] = ofctx.path()
222 222 if debug:
223 223 dbg('debug.copies: time: %f seconds\n'
224 224 % (util.timer() - start))
225 225 return cm
226 226
227 227 def _changesetforwardcopies(a, b, match):
228 228 if a.rev() == node.nullrev:
229 229 return {}
230 230
231 231 repo = a.repo()
232 232 children = {}
233 233 cl = repo.changelog
234 234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 235 for r in missingrevs:
236 236 for p in cl.parentrevs(r):
237 237 if p == node.nullrev:
238 238 continue
239 239 if p not in children:
240 240 children[p] = [r]
241 241 else:
242 242 children[p].append(r)
243 243
244 244 roots = set(children) - set(missingrevs)
245 245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 246 # The parent number is only used for knowing which parent the copies dict
247 247 # came from.
248 248 work = [(r, 1, {}) for r in roots]
249 249 heapq.heapify(work)
250 250 while work:
251 251 r, i1, copies1 = heapq.heappop(work)
252 252 if work and work[0][0] == r:
253 253 # We are tracing copies from both parents
254 254 r, i2, copies2 = heapq.heappop(work)
255 255 copies = {}
256 256 ctx = repo[r]
257 257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 258 allcopies = set(copies1) | set(copies2)
259 259 # TODO: perhaps this filtering should be done as long as ctx
260 260 # is merge, whether or not we're tracing from both parent.
261 261 for dst in allcopies:
262 262 if not match(dst):
263 263 continue
264 264 if dst not in copies2:
265 265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 266 # already exist on p2 side
267 267 if dst not in p2man:
268 268 copies[dst] = copies1[dst]
269 269 elif dst not in copies1:
270 270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 271 # already exist on p1 side
272 272 if dst not in p1man:
273 273 copies[dst] = copies2[dst]
274 274 else:
275 275 # Copied on both sides: mark as copy from p1 side
276 276 copies[dst] = copies1[dst]
277 277 else:
278 278 copies = copies1
279 279 if r == b.rev():
280 280 return copies
281 281 for c in children[r]:
282 282 childctx = repo[c]
283 283 if r == childctx.p1().rev():
284 284 parent = 1
285 285 childcopies = childctx.p1copies()
286 286 else:
287 287 assert r == childctx.p2().rev()
288 288 parent = 2
289 289 childcopies = childctx.p2copies()
290 290 if not match.always():
291 291 childcopies = {dst: src for dst, src in childcopies.items()
292 292 if match(dst)}
293 293 childcopies = _chain(a, childctx, copies, childcopies)
294 294 heapq.heappush(work, (c, parent, childcopies))
295 295 assert False
296 296
297 297 def _forwardcopies(a, b, match=None):
298 298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299 299
300 300 match = a.repo().narrowmatch(match)
301 301 # check for working copy
302 302 if b.rev() is None:
303 303 if a == b.p1():
304 304 # short-circuit to avoid issues with merge states
305 305 return _dirstatecopies(b._repo, match)
306 306
307 307 cm = _committedforwardcopies(a, b.p1(), match)
308 308 # combine copies from dirstate if necessary
309 309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 310 return _committedforwardcopies(a, b, match)
311 311
312 312 def _backwardrenames(a, b, match):
313 313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 314 return {}
315 315
316 316 # Even though we're not taking copies into account, 1:n rename situations
317 317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 318 # arbitrarily pick one of the renames.
319 319 # We don't want to pass in "match" here, since that would filter
320 320 # the destination by it. Since we're reversing the copies, we want
321 321 # to filter the source instead.
322 322 f = _forwardcopies(b, a)
323 323 r = {}
324 324 for k, v in sorted(f.iteritems()):
325 325 if match and not match(v):
326 326 continue
327 327 # remove copies
328 328 if v in a:
329 329 continue
330 330 r[v] = k
331 331 return r
332 332
333 333 def pathcopies(x, y, match=None):
334 334 """find {dst@y: src@x} copy mapping for directed compare"""
335 335 repo = x._repo
336 336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 337 if debug:
338 338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 339 % (x, y))
340 340 if x == y or not x or not y:
341 341 return {}
342 342 a = y.ancestor(x)
343 343 if a == x:
344 344 if debug:
345 345 repo.ui.debug('debug.copies: search mode: forward\n')
346 346 return _forwardcopies(x, y, match=match)
347 347 if a == y:
348 348 if debug:
349 349 repo.ui.debug('debug.copies: search mode: backward\n')
350 350 return _backwardrenames(x, y, match=match)
351 351 if debug:
352 352 repo.ui.debug('debug.copies: search mode: combined\n')
353 353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 354 _forwardcopies(a, y, match=match))
355 355
356 356 def mergecopies(repo, c1, c2, base):
357 357 """
358 358 Finds moves and copies between context c1 and c2 that are relevant for
359 359 merging. 'base' will be used as the merge base.
360 360
361 361 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
362 362 files that were moved/ copied in one merge parent and modified in another.
363 363 For example:
364 364
365 365 o ---> 4 another commit
366 366 |
367 367 | o ---> 3 commit that modifies a.txt
368 368 | /
369 369 o / ---> 2 commit that moves a.txt to b.txt
370 370 |/
371 371 o ---> 1 merge base
372 372
373 373 If we try to rebase revision 3 on revision 4, since there is no a.txt in
374 374 revision 4, and if user have copytrace disabled, we prints the following
375 375 message:
376 376
377 377 ```other changed <file> which local deleted```
378 378
379 379 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
380 380 "dirmove".
381 381
382 382 "copy" is a mapping from destination name -> source name,
383 383 where source is in c1 and destination is in c2 or vice-versa.
384 384
385 385 "movewithdir" is a mapping from source name -> destination name,
386 386 where the file at source present in one context but not the other
387 387 needs to be moved to destination by the merge process, because the
388 388 other context moved the directory it is in.
389 389
390 390 "diverge" is a mapping of source name -> list of destination names
391 391 for divergent renames.
392 392
393 393 "renamedelete" is a mapping of source name -> list of destination
394 394 names for files deleted in c1 that were renamed in c2 or vice-versa.
395 395
396 396 "dirmove" is a mapping of detected source dir -> destination dir renames.
397 397 This is needed for handling changes to new files previously grafted into
398 398 renamed directories.
399 399
400 400 This function calls different copytracing algorithms based on config.
401 401 """
402 402 # avoid silly behavior for update from empty dir
403 403 if not c1 or not c2 or c1 == c2:
404 404 return {}, {}, {}, {}, {}
405 405
406 406 narrowmatch = c1.repo().narrowmatch()
407 407
408 408 # avoid silly behavior for parent -> working dir
409 409 if c2.node() is None and c1.node() == repo.dirstate.p1():
410 410 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
411 411
412 412 copytracing = repo.ui.config('experimental', 'copytrace')
413 413 if stringutil.parsebool(copytracing) is False:
414 414 # stringutil.parsebool() returns None when it is unable to parse the
415 415 # value, so we should rely on making sure copytracing is on such cases
416 416 return {}, {}, {}, {}, {}
417 417
418 if usechangesetcentricalgo(repo):
419 # The heuristics don't make sense when we need changeset-centric algos
420 return _fullcopytracing(repo, c1, c2, base)
421
418 422 # Copy trace disabling is explicitly below the node == p1 logic above
419 423 # because the logic above is required for a simple copy to be kept across a
420 424 # rebase.
421 425 if copytracing == 'heuristics':
422 426 # Do full copytracing if only non-public revisions are involved as
423 427 # that will be fast enough and will also cover the copies which could
424 428 # be missed by heuristics
425 429 if _isfullcopytraceable(repo, c1, base):
426 430 return _fullcopytracing(repo, c1, c2, base)
427 431 return _heuristicscopytracing(repo, c1, c2, base)
428 432 else:
429 433 return _fullcopytracing(repo, c1, c2, base)
430 434
431 435 def _isfullcopytraceable(repo, c1, base):
432 436 """ Checks that if base, source and destination are all no-public branches,
433 437 if yes let's use the full copytrace algorithm for increased capabilities
434 438 since it will be fast enough.
435 439
436 440 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
437 441 number of changesets from c1 to base such that if number of changesets are
438 442 more than the limit, full copytracing algorithm won't be used.
439 443 """
440 444 if c1.rev() is None:
441 445 c1 = c1.p1()
442 446 if c1.mutable() and base.mutable():
443 447 sourcecommitlimit = repo.ui.configint('experimental',
444 448 'copytrace.sourcecommitlimit')
445 449 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
446 450 return commits < sourcecommitlimit
447 451 return False
448 452
449 453 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
450 454 copy, renamedelete):
451 455 if src not in m2:
452 456 # deleted on side 2
453 457 if src not in m1:
454 458 # renamed on side 1, deleted on side 2
455 459 renamedelete[src] = dsts1
456 460 elif m2[src] != mb[src]:
457 461 if not _related(c2[src], base[src]):
458 462 return
459 463 # modified on side 2
460 464 for dst in dsts1:
461 465 if dst not in m2:
462 466 # dst not added on side 2 (handle as regular
463 467 # "both created" case in manifestmerge otherwise)
464 468 copy[dst] = src
465 469
466 470 def _fullcopytracing(repo, c1, c2, base):
467 471 """ The full copytracing algorithm which finds all the new files that were
468 472 added from merge base up to the top commit and for each file it checks if
469 473 this file was copied from another file.
470 474
471 475 This is pretty slow when a lot of changesets are involved but will track all
472 476 the copies.
473 477 """
474 478 m1 = c1.manifest()
475 479 m2 = c2.manifest()
476 480 mb = base.manifest()
477 481
478 482 copies1 = pathcopies(base, c1)
479 483 copies2 = pathcopies(base, c2)
480 484
481 485 inversecopies1 = {}
482 486 inversecopies2 = {}
483 487 for dst, src in copies1.items():
484 488 inversecopies1.setdefault(src, []).append(dst)
485 489 for dst, src in copies2.items():
486 490 inversecopies2.setdefault(src, []).append(dst)
487 491
488 492 copy = {}
489 493 diverge = {}
490 494 renamedelete = {}
491 495 allsources = set(inversecopies1) | set(inversecopies2)
492 496 for src in allsources:
493 497 dsts1 = inversecopies1.get(src)
494 498 dsts2 = inversecopies2.get(src)
495 499 if dsts1 and dsts2:
496 500 # copied/renamed on both sides
497 501 if src not in m1 and src not in m2:
498 502 # renamed on both sides
499 503 dsts1 = set(dsts1)
500 504 dsts2 = set(dsts2)
501 505 # If there's some overlap in the rename destinations, we
502 506 # consider it not divergent. For example, if side 1 copies 'a'
503 507 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
504 508 # and 'd' and deletes 'a'.
505 509 if dsts1 & dsts2:
506 510 for dst in (dsts1 & dsts2):
507 511 copy[dst] = src
508 512 else:
509 513 diverge[src] = sorted(dsts1 | dsts2)
510 514 elif src in m1 and src in m2:
511 515 # copied on both sides
512 516 dsts1 = set(dsts1)
513 517 dsts2 = set(dsts2)
514 518 for dst in (dsts1 & dsts2):
515 519 copy[dst] = src
516 520 # TODO: Handle cases where it was renamed on one side and copied
517 521 # on the other side
518 522 elif dsts1:
519 523 # copied/renamed only on side 1
520 524 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
521 525 copy, renamedelete)
522 526 elif dsts2:
523 527 # copied/renamed only on side 2
524 528 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
525 529 copy, renamedelete)
526 530
527 531 renamedeleteset = set()
528 532 divergeset = set()
529 533 for dsts in diverge.values():
530 534 divergeset.update(dsts)
531 535 for dsts in renamedelete.values():
532 536 renamedeleteset.update(dsts)
533 537
534 538 # find interesting file sets from manifests
535 539 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
536 540 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
537 541 u1 = sorted(addedinm1 - addedinm2)
538 542 u2 = sorted(addedinm2 - addedinm1)
539 543
540 544 header = " unmatched files in %s"
541 545 if u1:
542 546 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
543 547 if u2:
544 548 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
545 549
546 550 fullcopy = copies1.copy()
547 551 fullcopy.update(copies2)
548 552 if not fullcopy:
549 553 return copy, {}, diverge, renamedelete, {}
550 554
551 555 if repo.ui.debugflag:
552 556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
553 557 "% = renamed and deleted):\n")
554 558 for f in sorted(fullcopy):
555 559 note = ""
556 560 if f in copy:
557 561 note += "*"
558 562 if f in divergeset:
559 563 note += "!"
560 564 if f in renamedeleteset:
561 565 note += "%"
562 566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
563 567 note))
564 568 del divergeset
565 569
566 570 repo.ui.debug(" checking for directory renames\n")
567 571
568 572 # generate a directory move map
569 573 d1, d2 = c1.dirs(), c2.dirs()
570 574 # Hack for adding '', which is not otherwise added, to d1 and d2
571 575 d1.addpath('/')
572 576 d2.addpath('/')
573 577 invalid = set()
574 578 dirmove = {}
575 579
576 580 # examine each file copy for a potential directory move, which is
577 581 # when all the files in a directory are moved to a new directory
578 582 for dst, src in fullcopy.iteritems():
579 583 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
580 584 if dsrc in invalid:
581 585 # already seen to be uninteresting
582 586 continue
583 587 elif dsrc in d1 and ddst in d1:
584 588 # directory wasn't entirely moved locally
585 589 invalid.add(dsrc)
586 590 elif dsrc in d2 and ddst in d2:
587 591 # directory wasn't entirely moved remotely
588 592 invalid.add(dsrc)
589 593 elif dsrc in dirmove and dirmove[dsrc] != ddst:
590 594 # files from the same directory moved to two different places
591 595 invalid.add(dsrc)
592 596 else:
593 597 # looks good so far
594 598 dirmove[dsrc] = ddst
595 599
596 600 for i in invalid:
597 601 if i in dirmove:
598 602 del dirmove[i]
599 603 del d1, d2, invalid
600 604
601 605 if not dirmove:
602 606 return copy, {}, diverge, renamedelete, {}
603 607
604 608 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
605 609
606 610 for d in dirmove:
607 611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
608 612 (d, dirmove[d]))
609 613
610 614 movewithdir = {}
611 615 # check unaccounted nonoverlapping files against directory moves
612 616 for f in u1 + u2:
613 617 if f not in fullcopy:
614 618 for d in dirmove:
615 619 if f.startswith(d):
616 620 # new file added in a directory that was moved, move it
617 621 df = dirmove[d] + f[len(d):]
618 622 if df not in copy:
619 623 movewithdir[f] = df
620 624 repo.ui.debug((" pending file src: '%s' -> "
621 625 "dst: '%s'\n") % (f, df))
622 626 break
623 627
624 628 return copy, movewithdir, diverge, renamedelete, dirmove
625 629
626 630 def _heuristicscopytracing(repo, c1, c2, base):
627 631 """ Fast copytracing using filename heuristics
628 632
629 633 Assumes that moves or renames are of following two types:
630 634
631 635 1) Inside a directory only (same directory name but different filenames)
632 636 2) Move from one directory to another
633 637 (same filenames but different directory names)
634 638
635 639 Works only when there are no merge commits in the "source branch".
636 640 Source branch is commits from base up to c2 not including base.
637 641
638 642 If merge is involved it fallbacks to _fullcopytracing().
639 643
640 644 Can be used by setting the following config:
641 645
642 646 [experimental]
643 647 copytrace = heuristics
644 648
645 649 In some cases the copy/move candidates found by heuristics can be very large
646 650 in number and that will make the algorithm slow. The number of possible
647 651 candidates to check can be limited by using the config
648 652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
649 653 """
650 654
651 655 if c1.rev() is None:
652 656 c1 = c1.p1()
653 657 if c2.rev() is None:
654 658 c2 = c2.p1()
655 659
656 660 copies = {}
657 661
658 662 changedfiles = set()
659 663 m1 = c1.manifest()
660 664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
661 665 # If base is not in c2 branch, we switch to fullcopytracing
662 666 repo.ui.debug("switching to full copytracing as base is not "
663 667 "an ancestor of c2\n")
664 668 return _fullcopytracing(repo, c1, c2, base)
665 669
666 670 ctx = c2
667 671 while ctx != base:
668 672 if len(ctx.parents()) == 2:
669 673 # To keep things simple let's not handle merges
670 674 repo.ui.debug("switching to full copytracing because of merges\n")
671 675 return _fullcopytracing(repo, c1, c2, base)
672 676 changedfiles.update(ctx.files())
673 677 ctx = ctx.p1()
674 678
675 679 cp = _forwardcopies(base, c2)
676 680 for dst, src in cp.iteritems():
677 681 if src in m1:
678 682 copies[dst] = src
679 683
680 684 # file is missing if it isn't present in the destination, but is present in
681 685 # the base and present in the source.
682 686 # Presence in the base is important to exclude added files, presence in the
683 687 # source is important to exclude removed files.
684 688 filt = lambda f: f not in m1 and f in base and f in c2
685 689 missingfiles = [f for f in changedfiles if filt(f)]
686 690
687 691 if missingfiles:
688 692 basenametofilename = collections.defaultdict(list)
689 693 dirnametofilename = collections.defaultdict(list)
690 694
691 695 for f in m1.filesnotin(base.manifest()):
692 696 basename = os.path.basename(f)
693 697 dirname = os.path.dirname(f)
694 698 basenametofilename[basename].append(f)
695 699 dirnametofilename[dirname].append(f)
696 700
697 701 for f in missingfiles:
698 702 basename = os.path.basename(f)
699 703 dirname = os.path.dirname(f)
700 704 samebasename = basenametofilename[basename]
701 705 samedirname = dirnametofilename[dirname]
702 706 movecandidates = samebasename + samedirname
703 707 # f is guaranteed to be present in c2, that's why
704 708 # c2.filectx(f) won't fail
705 709 f2 = c2.filectx(f)
706 710 # we can have a lot of candidates which can slow down the heuristics
707 711 # config value to limit the number of candidates moves to check
708 712 maxcandidates = repo.ui.configint('experimental',
709 713 'copytrace.movecandidateslimit')
710 714
711 715 if len(movecandidates) > maxcandidates:
712 716 repo.ui.status(_("skipping copytracing for '%s', more "
713 717 "candidates than the limit: %d\n")
714 718 % (f, len(movecandidates)))
715 719 continue
716 720
717 721 for candidate in movecandidates:
718 722 f1 = c1.filectx(candidate)
719 723 if _related(f1, f2):
720 724 # if there are a few related copies then we'll merge
721 725 # changes into all of them. This matches the behaviour
722 726 # of upstream copytracing
723 727 copies[candidate] = f
724 728
725 729 return copies, {}, {}, {}, {}
726 730
727 731 def _related(f1, f2):
728 732 """return True if f1 and f2 filectx have a common ancestor
729 733
730 734 Walk back to common ancestor to see if the two files originate
731 735 from the same file. Since workingfilectx's rev() is None it messes
732 736 up the integer comparison logic, hence the pre-step check for
733 737 None (f1 and f2 can only be workingfilectx's initially).
734 738 """
735 739
736 740 if f1 == f2:
737 741 return True # a match
738 742
739 743 g1, g2 = f1.ancestors(), f2.ancestors()
740 744 try:
741 745 f1r, f2r = f1.linkrev(), f2.linkrev()
742 746
743 747 if f1r is None:
744 748 f1 = next(g1)
745 749 if f2r is None:
746 750 f2 = next(g2)
747 751
748 752 while True:
749 753 f1r, f2r = f1.linkrev(), f2.linkrev()
750 754 if f1r > f2r:
751 755 f1 = next(g1)
752 756 elif f2r > f1r:
753 757 f2 = next(g2)
754 758 else: # f1 and f2 point to files in the same linkrev
755 759 return f1 == f2 # true if they point to the same file
756 760 except StopIteration:
757 761 return False
758 762
759 763 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
760 764 """reproduce copies from fromrev to rev in the dirstate
761 765
762 766 If skiprev is specified, it's a revision that should be used to
763 767 filter copy records. Any copies that occur between fromrev and
764 768 skiprev will not be duplicated, even if they appear in the set of
765 769 copies between fromrev and rev.
766 770 """
767 771 exclude = {}
768 772 ctraceconfig = repo.ui.config('experimental', 'copytrace')
769 773 bctrace = stringutil.parsebool(ctraceconfig)
770 774 if (skiprev is not None and
771 775 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
772 776 # copytrace='off' skips this line, but not the entire function because
773 777 # the line below is O(size of the repo) during a rebase, while the rest
774 778 # of the function is much faster (and is required for carrying copy
775 779 # metadata across the rebase anyway).
776 780 exclude = pathcopies(repo[fromrev], repo[skiprev])
777 781 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
778 782 # copies.pathcopies returns backward renames, so dst might not
779 783 # actually be in the dirstate
780 784 if dst in exclude:
781 785 continue
782 786 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now