##// END OF EJS Templates
copies: make "limit" argument to _tracefile() mandatory...
Martin von Zweigbergk -
r42427:c7422691 default
parent child Browse files
Show More
@@ -1,805 +1,805 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b'"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126
127 127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 129 # overwritten later, and case 3 will be removed later.
130 130 t = a.copy()
131 131 for k, v in b.iteritems():
132 132 if v in t:
133 133 # found a chain, i.e. cases 3 & 4.
134 134 if t[v] != k:
135 135 # file wasn't renamed back to itself (i.e. case 4, not 3)
136 136 t[k] = t[v]
137 137 else:
138 138 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
139 139 t[k] = v
140 140
141 141 for k, v in list(t.items()):
142 142 # remove copies from files that didn't exist, i.e. case 5
143 143 if v not in src:
144 144 del t[k]
145 145 # remove criss-crossed copies, i.e. case 3
146 146 elif k in src and v in dst:
147 147 del t[k]
148 148 # remove copies to files that were then removed, i.e. case 1
149 149 # and file 'y' in cases 3 & 4 (in case of rename)
150 150 elif k not in dst:
151 151 del t[k]
152 152
153 153 return t
154 154
155 def _tracefile(fctx, am, limit=node.nullrev):
155 def _tracefile(fctx, am, limit):
156 156 """return file context that is the ancestor of fctx present in ancestor
157 157 manifest am, stopping after the first ancestor lower than limit"""
158 158
159 159 for f in fctx.ancestors():
160 160 if am.get(f.path(), None) == f.filenode():
161 161 return f
162 if limit >= 0 and not f.isintroducedafter(limit):
162 if not f.isintroducedafter(limit):
163 163 return None
164 164
165 165 def _dirstatecopies(repo, match=None):
166 166 ds = repo.dirstate
167 167 c = ds.copies().copy()
168 168 for k in list(c):
169 169 if ds[k] not in 'anm' or (match and not match(k)):
170 170 del c[k]
171 171 return c
172 172
173 173 def _computeforwardmissing(a, b, match=None):
174 174 """Computes which files are in b but not a.
175 175 This is its own function so extensions can easily wrap this call to see what
176 176 files _forwardcopies is about to process.
177 177 """
178 178 ma = a.manifest()
179 179 mb = b.manifest()
180 180 return mb.filesnotin(ma, match=match)
181 181
182 182 def usechangesetcentricalgo(repo):
183 183 """Checks if we should use changeset-centric copy algorithms"""
184 184 return (repo.ui.config('experimental', 'copies.read-from') in
185 185 ('changeset-only', 'compatibility'))
186 186
187 187 def _committedforwardcopies(a, b, match):
188 188 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
189 189 # files might have to be traced back to the fctx parent of the last
190 190 # one-side-only changeset, but not further back than that
191 191 repo = a._repo
192 192
193 193 if usechangesetcentricalgo(repo):
194 194 return _changesetforwardcopies(a, b, match)
195 195
196 196 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
197 197 dbg = repo.ui.debug
198 198 if debug:
199 199 dbg('debug.copies: looking into rename from %s to %s\n'
200 200 % (a, b))
201 201 limit = _findlimit(repo, a, b)
202 202 if debug:
203 203 dbg('debug.copies: search limit: %d\n' % limit)
204 204 am = a.manifest()
205 205
206 206 # find where new files came from
207 207 # we currently don't try to find where old files went, too expensive
208 208 # this means we can miss a case like 'hg rm b; hg cp a b'
209 209 cm = {}
210 210
211 211 # Computing the forward missing is quite expensive on large manifests, since
212 212 # it compares the entire manifests. We can optimize it in the common use
213 213 # case of computing what copies are in a commit versus its parent (like
214 214 # during a rebase or histedit). Note, we exclude merge commits from this
215 215 # optimization, since the ctx.files() for a merge commit is not correct for
216 216 # this comparison.
217 217 forwardmissingmatch = match
218 218 if b.p1() == a and b.p2().node() == node.nullid:
219 219 filesmatcher = matchmod.exact(b.files())
220 220 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
221 221 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
222 222
223 223 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
224 224
225 225 if debug:
226 226 dbg('debug.copies: missing files to search: %d\n' % len(missing))
227 227
228 228 for f in sorted(missing):
229 229 if debug:
230 230 dbg('debug.copies: tracing file: %s\n' % f)
231 231 fctx = b[f]
232 232 fctx._ancestrycontext = ancestrycontext
233 233
234 234 if debug:
235 235 start = util.timer()
236 236 ofctx = _tracefile(fctx, am, limit)
237 237 if ofctx:
238 238 if debug:
239 239 dbg('debug.copies: rename of: %s\n' % ofctx._path)
240 240 cm[f] = ofctx.path()
241 241 if debug:
242 242 dbg('debug.copies: time: %f seconds\n'
243 243 % (util.timer() - start))
244 244 return cm
245 245
246 246 def _changesetforwardcopies(a, b, match):
247 247 if a.rev() == node.nullrev:
248 248 return {}
249 249
250 250 repo = a.repo()
251 251 children = {}
252 252 cl = repo.changelog
253 253 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
254 254 for r in missingrevs:
255 255 for p in cl.parentrevs(r):
256 256 if p == node.nullrev:
257 257 continue
258 258 if p not in children:
259 259 children[p] = [r]
260 260 else:
261 261 children[p].append(r)
262 262
263 263 roots = set(children) - set(missingrevs)
264 264 # 'work' contains 3-tuples of a (revision number, parent number, copies).
265 265 # The parent number is only used for knowing which parent the copies dict
266 266 # came from.
267 267 work = [(r, 1, {}) for r in roots]
268 268 heapq.heapify(work)
269 269 while work:
270 270 r, i1, copies1 = heapq.heappop(work)
271 271 if work and work[0][0] == r:
272 272 # We are tracing copies from both parents
273 273 r, i2, copies2 = heapq.heappop(work)
274 274 copies = {}
275 275 ctx = repo[r]
276 276 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
277 277 allcopies = set(copies1) | set(copies2)
278 278 # TODO: perhaps this filtering should be done as long as ctx
279 279 # is merge, whether or not we're tracing from both parent.
280 280 for dst in allcopies:
281 281 if not match(dst):
282 282 continue
283 283 if dst not in copies2:
284 284 # Copied on p1 side: mark as copy from p1 side if it didn't
285 285 # already exist on p2 side
286 286 if dst not in p2man:
287 287 copies[dst] = copies1[dst]
288 288 elif dst not in copies1:
289 289 # Copied on p2 side: mark as copy from p2 side if it didn't
290 290 # already exist on p1 side
291 291 if dst not in p1man:
292 292 copies[dst] = copies2[dst]
293 293 else:
294 294 # Copied on both sides: mark as copy from p1 side
295 295 copies[dst] = copies1[dst]
296 296 else:
297 297 copies = copies1
298 298 if r == b.rev():
299 299 return copies
300 300 for c in children[r]:
301 301 childctx = repo[c]
302 302 if r == childctx.p1().rev():
303 303 parent = 1
304 304 childcopies = childctx.p1copies()
305 305 else:
306 306 assert r == childctx.p2().rev()
307 307 parent = 2
308 308 childcopies = childctx.p2copies()
309 309 if not match.always():
310 310 childcopies = {dst: src for dst, src in childcopies.items()
311 311 if match(dst)}
312 312 childcopies = _chain(a, childctx, copies, childcopies)
313 313 heapq.heappush(work, (c, parent, childcopies))
314 314 assert False
315 315
316 316 def _forwardcopies(a, b, match=None):
317 317 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
318 318
319 319 match = a.repo().narrowmatch(match)
320 320 # check for working copy
321 321 if b.rev() is None:
322 322 if a == b.p1():
323 323 # short-circuit to avoid issues with merge states
324 324 return _dirstatecopies(b._repo, match)
325 325
326 326 cm = _committedforwardcopies(a, b.p1(), match)
327 327 # combine copies from dirstate if necessary
328 328 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
329 329 return _committedforwardcopies(a, b, match)
330 330
331 331 def _backwardrenames(a, b, match):
332 332 if a._repo.ui.config('experimental', 'copytrace') == 'off':
333 333 return {}
334 334
335 335 # Even though we're not taking copies into account, 1:n rename situations
336 336 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
337 337 # arbitrarily pick one of the renames.
338 338 # We don't want to pass in "match" here, since that would filter
339 339 # the destination by it. Since we're reversing the copies, we want
340 340 # to filter the source instead.
341 341 f = _forwardcopies(b, a)
342 342 r = {}
343 343 for k, v in sorted(f.iteritems()):
344 344 if match and not match(v):
345 345 continue
346 346 # remove copies
347 347 if v in a:
348 348 continue
349 349 r[v] = k
350 350 return r
351 351
352 352 def pathcopies(x, y, match=None):
353 353 """find {dst@y: src@x} copy mapping for directed compare"""
354 354 repo = x._repo
355 355 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
356 356 if debug:
357 357 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
358 358 % (x, y))
359 359 if x == y or not x or not y:
360 360 return {}
361 361 a = y.ancestor(x)
362 362 if a == x:
363 363 if debug:
364 364 repo.ui.debug('debug.copies: search mode: forward\n')
365 365 return _forwardcopies(x, y, match=match)
366 366 if a == y:
367 367 if debug:
368 368 repo.ui.debug('debug.copies: search mode: backward\n')
369 369 return _backwardrenames(x, y, match=match)
370 370 if debug:
371 371 repo.ui.debug('debug.copies: search mode: combined\n')
372 372 return _chain(x, y, _backwardrenames(x, a, match=match),
373 373 _forwardcopies(a, y, match=match))
374 374
375 375 def mergecopies(repo, c1, c2, base):
376 376 """
377 377 Finds moves and copies between context c1 and c2 that are relevant for
378 378 merging. 'base' will be used as the merge base.
379 379
380 380 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
381 381 files that were moved/ copied in one merge parent and modified in another.
382 382 For example:
383 383
384 384 o ---> 4 another commit
385 385 |
386 386 | o ---> 3 commit that modifies a.txt
387 387 | /
388 388 o / ---> 2 commit that moves a.txt to b.txt
389 389 |/
390 390 o ---> 1 merge base
391 391
392 392 If we try to rebase revision 3 on revision 4, since there is no a.txt in
393 393 revision 4, and if user have copytrace disabled, we prints the following
394 394 message:
395 395
396 396 ```other changed <file> which local deleted```
397 397
398 398 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
399 399 "dirmove".
400 400
401 401 "copy" is a mapping from destination name -> source name,
402 402 where source is in c1 and destination is in c2 or vice-versa.
403 403
404 404 "movewithdir" is a mapping from source name -> destination name,
405 405 where the file at source present in one context but not the other
406 406 needs to be moved to destination by the merge process, because the
407 407 other context moved the directory it is in.
408 408
409 409 "diverge" is a mapping of source name -> list of destination names
410 410 for divergent renames.
411 411
412 412 "renamedelete" is a mapping of source name -> list of destination
413 413 names for files deleted in c1 that were renamed in c2 or vice-versa.
414 414
415 415 "dirmove" is a mapping of detected source dir -> destination dir renames.
416 416 This is needed for handling changes to new files previously grafted into
417 417 renamed directories.
418 418
419 419 This function calls different copytracing algorithms based on config.
420 420 """
421 421 # avoid silly behavior for update from empty dir
422 422 if not c1 or not c2 or c1 == c2:
423 423 return {}, {}, {}, {}, {}
424 424
425 425 narrowmatch = c1.repo().narrowmatch()
426 426
427 427 # avoid silly behavior for parent -> working dir
428 428 if c2.node() is None and c1.node() == repo.dirstate.p1():
429 429 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
430 430
431 431 copytracing = repo.ui.config('experimental', 'copytrace')
432 432 if stringutil.parsebool(copytracing) is False:
433 433 # stringutil.parsebool() returns None when it is unable to parse the
434 434 # value, so we should rely on making sure copytracing is on such cases
435 435 return {}, {}, {}, {}, {}
436 436
437 437 if usechangesetcentricalgo(repo):
438 438 # The heuristics don't make sense when we need changeset-centric algos
439 439 return _fullcopytracing(repo, c1, c2, base)
440 440
441 441 # Copy trace disabling is explicitly below the node == p1 logic above
442 442 # because the logic above is required for a simple copy to be kept across a
443 443 # rebase.
444 444 if copytracing == 'heuristics':
445 445 # Do full copytracing if only non-public revisions are involved as
446 446 # that will be fast enough and will also cover the copies which could
447 447 # be missed by heuristics
448 448 if _isfullcopytraceable(repo, c1, base):
449 449 return _fullcopytracing(repo, c1, c2, base)
450 450 return _heuristicscopytracing(repo, c1, c2, base)
451 451 else:
452 452 return _fullcopytracing(repo, c1, c2, base)
453 453
454 454 def _isfullcopytraceable(repo, c1, base):
455 455 """ Checks that if base, source and destination are all no-public branches,
456 456 if yes let's use the full copytrace algorithm for increased capabilities
457 457 since it will be fast enough.
458 458
459 459 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
460 460 number of changesets from c1 to base such that if number of changesets are
461 461 more than the limit, full copytracing algorithm won't be used.
462 462 """
463 463 if c1.rev() is None:
464 464 c1 = c1.p1()
465 465 if c1.mutable() and base.mutable():
466 466 sourcecommitlimit = repo.ui.configint('experimental',
467 467 'copytrace.sourcecommitlimit')
468 468 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
469 469 return commits < sourcecommitlimit
470 470 return False
471 471
472 472 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
473 473 copy, renamedelete):
474 474 if src not in m2:
475 475 # deleted on side 2
476 476 if src not in m1:
477 477 # renamed on side 1, deleted on side 2
478 478 renamedelete[src] = dsts1
479 479 elif m2[src] != mb[src]:
480 480 if not _related(c2[src], base[src]):
481 481 return
482 482 # modified on side 2
483 483 for dst in dsts1:
484 484 if dst not in m2:
485 485 # dst not added on side 2 (handle as regular
486 486 # "both created" case in manifestmerge otherwise)
487 487 copy[dst] = src
488 488
489 489 def _fullcopytracing(repo, c1, c2, base):
490 490 """ The full copytracing algorithm which finds all the new files that were
491 491 added from merge base up to the top commit and for each file it checks if
492 492 this file was copied from another file.
493 493
494 494 This is pretty slow when a lot of changesets are involved but will track all
495 495 the copies.
496 496 """
497 497 m1 = c1.manifest()
498 498 m2 = c2.manifest()
499 499 mb = base.manifest()
500 500
501 501 copies1 = pathcopies(base, c1)
502 502 copies2 = pathcopies(base, c2)
503 503
504 504 inversecopies1 = {}
505 505 inversecopies2 = {}
506 506 for dst, src in copies1.items():
507 507 inversecopies1.setdefault(src, []).append(dst)
508 508 for dst, src in copies2.items():
509 509 inversecopies2.setdefault(src, []).append(dst)
510 510
511 511 copy = {}
512 512 diverge = {}
513 513 renamedelete = {}
514 514 allsources = set(inversecopies1) | set(inversecopies2)
515 515 for src in allsources:
516 516 dsts1 = inversecopies1.get(src)
517 517 dsts2 = inversecopies2.get(src)
518 518 if dsts1 and dsts2:
519 519 # copied/renamed on both sides
520 520 if src not in m1 and src not in m2:
521 521 # renamed on both sides
522 522 dsts1 = set(dsts1)
523 523 dsts2 = set(dsts2)
524 524 # If there's some overlap in the rename destinations, we
525 525 # consider it not divergent. For example, if side 1 copies 'a'
526 526 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
527 527 # and 'd' and deletes 'a'.
528 528 if dsts1 & dsts2:
529 529 for dst in (dsts1 & dsts2):
530 530 copy[dst] = src
531 531 else:
532 532 diverge[src] = sorted(dsts1 | dsts2)
533 533 elif src in m1 and src in m2:
534 534 # copied on both sides
535 535 dsts1 = set(dsts1)
536 536 dsts2 = set(dsts2)
537 537 for dst in (dsts1 & dsts2):
538 538 copy[dst] = src
539 539 # TODO: Handle cases where it was renamed on one side and copied
540 540 # on the other side
541 541 elif dsts1:
542 542 # copied/renamed only on side 1
543 543 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
544 544 copy, renamedelete)
545 545 elif dsts2:
546 546 # copied/renamed only on side 2
547 547 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
548 548 copy, renamedelete)
549 549
550 550 renamedeleteset = set()
551 551 divergeset = set()
552 552 for dsts in diverge.values():
553 553 divergeset.update(dsts)
554 554 for dsts in renamedelete.values():
555 555 renamedeleteset.update(dsts)
556 556
557 557 # find interesting file sets from manifests
558 558 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
559 559 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
560 560 u1 = sorted(addedinm1 - addedinm2)
561 561 u2 = sorted(addedinm2 - addedinm1)
562 562
563 563 header = " unmatched files in %s"
564 564 if u1:
565 565 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
566 566 if u2:
567 567 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
568 568
569 569 fullcopy = copies1.copy()
570 570 fullcopy.update(copies2)
571 571 if not fullcopy:
572 572 return copy, {}, diverge, renamedelete, {}
573 573
574 574 if repo.ui.debugflag:
575 575 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
576 576 "% = renamed and deleted):\n")
577 577 for f in sorted(fullcopy):
578 578 note = ""
579 579 if f in copy:
580 580 note += "*"
581 581 if f in divergeset:
582 582 note += "!"
583 583 if f in renamedeleteset:
584 584 note += "%"
585 585 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
586 586 note))
587 587 del divergeset
588 588
589 589 repo.ui.debug(" checking for directory renames\n")
590 590
591 591 # generate a directory move map
592 592 d1, d2 = c1.dirs(), c2.dirs()
593 593 # Hack for adding '', which is not otherwise added, to d1 and d2
594 594 d1.addpath('/')
595 595 d2.addpath('/')
596 596 invalid = set()
597 597 dirmove = {}
598 598
599 599 # examine each file copy for a potential directory move, which is
600 600 # when all the files in a directory are moved to a new directory
601 601 for dst, src in fullcopy.iteritems():
602 602 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 603 if dsrc in invalid:
604 604 # already seen to be uninteresting
605 605 continue
606 606 elif dsrc in d1 and ddst in d1:
607 607 # directory wasn't entirely moved locally
608 608 invalid.add(dsrc)
609 609 elif dsrc in d2 and ddst in d2:
610 610 # directory wasn't entirely moved remotely
611 611 invalid.add(dsrc)
612 612 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 613 # files from the same directory moved to two different places
614 614 invalid.add(dsrc)
615 615 else:
616 616 # looks good so far
617 617 dirmove[dsrc] = ddst
618 618
619 619 for i in invalid:
620 620 if i in dirmove:
621 621 del dirmove[i]
622 622 del d1, d2, invalid
623 623
624 624 if not dirmove:
625 625 return copy, {}, diverge, renamedelete, {}
626 626
627 627 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628 628
629 629 for d in dirmove:
630 630 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 631 (d, dirmove[d]))
632 632
633 633 movewithdir = {}
634 634 # check unaccounted nonoverlapping files against directory moves
635 635 for f in u1 + u2:
636 636 if f not in fullcopy:
637 637 for d in dirmove:
638 638 if f.startswith(d):
639 639 # new file added in a directory that was moved, move it
640 640 df = dirmove[d] + f[len(d):]
641 641 if df not in copy:
642 642 movewithdir[f] = df
643 643 repo.ui.debug((" pending file src: '%s' -> "
644 644 "dst: '%s'\n") % (f, df))
645 645 break
646 646
647 647 return copy, movewithdir, diverge, renamedelete, dirmove
648 648
649 649 def _heuristicscopytracing(repo, c1, c2, base):
650 650 """ Fast copytracing using filename heuristics
651 651
652 652 Assumes that moves or renames are of following two types:
653 653
654 654 1) Inside a directory only (same directory name but different filenames)
655 655 2) Move from one directory to another
656 656 (same filenames but different directory names)
657 657
658 658 Works only when there are no merge commits in the "source branch".
659 659 Source branch is commits from base up to c2 not including base.
660 660
661 661 If merge is involved it fallbacks to _fullcopytracing().
662 662
663 663 Can be used by setting the following config:
664 664
665 665 [experimental]
666 666 copytrace = heuristics
667 667
668 668 In some cases the copy/move candidates found by heuristics can be very large
669 669 in number and that will make the algorithm slow. The number of possible
670 670 candidates to check can be limited by using the config
671 671 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 672 """
673 673
674 674 if c1.rev() is None:
675 675 c1 = c1.p1()
676 676 if c2.rev() is None:
677 677 c2 = c2.p1()
678 678
679 679 copies = {}
680 680
681 681 changedfiles = set()
682 682 m1 = c1.manifest()
683 683 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 684 # If base is not in c2 branch, we switch to fullcopytracing
685 685 repo.ui.debug("switching to full copytracing as base is not "
686 686 "an ancestor of c2\n")
687 687 return _fullcopytracing(repo, c1, c2, base)
688 688
689 689 ctx = c2
690 690 while ctx != base:
691 691 if len(ctx.parents()) == 2:
692 692 # To keep things simple let's not handle merges
693 693 repo.ui.debug("switching to full copytracing because of merges\n")
694 694 return _fullcopytracing(repo, c1, c2, base)
695 695 changedfiles.update(ctx.files())
696 696 ctx = ctx.p1()
697 697
698 698 cp = _forwardcopies(base, c2)
699 699 for dst, src in cp.iteritems():
700 700 if src in m1:
701 701 copies[dst] = src
702 702
703 703 # file is missing if it isn't present in the destination, but is present in
704 704 # the base and present in the source.
705 705 # Presence in the base is important to exclude added files, presence in the
706 706 # source is important to exclude removed files.
707 707 filt = lambda f: f not in m1 and f in base and f in c2
708 708 missingfiles = [f for f in changedfiles if filt(f)]
709 709
710 710 if missingfiles:
711 711 basenametofilename = collections.defaultdict(list)
712 712 dirnametofilename = collections.defaultdict(list)
713 713
714 714 for f in m1.filesnotin(base.manifest()):
715 715 basename = os.path.basename(f)
716 716 dirname = os.path.dirname(f)
717 717 basenametofilename[basename].append(f)
718 718 dirnametofilename[dirname].append(f)
719 719
720 720 for f in missingfiles:
721 721 basename = os.path.basename(f)
722 722 dirname = os.path.dirname(f)
723 723 samebasename = basenametofilename[basename]
724 724 samedirname = dirnametofilename[dirname]
725 725 movecandidates = samebasename + samedirname
726 726 # f is guaranteed to be present in c2, that's why
727 727 # c2.filectx(f) won't fail
728 728 f2 = c2.filectx(f)
729 729 # we can have a lot of candidates which can slow down the heuristics
730 730 # config value to limit the number of candidates moves to check
731 731 maxcandidates = repo.ui.configint('experimental',
732 732 'copytrace.movecandidateslimit')
733 733
734 734 if len(movecandidates) > maxcandidates:
735 735 repo.ui.status(_("skipping copytracing for '%s', more "
736 736 "candidates than the limit: %d\n")
737 737 % (f, len(movecandidates)))
738 738 continue
739 739
740 740 for candidate in movecandidates:
741 741 f1 = c1.filectx(candidate)
742 742 if _related(f1, f2):
743 743 # if there are a few related copies then we'll merge
744 744 # changes into all of them. This matches the behaviour
745 745 # of upstream copytracing
746 746 copies[candidate] = f
747 747
748 748 return copies, {}, {}, {}, {}
749 749
750 750 def _related(f1, f2):
751 751 """return True if f1 and f2 filectx have a common ancestor
752 752
753 753 Walk back to common ancestor to see if the two files originate
754 754 from the same file. Since workingfilectx's rev() is None it messes
755 755 up the integer comparison logic, hence the pre-step check for
756 756 None (f1 and f2 can only be workingfilectx's initially).
757 757 """
758 758
759 759 if f1 == f2:
760 760 return True # a match
761 761
762 762 g1, g2 = f1.ancestors(), f2.ancestors()
763 763 try:
764 764 f1r, f2r = f1.linkrev(), f2.linkrev()
765 765
766 766 if f1r is None:
767 767 f1 = next(g1)
768 768 if f2r is None:
769 769 f2 = next(g2)
770 770
771 771 while True:
772 772 f1r, f2r = f1.linkrev(), f2.linkrev()
773 773 if f1r > f2r:
774 774 f1 = next(g1)
775 775 elif f2r > f1r:
776 776 f2 = next(g2)
777 777 else: # f1 and f2 point to files in the same linkrev
778 778 return f1 == f2 # true if they point to the same file
779 779 except StopIteration:
780 780 return False
781 781
782 782 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 783 """reproduce copies from fromrev to rev in the dirstate
784 784
785 785 If skiprev is specified, it's a revision that should be used to
786 786 filter copy records. Any copies that occur between fromrev and
787 787 skiprev will not be duplicated, even if they appear in the set of
788 788 copies between fromrev and rev.
789 789 """
790 790 exclude = {}
791 791 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 792 bctrace = stringutil.parsebool(ctraceconfig)
793 793 if (skiprev is not None and
794 794 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 795 # copytrace='off' skips this line, but not the entire function because
796 796 # the line below is O(size of the repo) during a rebase, while the rest
797 797 # of the function is much faster (and is required for carrying copy
798 798 # metadata across the rebase anyway).
799 799 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 800 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 801 # copies.pathcopies returns backward renames, so dst might not
802 802 # actually be in the dirstate
803 803 if dst in exclude:
804 804 continue
805 805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now