##// END OF EJS Templates
copies: return only path from _tracefile() since that's all caller needs...
Martin von Zweigbergk -
r42751:4ebbd7c4 default
parent child Browse files
Show More
@@ -1,805 +1,806 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chainandfilter(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 t = _chain(a, b)
133 133 _filter(src, dst, t)
134 134 return t
135 135
136 136 def _filter(src, dst, t):
137 137 """filters out invalid copies after chaining"""
138 138 for k, v in list(t.items()):
139 139 # remove copies from files that didn't exist
140 140 if v not in src:
141 141 del t[k]
142 142 # remove criss-crossed copies
143 143 elif k in src and v in dst:
144 144 del t[k]
145 145 # remove copies to files that were then removed
146 146 elif k not in dst:
147 147 del t[k]
148 148
149 149 def _chain(a, b):
150 150 """chain two sets of copies 'a' and 'b'"""
151 151 t = a.copy()
152 152 for k, v in b.iteritems():
153 153 if v in t:
154 154 t[k] = t[v]
155 155 else:
156 156 t[k] = v
157 157 return t
158 158
159 159 def _tracefile(fctx, am, limit):
160 160 """return file context that is the ancestor of fctx present in ancestor
161 161 manifest am, stopping after the first ancestor lower than limit"""
162 162
163 163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
165 return f
164 path = f.path()
165 if am.get(path, None) == f.filenode():
166 return path
166 167 if not f.isintroducedafter(limit):
167 168 return None
168 169
169 170 def _dirstatecopies(repo, match=None):
170 171 ds = repo.dirstate
171 172 c = ds.copies().copy()
172 173 for k in list(c):
173 174 if ds[k] not in 'anm' or (match and not match(k)):
174 175 del c[k]
175 176 return c
176 177
177 178 def _computeforwardmissing(a, b, match=None):
178 179 """Computes which files are in b but not a.
179 180 This is its own function so extensions can easily wrap this call to see what
180 181 files _forwardcopies is about to process.
181 182 """
182 183 ma = a.manifest()
183 184 mb = b.manifest()
184 185 return mb.filesnotin(ma, match=match)
185 186
186 187 def usechangesetcentricalgo(repo):
187 188 """Checks if we should use changeset-centric copy algorithms"""
188 189 return (repo.ui.config('experimental', 'copies.read-from') in
189 190 ('changeset-only', 'compatibility'))
190 191
191 192 def _committedforwardcopies(a, b, match):
192 193 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 194 # files might have to be traced back to the fctx parent of the last
194 195 # one-side-only changeset, but not further back than that
195 196 repo = a._repo
196 197
197 198 if usechangesetcentricalgo(repo):
198 199 return _changesetforwardcopies(a, b, match)
199 200
200 201 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 202 dbg = repo.ui.debug
202 203 if debug:
203 204 dbg('debug.copies: looking into rename from %s to %s\n'
204 205 % (a, b))
205 206 limit = _findlimit(repo, a, b)
206 207 if debug:
207 208 dbg('debug.copies: search limit: %d\n' % limit)
208 209 am = a.manifest()
209 210
210 211 # find where new files came from
211 212 # we currently don't try to find where old files went, too expensive
212 213 # this means we can miss a case like 'hg rm b; hg cp a b'
213 214 cm = {}
214 215
215 216 # Computing the forward missing is quite expensive on large manifests, since
216 217 # it compares the entire manifests. We can optimize it in the common use
217 218 # case of computing what copies are in a commit versus its parent (like
218 219 # during a rebase or histedit). Note, we exclude merge commits from this
219 220 # optimization, since the ctx.files() for a merge commit is not correct for
220 221 # this comparison.
221 222 forwardmissingmatch = match
222 223 if b.p1() == a and b.p2().node() == node.nullid:
223 224 filesmatcher = matchmod.exact(b.files())
224 225 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 226 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226 227
227 228 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228 229
229 230 if debug:
230 231 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231 232
232 233 for f in sorted(missing):
233 234 if debug:
234 235 dbg('debug.copies: tracing file: %s\n' % f)
235 236 fctx = b[f]
236 237 fctx._ancestrycontext = ancestrycontext
237 238
238 239 if debug:
239 240 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
241 if ofctx:
241 opath = _tracefile(fctx, am, limit)
242 if opath:
242 243 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 cm[f] = ofctx.path()
244 dbg('debug.copies: rename of: %s\n' % opath)
245 cm[f] = opath
245 246 if debug:
246 247 dbg('debug.copies: time: %f seconds\n'
247 248 % (util.timer() - start))
248 249 return cm
249 250
250 251 def _changesetforwardcopies(a, b, match):
251 252 if a.rev() == node.nullrev:
252 253 return {}
253 254
254 255 repo = a.repo()
255 256 children = {}
256 257 cl = repo.changelog
257 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 259 for r in missingrevs:
259 260 for p in cl.parentrevs(r):
260 261 if p == node.nullrev:
261 262 continue
262 263 if p not in children:
263 264 children[p] = [r]
264 265 else:
265 266 children[p].append(r)
266 267
267 268 roots = set(children) - set(missingrevs)
268 269 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 270 # The parent number is only used for knowing which parent the copies dict
270 271 # came from.
271 272 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
272 273 # instance for *one* of the child nodes (the last one). Once an instance
273 274 # has been put on the queue, it is thus no longer safe to modify it.
274 275 # Conversely, it *is* safe to modify an instance popped off the queue.
275 276 work = [(r, 1, {}) for r in roots]
276 277 heapq.heapify(work)
277 278 alwaysmatch = match.always()
278 279 while work:
279 280 r, i1, copies = heapq.heappop(work)
280 281 if work and work[0][0] == r:
281 282 # We are tracing copies from both parents
282 283 r, i2, copies2 = heapq.heappop(work)
283 284 for dst, src in copies2.items():
284 285 # Unlike when copies are stored in the filelog, we consider
285 286 # it a copy even if the destination already existed on the
286 287 # other branch. It's simply too expensive to check if the
287 288 # file existed in the manifest.
288 289 if dst not in copies:
289 290 # If it was copied on the p1 side, leave it as copied from
290 291 # that side, even if it was also copied on the p2 side.
291 292 copies[dst] = copies2[dst]
292 293 if r == b.rev():
293 294 _filter(a, b, copies)
294 295 return copies
295 296 for i, c in enumerate(children[r]):
296 297 childctx = repo[c]
297 298 if r == childctx.p1().rev():
298 299 parent = 1
299 300 childcopies = childctx.p1copies()
300 301 else:
301 302 assert r == childctx.p2().rev()
302 303 parent = 2
303 304 childcopies = childctx.p2copies()
304 305 if not alwaysmatch:
305 306 childcopies = {dst: src for dst, src in childcopies.items()
306 307 if match(dst)}
307 308 # Copy the dict only if later iterations will also need it
308 309 if i != len(children[r]) - 1:
309 310 newcopies = copies.copy()
310 311 else:
311 312 newcopies = copies
312 313 if childcopies:
313 314 newcopies = _chain(newcopies, childcopies)
314 315 for f in childctx.filesremoved():
315 316 if f in newcopies:
316 317 del newcopies[f]
317 318 heapq.heappush(work, (c, parent, newcopies))
318 319 assert False
319 320
320 321 def _forwardcopies(a, b, match=None):
321 322 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
322 323
323 324 match = a.repo().narrowmatch(match)
324 325 # check for working copy
325 326 if b.rev() is None:
326 327 if a == b.p1():
327 328 # short-circuit to avoid issues with merge states
328 329 return _dirstatecopies(b._repo, match)
329 330
330 331 cm = _committedforwardcopies(a, b.p1(), match)
331 332 # combine copies from dirstate if necessary
332 333 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
333 334 return _committedforwardcopies(a, b, match)
334 335
335 336 def _backwardrenames(a, b, match):
336 337 if a._repo.ui.config('experimental', 'copytrace') == 'off':
337 338 return {}
338 339
339 340 # Even though we're not taking copies into account, 1:n rename situations
340 341 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
341 342 # arbitrarily pick one of the renames.
342 343 # We don't want to pass in "match" here, since that would filter
343 344 # the destination by it. Since we're reversing the copies, we want
344 345 # to filter the source instead.
345 346 f = _forwardcopies(b, a)
346 347 r = {}
347 348 for k, v in sorted(f.iteritems()):
348 349 if match and not match(v):
349 350 continue
350 351 # remove copies
351 352 if v in a:
352 353 continue
353 354 r[v] = k
354 355 return r
355 356
356 357 def pathcopies(x, y, match=None):
357 358 """find {dst@y: src@x} copy mapping for directed compare"""
358 359 repo = x._repo
359 360 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
360 361 if debug:
361 362 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
362 363 % (x, y))
363 364 if x == y or not x or not y:
364 365 return {}
365 366 a = y.ancestor(x)
366 367 if a == x:
367 368 if debug:
368 369 repo.ui.debug('debug.copies: search mode: forward\n')
369 370 return _forwardcopies(x, y, match=match)
370 371 if a == y:
371 372 if debug:
372 373 repo.ui.debug('debug.copies: search mode: backward\n')
373 374 return _backwardrenames(x, y, match=match)
374 375 if debug:
375 376 repo.ui.debug('debug.copies: search mode: combined\n')
376 377 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
377 378 _forwardcopies(a, y, match=match))
378 379
379 380 def mergecopies(repo, c1, c2, base):
380 381 """
381 382 Finds moves and copies between context c1 and c2 that are relevant for
382 383 merging. 'base' will be used as the merge base.
383 384
384 385 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
385 386 files that were moved/ copied in one merge parent and modified in another.
386 387 For example:
387 388
388 389 o ---> 4 another commit
389 390 |
390 391 | o ---> 3 commit that modifies a.txt
391 392 | /
392 393 o / ---> 2 commit that moves a.txt to b.txt
393 394 |/
394 395 o ---> 1 merge base
395 396
396 397 If we try to rebase revision 3 on revision 4, since there is no a.txt in
397 398 revision 4, and if user have copytrace disabled, we prints the following
398 399 message:
399 400
400 401 ```other changed <file> which local deleted```
401 402
402 403 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
403 404 "dirmove".
404 405
405 406 "copy" is a mapping from destination name -> source name,
406 407 where source is in c1 and destination is in c2 or vice-versa.
407 408
408 409 "movewithdir" is a mapping from source name -> destination name,
409 410 where the file at source present in one context but not the other
410 411 needs to be moved to destination by the merge process, because the
411 412 other context moved the directory it is in.
412 413
413 414 "diverge" is a mapping of source name -> list of destination names
414 415 for divergent renames.
415 416
416 417 "renamedelete" is a mapping of source name -> list of destination
417 418 names for files deleted in c1 that were renamed in c2 or vice-versa.
418 419
419 420 "dirmove" is a mapping of detected source dir -> destination dir renames.
420 421 This is needed for handling changes to new files previously grafted into
421 422 renamed directories.
422 423
423 424 This function calls different copytracing algorithms based on config.
424 425 """
425 426 # avoid silly behavior for update from empty dir
426 427 if not c1 or not c2 or c1 == c2:
427 428 return {}, {}, {}, {}, {}
428 429
429 430 narrowmatch = c1.repo().narrowmatch()
430 431
431 432 # avoid silly behavior for parent -> working dir
432 433 if c2.node() is None and c1.node() == repo.dirstate.p1():
433 434 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
434 435
435 436 copytracing = repo.ui.config('experimental', 'copytrace')
436 437 if stringutil.parsebool(copytracing) is False:
437 438 # stringutil.parsebool() returns None when it is unable to parse the
438 439 # value, so we should rely on making sure copytracing is on such cases
439 440 return {}, {}, {}, {}, {}
440 441
441 442 if usechangesetcentricalgo(repo):
442 443 # The heuristics don't make sense when we need changeset-centric algos
443 444 return _fullcopytracing(repo, c1, c2, base)
444 445
445 446 # Copy trace disabling is explicitly below the node == p1 logic above
446 447 # because the logic above is required for a simple copy to be kept across a
447 448 # rebase.
448 449 if copytracing == 'heuristics':
449 450 # Do full copytracing if only non-public revisions are involved as
450 451 # that will be fast enough and will also cover the copies which could
451 452 # be missed by heuristics
452 453 if _isfullcopytraceable(repo, c1, base):
453 454 return _fullcopytracing(repo, c1, c2, base)
454 455 return _heuristicscopytracing(repo, c1, c2, base)
455 456 else:
456 457 return _fullcopytracing(repo, c1, c2, base)
457 458
458 459 def _isfullcopytraceable(repo, c1, base):
459 460 """ Checks that if base, source and destination are all no-public branches,
460 461 if yes let's use the full copytrace algorithm for increased capabilities
461 462 since it will be fast enough.
462 463
463 464 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
464 465 number of changesets from c1 to base such that if number of changesets are
465 466 more than the limit, full copytracing algorithm won't be used.
466 467 """
467 468 if c1.rev() is None:
468 469 c1 = c1.p1()
469 470 if c1.mutable() and base.mutable():
470 471 sourcecommitlimit = repo.ui.configint('experimental',
471 472 'copytrace.sourcecommitlimit')
472 473 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
473 474 return commits < sourcecommitlimit
474 475 return False
475 476
476 477 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
477 478 copy, renamedelete):
478 479 if src not in m2:
479 480 # deleted on side 2
480 481 if src not in m1:
481 482 # renamed on side 1, deleted on side 2
482 483 renamedelete[src] = dsts1
483 484 elif m2[src] != mb[src]:
484 485 if not _related(c2[src], base[src]):
485 486 return
486 487 # modified on side 2
487 488 for dst in dsts1:
488 489 if dst not in m2:
489 490 # dst not added on side 2 (handle as regular
490 491 # "both created" case in manifestmerge otherwise)
491 492 copy[dst] = src
492 493
493 494 def _fullcopytracing(repo, c1, c2, base):
494 495 """ The full copytracing algorithm which finds all the new files that were
495 496 added from merge base up to the top commit and for each file it checks if
496 497 this file was copied from another file.
497 498
498 499 This is pretty slow when a lot of changesets are involved but will track all
499 500 the copies.
500 501 """
501 502 m1 = c1.manifest()
502 503 m2 = c2.manifest()
503 504 mb = base.manifest()
504 505
505 506 copies1 = pathcopies(base, c1)
506 507 copies2 = pathcopies(base, c2)
507 508
508 509 inversecopies1 = {}
509 510 inversecopies2 = {}
510 511 for dst, src in copies1.items():
511 512 inversecopies1.setdefault(src, []).append(dst)
512 513 for dst, src in copies2.items():
513 514 inversecopies2.setdefault(src, []).append(dst)
514 515
515 516 copy = {}
516 517 diverge = {}
517 518 renamedelete = {}
518 519 allsources = set(inversecopies1) | set(inversecopies2)
519 520 for src in allsources:
520 521 dsts1 = inversecopies1.get(src)
521 522 dsts2 = inversecopies2.get(src)
522 523 if dsts1 and dsts2:
523 524 # copied/renamed on both sides
524 525 if src not in m1 and src not in m2:
525 526 # renamed on both sides
526 527 dsts1 = set(dsts1)
527 528 dsts2 = set(dsts2)
528 529 # If there's some overlap in the rename destinations, we
529 530 # consider it not divergent. For example, if side 1 copies 'a'
530 531 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
531 532 # and 'd' and deletes 'a'.
532 533 if dsts1 & dsts2:
533 534 for dst in (dsts1 & dsts2):
534 535 copy[dst] = src
535 536 else:
536 537 diverge[src] = sorted(dsts1 | dsts2)
537 538 elif src in m1 and src in m2:
538 539 # copied on both sides
539 540 dsts1 = set(dsts1)
540 541 dsts2 = set(dsts2)
541 542 for dst in (dsts1 & dsts2):
542 543 copy[dst] = src
543 544 # TODO: Handle cases where it was renamed on one side and copied
544 545 # on the other side
545 546 elif dsts1:
546 547 # copied/renamed only on side 1
547 548 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
548 549 copy, renamedelete)
549 550 elif dsts2:
550 551 # copied/renamed only on side 2
551 552 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
552 553 copy, renamedelete)
553 554
554 555 renamedeleteset = set()
555 556 divergeset = set()
556 557 for dsts in diverge.values():
557 558 divergeset.update(dsts)
558 559 for dsts in renamedelete.values():
559 560 renamedeleteset.update(dsts)
560 561
561 562 # find interesting file sets from manifests
562 563 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
563 564 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
564 565 u1 = sorted(addedinm1 - addedinm2)
565 566 u2 = sorted(addedinm2 - addedinm1)
566 567
567 568 header = " unmatched files in %s"
568 569 if u1:
569 570 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
570 571 if u2:
571 572 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
572 573
573 574 fullcopy = copies1.copy()
574 575 fullcopy.update(copies2)
575 576 if not fullcopy:
576 577 return copy, {}, diverge, renamedelete, {}
577 578
578 579 if repo.ui.debugflag:
579 580 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
580 581 "% = renamed and deleted):\n")
581 582 for f in sorted(fullcopy):
582 583 note = ""
583 584 if f in copy:
584 585 note += "*"
585 586 if f in divergeset:
586 587 note += "!"
587 588 if f in renamedeleteset:
588 589 note += "%"
589 590 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
590 591 note))
591 592 del divergeset
592 593
593 594 repo.ui.debug(" checking for directory renames\n")
594 595
595 596 # generate a directory move map
596 597 d1, d2 = c1.dirs(), c2.dirs()
597 598 invalid = set()
598 599 dirmove = {}
599 600
600 601 # examine each file copy for a potential directory move, which is
601 602 # when all the files in a directory are moved to a new directory
602 603 for dst, src in fullcopy.iteritems():
603 604 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
604 605 if dsrc in invalid:
605 606 # already seen to be uninteresting
606 607 continue
607 608 elif dsrc in d1 and ddst in d1:
608 609 # directory wasn't entirely moved locally
609 610 invalid.add(dsrc)
610 611 elif dsrc in d2 and ddst in d2:
611 612 # directory wasn't entirely moved remotely
612 613 invalid.add(dsrc)
613 614 elif dsrc in dirmove and dirmove[dsrc] != ddst:
614 615 # files from the same directory moved to two different places
615 616 invalid.add(dsrc)
616 617 else:
617 618 # looks good so far
618 619 dirmove[dsrc] = ddst
619 620
620 621 for i in invalid:
621 622 if i in dirmove:
622 623 del dirmove[i]
623 624 del d1, d2, invalid
624 625
625 626 if not dirmove:
626 627 return copy, {}, diverge, renamedelete, {}
627 628
628 629 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
629 630
630 631 for d in dirmove:
631 632 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
632 633 (d, dirmove[d]))
633 634
634 635 movewithdir = {}
635 636 # check unaccounted nonoverlapping files against directory moves
636 637 for f in u1 + u2:
637 638 if f not in fullcopy:
638 639 for d in dirmove:
639 640 if f.startswith(d):
640 641 # new file added in a directory that was moved, move it
641 642 df = dirmove[d] + f[len(d):]
642 643 if df not in copy:
643 644 movewithdir[f] = df
644 645 repo.ui.debug((" pending file src: '%s' -> "
645 646 "dst: '%s'\n") % (f, df))
646 647 break
647 648
648 649 return copy, movewithdir, diverge, renamedelete, dirmove
649 650
650 651 def _heuristicscopytracing(repo, c1, c2, base):
651 652 """ Fast copytracing using filename heuristics
652 653
653 654 Assumes that moves or renames are of following two types:
654 655
655 656 1) Inside a directory only (same directory name but different filenames)
656 657 2) Move from one directory to another
657 658 (same filenames but different directory names)
658 659
659 660 Works only when there are no merge commits in the "source branch".
660 661 Source branch is commits from base up to c2 not including base.
661 662
662 663 If merge is involved it fallbacks to _fullcopytracing().
663 664
664 665 Can be used by setting the following config:
665 666
666 667 [experimental]
667 668 copytrace = heuristics
668 669
669 670 In some cases the copy/move candidates found by heuristics can be very large
670 671 in number and that will make the algorithm slow. The number of possible
671 672 candidates to check can be limited by using the config
672 673 `experimental.copytrace.movecandidateslimit` which defaults to 100.
673 674 """
674 675
675 676 if c1.rev() is None:
676 677 c1 = c1.p1()
677 678 if c2.rev() is None:
678 679 c2 = c2.p1()
679 680
680 681 copies = {}
681 682
682 683 changedfiles = set()
683 684 m1 = c1.manifest()
684 685 if not repo.revs('%d::%d', base.rev(), c2.rev()):
685 686 # If base is not in c2 branch, we switch to fullcopytracing
686 687 repo.ui.debug("switching to full copytracing as base is not "
687 688 "an ancestor of c2\n")
688 689 return _fullcopytracing(repo, c1, c2, base)
689 690
690 691 ctx = c2
691 692 while ctx != base:
692 693 if len(ctx.parents()) == 2:
693 694 # To keep things simple let's not handle merges
694 695 repo.ui.debug("switching to full copytracing because of merges\n")
695 696 return _fullcopytracing(repo, c1, c2, base)
696 697 changedfiles.update(ctx.files())
697 698 ctx = ctx.p1()
698 699
699 700 cp = _forwardcopies(base, c2)
700 701 for dst, src in cp.iteritems():
701 702 if src in m1:
702 703 copies[dst] = src
703 704
704 705 # file is missing if it isn't present in the destination, but is present in
705 706 # the base and present in the source.
706 707 # Presence in the base is important to exclude added files, presence in the
707 708 # source is important to exclude removed files.
708 709 filt = lambda f: f not in m1 and f in base and f in c2
709 710 missingfiles = [f for f in changedfiles if filt(f)]
710 711
711 712 if missingfiles:
712 713 basenametofilename = collections.defaultdict(list)
713 714 dirnametofilename = collections.defaultdict(list)
714 715
715 716 for f in m1.filesnotin(base.manifest()):
716 717 basename = os.path.basename(f)
717 718 dirname = os.path.dirname(f)
718 719 basenametofilename[basename].append(f)
719 720 dirnametofilename[dirname].append(f)
720 721
721 722 for f in missingfiles:
722 723 basename = os.path.basename(f)
723 724 dirname = os.path.dirname(f)
724 725 samebasename = basenametofilename[basename]
725 726 samedirname = dirnametofilename[dirname]
726 727 movecandidates = samebasename + samedirname
727 728 # f is guaranteed to be present in c2, that's why
728 729 # c2.filectx(f) won't fail
729 730 f2 = c2.filectx(f)
730 731 # we can have a lot of candidates which can slow down the heuristics
731 732 # config value to limit the number of candidates moves to check
732 733 maxcandidates = repo.ui.configint('experimental',
733 734 'copytrace.movecandidateslimit')
734 735
735 736 if len(movecandidates) > maxcandidates:
736 737 repo.ui.status(_("skipping copytracing for '%s', more "
737 738 "candidates than the limit: %d\n")
738 739 % (f, len(movecandidates)))
739 740 continue
740 741
741 742 for candidate in movecandidates:
742 743 f1 = c1.filectx(candidate)
743 744 if _related(f1, f2):
744 745 # if there are a few related copies then we'll merge
745 746 # changes into all of them. This matches the behaviour
746 747 # of upstream copytracing
747 748 copies[candidate] = f
748 749
749 750 return copies, {}, {}, {}, {}
750 751
751 752 def _related(f1, f2):
752 753 """return True if f1 and f2 filectx have a common ancestor
753 754
754 755 Walk back to common ancestor to see if the two files originate
755 756 from the same file. Since workingfilectx's rev() is None it messes
756 757 up the integer comparison logic, hence the pre-step check for
757 758 None (f1 and f2 can only be workingfilectx's initially).
758 759 """
759 760
760 761 if f1 == f2:
761 762 return True # a match
762 763
763 764 g1, g2 = f1.ancestors(), f2.ancestors()
764 765 try:
765 766 f1r, f2r = f1.linkrev(), f2.linkrev()
766 767
767 768 if f1r is None:
768 769 f1 = next(g1)
769 770 if f2r is None:
770 771 f2 = next(g2)
771 772
772 773 while True:
773 774 f1r, f2r = f1.linkrev(), f2.linkrev()
774 775 if f1r > f2r:
775 776 f1 = next(g1)
776 777 elif f2r > f1r:
777 778 f2 = next(g2)
778 779 else: # f1 and f2 point to files in the same linkrev
779 780 return f1 == f2 # true if they point to the same file
780 781 except StopIteration:
781 782 return False
782 783
783 784 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
784 785 """reproduce copies from fromrev to rev in the dirstate
785 786
786 787 If skiprev is specified, it's a revision that should be used to
787 788 filter copy records. Any copies that occur between fromrev and
788 789 skiprev will not be duplicated, even if they appear in the set of
789 790 copies between fromrev and rev.
790 791 """
791 792 exclude = {}
792 793 ctraceconfig = repo.ui.config('experimental', 'copytrace')
793 794 bctrace = stringutil.parsebool(ctraceconfig)
794 795 if (skiprev is not None and
795 796 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
796 797 # copytrace='off' skips this line, but not the entire function because
797 798 # the line below is O(size of the repo) during a rebase, while the rest
798 799 # of the function is much faster (and is required for carrying copy
799 800 # metadata across the rebase anyway).
800 801 exclude = pathcopies(repo[fromrev], repo[skiprev])
801 802 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
802 803 if dst in exclude:
803 804 continue
804 805 if dst in wctx:
805 806 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now