##// END OF EJS Templates
copies: expand the logic of usechangesetcentricalgo...
marmoute -
r43290:f3bcae1e default
parent child Browse files
Show More
@@ -1,836 +1,837 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _filter(src, dst, t):
111 111 """filters out invalid copies after chaining"""
112 112
113 113 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
114 114 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
115 115 # in the following table (not including trivial cases). For example, case 2
116 116 # is where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 for k, v in list(t.items()):
133 133 # remove copies from files that didn't exist
134 134 if v not in src:
135 135 del t[k]
136 136 # remove criss-crossed copies
137 137 elif k in src and v in dst:
138 138 del t[k]
139 139 # remove copies to files that were then removed
140 140 elif k not in dst:
141 141 del t[k]
142 142
143 143 def _chain(a, b):
144 144 """chain two sets of copies 'a' and 'b'"""
145 145 t = a.copy()
146 146 for k, v in b.iteritems():
147 147 if v in t:
148 148 t[k] = t[v]
149 149 else:
150 150 t[k] = v
151 151 return t
152 152
153 153 def _tracefile(fctx, am, basemf, limit):
154 154 """return file context that is the ancestor of fctx present in ancestor
155 155 manifest am, stopping after the first ancestor lower than limit"""
156 156
157 157 for f in fctx.ancestors():
158 158 path = f.path()
159 159 if am.get(path, None) == f.filenode():
160 160 return path
161 161 if basemf and basemf.get(path, None) == f.filenode():
162 162 return path
163 163 if not f.isintroducedafter(limit):
164 164 return None
165 165
166 166 def _dirstatecopies(repo, match=None):
167 167 ds = repo.dirstate
168 168 c = ds.copies().copy()
169 169 for k in list(c):
170 170 if ds[k] not in 'anm' or (match and not match(k)):
171 171 del c[k]
172 172 return c
173 173
174 174 def _computeforwardmissing(a, b, match=None):
175 175 """Computes which files are in b but not a.
176 176 This is its own function so extensions can easily wrap this call to see what
177 177 files _forwardcopies is about to process.
178 178 """
179 179 ma = a.manifest()
180 180 mb = b.manifest()
181 181 return mb.filesnotin(ma, match=match)
182 182
183 183 def usechangesetcentricalgo(repo):
184 184 """Checks if we should use changeset-centric copy algorithms"""
185 return (repo.ui.config('experimental', 'copies.read-from') in
186 ('changeset-only', 'compatibility'))
185 readfrom = repo.ui.config('experimental', 'copies.read-from')
186 changesetsource = ('changeset-only', 'compatibility')
187 return readfrom in changesetsource
187 188
188 189 def _committedforwardcopies(a, b, base, match):
189 190 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
190 191 # files might have to be traced back to the fctx parent of the last
191 192 # one-side-only changeset, but not further back than that
192 193 repo = a._repo
193 194
194 195 if usechangesetcentricalgo(repo):
195 196 return _changesetforwardcopies(a, b, match)
196 197
197 198 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
198 199 dbg = repo.ui.debug
199 200 if debug:
200 201 dbg('debug.copies: looking into rename from %s to %s\n'
201 202 % (a, b))
202 203 limit = _findlimit(repo, a, b)
203 204 if debug:
204 205 dbg('debug.copies: search limit: %d\n' % limit)
205 206 am = a.manifest()
206 207 basemf = None if base is None else base.manifest()
207 208
208 209 # find where new files came from
209 210 # we currently don't try to find where old files went, too expensive
210 211 # this means we can miss a case like 'hg rm b; hg cp a b'
211 212 cm = {}
212 213
213 214 # Computing the forward missing is quite expensive on large manifests, since
214 215 # it compares the entire manifests. We can optimize it in the common use
215 216 # case of computing what copies are in a commit versus its parent (like
216 217 # during a rebase or histedit). Note, we exclude merge commits from this
217 218 # optimization, since the ctx.files() for a merge commit is not correct for
218 219 # this comparison.
219 220 forwardmissingmatch = match
220 221 if b.p1() == a and b.p2().node() == node.nullid:
221 222 filesmatcher = matchmod.exact(b.files())
222 223 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
223 224 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
224 225
225 226 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
226 227
227 228 if debug:
228 229 dbg('debug.copies: missing files to search: %d\n' % len(missing))
229 230
230 231 for f in sorted(missing):
231 232 if debug:
232 233 dbg('debug.copies: tracing file: %s\n' % f)
233 234 fctx = b[f]
234 235 fctx._ancestrycontext = ancestrycontext
235 236
236 237 if debug:
237 238 start = util.timer()
238 239 opath = _tracefile(fctx, am, basemf, limit)
239 240 if opath:
240 241 if debug:
241 242 dbg('debug.copies: rename of: %s\n' % opath)
242 243 cm[f] = opath
243 244 if debug:
244 245 dbg('debug.copies: time: %f seconds\n'
245 246 % (util.timer() - start))
246 247 return cm
247 248
248 249 def _changesetforwardcopies(a, b, match):
249 250 if a.rev() in (node.nullrev, b.rev()):
250 251 return {}
251 252
252 253 repo = a.repo()
253 254 children = {}
254 255 cl = repo.changelog
255 256 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
256 257 for r in missingrevs:
257 258 for p in cl.parentrevs(r):
258 259 if p == node.nullrev:
259 260 continue
260 261 if p not in children:
261 262 children[p] = [r]
262 263 else:
263 264 children[p].append(r)
264 265
265 266 roots = set(children) - set(missingrevs)
266 267 # 'work' contains 3-tuples of a (revision number, parent number, copies).
267 268 # The parent number is only used for knowing which parent the copies dict
268 269 # came from.
269 270 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
270 271 # instance for *one* of the child nodes (the last one). Once an instance
271 272 # has been put on the queue, it is thus no longer safe to modify it.
272 273 # Conversely, it *is* safe to modify an instance popped off the queue.
273 274 work = [(r, 1, {}) for r in roots]
274 275 heapq.heapify(work)
275 276 alwaysmatch = match.always()
276 277 while work:
277 278 r, i1, copies = heapq.heappop(work)
278 279 if work and work[0][0] == r:
279 280 # We are tracing copies from both parents
280 281 r, i2, copies2 = heapq.heappop(work)
281 282 for dst, src in copies2.items():
282 283 # Unlike when copies are stored in the filelog, we consider
283 284 # it a copy even if the destination already existed on the
284 285 # other branch. It's simply too expensive to check if the
285 286 # file existed in the manifest.
286 287 if dst not in copies:
287 288 # If it was copied on the p1 side, leave it as copied from
288 289 # that side, even if it was also copied on the p2 side.
289 290 copies[dst] = copies2[dst]
290 291 if r == b.rev():
291 292 return copies
292 293 for i, c in enumerate(children[r]):
293 294 childctx = repo[c]
294 295 if r == childctx.p1().rev():
295 296 parent = 1
296 297 childcopies = childctx.p1copies()
297 298 else:
298 299 assert r == childctx.p2().rev()
299 300 parent = 2
300 301 childcopies = childctx.p2copies()
301 302 if not alwaysmatch:
302 303 childcopies = {dst: src for dst, src in childcopies.items()
303 304 if match(dst)}
304 305 # Copy the dict only if later iterations will also need it
305 306 if i != len(children[r]) - 1:
306 307 newcopies = copies.copy()
307 308 else:
308 309 newcopies = copies
309 310 if childcopies:
310 311 newcopies = _chain(newcopies, childcopies)
311 312 for f in childctx.filesremoved():
312 313 if f in newcopies:
313 314 del newcopies[f]
314 315 heapq.heappush(work, (c, parent, newcopies))
315 316 assert False
316 317
317 318 def _forwardcopies(a, b, base=None, match=None):
318 319 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
319 320
320 321 if base is None:
321 322 base = a
322 323 match = a.repo().narrowmatch(match)
323 324 # check for working copy
324 325 if b.rev() is None:
325 326 cm = _committedforwardcopies(a, b.p1(), base, match)
326 327 # combine copies from dirstate if necessary
327 328 copies = _chain(cm, _dirstatecopies(b._repo, match))
328 329 else:
329 330 copies = _committedforwardcopies(a, b, base, match)
330 331 return copies
331 332
332 333 def _backwardrenames(a, b, match):
333 334 if a._repo.ui.config('experimental', 'copytrace') == 'off':
334 335 return {}
335 336
336 337 # Even though we're not taking copies into account, 1:n rename situations
337 338 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
338 339 # arbitrarily pick one of the renames.
339 340 # We don't want to pass in "match" here, since that would filter
340 341 # the destination by it. Since we're reversing the copies, we want
341 342 # to filter the source instead.
342 343 f = _forwardcopies(b, a)
343 344 r = {}
344 345 for k, v in sorted(f.iteritems()):
345 346 if match and not match(v):
346 347 continue
347 348 # remove copies
348 349 if v in a:
349 350 continue
350 351 r[v] = k
351 352 return r
352 353
353 354 def pathcopies(x, y, match=None):
354 355 """find {dst@y: src@x} copy mapping for directed compare"""
355 356 repo = x._repo
356 357 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
357 358 if debug:
358 359 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
359 360 % (x, y))
360 361 if x == y or not x or not y:
361 362 return {}
362 363 a = y.ancestor(x)
363 364 if a == x:
364 365 if debug:
365 366 repo.ui.debug('debug.copies: search mode: forward\n')
366 367 if y.rev() is None and x == y.p1():
367 368 # short-circuit to avoid issues with merge states
368 369 return _dirstatecopies(repo, match)
369 370 copies = _forwardcopies(x, y, match=match)
370 371 elif a == y:
371 372 if debug:
372 373 repo.ui.debug('debug.copies: search mode: backward\n')
373 374 copies = _backwardrenames(x, y, match=match)
374 375 else:
375 376 if debug:
376 377 repo.ui.debug('debug.copies: search mode: combined\n')
377 378 base = None
378 379 if a.rev() != node.nullrev:
379 380 base = x
380 381 copies = _chain(_backwardrenames(x, a, match=match),
381 382 _forwardcopies(a, y, base, match=match))
382 383 _filter(x, y, copies)
383 384 return copies
384 385
385 386 def mergecopies(repo, c1, c2, base):
386 387 """
387 388 Finds moves and copies between context c1 and c2 that are relevant for
388 389 merging. 'base' will be used as the merge base.
389 390
390 391 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 392 files that were moved/ copied in one merge parent and modified in another.
392 393 For example:
393 394
394 395 o ---> 4 another commit
395 396 |
396 397 | o ---> 3 commit that modifies a.txt
397 398 | /
398 399 o / ---> 2 commit that moves a.txt to b.txt
399 400 |/
400 401 o ---> 1 merge base
401 402
402 403 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 404 revision 4, and if user have copytrace disabled, we prints the following
404 405 message:
405 406
406 407 ```other changed <file> which local deleted```
407 408
408 409 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 410 "dirmove".
410 411
411 412 "copy" is a mapping from destination name -> source name,
412 413 where source is in c1 and destination is in c2 or vice-versa.
413 414
414 415 "movewithdir" is a mapping from source name -> destination name,
415 416 where the file at source present in one context but not the other
416 417 needs to be moved to destination by the merge process, because the
417 418 other context moved the directory it is in.
418 419
419 420 "diverge" is a mapping of source name -> list of destination names
420 421 for divergent renames.
421 422
422 423 "renamedelete" is a mapping of source name -> list of destination
423 424 names for files deleted in c1 that were renamed in c2 or vice-versa.
424 425
425 426 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 427 This is needed for handling changes to new files previously grafted into
427 428 renamed directories.
428 429
429 430 This function calls different copytracing algorithms based on config.
430 431 """
431 432 # avoid silly behavior for update from empty dir
432 433 if not c1 or not c2 or c1 == c2:
433 434 return {}, {}, {}, {}, {}
434 435
435 436 narrowmatch = c1.repo().narrowmatch()
436 437
437 438 # avoid silly behavior for parent -> working dir
438 439 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 440 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440 441
441 442 copytracing = repo.ui.config('experimental', 'copytrace')
442 443 if stringutil.parsebool(copytracing) is False:
443 444 # stringutil.parsebool() returns None when it is unable to parse the
444 445 # value, so we should rely on making sure copytracing is on such cases
445 446 return {}, {}, {}, {}, {}
446 447
447 448 if usechangesetcentricalgo(repo):
448 449 # The heuristics don't make sense when we need changeset-centric algos
449 450 return _fullcopytracing(repo, c1, c2, base)
450 451
451 452 # Copy trace disabling is explicitly below the node == p1 logic above
452 453 # because the logic above is required for a simple copy to be kept across a
453 454 # rebase.
454 455 if copytracing == 'heuristics':
455 456 # Do full copytracing if only non-public revisions are involved as
456 457 # that will be fast enough and will also cover the copies which could
457 458 # be missed by heuristics
458 459 if _isfullcopytraceable(repo, c1, base):
459 460 return _fullcopytracing(repo, c1, c2, base)
460 461 return _heuristicscopytracing(repo, c1, c2, base)
461 462 else:
462 463 return _fullcopytracing(repo, c1, c2, base)
463 464
464 465 def _isfullcopytraceable(repo, c1, base):
465 466 """ Checks that if base, source and destination are all no-public branches,
466 467 if yes let's use the full copytrace algorithm for increased capabilities
467 468 since it will be fast enough.
468 469
469 470 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 471 number of changesets from c1 to base such that if number of changesets are
471 472 more than the limit, full copytracing algorithm won't be used.
472 473 """
473 474 if c1.rev() is None:
474 475 c1 = c1.p1()
475 476 if c1.mutable() and base.mutable():
476 477 sourcecommitlimit = repo.ui.configint('experimental',
477 478 'copytrace.sourcecommitlimit')
478 479 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
479 480 return commits < sourcecommitlimit
480 481 return False
481 482
482 483 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
483 484 copy, renamedelete):
484 485 if src not in m2:
485 486 # deleted on side 2
486 487 if src not in m1:
487 488 # renamed on side 1, deleted on side 2
488 489 renamedelete[src] = dsts1
489 490 elif m2[src] != mb[src]:
490 491 if not _related(c2[src], base[src]):
491 492 return
492 493 # modified on side 2
493 494 for dst in dsts1:
494 495 if dst not in m2:
495 496 # dst not added on side 2 (handle as regular
496 497 # "both created" case in manifestmerge otherwise)
497 498 copy[dst] = src
498 499
499 500 def _fullcopytracing(repo, c1, c2, base):
500 501 """ The full copytracing algorithm which finds all the new files that were
501 502 added from merge base up to the top commit and for each file it checks if
502 503 this file was copied from another file.
503 504
504 505 This is pretty slow when a lot of changesets are involved but will track all
505 506 the copies.
506 507 """
507 508 m1 = c1.manifest()
508 509 m2 = c2.manifest()
509 510 mb = base.manifest()
510 511
511 512 copies1 = pathcopies(base, c1)
512 513 copies2 = pathcopies(base, c2)
513 514
514 515 inversecopies1 = {}
515 516 inversecopies2 = {}
516 517 for dst, src in copies1.items():
517 518 inversecopies1.setdefault(src, []).append(dst)
518 519 for dst, src in copies2.items():
519 520 inversecopies2.setdefault(src, []).append(dst)
520 521
521 522 copy = {}
522 523 diverge = {}
523 524 renamedelete = {}
524 525 allsources = set(inversecopies1) | set(inversecopies2)
525 526 for src in allsources:
526 527 dsts1 = inversecopies1.get(src)
527 528 dsts2 = inversecopies2.get(src)
528 529 if dsts1 and dsts2:
529 530 # copied/renamed on both sides
530 531 if src not in m1 and src not in m2:
531 532 # renamed on both sides
532 533 dsts1 = set(dsts1)
533 534 dsts2 = set(dsts2)
534 535 # If there's some overlap in the rename destinations, we
535 536 # consider it not divergent. For example, if side 1 copies 'a'
536 537 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 538 # and 'd' and deletes 'a'.
538 539 if dsts1 & dsts2:
539 540 for dst in (dsts1 & dsts2):
540 541 copy[dst] = src
541 542 else:
542 543 diverge[src] = sorted(dsts1 | dsts2)
543 544 elif src in m1 and src in m2:
544 545 # copied on both sides
545 546 dsts1 = set(dsts1)
546 547 dsts2 = set(dsts2)
547 548 for dst in (dsts1 & dsts2):
548 549 copy[dst] = src
549 550 # TODO: Handle cases where it was renamed on one side and copied
550 551 # on the other side
551 552 elif dsts1:
552 553 # copied/renamed only on side 1
553 554 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
554 555 copy, renamedelete)
555 556 elif dsts2:
556 557 # copied/renamed only on side 2
557 558 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
558 559 copy, renamedelete)
559 560
560 561 renamedeleteset = set()
561 562 divergeset = set()
562 563 for dsts in diverge.values():
563 564 divergeset.update(dsts)
564 565 for dsts in renamedelete.values():
565 566 renamedeleteset.update(dsts)
566 567
567 568 # find interesting file sets from manifests
568 569 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
569 570 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
570 571 u1 = sorted(addedinm1 - addedinm2)
571 572 u2 = sorted(addedinm2 - addedinm1)
572 573
573 574 header = " unmatched files in %s"
574 575 if u1:
575 576 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
576 577 if u2:
577 578 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
578 579
579 580 fullcopy = copies1.copy()
580 581 fullcopy.update(copies2)
581 582 if not fullcopy:
582 583 return copy, {}, diverge, renamedelete, {}
583 584
584 585 if repo.ui.debugflag:
585 586 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
586 587 "% = renamed and deleted):\n")
587 588 for f in sorted(fullcopy):
588 589 note = ""
589 590 if f in copy:
590 591 note += "*"
591 592 if f in divergeset:
592 593 note += "!"
593 594 if f in renamedeleteset:
594 595 note += "%"
595 596 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
596 597 note))
597 598 del divergeset
598 599
599 600 repo.ui.debug(" checking for directory renames\n")
600 601
601 602 # generate a directory move map
602 603 d1, d2 = c1.dirs(), c2.dirs()
603 604 invalid = set()
604 605 dirmove = {}
605 606
606 607 # examine each file copy for a potential directory move, which is
607 608 # when all the files in a directory are moved to a new directory
608 609 for dst, src in fullcopy.iteritems():
609 610 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
610 611 if dsrc in invalid:
611 612 # already seen to be uninteresting
612 613 continue
613 614 elif dsrc in d1 and ddst in d1:
614 615 # directory wasn't entirely moved locally
615 616 invalid.add(dsrc)
616 617 elif dsrc in d2 and ddst in d2:
617 618 # directory wasn't entirely moved remotely
618 619 invalid.add(dsrc)
619 620 elif dsrc in dirmove and dirmove[dsrc] != ddst:
620 621 # files from the same directory moved to two different places
621 622 invalid.add(dsrc)
622 623 else:
623 624 # looks good so far
624 625 dirmove[dsrc] = ddst
625 626
626 627 for i in invalid:
627 628 if i in dirmove:
628 629 del dirmove[i]
629 630 del d1, d2, invalid
630 631
631 632 if not dirmove:
632 633 return copy, {}, diverge, renamedelete, {}
633 634
634 635 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
635 636
636 637 for d in dirmove:
637 638 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
638 639 (d, dirmove[d]))
639 640
640 641 movewithdir = {}
641 642 # check unaccounted nonoverlapping files against directory moves
642 643 for f in u1 + u2:
643 644 if f not in fullcopy:
644 645 for d in dirmove:
645 646 if f.startswith(d):
646 647 # new file added in a directory that was moved, move it
647 648 df = dirmove[d] + f[len(d):]
648 649 if df not in copy:
649 650 movewithdir[f] = df
650 651 repo.ui.debug((" pending file src: '%s' -> "
651 652 "dst: '%s'\n") % (f, df))
652 653 break
653 654
654 655 return copy, movewithdir, diverge, renamedelete, dirmove
655 656
656 657 def _heuristicscopytracing(repo, c1, c2, base):
657 658 """ Fast copytracing using filename heuristics
658 659
659 660 Assumes that moves or renames are of following two types:
660 661
661 662 1) Inside a directory only (same directory name but different filenames)
662 663 2) Move from one directory to another
663 664 (same filenames but different directory names)
664 665
665 666 Works only when there are no merge commits in the "source branch".
666 667 Source branch is commits from base up to c2 not including base.
667 668
668 669 If merge is involved it fallbacks to _fullcopytracing().
669 670
670 671 Can be used by setting the following config:
671 672
672 673 [experimental]
673 674 copytrace = heuristics
674 675
675 676 In some cases the copy/move candidates found by heuristics can be very large
676 677 in number and that will make the algorithm slow. The number of possible
677 678 candidates to check can be limited by using the config
678 679 `experimental.copytrace.movecandidateslimit` which defaults to 100.
679 680 """
680 681
681 682 if c1.rev() is None:
682 683 c1 = c1.p1()
683 684 if c2.rev() is None:
684 685 c2 = c2.p1()
685 686
686 687 copies = {}
687 688
688 689 changedfiles = set()
689 690 m1 = c1.manifest()
690 691 if not repo.revs('%d::%d', base.rev(), c2.rev()):
691 692 # If base is not in c2 branch, we switch to fullcopytracing
692 693 repo.ui.debug("switching to full copytracing as base is not "
693 694 "an ancestor of c2\n")
694 695 return _fullcopytracing(repo, c1, c2, base)
695 696
696 697 ctx = c2
697 698 while ctx != base:
698 699 if len(ctx.parents()) == 2:
699 700 # To keep things simple let's not handle merges
700 701 repo.ui.debug("switching to full copytracing because of merges\n")
701 702 return _fullcopytracing(repo, c1, c2, base)
702 703 changedfiles.update(ctx.files())
703 704 ctx = ctx.p1()
704 705
705 706 cp = _forwardcopies(base, c2)
706 707 for dst, src in cp.iteritems():
707 708 if src in m1:
708 709 copies[dst] = src
709 710
710 711 # file is missing if it isn't present in the destination, but is present in
711 712 # the base and present in the source.
712 713 # Presence in the base is important to exclude added files, presence in the
713 714 # source is important to exclude removed files.
714 715 filt = lambda f: f not in m1 and f in base and f in c2
715 716 missingfiles = [f for f in changedfiles if filt(f)]
716 717
717 718 if missingfiles:
718 719 basenametofilename = collections.defaultdict(list)
719 720 dirnametofilename = collections.defaultdict(list)
720 721
721 722 for f in m1.filesnotin(base.manifest()):
722 723 basename = os.path.basename(f)
723 724 dirname = os.path.dirname(f)
724 725 basenametofilename[basename].append(f)
725 726 dirnametofilename[dirname].append(f)
726 727
727 728 for f in missingfiles:
728 729 basename = os.path.basename(f)
729 730 dirname = os.path.dirname(f)
730 731 samebasename = basenametofilename[basename]
731 732 samedirname = dirnametofilename[dirname]
732 733 movecandidates = samebasename + samedirname
733 734 # f is guaranteed to be present in c2, that's why
734 735 # c2.filectx(f) won't fail
735 736 f2 = c2.filectx(f)
736 737 # we can have a lot of candidates which can slow down the heuristics
737 738 # config value to limit the number of candidates moves to check
738 739 maxcandidates = repo.ui.configint('experimental',
739 740 'copytrace.movecandidateslimit')
740 741
741 742 if len(movecandidates) > maxcandidates:
742 743 repo.ui.status(_("skipping copytracing for '%s', more "
743 744 "candidates than the limit: %d\n")
744 745 % (f, len(movecandidates)))
745 746 continue
746 747
747 748 for candidate in movecandidates:
748 749 f1 = c1.filectx(candidate)
749 750 if _related(f1, f2):
750 751 # if there are a few related copies then we'll merge
751 752 # changes into all of them. This matches the behaviour
752 753 # of upstream copytracing
753 754 copies[candidate] = f
754 755
755 756 return copies, {}, {}, {}, {}
756 757
757 758 def _related(f1, f2):
758 759 """return True if f1 and f2 filectx have a common ancestor
759 760
760 761 Walk back to common ancestor to see if the two files originate
761 762 from the same file. Since workingfilectx's rev() is None it messes
762 763 up the integer comparison logic, hence the pre-step check for
763 764 None (f1 and f2 can only be workingfilectx's initially).
764 765 """
765 766
766 767 if f1 == f2:
767 768 return True # a match
768 769
769 770 g1, g2 = f1.ancestors(), f2.ancestors()
770 771 try:
771 772 f1r, f2r = f1.linkrev(), f2.linkrev()
772 773
773 774 if f1r is None:
774 775 f1 = next(g1)
775 776 if f2r is None:
776 777 f2 = next(g2)
777 778
778 779 while True:
779 780 f1r, f2r = f1.linkrev(), f2.linkrev()
780 781 if f1r > f2r:
781 782 f1 = next(g1)
782 783 elif f2r > f1r:
783 784 f2 = next(g2)
784 785 else: # f1 and f2 point to files in the same linkrev
785 786 return f1 == f2 # true if they point to the same file
786 787 except StopIteration:
787 788 return False
788 789
789 790 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
790 791 """reproduce copies from fromrev to rev in the dirstate
791 792
792 793 If skiprev is specified, it's a revision that should be used to
793 794 filter copy records. Any copies that occur between fromrev and
794 795 skiprev will not be duplicated, even if they appear in the set of
795 796 copies between fromrev and rev.
796 797 """
797 798 exclude = {}
798 799 ctraceconfig = repo.ui.config('experimental', 'copytrace')
799 800 bctrace = stringutil.parsebool(ctraceconfig)
800 801 if (skiprev is not None and
801 802 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
802 803 # copytrace='off' skips this line, but not the entire function because
803 804 # the line below is O(size of the repo) during a rebase, while the rest
804 805 # of the function is much faster (and is required for carrying copy
805 806 # metadata across the rebase anyway).
806 807 exclude = pathcopies(repo[fromrev], repo[skiprev])
807 808 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
808 809 if dst in exclude:
809 810 continue
810 811 if dst in wctx:
811 812 wctx[dst].markcopied(src)
812 813
813 814 def computechangesetcopies(ctx):
814 815 """return the copies data for a changeset
815 816
816 817 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
817 818
818 819 Each dictionnary are in the form: `{newname: oldname}`
819 820 """
820 821 p1copies = {}
821 822 p2copies = {}
822 823 p1 = ctx.p1()
823 824 p2 = ctx.p2()
824 825 narrowmatch = ctx._repo.narrowmatch()
825 826 for dst in ctx.files():
826 827 if not narrowmatch(dst) or dst not in ctx:
827 828 continue
828 829 copied = ctx[dst].renamed()
829 830 if not copied:
830 831 continue
831 832 src, srcnode = copied
832 833 if src in p1 and p1[src].filenode() == srcnode:
833 834 p1copies[dst] = src
834 835 elif src in p2 and p2[src].filenode() == srcnode:
835 836 p2copies[dst] = src
836 837 return p1copies, p2copies
General Comments 0
You need to be logged in to leave comments. Login now