##// END OF EJS Templates
copies: avoid calling matcher if matcher.always()...
Martin von Zweigbergk -
r42688:c0b51449 default
parent child Browse files
Show More
@@ -1,810 +1,811 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chainandfilter(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 t = _chain(a, b)
133 133 _filter(src, dst, t)
134 134 return t
135 135
136 136 def _filter(src, dst, t):
137 137 """filters out invalid copies after chaining"""
138 138 for k, v in list(t.items()):
139 139 # remove copies from files that didn't exist
140 140 if v not in src:
141 141 del t[k]
142 142 # remove criss-crossed copies
143 143 elif k in src and v in dst:
144 144 del t[k]
145 145 # remove copies to files that were then removed
146 146 elif k not in dst:
147 147 del t[k]
148 148
149 149 def _chain(a, b):
150 150 """chain two sets of copies 'a' and 'b'"""
151 151 t = a.copy()
152 152 for k, v in b.iteritems():
153 153 if v in t:
154 154 t[k] = t[v]
155 155 else:
156 156 t[k] = v
157 157 return t
158 158
159 159 def _tracefile(fctx, am, limit):
160 160 """return file context that is the ancestor of fctx present in ancestor
161 161 manifest am, stopping after the first ancestor lower than limit"""
162 162
163 163 for f in fctx.ancestors():
164 164 if am.get(f.path(), None) == f.filenode():
165 165 return f
166 166 if not f.isintroducedafter(limit):
167 167 return None
168 168
169 169 def _dirstatecopies(repo, match=None):
170 170 ds = repo.dirstate
171 171 c = ds.copies().copy()
172 172 for k in list(c):
173 173 if ds[k] not in 'anm' or (match and not match(k)):
174 174 del c[k]
175 175 return c
176 176
177 177 def _computeforwardmissing(a, b, match=None):
178 178 """Computes which files are in b but not a.
179 179 This is its own function so extensions can easily wrap this call to see what
180 180 files _forwardcopies is about to process.
181 181 """
182 182 ma = a.manifest()
183 183 mb = b.manifest()
184 184 return mb.filesnotin(ma, match=match)
185 185
186 186 def usechangesetcentricalgo(repo):
187 187 """Checks if we should use changeset-centric copy algorithms"""
188 188 return (repo.ui.config('experimental', 'copies.read-from') in
189 189 ('changeset-only', 'compatibility'))
190 190
191 191 def _committedforwardcopies(a, b, match):
192 192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 193 # files might have to be traced back to the fctx parent of the last
194 194 # one-side-only changeset, but not further back than that
195 195 repo = a._repo
196 196
197 197 if usechangesetcentricalgo(repo):
198 198 return _changesetforwardcopies(a, b, match)
199 199
200 200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 201 dbg = repo.ui.debug
202 202 if debug:
203 203 dbg('debug.copies: looking into rename from %s to %s\n'
204 204 % (a, b))
205 205 limit = _findlimit(repo, a, b)
206 206 if debug:
207 207 dbg('debug.copies: search limit: %d\n' % limit)
208 208 am = a.manifest()
209 209
210 210 # find where new files came from
211 211 # we currently don't try to find where old files went, too expensive
212 212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 213 cm = {}
214 214
215 215 # Computing the forward missing is quite expensive on large manifests, since
216 216 # it compares the entire manifests. We can optimize it in the common use
217 217 # case of computing what copies are in a commit versus its parent (like
218 218 # during a rebase or histedit). Note, we exclude merge commits from this
219 219 # optimization, since the ctx.files() for a merge commit is not correct for
220 220 # this comparison.
221 221 forwardmissingmatch = match
222 222 if b.p1() == a and b.p2().node() == node.nullid:
223 223 filesmatcher = matchmod.exact(b.files())
224 224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226 226
227 227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228 228
229 229 if debug:
230 230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231 231
232 232 for f in sorted(missing):
233 233 if debug:
234 234 dbg('debug.copies: tracing file: %s\n' % f)
235 235 fctx = b[f]
236 236 fctx._ancestrycontext = ancestrycontext
237 237
238 238 if debug:
239 239 start = util.timer()
240 240 ofctx = _tracefile(fctx, am, limit)
241 241 if ofctx:
242 242 if debug:
243 243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 244 cm[f] = ofctx.path()
245 245 if debug:
246 246 dbg('debug.copies: time: %f seconds\n'
247 247 % (util.timer() - start))
248 248 return cm
249 249
250 250 def _changesetforwardcopies(a, b, match):
251 251 if a.rev() == node.nullrev:
252 252 return {}
253 253
254 254 repo = a.repo()
255 255 children = {}
256 256 cl = repo.changelog
257 257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 258 for r in missingrevs:
259 259 for p in cl.parentrevs(r):
260 260 if p == node.nullrev:
261 261 continue
262 262 if p not in children:
263 263 children[p] = [r]
264 264 else:
265 265 children[p].append(r)
266 266
267 267 roots = set(children) - set(missingrevs)
268 268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 269 # The parent number is only used for knowing which parent the copies dict
270 270 # came from.
271 271 work = [(r, 1, {}) for r in roots]
272 272 heapq.heapify(work)
273 alwaysmatch = match.always()
273 274 while work:
274 275 r, i1, copies1 = heapq.heappop(work)
275 276 if work and work[0][0] == r:
276 277 # We are tracing copies from both parents
277 278 r, i2, copies2 = heapq.heappop(work)
278 279 copies = {}
279 280 allcopies = set(copies1) | set(copies2)
280 281 # TODO: perhaps this filtering should be done as long as ctx
281 282 # is merge, whether or not we're tracing from both parent.
282 283 for dst in allcopies:
283 if not match(dst):
284 if not alwaysmatch and not match(dst):
284 285 continue
285 286 # Unlike when copies are stored in the filelog, we consider
286 287 # it a copy even if the destination already existed on the
287 288 # other branch. It's simply too expensive to check if the
288 289 # file existed in the manifest.
289 290 if dst in copies1:
290 291 # If it was copied on the p1 side, mark it as copied from
291 292 # that side, even if it was also copied on the p2 side.
292 293 copies[dst] = copies1[dst]
293 294 else:
294 295 copies[dst] = copies2[dst]
295 296 else:
296 297 copies = copies1
297 298 if r == b.rev():
298 299 _filter(a, b, copies)
299 300 return copies
300 301 for i, c in enumerate(children[r]):
301 302 childctx = repo[c]
302 303 if r == childctx.p1().rev():
303 304 parent = 1
304 305 childcopies = childctx.p1copies()
305 306 else:
306 307 assert r == childctx.p2().rev()
307 308 parent = 2
308 309 childcopies = childctx.p2copies()
309 if not match.always():
310 if not alwaysmatch:
310 311 childcopies = {dst: src for dst, src in childcopies.items()
311 312 if match(dst)}
312 313 # Copy the dict only if later iterations will also need it
313 314 if i != len(children[r]) - 1:
314 315 copies = copies.copy()
315 316 if childcopies:
316 317 childcopies = _chain(copies, childcopies)
317 318 else:
318 319 childcopies = copies
319 320 for f in childctx.filesremoved():
320 321 if f in childcopies:
321 322 del childcopies[f]
322 323 heapq.heappush(work, (c, parent, childcopies))
323 324 assert False
324 325
325 326 def _forwardcopies(a, b, match=None):
326 327 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
327 328
328 329 match = a.repo().narrowmatch(match)
329 330 # check for working copy
330 331 if b.rev() is None:
331 332 if a == b.p1():
332 333 # short-circuit to avoid issues with merge states
333 334 return _dirstatecopies(b._repo, match)
334 335
335 336 cm = _committedforwardcopies(a, b.p1(), match)
336 337 # combine copies from dirstate if necessary
337 338 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
338 339 return _committedforwardcopies(a, b, match)
339 340
340 341 def _backwardrenames(a, b, match):
341 342 if a._repo.ui.config('experimental', 'copytrace') == 'off':
342 343 return {}
343 344
344 345 # Even though we're not taking copies into account, 1:n rename situations
345 346 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
346 347 # arbitrarily pick one of the renames.
347 348 # We don't want to pass in "match" here, since that would filter
348 349 # the destination by it. Since we're reversing the copies, we want
349 350 # to filter the source instead.
350 351 f = _forwardcopies(b, a)
351 352 r = {}
352 353 for k, v in sorted(f.iteritems()):
353 354 if match and not match(v):
354 355 continue
355 356 # remove copies
356 357 if v in a:
357 358 continue
358 359 r[v] = k
359 360 return r
360 361
361 362 def pathcopies(x, y, match=None):
362 363 """find {dst@y: src@x} copy mapping for directed compare"""
363 364 repo = x._repo
364 365 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
365 366 if debug:
366 367 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
367 368 % (x, y))
368 369 if x == y or not x or not y:
369 370 return {}
370 371 a = y.ancestor(x)
371 372 if a == x:
372 373 if debug:
373 374 repo.ui.debug('debug.copies: search mode: forward\n')
374 375 return _forwardcopies(x, y, match=match)
375 376 if a == y:
376 377 if debug:
377 378 repo.ui.debug('debug.copies: search mode: backward\n')
378 379 return _backwardrenames(x, y, match=match)
379 380 if debug:
380 381 repo.ui.debug('debug.copies: search mode: combined\n')
381 382 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
382 383 _forwardcopies(a, y, match=match))
383 384
384 385 def mergecopies(repo, c1, c2, base):
385 386 """
386 387 Finds moves and copies between context c1 and c2 that are relevant for
387 388 merging. 'base' will be used as the merge base.
388 389
389 390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
390 391 files that were moved/ copied in one merge parent and modified in another.
391 392 For example:
392 393
393 394 o ---> 4 another commit
394 395 |
395 396 | o ---> 3 commit that modifies a.txt
396 397 | /
397 398 o / ---> 2 commit that moves a.txt to b.txt
398 399 |/
399 400 o ---> 1 merge base
400 401
401 402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
402 403 revision 4, and if user have copytrace disabled, we prints the following
403 404 message:
404 405
405 406 ```other changed <file> which local deleted```
406 407
407 408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
408 409 "dirmove".
409 410
410 411 "copy" is a mapping from destination name -> source name,
411 412 where source is in c1 and destination is in c2 or vice-versa.
412 413
413 414 "movewithdir" is a mapping from source name -> destination name,
414 415 where the file at source present in one context but not the other
415 416 needs to be moved to destination by the merge process, because the
416 417 other context moved the directory it is in.
417 418
418 419 "diverge" is a mapping of source name -> list of destination names
419 420 for divergent renames.
420 421
421 422 "renamedelete" is a mapping of source name -> list of destination
422 423 names for files deleted in c1 that were renamed in c2 or vice-versa.
423 424
424 425 "dirmove" is a mapping of detected source dir -> destination dir renames.
425 426 This is needed for handling changes to new files previously grafted into
426 427 renamed directories.
427 428
428 429 This function calls different copytracing algorithms based on config.
429 430 """
430 431 # avoid silly behavior for update from empty dir
431 432 if not c1 or not c2 or c1 == c2:
432 433 return {}, {}, {}, {}, {}
433 434
434 435 narrowmatch = c1.repo().narrowmatch()
435 436
436 437 # avoid silly behavior for parent -> working dir
437 438 if c2.node() is None and c1.node() == repo.dirstate.p1():
438 439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
439 440
440 441 copytracing = repo.ui.config('experimental', 'copytrace')
441 442 if stringutil.parsebool(copytracing) is False:
442 443 # stringutil.parsebool() returns None when it is unable to parse the
443 444 # value, so we should rely on making sure copytracing is on such cases
444 445 return {}, {}, {}, {}, {}
445 446
446 447 if usechangesetcentricalgo(repo):
447 448 # The heuristics don't make sense when we need changeset-centric algos
448 449 return _fullcopytracing(repo, c1, c2, base)
449 450
450 451 # Copy trace disabling is explicitly below the node == p1 logic above
451 452 # because the logic above is required for a simple copy to be kept across a
452 453 # rebase.
453 454 if copytracing == 'heuristics':
454 455 # Do full copytracing if only non-public revisions are involved as
455 456 # that will be fast enough and will also cover the copies which could
456 457 # be missed by heuristics
457 458 if _isfullcopytraceable(repo, c1, base):
458 459 return _fullcopytracing(repo, c1, c2, base)
459 460 return _heuristicscopytracing(repo, c1, c2, base)
460 461 else:
461 462 return _fullcopytracing(repo, c1, c2, base)
462 463
463 464 def _isfullcopytraceable(repo, c1, base):
464 465 """ Checks that if base, source and destination are all no-public branches,
465 466 if yes let's use the full copytrace algorithm for increased capabilities
466 467 since it will be fast enough.
467 468
468 469 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
469 470 number of changesets from c1 to base such that if number of changesets are
470 471 more than the limit, full copytracing algorithm won't be used.
471 472 """
472 473 if c1.rev() is None:
473 474 c1 = c1.p1()
474 475 if c1.mutable() and base.mutable():
475 476 sourcecommitlimit = repo.ui.configint('experimental',
476 477 'copytrace.sourcecommitlimit')
477 478 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
478 479 return commits < sourcecommitlimit
479 480 return False
480 481
481 482 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
482 483 copy, renamedelete):
483 484 if src not in m2:
484 485 # deleted on side 2
485 486 if src not in m1:
486 487 # renamed on side 1, deleted on side 2
487 488 renamedelete[src] = dsts1
488 489 elif m2[src] != mb[src]:
489 490 if not _related(c2[src], base[src]):
490 491 return
491 492 # modified on side 2
492 493 for dst in dsts1:
493 494 if dst not in m2:
494 495 # dst not added on side 2 (handle as regular
495 496 # "both created" case in manifestmerge otherwise)
496 497 copy[dst] = src
497 498
498 499 def _fullcopytracing(repo, c1, c2, base):
499 500 """ The full copytracing algorithm which finds all the new files that were
500 501 added from merge base up to the top commit and for each file it checks if
501 502 this file was copied from another file.
502 503
503 504 This is pretty slow when a lot of changesets are involved but will track all
504 505 the copies.
505 506 """
506 507 m1 = c1.manifest()
507 508 m2 = c2.manifest()
508 509 mb = base.manifest()
509 510
510 511 copies1 = pathcopies(base, c1)
511 512 copies2 = pathcopies(base, c2)
512 513
513 514 inversecopies1 = {}
514 515 inversecopies2 = {}
515 516 for dst, src in copies1.items():
516 517 inversecopies1.setdefault(src, []).append(dst)
517 518 for dst, src in copies2.items():
518 519 inversecopies2.setdefault(src, []).append(dst)
519 520
520 521 copy = {}
521 522 diverge = {}
522 523 renamedelete = {}
523 524 allsources = set(inversecopies1) | set(inversecopies2)
524 525 for src in allsources:
525 526 dsts1 = inversecopies1.get(src)
526 527 dsts2 = inversecopies2.get(src)
527 528 if dsts1 and dsts2:
528 529 # copied/renamed on both sides
529 530 if src not in m1 and src not in m2:
530 531 # renamed on both sides
531 532 dsts1 = set(dsts1)
532 533 dsts2 = set(dsts2)
533 534 # If there's some overlap in the rename destinations, we
534 535 # consider it not divergent. For example, if side 1 copies 'a'
535 536 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
536 537 # and 'd' and deletes 'a'.
537 538 if dsts1 & dsts2:
538 539 for dst in (dsts1 & dsts2):
539 540 copy[dst] = src
540 541 else:
541 542 diverge[src] = sorted(dsts1 | dsts2)
542 543 elif src in m1 and src in m2:
543 544 # copied on both sides
544 545 dsts1 = set(dsts1)
545 546 dsts2 = set(dsts2)
546 547 for dst in (dsts1 & dsts2):
547 548 copy[dst] = src
548 549 # TODO: Handle cases where it was renamed on one side and copied
549 550 # on the other side
550 551 elif dsts1:
551 552 # copied/renamed only on side 1
552 553 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
553 554 copy, renamedelete)
554 555 elif dsts2:
555 556 # copied/renamed only on side 2
556 557 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
557 558 copy, renamedelete)
558 559
559 560 renamedeleteset = set()
560 561 divergeset = set()
561 562 for dsts in diverge.values():
562 563 divergeset.update(dsts)
563 564 for dsts in renamedelete.values():
564 565 renamedeleteset.update(dsts)
565 566
566 567 # find interesting file sets from manifests
567 568 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
568 569 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
569 570 u1 = sorted(addedinm1 - addedinm2)
570 571 u2 = sorted(addedinm2 - addedinm1)
571 572
572 573 header = " unmatched files in %s"
573 574 if u1:
574 575 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
575 576 if u2:
576 577 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
577 578
578 579 fullcopy = copies1.copy()
579 580 fullcopy.update(copies2)
580 581 if not fullcopy:
581 582 return copy, {}, diverge, renamedelete, {}
582 583
583 584 if repo.ui.debugflag:
584 585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
585 586 "% = renamed and deleted):\n")
586 587 for f in sorted(fullcopy):
587 588 note = ""
588 589 if f in copy:
589 590 note += "*"
590 591 if f in divergeset:
591 592 note += "!"
592 593 if f in renamedeleteset:
593 594 note += "%"
594 595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
595 596 note))
596 597 del divergeset
597 598
598 599 repo.ui.debug(" checking for directory renames\n")
599 600
600 601 # generate a directory move map
601 602 d1, d2 = c1.dirs(), c2.dirs()
602 603 invalid = set()
603 604 dirmove = {}
604 605
605 606 # examine each file copy for a potential directory move, which is
606 607 # when all the files in a directory are moved to a new directory
607 608 for dst, src in fullcopy.iteritems():
608 609 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
609 610 if dsrc in invalid:
610 611 # already seen to be uninteresting
611 612 continue
612 613 elif dsrc in d1 and ddst in d1:
613 614 # directory wasn't entirely moved locally
614 615 invalid.add(dsrc)
615 616 elif dsrc in d2 and ddst in d2:
616 617 # directory wasn't entirely moved remotely
617 618 invalid.add(dsrc)
618 619 elif dsrc in dirmove and dirmove[dsrc] != ddst:
619 620 # files from the same directory moved to two different places
620 621 invalid.add(dsrc)
621 622 else:
622 623 # looks good so far
623 624 dirmove[dsrc] = ddst
624 625
625 626 for i in invalid:
626 627 if i in dirmove:
627 628 del dirmove[i]
628 629 del d1, d2, invalid
629 630
630 631 if not dirmove:
631 632 return copy, {}, diverge, renamedelete, {}
632 633
633 634 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
634 635
635 636 for d in dirmove:
636 637 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
637 638 (d, dirmove[d]))
638 639
639 640 movewithdir = {}
640 641 # check unaccounted nonoverlapping files against directory moves
641 642 for f in u1 + u2:
642 643 if f not in fullcopy:
643 644 for d in dirmove:
644 645 if f.startswith(d):
645 646 # new file added in a directory that was moved, move it
646 647 df = dirmove[d] + f[len(d):]
647 648 if df not in copy:
648 649 movewithdir[f] = df
649 650 repo.ui.debug((" pending file src: '%s' -> "
650 651 "dst: '%s'\n") % (f, df))
651 652 break
652 653
653 654 return copy, movewithdir, diverge, renamedelete, dirmove
654 655
655 656 def _heuristicscopytracing(repo, c1, c2, base):
656 657 """ Fast copytracing using filename heuristics
657 658
658 659 Assumes that moves or renames are of following two types:
659 660
660 661 1) Inside a directory only (same directory name but different filenames)
661 662 2) Move from one directory to another
662 663 (same filenames but different directory names)
663 664
664 665 Works only when there are no merge commits in the "source branch".
665 666 Source branch is commits from base up to c2 not including base.
666 667
667 668 If merge is involved it fallbacks to _fullcopytracing().
668 669
669 670 Can be used by setting the following config:
670 671
671 672 [experimental]
672 673 copytrace = heuristics
673 674
674 675 In some cases the copy/move candidates found by heuristics can be very large
675 676 in number and that will make the algorithm slow. The number of possible
676 677 candidates to check can be limited by using the config
677 678 `experimental.copytrace.movecandidateslimit` which defaults to 100.
678 679 """
679 680
680 681 if c1.rev() is None:
681 682 c1 = c1.p1()
682 683 if c2.rev() is None:
683 684 c2 = c2.p1()
684 685
685 686 copies = {}
686 687
687 688 changedfiles = set()
688 689 m1 = c1.manifest()
689 690 if not repo.revs('%d::%d', base.rev(), c2.rev()):
690 691 # If base is not in c2 branch, we switch to fullcopytracing
691 692 repo.ui.debug("switching to full copytracing as base is not "
692 693 "an ancestor of c2\n")
693 694 return _fullcopytracing(repo, c1, c2, base)
694 695
695 696 ctx = c2
696 697 while ctx != base:
697 698 if len(ctx.parents()) == 2:
698 699 # To keep things simple let's not handle merges
699 700 repo.ui.debug("switching to full copytracing because of merges\n")
700 701 return _fullcopytracing(repo, c1, c2, base)
701 702 changedfiles.update(ctx.files())
702 703 ctx = ctx.p1()
703 704
704 705 cp = _forwardcopies(base, c2)
705 706 for dst, src in cp.iteritems():
706 707 if src in m1:
707 708 copies[dst] = src
708 709
709 710 # file is missing if it isn't present in the destination, but is present in
710 711 # the base and present in the source.
711 712 # Presence in the base is important to exclude added files, presence in the
712 713 # source is important to exclude removed files.
713 714 filt = lambda f: f not in m1 and f in base and f in c2
714 715 missingfiles = [f for f in changedfiles if filt(f)]
715 716
716 717 if missingfiles:
717 718 basenametofilename = collections.defaultdict(list)
718 719 dirnametofilename = collections.defaultdict(list)
719 720
720 721 for f in m1.filesnotin(base.manifest()):
721 722 basename = os.path.basename(f)
722 723 dirname = os.path.dirname(f)
723 724 basenametofilename[basename].append(f)
724 725 dirnametofilename[dirname].append(f)
725 726
726 727 for f in missingfiles:
727 728 basename = os.path.basename(f)
728 729 dirname = os.path.dirname(f)
729 730 samebasename = basenametofilename[basename]
730 731 samedirname = dirnametofilename[dirname]
731 732 movecandidates = samebasename + samedirname
732 733 # f is guaranteed to be present in c2, that's why
733 734 # c2.filectx(f) won't fail
734 735 f2 = c2.filectx(f)
735 736 # we can have a lot of candidates which can slow down the heuristics
736 737 # config value to limit the number of candidates moves to check
737 738 maxcandidates = repo.ui.configint('experimental',
738 739 'copytrace.movecandidateslimit')
739 740
740 741 if len(movecandidates) > maxcandidates:
741 742 repo.ui.status(_("skipping copytracing for '%s', more "
742 743 "candidates than the limit: %d\n")
743 744 % (f, len(movecandidates)))
744 745 continue
745 746
746 747 for candidate in movecandidates:
747 748 f1 = c1.filectx(candidate)
748 749 if _related(f1, f2):
749 750 # if there are a few related copies then we'll merge
750 751 # changes into all of them. This matches the behaviour
751 752 # of upstream copytracing
752 753 copies[candidate] = f
753 754
754 755 return copies, {}, {}, {}, {}
755 756
756 757 def _related(f1, f2):
757 758 """return True if f1 and f2 filectx have a common ancestor
758 759
759 760 Walk back to common ancestor to see if the two files originate
760 761 from the same file. Since workingfilectx's rev() is None it messes
761 762 up the integer comparison logic, hence the pre-step check for
762 763 None (f1 and f2 can only be workingfilectx's initially).
763 764 """
764 765
765 766 if f1 == f2:
766 767 return True # a match
767 768
768 769 g1, g2 = f1.ancestors(), f2.ancestors()
769 770 try:
770 771 f1r, f2r = f1.linkrev(), f2.linkrev()
771 772
772 773 if f1r is None:
773 774 f1 = next(g1)
774 775 if f2r is None:
775 776 f2 = next(g2)
776 777
777 778 while True:
778 779 f1r, f2r = f1.linkrev(), f2.linkrev()
779 780 if f1r > f2r:
780 781 f1 = next(g1)
781 782 elif f2r > f1r:
782 783 f2 = next(g2)
783 784 else: # f1 and f2 point to files in the same linkrev
784 785 return f1 == f2 # true if they point to the same file
785 786 except StopIteration:
786 787 return False
787 788
788 789 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
789 790 """reproduce copies from fromrev to rev in the dirstate
790 791
791 792 If skiprev is specified, it's a revision that should be used to
792 793 filter copy records. Any copies that occur between fromrev and
793 794 skiprev will not be duplicated, even if they appear in the set of
794 795 copies between fromrev and rev.
795 796 """
796 797 exclude = {}
797 798 ctraceconfig = repo.ui.config('experimental', 'copytrace')
798 799 bctrace = stringutil.parsebool(ctraceconfig)
799 800 if (skiprev is not None and
800 801 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
801 802 # copytrace='off' skips this line, but not the entire function because
802 803 # the line below is O(size of the repo) during a rebase, while the rest
803 804 # of the function is much faster (and is required for carrying copy
804 805 # metadata across the rebase anyway).
805 806 exclude = pathcopies(repo[fromrev], repo[skiprev])
806 807 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
807 808 if dst in exclude:
808 809 continue
809 810 if dst in wctx:
810 811 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now