##// END OF EJS Templates
copies: avoid reusing the same variable for two different copy dicts...
Martin von Zweigbergk -
r42714:e7c55e24 default
parent child Browse files
Show More
@@ -1,811 +1,811 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chainandfilter(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 t = _chain(a, b)
133 133 _filter(src, dst, t)
134 134 return t
135 135
136 136 def _filter(src, dst, t):
137 137 """filters out invalid copies after chaining"""
138 138 for k, v in list(t.items()):
139 139 # remove copies from files that didn't exist
140 140 if v not in src:
141 141 del t[k]
142 142 # remove criss-crossed copies
143 143 elif k in src and v in dst:
144 144 del t[k]
145 145 # remove copies to files that were then removed
146 146 elif k not in dst:
147 147 del t[k]
148 148
149 149 def _chain(a, b):
150 150 """chain two sets of copies 'a' and 'b'"""
151 151 t = a.copy()
152 152 for k, v in b.iteritems():
153 153 if v in t:
154 154 t[k] = t[v]
155 155 else:
156 156 t[k] = v
157 157 return t
158 158
159 159 def _tracefile(fctx, am, limit):
160 160 """return file context that is the ancestor of fctx present in ancestor
161 161 manifest am, stopping after the first ancestor lower than limit"""
162 162
163 163 for f in fctx.ancestors():
164 164 if am.get(f.path(), None) == f.filenode():
165 165 return f
166 166 if not f.isintroducedafter(limit):
167 167 return None
168 168
169 169 def _dirstatecopies(repo, match=None):
170 170 ds = repo.dirstate
171 171 c = ds.copies().copy()
172 172 for k in list(c):
173 173 if ds[k] not in 'anm' or (match and not match(k)):
174 174 del c[k]
175 175 return c
176 176
177 177 def _computeforwardmissing(a, b, match=None):
178 178 """Computes which files are in b but not a.
179 179 This is its own function so extensions can easily wrap this call to see what
180 180 files _forwardcopies is about to process.
181 181 """
182 182 ma = a.manifest()
183 183 mb = b.manifest()
184 184 return mb.filesnotin(ma, match=match)
185 185
186 186 def usechangesetcentricalgo(repo):
187 187 """Checks if we should use changeset-centric copy algorithms"""
188 188 return (repo.ui.config('experimental', 'copies.read-from') in
189 189 ('changeset-only', 'compatibility'))
190 190
191 191 def _committedforwardcopies(a, b, match):
192 192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 193 # files might have to be traced back to the fctx parent of the last
194 194 # one-side-only changeset, but not further back than that
195 195 repo = a._repo
196 196
197 197 if usechangesetcentricalgo(repo):
198 198 return _changesetforwardcopies(a, b, match)
199 199
200 200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 201 dbg = repo.ui.debug
202 202 if debug:
203 203 dbg('debug.copies: looking into rename from %s to %s\n'
204 204 % (a, b))
205 205 limit = _findlimit(repo, a, b)
206 206 if debug:
207 207 dbg('debug.copies: search limit: %d\n' % limit)
208 208 am = a.manifest()
209 209
210 210 # find where new files came from
211 211 # we currently don't try to find where old files went, too expensive
212 212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 213 cm = {}
214 214
215 215 # Computing the forward missing is quite expensive on large manifests, since
216 216 # it compares the entire manifests. We can optimize it in the common use
217 217 # case of computing what copies are in a commit versus its parent (like
218 218 # during a rebase or histedit). Note, we exclude merge commits from this
219 219 # optimization, since the ctx.files() for a merge commit is not correct for
220 220 # this comparison.
221 221 forwardmissingmatch = match
222 222 if b.p1() == a and b.p2().node() == node.nullid:
223 223 filesmatcher = matchmod.exact(b.files())
224 224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226 226
227 227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228 228
229 229 if debug:
230 230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231 231
232 232 for f in sorted(missing):
233 233 if debug:
234 234 dbg('debug.copies: tracing file: %s\n' % f)
235 235 fctx = b[f]
236 236 fctx._ancestrycontext = ancestrycontext
237 237
238 238 if debug:
239 239 start = util.timer()
240 240 ofctx = _tracefile(fctx, am, limit)
241 241 if ofctx:
242 242 if debug:
243 243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 244 cm[f] = ofctx.path()
245 245 if debug:
246 246 dbg('debug.copies: time: %f seconds\n'
247 247 % (util.timer() - start))
248 248 return cm
249 249
250 250 def _changesetforwardcopies(a, b, match):
251 251 if a.rev() == node.nullrev:
252 252 return {}
253 253
254 254 repo = a.repo()
255 255 children = {}
256 256 cl = repo.changelog
257 257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 258 for r in missingrevs:
259 259 for p in cl.parentrevs(r):
260 260 if p == node.nullrev:
261 261 continue
262 262 if p not in children:
263 263 children[p] = [r]
264 264 else:
265 265 children[p].append(r)
266 266
267 267 roots = set(children) - set(missingrevs)
268 268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 269 # The parent number is only used for knowing which parent the copies dict
270 270 # came from.
271 271 work = [(r, 1, {}) for r in roots]
272 272 heapq.heapify(work)
273 273 alwaysmatch = match.always()
274 274 while work:
275 275 r, i1, copies1 = heapq.heappop(work)
276 276 if work and work[0][0] == r:
277 277 # We are tracing copies from both parents
278 278 r, i2, copies2 = heapq.heappop(work)
279 279 copies = {}
280 280 allcopies = set(copies1) | set(copies2)
281 281 # TODO: perhaps this filtering should be done as long as ctx
282 282 # is merge, whether or not we're tracing from both parent.
283 283 for dst in allcopies:
284 284 if not alwaysmatch and not match(dst):
285 285 continue
286 286 # Unlike when copies are stored in the filelog, we consider
287 287 # it a copy even if the destination already existed on the
288 288 # other branch. It's simply too expensive to check if the
289 289 # file existed in the manifest.
290 290 if dst in copies1:
291 291 # If it was copied on the p1 side, mark it as copied from
292 292 # that side, even if it was also copied on the p2 side.
293 293 copies[dst] = copies1[dst]
294 294 else:
295 295 copies[dst] = copies2[dst]
296 296 else:
297 297 copies = copies1
298 298 if r == b.rev():
299 299 _filter(a, b, copies)
300 300 return copies
301 301 for i, c in enumerate(children[r]):
302 302 childctx = repo[c]
303 303 if r == childctx.p1().rev():
304 304 parent = 1
305 305 childcopies = childctx.p1copies()
306 306 else:
307 307 assert r == childctx.p2().rev()
308 308 parent = 2
309 309 childcopies = childctx.p2copies()
310 310 if not alwaysmatch:
311 311 childcopies = {dst: src for dst, src in childcopies.items()
312 312 if match(dst)}
313 313 # Copy the dict only if later iterations will also need it
314 314 if i != len(children[r]) - 1:
315 copies = copies.copy()
316 if childcopies:
317 childcopies = _chain(copies, childcopies)
315 newcopies = copies.copy()
318 316 else:
319 childcopies = copies
317 newcopies = copies
318 if childcopies:
319 newcopies = _chain(newcopies, childcopies)
320 320 for f in childctx.filesremoved():
321 if f in childcopies:
322 del childcopies[f]
323 heapq.heappush(work, (c, parent, childcopies))
321 if f in newcopies:
322 del newcopies[f]
323 heapq.heappush(work, (c, parent, newcopies))
324 324 assert False
325 325
326 326 def _forwardcopies(a, b, match=None):
327 327 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
328 328
329 329 match = a.repo().narrowmatch(match)
330 330 # check for working copy
331 331 if b.rev() is None:
332 332 if a == b.p1():
333 333 # short-circuit to avoid issues with merge states
334 334 return _dirstatecopies(b._repo, match)
335 335
336 336 cm = _committedforwardcopies(a, b.p1(), match)
337 337 # combine copies from dirstate if necessary
338 338 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
339 339 return _committedforwardcopies(a, b, match)
340 340
341 341 def _backwardrenames(a, b, match):
342 342 if a._repo.ui.config('experimental', 'copytrace') == 'off':
343 343 return {}
344 344
345 345 # Even though we're not taking copies into account, 1:n rename situations
346 346 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
347 347 # arbitrarily pick one of the renames.
348 348 # We don't want to pass in "match" here, since that would filter
349 349 # the destination by it. Since we're reversing the copies, we want
350 350 # to filter the source instead.
351 351 f = _forwardcopies(b, a)
352 352 r = {}
353 353 for k, v in sorted(f.iteritems()):
354 354 if match and not match(v):
355 355 continue
356 356 # remove copies
357 357 if v in a:
358 358 continue
359 359 r[v] = k
360 360 return r
361 361
362 362 def pathcopies(x, y, match=None):
363 363 """find {dst@y: src@x} copy mapping for directed compare"""
364 364 repo = x._repo
365 365 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
366 366 if debug:
367 367 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
368 368 % (x, y))
369 369 if x == y or not x or not y:
370 370 return {}
371 371 a = y.ancestor(x)
372 372 if a == x:
373 373 if debug:
374 374 repo.ui.debug('debug.copies: search mode: forward\n')
375 375 return _forwardcopies(x, y, match=match)
376 376 if a == y:
377 377 if debug:
378 378 repo.ui.debug('debug.copies: search mode: backward\n')
379 379 return _backwardrenames(x, y, match=match)
380 380 if debug:
381 381 repo.ui.debug('debug.copies: search mode: combined\n')
382 382 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
383 383 _forwardcopies(a, y, match=match))
384 384
385 385 def mergecopies(repo, c1, c2, base):
386 386 """
387 387 Finds moves and copies between context c1 and c2 that are relevant for
388 388 merging. 'base' will be used as the merge base.
389 389
390 390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 391 files that were moved/ copied in one merge parent and modified in another.
392 392 For example:
393 393
394 394 o ---> 4 another commit
395 395 |
396 396 | o ---> 3 commit that modifies a.txt
397 397 | /
398 398 o / ---> 2 commit that moves a.txt to b.txt
399 399 |/
400 400 o ---> 1 merge base
401 401
402 402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 403 revision 4, and if user have copytrace disabled, we prints the following
404 404 message:
405 405
406 406 ```other changed <file> which local deleted```
407 407
408 408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 409 "dirmove".
410 410
411 411 "copy" is a mapping from destination name -> source name,
412 412 where source is in c1 and destination is in c2 or vice-versa.
413 413
414 414 "movewithdir" is a mapping from source name -> destination name,
415 415 where the file at source present in one context but not the other
416 416 needs to be moved to destination by the merge process, because the
417 417 other context moved the directory it is in.
418 418
419 419 "diverge" is a mapping of source name -> list of destination names
420 420 for divergent renames.
421 421
422 422 "renamedelete" is a mapping of source name -> list of destination
423 423 names for files deleted in c1 that were renamed in c2 or vice-versa.
424 424
425 425 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 426 This is needed for handling changes to new files previously grafted into
427 427 renamed directories.
428 428
429 429 This function calls different copytracing algorithms based on config.
430 430 """
431 431 # avoid silly behavior for update from empty dir
432 432 if not c1 or not c2 or c1 == c2:
433 433 return {}, {}, {}, {}, {}
434 434
435 435 narrowmatch = c1.repo().narrowmatch()
436 436
437 437 # avoid silly behavior for parent -> working dir
438 438 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440 440
441 441 copytracing = repo.ui.config('experimental', 'copytrace')
442 442 if stringutil.parsebool(copytracing) is False:
443 443 # stringutil.parsebool() returns None when it is unable to parse the
444 444 # value, so we should rely on making sure copytracing is on such cases
445 445 return {}, {}, {}, {}, {}
446 446
447 447 if usechangesetcentricalgo(repo):
448 448 # The heuristics don't make sense when we need changeset-centric algos
449 449 return _fullcopytracing(repo, c1, c2, base)
450 450
451 451 # Copy trace disabling is explicitly below the node == p1 logic above
452 452 # because the logic above is required for a simple copy to be kept across a
453 453 # rebase.
454 454 if copytracing == 'heuristics':
455 455 # Do full copytracing if only non-public revisions are involved as
456 456 # that will be fast enough and will also cover the copies which could
457 457 # be missed by heuristics
458 458 if _isfullcopytraceable(repo, c1, base):
459 459 return _fullcopytracing(repo, c1, c2, base)
460 460 return _heuristicscopytracing(repo, c1, c2, base)
461 461 else:
462 462 return _fullcopytracing(repo, c1, c2, base)
463 463
464 464 def _isfullcopytraceable(repo, c1, base):
465 465 """ Checks that if base, source and destination are all no-public branches,
466 466 if yes let's use the full copytrace algorithm for increased capabilities
467 467 since it will be fast enough.
468 468
469 469 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 470 number of changesets from c1 to base such that if number of changesets are
471 471 more than the limit, full copytracing algorithm won't be used.
472 472 """
473 473 if c1.rev() is None:
474 474 c1 = c1.p1()
475 475 if c1.mutable() and base.mutable():
476 476 sourcecommitlimit = repo.ui.configint('experimental',
477 477 'copytrace.sourcecommitlimit')
478 478 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
479 479 return commits < sourcecommitlimit
480 480 return False
481 481
482 482 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
483 483 copy, renamedelete):
484 484 if src not in m2:
485 485 # deleted on side 2
486 486 if src not in m1:
487 487 # renamed on side 1, deleted on side 2
488 488 renamedelete[src] = dsts1
489 489 elif m2[src] != mb[src]:
490 490 if not _related(c2[src], base[src]):
491 491 return
492 492 # modified on side 2
493 493 for dst in dsts1:
494 494 if dst not in m2:
495 495 # dst not added on side 2 (handle as regular
496 496 # "both created" case in manifestmerge otherwise)
497 497 copy[dst] = src
498 498
499 499 def _fullcopytracing(repo, c1, c2, base):
500 500 """ The full copytracing algorithm which finds all the new files that were
501 501 added from merge base up to the top commit and for each file it checks if
502 502 this file was copied from another file.
503 503
504 504 This is pretty slow when a lot of changesets are involved but will track all
505 505 the copies.
506 506 """
507 507 m1 = c1.manifest()
508 508 m2 = c2.manifest()
509 509 mb = base.manifest()
510 510
511 511 copies1 = pathcopies(base, c1)
512 512 copies2 = pathcopies(base, c2)
513 513
514 514 inversecopies1 = {}
515 515 inversecopies2 = {}
516 516 for dst, src in copies1.items():
517 517 inversecopies1.setdefault(src, []).append(dst)
518 518 for dst, src in copies2.items():
519 519 inversecopies2.setdefault(src, []).append(dst)
520 520
521 521 copy = {}
522 522 diverge = {}
523 523 renamedelete = {}
524 524 allsources = set(inversecopies1) | set(inversecopies2)
525 525 for src in allsources:
526 526 dsts1 = inversecopies1.get(src)
527 527 dsts2 = inversecopies2.get(src)
528 528 if dsts1 and dsts2:
529 529 # copied/renamed on both sides
530 530 if src not in m1 and src not in m2:
531 531 # renamed on both sides
532 532 dsts1 = set(dsts1)
533 533 dsts2 = set(dsts2)
534 534 # If there's some overlap in the rename destinations, we
535 535 # consider it not divergent. For example, if side 1 copies 'a'
536 536 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 537 # and 'd' and deletes 'a'.
538 538 if dsts1 & dsts2:
539 539 for dst in (dsts1 & dsts2):
540 540 copy[dst] = src
541 541 else:
542 542 diverge[src] = sorted(dsts1 | dsts2)
543 543 elif src in m1 and src in m2:
544 544 # copied on both sides
545 545 dsts1 = set(dsts1)
546 546 dsts2 = set(dsts2)
547 547 for dst in (dsts1 & dsts2):
548 548 copy[dst] = src
549 549 # TODO: Handle cases where it was renamed on one side and copied
550 550 # on the other side
551 551 elif dsts1:
552 552 # copied/renamed only on side 1
553 553 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
554 554 copy, renamedelete)
555 555 elif dsts2:
556 556 # copied/renamed only on side 2
557 557 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
558 558 copy, renamedelete)
559 559
560 560 renamedeleteset = set()
561 561 divergeset = set()
562 562 for dsts in diverge.values():
563 563 divergeset.update(dsts)
564 564 for dsts in renamedelete.values():
565 565 renamedeleteset.update(dsts)
566 566
567 567 # find interesting file sets from manifests
568 568 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
569 569 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
570 570 u1 = sorted(addedinm1 - addedinm2)
571 571 u2 = sorted(addedinm2 - addedinm1)
572 572
573 573 header = " unmatched files in %s"
574 574 if u1:
575 575 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
576 576 if u2:
577 577 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
578 578
579 579 fullcopy = copies1.copy()
580 580 fullcopy.update(copies2)
581 581 if not fullcopy:
582 582 return copy, {}, diverge, renamedelete, {}
583 583
584 584 if repo.ui.debugflag:
585 585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
586 586 "% = renamed and deleted):\n")
587 587 for f in sorted(fullcopy):
588 588 note = ""
589 589 if f in copy:
590 590 note += "*"
591 591 if f in divergeset:
592 592 note += "!"
593 593 if f in renamedeleteset:
594 594 note += "%"
595 595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
596 596 note))
597 597 del divergeset
598 598
599 599 repo.ui.debug(" checking for directory renames\n")
600 600
601 601 # generate a directory move map
602 602 d1, d2 = c1.dirs(), c2.dirs()
603 603 invalid = set()
604 604 dirmove = {}
605 605
606 606 # examine each file copy for a potential directory move, which is
607 607 # when all the files in a directory are moved to a new directory
608 608 for dst, src in fullcopy.iteritems():
609 609 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
610 610 if dsrc in invalid:
611 611 # already seen to be uninteresting
612 612 continue
613 613 elif dsrc in d1 and ddst in d1:
614 614 # directory wasn't entirely moved locally
615 615 invalid.add(dsrc)
616 616 elif dsrc in d2 and ddst in d2:
617 617 # directory wasn't entirely moved remotely
618 618 invalid.add(dsrc)
619 619 elif dsrc in dirmove and dirmove[dsrc] != ddst:
620 620 # files from the same directory moved to two different places
621 621 invalid.add(dsrc)
622 622 else:
623 623 # looks good so far
624 624 dirmove[dsrc] = ddst
625 625
626 626 for i in invalid:
627 627 if i in dirmove:
628 628 del dirmove[i]
629 629 del d1, d2, invalid
630 630
631 631 if not dirmove:
632 632 return copy, {}, diverge, renamedelete, {}
633 633
634 634 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
635 635
636 636 for d in dirmove:
637 637 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
638 638 (d, dirmove[d]))
639 639
640 640 movewithdir = {}
641 641 # check unaccounted nonoverlapping files against directory moves
642 642 for f in u1 + u2:
643 643 if f not in fullcopy:
644 644 for d in dirmove:
645 645 if f.startswith(d):
646 646 # new file added in a directory that was moved, move it
647 647 df = dirmove[d] + f[len(d):]
648 648 if df not in copy:
649 649 movewithdir[f] = df
650 650 repo.ui.debug((" pending file src: '%s' -> "
651 651 "dst: '%s'\n") % (f, df))
652 652 break
653 653
654 654 return copy, movewithdir, diverge, renamedelete, dirmove
655 655
656 656 def _heuristicscopytracing(repo, c1, c2, base):
657 657 """ Fast copytracing using filename heuristics
658 658
659 659 Assumes that moves or renames are of following two types:
660 660
661 661 1) Inside a directory only (same directory name but different filenames)
662 662 2) Move from one directory to another
663 663 (same filenames but different directory names)
664 664
665 665 Works only when there are no merge commits in the "source branch".
666 666 Source branch is commits from base up to c2 not including base.
667 667
668 668 If merge is involved it fallbacks to _fullcopytracing().
669 669
670 670 Can be used by setting the following config:
671 671
672 672 [experimental]
673 673 copytrace = heuristics
674 674
675 675 In some cases the copy/move candidates found by heuristics can be very large
676 676 in number and that will make the algorithm slow. The number of possible
677 677 candidates to check can be limited by using the config
678 678 `experimental.copytrace.movecandidateslimit` which defaults to 100.
679 679 """
680 680
681 681 if c1.rev() is None:
682 682 c1 = c1.p1()
683 683 if c2.rev() is None:
684 684 c2 = c2.p1()
685 685
686 686 copies = {}
687 687
688 688 changedfiles = set()
689 689 m1 = c1.manifest()
690 690 if not repo.revs('%d::%d', base.rev(), c2.rev()):
691 691 # If base is not in c2 branch, we switch to fullcopytracing
692 692 repo.ui.debug("switching to full copytracing as base is not "
693 693 "an ancestor of c2\n")
694 694 return _fullcopytracing(repo, c1, c2, base)
695 695
696 696 ctx = c2
697 697 while ctx != base:
698 698 if len(ctx.parents()) == 2:
699 699 # To keep things simple let's not handle merges
700 700 repo.ui.debug("switching to full copytracing because of merges\n")
701 701 return _fullcopytracing(repo, c1, c2, base)
702 702 changedfiles.update(ctx.files())
703 703 ctx = ctx.p1()
704 704
705 705 cp = _forwardcopies(base, c2)
706 706 for dst, src in cp.iteritems():
707 707 if src in m1:
708 708 copies[dst] = src
709 709
710 710 # file is missing if it isn't present in the destination, but is present in
711 711 # the base and present in the source.
712 712 # Presence in the base is important to exclude added files, presence in the
713 713 # source is important to exclude removed files.
714 714 filt = lambda f: f not in m1 and f in base and f in c2
715 715 missingfiles = [f for f in changedfiles if filt(f)]
716 716
717 717 if missingfiles:
718 718 basenametofilename = collections.defaultdict(list)
719 719 dirnametofilename = collections.defaultdict(list)
720 720
721 721 for f in m1.filesnotin(base.manifest()):
722 722 basename = os.path.basename(f)
723 723 dirname = os.path.dirname(f)
724 724 basenametofilename[basename].append(f)
725 725 dirnametofilename[dirname].append(f)
726 726
727 727 for f in missingfiles:
728 728 basename = os.path.basename(f)
729 729 dirname = os.path.dirname(f)
730 730 samebasename = basenametofilename[basename]
731 731 samedirname = dirnametofilename[dirname]
732 732 movecandidates = samebasename + samedirname
733 733 # f is guaranteed to be present in c2, that's why
734 734 # c2.filectx(f) won't fail
735 735 f2 = c2.filectx(f)
736 736 # we can have a lot of candidates which can slow down the heuristics
737 737 # config value to limit the number of candidates moves to check
738 738 maxcandidates = repo.ui.configint('experimental',
739 739 'copytrace.movecandidateslimit')
740 740
741 741 if len(movecandidates) > maxcandidates:
742 742 repo.ui.status(_("skipping copytracing for '%s', more "
743 743 "candidates than the limit: %d\n")
744 744 % (f, len(movecandidates)))
745 745 continue
746 746
747 747 for candidate in movecandidates:
748 748 f1 = c1.filectx(candidate)
749 749 if _related(f1, f2):
750 750 # if there are a few related copies then we'll merge
751 751 # changes into all of them. This matches the behaviour
752 752 # of upstream copytracing
753 753 copies[candidate] = f
754 754
755 755 return copies, {}, {}, {}, {}
756 756
757 757 def _related(f1, f2):
758 758 """return True if f1 and f2 filectx have a common ancestor
759 759
760 760 Walk back to common ancestor to see if the two files originate
761 761 from the same file. Since workingfilectx's rev() is None it messes
762 762 up the integer comparison logic, hence the pre-step check for
763 763 None (f1 and f2 can only be workingfilectx's initially).
764 764 """
765 765
766 766 if f1 == f2:
767 767 return True # a match
768 768
769 769 g1, g2 = f1.ancestors(), f2.ancestors()
770 770 try:
771 771 f1r, f2r = f1.linkrev(), f2.linkrev()
772 772
773 773 if f1r is None:
774 774 f1 = next(g1)
775 775 if f2r is None:
776 776 f2 = next(g2)
777 777
778 778 while True:
779 779 f1r, f2r = f1.linkrev(), f2.linkrev()
780 780 if f1r > f2r:
781 781 f1 = next(g1)
782 782 elif f2r > f1r:
783 783 f2 = next(g2)
784 784 else: # f1 and f2 point to files in the same linkrev
785 785 return f1 == f2 # true if they point to the same file
786 786 except StopIteration:
787 787 return False
788 788
789 789 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
790 790 """reproduce copies from fromrev to rev in the dirstate
791 791
792 792 If skiprev is specified, it's a revision that should be used to
793 793 filter copy records. Any copies that occur between fromrev and
794 794 skiprev will not be duplicated, even if they appear in the set of
795 795 copies between fromrev and rev.
796 796 """
797 797 exclude = {}
798 798 ctraceconfig = repo.ui.config('experimental', 'copytrace')
799 799 bctrace = stringutil.parsebool(ctraceconfig)
800 800 if (skiprev is not None and
801 801 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
802 802 # copytrace='off' skips this line, but not the entire function because
803 803 # the line below is O(size of the repo) during a rebase, while the rest
804 804 # of the function is much faster (and is required for carrying copy
805 805 # metadata across the rebase anyway).
806 806 exclude = pathcopies(repo[fromrev], repo[skiprev])
807 807 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
808 808 if dst in exclude:
809 809 continue
810 810 if dst in wctx:
811 811 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now