##// END OF EJS Templates
copies: replace .items() by .values() where appropriate...
Martin von Zweigbergk -
r42410:a20d7c6a default
parent child Browse files
Show More
@@ -1,783 +1,783 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chain(src, dst, a, b):
111 111 """chain two sets of copies a->b"""
112 112 t = a.copy()
113 113 for k, v in b.iteritems():
114 114 if v in t:
115 115 # found a chain
116 116 if t[v] != k:
117 117 # file wasn't renamed back to itself
118 118 t[k] = t[v]
119 119 if v not in dst:
120 120 # chain was a rename, not a copy
121 121 del t[v]
122 122 if v in src:
123 123 # file is a copy of an existing file
124 124 t[k] = v
125 125
126 126 for k, v in list(t.items()):
127 127 # remove criss-crossed copies
128 128 if k in src and v in dst:
129 129 del t[k]
130 130 # remove copies to files that were then removed
131 131 elif k not in dst:
132 132 del t[k]
133 133
134 134 return t
135 135
136 136 def _tracefile(fctx, am, limit=node.nullrev):
137 137 """return file context that is the ancestor of fctx present in ancestor
138 138 manifest am, stopping after the first ancestor lower than limit"""
139 139
140 140 for f in fctx.ancestors():
141 141 if am.get(f.path(), None) == f.filenode():
142 142 return f
143 143 if limit >= 0 and not f.isintroducedafter(limit):
144 144 return None
145 145
146 146 def _dirstatecopies(repo, match=None):
147 147 ds = repo.dirstate
148 148 c = ds.copies().copy()
149 149 for k in list(c):
150 150 if ds[k] not in 'anm' or (match and not match(k)):
151 151 del c[k]
152 152 return c
153 153
154 154 def _computeforwardmissing(a, b, match=None):
155 155 """Computes which files are in b but not a.
156 156 This is its own function so extensions can easily wrap this call to see what
157 157 files _forwardcopies is about to process.
158 158 """
159 159 ma = a.manifest()
160 160 mb = b.manifest()
161 161 return mb.filesnotin(ma, match=match)
162 162
163 163 def usechangesetcentricalgo(repo):
164 164 """Checks if we should use changeset-centric copy algorithms"""
165 165 return (repo.ui.config('experimental', 'copies.read-from') in
166 166 ('changeset-only', 'compatibility'))
167 167
168 168 def _committedforwardcopies(a, b, match):
169 169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 170 # files might have to be traced back to the fctx parent of the last
171 171 # one-side-only changeset, but not further back than that
172 172 repo = a._repo
173 173
174 174 if usechangesetcentricalgo(repo):
175 175 return _changesetforwardcopies(a, b, match)
176 176
177 177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 178 dbg = repo.ui.debug
179 179 if debug:
180 180 dbg('debug.copies: looking into rename from %s to %s\n'
181 181 % (a, b))
182 182 limit = _findlimit(repo, a, b)
183 183 if debug:
184 184 dbg('debug.copies: search limit: %d\n' % limit)
185 185 am = a.manifest()
186 186
187 187 # find where new files came from
188 188 # we currently don't try to find where old files went, too expensive
189 189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 190 cm = {}
191 191
192 192 # Computing the forward missing is quite expensive on large manifests, since
193 193 # it compares the entire manifests. We can optimize it in the common use
194 194 # case of computing what copies are in a commit versus its parent (like
195 195 # during a rebase or histedit). Note, we exclude merge commits from this
196 196 # optimization, since the ctx.files() for a merge commit is not correct for
197 197 # this comparison.
198 198 forwardmissingmatch = match
199 199 if b.p1() == a and b.p2().node() == node.nullid:
200 200 filesmatcher = matchmod.exact(b.files())
201 201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203 203
204 204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205 205
206 206 if debug:
207 207 dbg('debug.copies: missing files to search: %d\n' % len(missing))
208 208
209 209 for f in sorted(missing):
210 210 if debug:
211 211 dbg('debug.copies: tracing file: %s\n' % f)
212 212 fctx = b[f]
213 213 fctx._ancestrycontext = ancestrycontext
214 214
215 215 if debug:
216 216 start = util.timer()
217 217 ofctx = _tracefile(fctx, am, limit)
218 218 if ofctx:
219 219 if debug:
220 220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 221 cm[f] = ofctx.path()
222 222 if debug:
223 223 dbg('debug.copies: time: %f seconds\n'
224 224 % (util.timer() - start))
225 225 return cm
226 226
227 227 def _changesetforwardcopies(a, b, match):
228 228 if a.rev() == node.nullrev:
229 229 return {}
230 230
231 231 repo = a.repo()
232 232 children = {}
233 233 cl = repo.changelog
234 234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 235 for r in missingrevs:
236 236 for p in cl.parentrevs(r):
237 237 if p == node.nullrev:
238 238 continue
239 239 if p not in children:
240 240 children[p] = [r]
241 241 else:
242 242 children[p].append(r)
243 243
244 244 roots = set(children) - set(missingrevs)
245 245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 246 # The parent number is only used for knowing which parent the copies dict
247 247 # came from.
248 248 work = [(r, 1, {}) for r in roots]
249 249 heapq.heapify(work)
250 250 while work:
251 251 r, i1, copies1 = heapq.heappop(work)
252 252 if work and work[0][0] == r:
253 253 # We are tracing copies from both parents
254 254 r, i2, copies2 = heapq.heappop(work)
255 255 copies = {}
256 256 ctx = repo[r]
257 257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 258 allcopies = set(copies1) | set(copies2)
259 259 # TODO: perhaps this filtering should be done as long as ctx
260 260 # is merge, whether or not we're tracing from both parent.
261 261 for dst in allcopies:
262 262 if not match(dst):
263 263 continue
264 264 if dst not in copies2:
265 265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 266 # already exist on p2 side
267 267 if dst not in p2man:
268 268 copies[dst] = copies1[dst]
269 269 elif dst not in copies1:
270 270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 271 # already exist on p1 side
272 272 if dst not in p1man:
273 273 copies[dst] = copies2[dst]
274 274 else:
275 275 # Copied on both sides: mark as copy from p1 side
276 276 copies[dst] = copies1[dst]
277 277 else:
278 278 copies = copies1
279 279 if r == b.rev():
280 280 return copies
281 281 for c in children[r]:
282 282 childctx = repo[c]
283 283 if r == childctx.p1().rev():
284 284 parent = 1
285 285 childcopies = childctx.p1copies()
286 286 else:
287 287 assert r == childctx.p2().rev()
288 288 parent = 2
289 289 childcopies = childctx.p2copies()
290 290 if not match.always():
291 291 childcopies = {dst: src for dst, src in childcopies.items()
292 292 if match(dst)}
293 293 childcopies = _chain(a, childctx, copies, childcopies)
294 294 heapq.heappush(work, (c, parent, childcopies))
295 295 assert False
296 296
297 297 def _forwardcopies(a, b, match=None):
298 298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299 299
300 300 match = a.repo().narrowmatch(match)
301 301 # check for working copy
302 302 if b.rev() is None:
303 303 if a == b.p1():
304 304 # short-circuit to avoid issues with merge states
305 305 return _dirstatecopies(b._repo, match)
306 306
307 307 cm = _committedforwardcopies(a, b.p1(), match)
308 308 # combine copies from dirstate if necessary
309 309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 310 return _committedforwardcopies(a, b, match)
311 311
312 312 def _backwardrenames(a, b, match):
313 313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 314 return {}
315 315
316 316 # Even though we're not taking copies into account, 1:n rename situations
317 317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 318 # arbitrarily pick one of the renames.
319 319 # We don't want to pass in "match" here, since that would filter
320 320 # the destination by it. Since we're reversing the copies, we want
321 321 # to filter the source instead.
322 322 f = _forwardcopies(b, a)
323 323 r = {}
324 324 for k, v in sorted(f.iteritems()):
325 325 if match and not match(v):
326 326 continue
327 327 # remove copies
328 328 if v in a:
329 329 continue
330 330 r[v] = k
331 331 return r
332 332
333 333 def pathcopies(x, y, match=None):
334 334 """find {dst@y: src@x} copy mapping for directed compare"""
335 335 repo = x._repo
336 336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 337 if debug:
338 338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 339 % (x, y))
340 340 if x == y or not x or not y:
341 341 return {}
342 342 a = y.ancestor(x)
343 343 if a == x:
344 344 if debug:
345 345 repo.ui.debug('debug.copies: search mode: forward\n')
346 346 return _forwardcopies(x, y, match=match)
347 347 if a == y:
348 348 if debug:
349 349 repo.ui.debug('debug.copies: search mode: backward\n')
350 350 return _backwardrenames(x, y, match=match)
351 351 if debug:
352 352 repo.ui.debug('debug.copies: search mode: combined\n')
353 353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 354 _forwardcopies(a, y, match=match))
355 355
356 356 def mergecopies(repo, c1, c2, base):
357 357 """
358 358 Finds moves and copies between context c1 and c2 that are relevant for
359 359 merging. 'base' will be used as the merge base.
360 360
361 361 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
362 362 files that were moved/ copied in one merge parent and modified in another.
363 363 For example:
364 364
365 365 o ---> 4 another commit
366 366 |
367 367 | o ---> 3 commit that modifies a.txt
368 368 | /
369 369 o / ---> 2 commit that moves a.txt to b.txt
370 370 |/
371 371 o ---> 1 merge base
372 372
373 373 If we try to rebase revision 3 on revision 4, since there is no a.txt in
374 374 revision 4, and if user have copytrace disabled, we prints the following
375 375 message:
376 376
377 377 ```other changed <file> which local deleted```
378 378
379 379 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
380 380 "dirmove".
381 381
382 382 "copy" is a mapping from destination name -> source name,
383 383 where source is in c1 and destination is in c2 or vice-versa.
384 384
385 385 "movewithdir" is a mapping from source name -> destination name,
386 386 where the file at source present in one context but not the other
387 387 needs to be moved to destination by the merge process, because the
388 388 other context moved the directory it is in.
389 389
390 390 "diverge" is a mapping of source name -> list of destination names
391 391 for divergent renames.
392 392
393 393 "renamedelete" is a mapping of source name -> list of destination
394 394 names for files deleted in c1 that were renamed in c2 or vice-versa.
395 395
396 396 "dirmove" is a mapping of detected source dir -> destination dir renames.
397 397 This is needed for handling changes to new files previously grafted into
398 398 renamed directories.
399 399
400 400 This function calls different copytracing algorithms based on config.
401 401 """
402 402 # avoid silly behavior for update from empty dir
403 403 if not c1 or not c2 or c1 == c2:
404 404 return {}, {}, {}, {}, {}
405 405
406 406 narrowmatch = c1.repo().narrowmatch()
407 407
408 408 # avoid silly behavior for parent -> working dir
409 409 if c2.node() is None and c1.node() == repo.dirstate.p1():
410 410 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
411 411
412 412 copytracing = repo.ui.config('experimental', 'copytrace')
413 413 boolctrace = stringutil.parsebool(copytracing)
414 414
415 415 # Copy trace disabling is explicitly below the node == p1 logic above
416 416 # because the logic above is required for a simple copy to be kept across a
417 417 # rebase.
418 418 if copytracing == 'heuristics':
419 419 # Do full copytracing if only non-public revisions are involved as
420 420 # that will be fast enough and will also cover the copies which could
421 421 # be missed by heuristics
422 422 if _isfullcopytraceable(repo, c1, base):
423 423 return _fullcopytracing(repo, c1, c2, base)
424 424 return _heuristicscopytracing(repo, c1, c2, base)
425 425 elif boolctrace is False:
426 426 # stringutil.parsebool() returns None when it is unable to parse the
427 427 # value, so we should rely on making sure copytracing is on such cases
428 428 return {}, {}, {}, {}, {}
429 429 else:
430 430 return _fullcopytracing(repo, c1, c2, base)
431 431
432 432 def _isfullcopytraceable(repo, c1, base):
433 433 """ Checks that if base, source and destination are all no-public branches,
434 434 if yes let's use the full copytrace algorithm for increased capabilities
435 435 since it will be fast enough.
436 436
437 437 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
438 438 number of changesets from c1 to base such that if number of changesets are
439 439 more than the limit, full copytracing algorithm won't be used.
440 440 """
441 441 if c1.rev() is None:
442 442 c1 = c1.p1()
443 443 if c1.mutable() and base.mutable():
444 444 sourcecommitlimit = repo.ui.configint('experimental',
445 445 'copytrace.sourcecommitlimit')
446 446 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
447 447 return commits < sourcecommitlimit
448 448 return False
449 449
450 450 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
451 451 copy, renamedelete):
452 452 if src not in m2:
453 453 # deleted on side 2
454 454 if src not in m1:
455 455 # renamed on side 1, deleted on side 2
456 456 renamedelete[src] = dsts1
457 457 elif m2[src] != mb[src]:
458 458 if not _related(c2[src], base[src]):
459 459 return
460 460 # modified on side 2
461 461 for dst in dsts1:
462 462 if dst not in m2:
463 463 # dst not added on side 2 (handle as regular
464 464 # "both created" case in manifestmerge otherwise)
465 465 copy[dst] = src
466 466
467 467 def _fullcopytracing(repo, c1, c2, base):
468 468 """ The full copytracing algorithm which finds all the new files that were
469 469 added from merge base up to the top commit and for each file it checks if
470 470 this file was copied from another file.
471 471
472 472 This is pretty slow when a lot of changesets are involved but will track all
473 473 the copies.
474 474 """
475 475 m1 = c1.manifest()
476 476 m2 = c2.manifest()
477 477 mb = base.manifest()
478 478
479 479 copies1 = pathcopies(base, c1)
480 480 copies2 = pathcopies(base, c2)
481 481
482 482 inversecopies1 = {}
483 483 inversecopies2 = {}
484 484 for dst, src in copies1.items():
485 485 inversecopies1.setdefault(src, []).append(dst)
486 486 for dst, src in copies2.items():
487 487 inversecopies2.setdefault(src, []).append(dst)
488 488
489 489 copy = {}
490 490 diverge = {}
491 491 renamedelete = {}
492 492 allsources = set(inversecopies1) | set(inversecopies2)
493 493 for src in allsources:
494 494 dsts1 = inversecopies1.get(src)
495 495 dsts2 = inversecopies2.get(src)
496 496 if dsts1 and dsts2:
497 497 # copied/renamed on both sides
498 498 if src not in m1 and src not in m2:
499 499 # renamed on both sides
500 500 dsts1 = set(dsts1)
501 501 dsts2 = set(dsts2)
502 502 # If there's some overlap in the rename destinations, we
503 503 # consider it not divergent. For example, if side 1 copies 'a'
504 504 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
505 505 # and 'd' and deletes 'a'.
506 506 if dsts1 & dsts2:
507 507 for dst in (dsts1 & dsts2):
508 508 copy[dst] = src
509 509 else:
510 510 diverge[src] = sorted(dsts1 | dsts2)
511 511 elif src in m1 and src in m2:
512 512 # copied on both sides
513 513 dsts1 = set(dsts1)
514 514 dsts2 = set(dsts2)
515 515 for dst in (dsts1 & dsts2):
516 516 copy[dst] = src
517 517 # TODO: Handle cases where it was renamed on one side and copied
518 518 # on the other side
519 519 elif dsts1:
520 520 # copied/renamed only on side 1
521 521 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
522 522 copy, renamedelete)
523 523 elif dsts2:
524 524 # copied/renamed only on side 2
525 525 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
526 526 copy, renamedelete)
527 527
528 528 renamedeleteset = set()
529 529 divergeset = set()
530 for src, dsts in diverge.items():
530 for dsts in diverge.values():
531 531 divergeset.update(dsts)
532 for src, dsts in renamedelete.items():
532 for dsts in renamedelete.values():
533 533 renamedeleteset.update(dsts)
534 534
535 535 # find interesting file sets from manifests
536 536 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
537 537 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
538 538 u1 = sorted(addedinm1 - addedinm2)
539 539 u2 = sorted(addedinm2 - addedinm1)
540 540
541 541 header = " unmatched files in %s"
542 542 if u1:
543 543 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
544 544 if u2:
545 545 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
546 546
547 547 fullcopy = copies1.copy()
548 548 fullcopy.update(copies2)
549 549 if not fullcopy:
550 550 return copy, {}, diverge, renamedelete, {}
551 551
552 552 if repo.ui.debugflag:
553 553 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
554 554 "% = renamed and deleted):\n")
555 555 for f in sorted(fullcopy):
556 556 note = ""
557 557 if f in copy:
558 558 note += "*"
559 559 if f in divergeset:
560 560 note += "!"
561 561 if f in renamedeleteset:
562 562 note += "%"
563 563 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
564 564 note))
565 565 del divergeset
566 566
567 567 repo.ui.debug(" checking for directory renames\n")
568 568
569 569 # generate a directory move map
570 570 d1, d2 = c1.dirs(), c2.dirs()
571 571 # Hack for adding '', which is not otherwise added, to d1 and d2
572 572 d1.addpath('/')
573 573 d2.addpath('/')
574 574 invalid = set()
575 575 dirmove = {}
576 576
577 577 # examine each file copy for a potential directory move, which is
578 578 # when all the files in a directory are moved to a new directory
579 579 for dst, src in fullcopy.iteritems():
580 580 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
581 581 if dsrc in invalid:
582 582 # already seen to be uninteresting
583 583 continue
584 584 elif dsrc in d1 and ddst in d1:
585 585 # directory wasn't entirely moved locally
586 586 invalid.add(dsrc)
587 587 elif dsrc in d2 and ddst in d2:
588 588 # directory wasn't entirely moved remotely
589 589 invalid.add(dsrc)
590 590 elif dsrc in dirmove and dirmove[dsrc] != ddst:
591 591 # files from the same directory moved to two different places
592 592 invalid.add(dsrc)
593 593 else:
594 594 # looks good so far
595 595 dirmove[dsrc] = ddst
596 596
597 597 for i in invalid:
598 598 if i in dirmove:
599 599 del dirmove[i]
600 600 del d1, d2, invalid
601 601
602 602 if not dirmove:
603 603 return copy, {}, diverge, renamedelete, {}
604 604
605 605 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
606 606
607 607 for d in dirmove:
608 608 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
609 609 (d, dirmove[d]))
610 610
611 611 movewithdir = {}
612 612 # check unaccounted nonoverlapping files against directory moves
613 613 for f in u1 + u2:
614 614 if f not in fullcopy:
615 615 for d in dirmove:
616 616 if f.startswith(d):
617 617 # new file added in a directory that was moved, move it
618 618 df = dirmove[d] + f[len(d):]
619 619 if df not in copy:
620 620 movewithdir[f] = df
621 621 repo.ui.debug((" pending file src: '%s' -> "
622 622 "dst: '%s'\n") % (f, df))
623 623 break
624 624
625 625 return copy, movewithdir, diverge, renamedelete, dirmove
626 626
627 627 def _heuristicscopytracing(repo, c1, c2, base):
628 628 """ Fast copytracing using filename heuristics
629 629
630 630 Assumes that moves or renames are of following two types:
631 631
632 632 1) Inside a directory only (same directory name but different filenames)
633 633 2) Move from one directory to another
634 634 (same filenames but different directory names)
635 635
636 636 Works only when there are no merge commits in the "source branch".
637 637 Source branch is commits from base up to c2 not including base.
638 638
639 639 If merge is involved it fallbacks to _fullcopytracing().
640 640
641 641 Can be used by setting the following config:
642 642
643 643 [experimental]
644 644 copytrace = heuristics
645 645
646 646 In some cases the copy/move candidates found by heuristics can be very large
647 647 in number and that will make the algorithm slow. The number of possible
648 648 candidates to check can be limited by using the config
649 649 `experimental.copytrace.movecandidateslimit` which defaults to 100.
650 650 """
651 651
652 652 if c1.rev() is None:
653 653 c1 = c1.p1()
654 654 if c2.rev() is None:
655 655 c2 = c2.p1()
656 656
657 657 copies = {}
658 658
659 659 changedfiles = set()
660 660 m1 = c1.manifest()
661 661 if not repo.revs('%d::%d', base.rev(), c2.rev()):
662 662 # If base is not in c2 branch, we switch to fullcopytracing
663 663 repo.ui.debug("switching to full copytracing as base is not "
664 664 "an ancestor of c2\n")
665 665 return _fullcopytracing(repo, c1, c2, base)
666 666
667 667 ctx = c2
668 668 while ctx != base:
669 669 if len(ctx.parents()) == 2:
670 670 # To keep things simple let's not handle merges
671 671 repo.ui.debug("switching to full copytracing because of merges\n")
672 672 return _fullcopytracing(repo, c1, c2, base)
673 673 changedfiles.update(ctx.files())
674 674 ctx = ctx.p1()
675 675
676 676 cp = _forwardcopies(base, c2)
677 677 for dst, src in cp.iteritems():
678 678 if src in m1:
679 679 copies[dst] = src
680 680
681 681 # file is missing if it isn't present in the destination, but is present in
682 682 # the base and present in the source.
683 683 # Presence in the base is important to exclude added files, presence in the
684 684 # source is important to exclude removed files.
685 685 filt = lambda f: f not in m1 and f in base and f in c2
686 686 missingfiles = [f for f in changedfiles if filt(f)]
687 687
688 688 if missingfiles:
689 689 basenametofilename = collections.defaultdict(list)
690 690 dirnametofilename = collections.defaultdict(list)
691 691
692 692 for f in m1.filesnotin(base.manifest()):
693 693 basename = os.path.basename(f)
694 694 dirname = os.path.dirname(f)
695 695 basenametofilename[basename].append(f)
696 696 dirnametofilename[dirname].append(f)
697 697
698 698 for f in missingfiles:
699 699 basename = os.path.basename(f)
700 700 dirname = os.path.dirname(f)
701 701 samebasename = basenametofilename[basename]
702 702 samedirname = dirnametofilename[dirname]
703 703 movecandidates = samebasename + samedirname
704 704 # f is guaranteed to be present in c2, that's why
705 705 # c2.filectx(f) won't fail
706 706 f2 = c2.filectx(f)
707 707 # we can have a lot of candidates which can slow down the heuristics
708 708 # config value to limit the number of candidates moves to check
709 709 maxcandidates = repo.ui.configint('experimental',
710 710 'copytrace.movecandidateslimit')
711 711
712 712 if len(movecandidates) > maxcandidates:
713 713 repo.ui.status(_("skipping copytracing for '%s', more "
714 714 "candidates than the limit: %d\n")
715 715 % (f, len(movecandidates)))
716 716 continue
717 717
718 718 for candidate in movecandidates:
719 719 f1 = c1.filectx(candidate)
720 720 if _related(f1, f2):
721 721 # if there are a few related copies then we'll merge
722 722 # changes into all of them. This matches the behaviour
723 723 # of upstream copytracing
724 724 copies[candidate] = f
725 725
726 726 return copies, {}, {}, {}, {}
727 727
728 728 def _related(f1, f2):
729 729 """return True if f1 and f2 filectx have a common ancestor
730 730
731 731 Walk back to common ancestor to see if the two files originate
732 732 from the same file. Since workingfilectx's rev() is None it messes
733 733 up the integer comparison logic, hence the pre-step check for
734 734 None (f1 and f2 can only be workingfilectx's initially).
735 735 """
736 736
737 737 if f1 == f2:
738 738 return True # a match
739 739
740 740 g1, g2 = f1.ancestors(), f2.ancestors()
741 741 try:
742 742 f1r, f2r = f1.linkrev(), f2.linkrev()
743 743
744 744 if f1r is None:
745 745 f1 = next(g1)
746 746 if f2r is None:
747 747 f2 = next(g2)
748 748
749 749 while True:
750 750 f1r, f2r = f1.linkrev(), f2.linkrev()
751 751 if f1r > f2r:
752 752 f1 = next(g1)
753 753 elif f2r > f1r:
754 754 f2 = next(g2)
755 755 else: # f1 and f2 point to files in the same linkrev
756 756 return f1 == f2 # true if they point to the same file
757 757 except StopIteration:
758 758 return False
759 759
760 760 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
761 761 """reproduce copies from fromrev to rev in the dirstate
762 762
763 763 If skiprev is specified, it's a revision that should be used to
764 764 filter copy records. Any copies that occur between fromrev and
765 765 skiprev will not be duplicated, even if they appear in the set of
766 766 copies between fromrev and rev.
767 767 """
768 768 exclude = {}
769 769 ctraceconfig = repo.ui.config('experimental', 'copytrace')
770 770 bctrace = stringutil.parsebool(ctraceconfig)
771 771 if (skiprev is not None and
772 772 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
773 773 # copytrace='off' skips this line, but not the entire function because
774 774 # the line below is O(size of the repo) during a rebase, while the rest
775 775 # of the function is much faster (and is required for carrying copy
776 776 # metadata across the rebase anyway).
777 777 exclude = pathcopies(repo[fromrev], repo[skiprev])
778 778 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
779 779 # copies.pathcopies returns backward renames, so dst might not
780 780 # actually be in the dirstate
781 781 if dst in exclude:
782 782 continue
783 783 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now