##// END OF EJS Templates
copies: document how 'copies' dict instances are reused...
Martin von Zweigbergk -
r42720:898b36f7 default
parent child Browse files
Show More
@@ -1,801 +1,805 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chainandfilter(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 t = _chain(a, b)
133 133 _filter(src, dst, t)
134 134 return t
135 135
136 136 def _filter(src, dst, t):
137 137 """filters out invalid copies after chaining"""
138 138 for k, v in list(t.items()):
139 139 # remove copies from files that didn't exist
140 140 if v not in src:
141 141 del t[k]
142 142 # remove criss-crossed copies
143 143 elif k in src and v in dst:
144 144 del t[k]
145 145 # remove copies to files that were then removed
146 146 elif k not in dst:
147 147 del t[k]
148 148
149 149 def _chain(a, b):
150 150 """chain two sets of copies 'a' and 'b'"""
151 151 t = a.copy()
152 152 for k, v in b.iteritems():
153 153 if v in t:
154 154 t[k] = t[v]
155 155 else:
156 156 t[k] = v
157 157 return t
158 158
159 159 def _tracefile(fctx, am, limit):
160 160 """return file context that is the ancestor of fctx present in ancestor
161 161 manifest am, stopping after the first ancestor lower than limit"""
162 162
163 163 for f in fctx.ancestors():
164 164 if am.get(f.path(), None) == f.filenode():
165 165 return f
166 166 if not f.isintroducedafter(limit):
167 167 return None
168 168
169 169 def _dirstatecopies(repo, match=None):
170 170 ds = repo.dirstate
171 171 c = ds.copies().copy()
172 172 for k in list(c):
173 173 if ds[k] not in 'anm' or (match and not match(k)):
174 174 del c[k]
175 175 return c
176 176
177 177 def _computeforwardmissing(a, b, match=None):
178 178 """Computes which files are in b but not a.
179 179 This is its own function so extensions can easily wrap this call to see what
180 180 files _forwardcopies is about to process.
181 181 """
182 182 ma = a.manifest()
183 183 mb = b.manifest()
184 184 return mb.filesnotin(ma, match=match)
185 185
186 186 def usechangesetcentricalgo(repo):
187 187 """Checks if we should use changeset-centric copy algorithms"""
188 188 return (repo.ui.config('experimental', 'copies.read-from') in
189 189 ('changeset-only', 'compatibility'))
190 190
191 191 def _committedforwardcopies(a, b, match):
192 192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 193 # files might have to be traced back to the fctx parent of the last
194 194 # one-side-only changeset, but not further back than that
195 195 repo = a._repo
196 196
197 197 if usechangesetcentricalgo(repo):
198 198 return _changesetforwardcopies(a, b, match)
199 199
200 200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 201 dbg = repo.ui.debug
202 202 if debug:
203 203 dbg('debug.copies: looking into rename from %s to %s\n'
204 204 % (a, b))
205 205 limit = _findlimit(repo, a, b)
206 206 if debug:
207 207 dbg('debug.copies: search limit: %d\n' % limit)
208 208 am = a.manifest()
209 209
210 210 # find where new files came from
211 211 # we currently don't try to find where old files went, too expensive
212 212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 213 cm = {}
214 214
215 215 # Computing the forward missing is quite expensive on large manifests, since
216 216 # it compares the entire manifests. We can optimize it in the common use
217 217 # case of computing what copies are in a commit versus its parent (like
218 218 # during a rebase or histedit). Note, we exclude merge commits from this
219 219 # optimization, since the ctx.files() for a merge commit is not correct for
220 220 # this comparison.
221 221 forwardmissingmatch = match
222 222 if b.p1() == a and b.p2().node() == node.nullid:
223 223 filesmatcher = matchmod.exact(b.files())
224 224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226 226
227 227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228 228
229 229 if debug:
230 230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231 231
232 232 for f in sorted(missing):
233 233 if debug:
234 234 dbg('debug.copies: tracing file: %s\n' % f)
235 235 fctx = b[f]
236 236 fctx._ancestrycontext = ancestrycontext
237 237
238 238 if debug:
239 239 start = util.timer()
240 240 ofctx = _tracefile(fctx, am, limit)
241 241 if ofctx:
242 242 if debug:
243 243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 244 cm[f] = ofctx.path()
245 245 if debug:
246 246 dbg('debug.copies: time: %f seconds\n'
247 247 % (util.timer() - start))
248 248 return cm
249 249
250 250 def _changesetforwardcopies(a, b, match):
251 251 if a.rev() == node.nullrev:
252 252 return {}
253 253
254 254 repo = a.repo()
255 255 children = {}
256 256 cl = repo.changelog
257 257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 258 for r in missingrevs:
259 259 for p in cl.parentrevs(r):
260 260 if p == node.nullrev:
261 261 continue
262 262 if p not in children:
263 263 children[p] = [r]
264 264 else:
265 265 children[p].append(r)
266 266
267 267 roots = set(children) - set(missingrevs)
268 268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 269 # The parent number is only used for knowing which parent the copies dict
270 270 # came from.
271 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
272 # instance for *one* of the child nodes (the last one). Once an instance
273 # has been put on the queue, it is thus no longer safe to modify it.
274 # Conversely, it *is* safe to modify an instance popped off the queue.
271 275 work = [(r, 1, {}) for r in roots]
272 276 heapq.heapify(work)
273 277 alwaysmatch = match.always()
274 278 while work:
275 279 r, i1, copies = heapq.heappop(work)
276 280 if work and work[0][0] == r:
277 281 # We are tracing copies from both parents
278 282 r, i2, copies2 = heapq.heappop(work)
279 283 for dst, src in copies2.items():
280 284 # Unlike when copies are stored in the filelog, we consider
281 285 # it a copy even if the destination already existed on the
282 286 # other branch. It's simply too expensive to check if the
283 287 # file existed in the manifest.
284 288 if dst not in copies:
285 289 # If it was copied on the p1 side, leave it as copied from
286 290 # that side, even if it was also copied on the p2 side.
287 291 copies[dst] = copies2[dst]
288 292 if r == b.rev():
289 293 _filter(a, b, copies)
290 294 return copies
291 295 for i, c in enumerate(children[r]):
292 296 childctx = repo[c]
293 297 if r == childctx.p1().rev():
294 298 parent = 1
295 299 childcopies = childctx.p1copies()
296 300 else:
297 301 assert r == childctx.p2().rev()
298 302 parent = 2
299 303 childcopies = childctx.p2copies()
300 304 if not alwaysmatch:
301 305 childcopies = {dst: src for dst, src in childcopies.items()
302 306 if match(dst)}
303 307 # Copy the dict only if later iterations will also need it
304 308 if i != len(children[r]) - 1:
305 309 newcopies = copies.copy()
306 310 else:
307 311 newcopies = copies
308 312 if childcopies:
309 313 newcopies = _chain(newcopies, childcopies)
310 314 for f in childctx.filesremoved():
311 315 if f in newcopies:
312 316 del newcopies[f]
313 317 heapq.heappush(work, (c, parent, newcopies))
314 318 assert False
315 319
316 320 def _forwardcopies(a, b, match=None):
317 321 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
318 322
319 323 match = a.repo().narrowmatch(match)
320 324 # check for working copy
321 325 if b.rev() is None:
322 326 if a == b.p1():
323 327 # short-circuit to avoid issues with merge states
324 328 return _dirstatecopies(b._repo, match)
325 329
326 330 cm = _committedforwardcopies(a, b.p1(), match)
327 331 # combine copies from dirstate if necessary
328 332 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
329 333 return _committedforwardcopies(a, b, match)
330 334
331 335 def _backwardrenames(a, b, match):
332 336 if a._repo.ui.config('experimental', 'copytrace') == 'off':
333 337 return {}
334 338
335 339 # Even though we're not taking copies into account, 1:n rename situations
336 340 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
337 341 # arbitrarily pick one of the renames.
338 342 # We don't want to pass in "match" here, since that would filter
339 343 # the destination by it. Since we're reversing the copies, we want
340 344 # to filter the source instead.
341 345 f = _forwardcopies(b, a)
342 346 r = {}
343 347 for k, v in sorted(f.iteritems()):
344 348 if match and not match(v):
345 349 continue
346 350 # remove copies
347 351 if v in a:
348 352 continue
349 353 r[v] = k
350 354 return r
351 355
352 356 def pathcopies(x, y, match=None):
353 357 """find {dst@y: src@x} copy mapping for directed compare"""
354 358 repo = x._repo
355 359 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
356 360 if debug:
357 361 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
358 362 % (x, y))
359 363 if x == y or not x or not y:
360 364 return {}
361 365 a = y.ancestor(x)
362 366 if a == x:
363 367 if debug:
364 368 repo.ui.debug('debug.copies: search mode: forward\n')
365 369 return _forwardcopies(x, y, match=match)
366 370 if a == y:
367 371 if debug:
368 372 repo.ui.debug('debug.copies: search mode: backward\n')
369 373 return _backwardrenames(x, y, match=match)
370 374 if debug:
371 375 repo.ui.debug('debug.copies: search mode: combined\n')
372 376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
373 377 _forwardcopies(a, y, match=match))
374 378
375 379 def mergecopies(repo, c1, c2, base):
376 380 """
377 381 Finds moves and copies between context c1 and c2 that are relevant for
378 382 merging. 'base' will be used as the merge base.
379 383
380 384 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
381 385 files that were moved/ copied in one merge parent and modified in another.
382 386 For example:
383 387
384 388 o ---> 4 another commit
385 389 |
386 390 | o ---> 3 commit that modifies a.txt
387 391 | /
388 392 o / ---> 2 commit that moves a.txt to b.txt
389 393 |/
390 394 o ---> 1 merge base
391 395
392 396 If we try to rebase revision 3 on revision 4, since there is no a.txt in
393 397 revision 4, and if user have copytrace disabled, we prints the following
394 398 message:
395 399
396 400 ```other changed <file> which local deleted```
397 401
398 402 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
399 403 "dirmove".
400 404
401 405 "copy" is a mapping from destination name -> source name,
402 406 where source is in c1 and destination is in c2 or vice-versa.
403 407
404 408 "movewithdir" is a mapping from source name -> destination name,
405 409 where the file at source present in one context but not the other
406 410 needs to be moved to destination by the merge process, because the
407 411 other context moved the directory it is in.
408 412
409 413 "diverge" is a mapping of source name -> list of destination names
410 414 for divergent renames.
411 415
412 416 "renamedelete" is a mapping of source name -> list of destination
413 417 names for files deleted in c1 that were renamed in c2 or vice-versa.
414 418
415 419 "dirmove" is a mapping of detected source dir -> destination dir renames.
416 420 This is needed for handling changes to new files previously grafted into
417 421 renamed directories.
418 422
419 423 This function calls different copytracing algorithms based on config.
420 424 """
421 425 # avoid silly behavior for update from empty dir
422 426 if not c1 or not c2 or c1 == c2:
423 427 return {}, {}, {}, {}, {}
424 428
425 429 narrowmatch = c1.repo().narrowmatch()
426 430
427 431 # avoid silly behavior for parent -> working dir
428 432 if c2.node() is None and c1.node() == repo.dirstate.p1():
429 433 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
430 434
431 435 copytracing = repo.ui.config('experimental', 'copytrace')
432 436 if stringutil.parsebool(copytracing) is False:
433 437 # stringutil.parsebool() returns None when it is unable to parse the
434 438 # value, so we should rely on making sure copytracing is on such cases
435 439 return {}, {}, {}, {}, {}
436 440
437 441 if usechangesetcentricalgo(repo):
438 442 # The heuristics don't make sense when we need changeset-centric algos
439 443 return _fullcopytracing(repo, c1, c2, base)
440 444
441 445 # Copy trace disabling is explicitly below the node == p1 logic above
442 446 # because the logic above is required for a simple copy to be kept across a
443 447 # rebase.
444 448 if copytracing == 'heuristics':
445 449 # Do full copytracing if only non-public revisions are involved as
446 450 # that will be fast enough and will also cover the copies which could
447 451 # be missed by heuristics
448 452 if _isfullcopytraceable(repo, c1, base):
449 453 return _fullcopytracing(repo, c1, c2, base)
450 454 return _heuristicscopytracing(repo, c1, c2, base)
451 455 else:
452 456 return _fullcopytracing(repo, c1, c2, base)
453 457
454 458 def _isfullcopytraceable(repo, c1, base):
455 459 """ Checks that if base, source and destination are all no-public branches,
456 460 if yes let's use the full copytrace algorithm for increased capabilities
457 461 since it will be fast enough.
458 462
459 463 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
460 464 number of changesets from c1 to base such that if number of changesets are
461 465 more than the limit, full copytracing algorithm won't be used.
462 466 """
463 467 if c1.rev() is None:
464 468 c1 = c1.p1()
465 469 if c1.mutable() and base.mutable():
466 470 sourcecommitlimit = repo.ui.configint('experimental',
467 471 'copytrace.sourcecommitlimit')
468 472 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
469 473 return commits < sourcecommitlimit
470 474 return False
471 475
472 476 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
473 477 copy, renamedelete):
474 478 if src not in m2:
475 479 # deleted on side 2
476 480 if src not in m1:
477 481 # renamed on side 1, deleted on side 2
478 482 renamedelete[src] = dsts1
479 483 elif m2[src] != mb[src]:
480 484 if not _related(c2[src], base[src]):
481 485 return
482 486 # modified on side 2
483 487 for dst in dsts1:
484 488 if dst not in m2:
485 489 # dst not added on side 2 (handle as regular
486 490 # "both created" case in manifestmerge otherwise)
487 491 copy[dst] = src
488 492
489 493 def _fullcopytracing(repo, c1, c2, base):
490 494 """ The full copytracing algorithm which finds all the new files that were
491 495 added from merge base up to the top commit and for each file it checks if
492 496 this file was copied from another file.
493 497
494 498 This is pretty slow when a lot of changesets are involved but will track all
495 499 the copies.
496 500 """
497 501 m1 = c1.manifest()
498 502 m2 = c2.manifest()
499 503 mb = base.manifest()
500 504
501 505 copies1 = pathcopies(base, c1)
502 506 copies2 = pathcopies(base, c2)
503 507
504 508 inversecopies1 = {}
505 509 inversecopies2 = {}
506 510 for dst, src in copies1.items():
507 511 inversecopies1.setdefault(src, []).append(dst)
508 512 for dst, src in copies2.items():
509 513 inversecopies2.setdefault(src, []).append(dst)
510 514
511 515 copy = {}
512 516 diverge = {}
513 517 renamedelete = {}
514 518 allsources = set(inversecopies1) | set(inversecopies2)
515 519 for src in allsources:
516 520 dsts1 = inversecopies1.get(src)
517 521 dsts2 = inversecopies2.get(src)
518 522 if dsts1 and dsts2:
519 523 # copied/renamed on both sides
520 524 if src not in m1 and src not in m2:
521 525 # renamed on both sides
522 526 dsts1 = set(dsts1)
523 527 dsts2 = set(dsts2)
524 528 # If there's some overlap in the rename destinations, we
525 529 # consider it not divergent. For example, if side 1 copies 'a'
526 530 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
527 531 # and 'd' and deletes 'a'.
528 532 if dsts1 & dsts2:
529 533 for dst in (dsts1 & dsts2):
530 534 copy[dst] = src
531 535 else:
532 536 diverge[src] = sorted(dsts1 | dsts2)
533 537 elif src in m1 and src in m2:
534 538 # copied on both sides
535 539 dsts1 = set(dsts1)
536 540 dsts2 = set(dsts2)
537 541 for dst in (dsts1 & dsts2):
538 542 copy[dst] = src
539 543 # TODO: Handle cases where it was renamed on one side and copied
540 544 # on the other side
541 545 elif dsts1:
542 546 # copied/renamed only on side 1
543 547 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
544 548 copy, renamedelete)
545 549 elif dsts2:
546 550 # copied/renamed only on side 2
547 551 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
548 552 copy, renamedelete)
549 553
550 554 renamedeleteset = set()
551 555 divergeset = set()
552 556 for dsts in diverge.values():
553 557 divergeset.update(dsts)
554 558 for dsts in renamedelete.values():
555 559 renamedeleteset.update(dsts)
556 560
557 561 # find interesting file sets from manifests
558 562 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
559 563 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
560 564 u1 = sorted(addedinm1 - addedinm2)
561 565 u2 = sorted(addedinm2 - addedinm1)
562 566
563 567 header = " unmatched files in %s"
564 568 if u1:
565 569 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
566 570 if u2:
567 571 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
568 572
569 573 fullcopy = copies1.copy()
570 574 fullcopy.update(copies2)
571 575 if not fullcopy:
572 576 return copy, {}, diverge, renamedelete, {}
573 577
574 578 if repo.ui.debugflag:
575 579 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
576 580 "% = renamed and deleted):\n")
577 581 for f in sorted(fullcopy):
578 582 note = ""
579 583 if f in copy:
580 584 note += "*"
581 585 if f in divergeset:
582 586 note += "!"
583 587 if f in renamedeleteset:
584 588 note += "%"
585 589 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
586 590 note))
587 591 del divergeset
588 592
589 593 repo.ui.debug(" checking for directory renames\n")
590 594
591 595 # generate a directory move map
592 596 d1, d2 = c1.dirs(), c2.dirs()
593 597 invalid = set()
594 598 dirmove = {}
595 599
596 600 # examine each file copy for a potential directory move, which is
597 601 # when all the files in a directory are moved to a new directory
598 602 for dst, src in fullcopy.iteritems():
599 603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
600 604 if dsrc in invalid:
601 605 # already seen to be uninteresting
602 606 continue
603 607 elif dsrc in d1 and ddst in d1:
604 608 # directory wasn't entirely moved locally
605 609 invalid.add(dsrc)
606 610 elif dsrc in d2 and ddst in d2:
607 611 # directory wasn't entirely moved remotely
608 612 invalid.add(dsrc)
609 613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
610 614 # files from the same directory moved to two different places
611 615 invalid.add(dsrc)
612 616 else:
613 617 # looks good so far
614 618 dirmove[dsrc] = ddst
615 619
616 620 for i in invalid:
617 621 if i in dirmove:
618 622 del dirmove[i]
619 623 del d1, d2, invalid
620 624
621 625 if not dirmove:
622 626 return copy, {}, diverge, renamedelete, {}
623 627
624 628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
625 629
626 630 for d in dirmove:
627 631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
628 632 (d, dirmove[d]))
629 633
630 634 movewithdir = {}
631 635 # check unaccounted nonoverlapping files against directory moves
632 636 for f in u1 + u2:
633 637 if f not in fullcopy:
634 638 for d in dirmove:
635 639 if f.startswith(d):
636 640 # new file added in a directory that was moved, move it
637 641 df = dirmove[d] + f[len(d):]
638 642 if df not in copy:
639 643 movewithdir[f] = df
640 644 repo.ui.debug((" pending file src: '%s' -> "
641 645 "dst: '%s'\n") % (f, df))
642 646 break
643 647
644 648 return copy, movewithdir, diverge, renamedelete, dirmove
645 649
646 650 def _heuristicscopytracing(repo, c1, c2, base):
647 651 """ Fast copytracing using filename heuristics
648 652
649 653 Assumes that moves or renames are of following two types:
650 654
651 655 1) Inside a directory only (same directory name but different filenames)
652 656 2) Move from one directory to another
653 657 (same filenames but different directory names)
654 658
655 659 Works only when there are no merge commits in the "source branch".
656 660 Source branch is commits from base up to c2 not including base.
657 661
658 662 If merge is involved it fallbacks to _fullcopytracing().
659 663
660 664 Can be used by setting the following config:
661 665
662 666 [experimental]
663 667 copytrace = heuristics
664 668
665 669 In some cases the copy/move candidates found by heuristics can be very large
666 670 in number and that will make the algorithm slow. The number of possible
667 671 candidates to check can be limited by using the config
668 672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
669 673 """
670 674
671 675 if c1.rev() is None:
672 676 c1 = c1.p1()
673 677 if c2.rev() is None:
674 678 c2 = c2.p1()
675 679
676 680 copies = {}
677 681
678 682 changedfiles = set()
679 683 m1 = c1.manifest()
680 684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
681 685 # If base is not in c2 branch, we switch to fullcopytracing
682 686 repo.ui.debug("switching to full copytracing as base is not "
683 687 "an ancestor of c2\n")
684 688 return _fullcopytracing(repo, c1, c2, base)
685 689
686 690 ctx = c2
687 691 while ctx != base:
688 692 if len(ctx.parents()) == 2:
689 693 # To keep things simple let's not handle merges
690 694 repo.ui.debug("switching to full copytracing because of merges\n")
691 695 return _fullcopytracing(repo, c1, c2, base)
692 696 changedfiles.update(ctx.files())
693 697 ctx = ctx.p1()
694 698
695 699 cp = _forwardcopies(base, c2)
696 700 for dst, src in cp.iteritems():
697 701 if src in m1:
698 702 copies[dst] = src
699 703
700 704 # file is missing if it isn't present in the destination, but is present in
701 705 # the base and present in the source.
702 706 # Presence in the base is important to exclude added files, presence in the
703 707 # source is important to exclude removed files.
704 708 filt = lambda f: f not in m1 and f in base and f in c2
705 709 missingfiles = [f for f in changedfiles if filt(f)]
706 710
707 711 if missingfiles:
708 712 basenametofilename = collections.defaultdict(list)
709 713 dirnametofilename = collections.defaultdict(list)
710 714
711 715 for f in m1.filesnotin(base.manifest()):
712 716 basename = os.path.basename(f)
713 717 dirname = os.path.dirname(f)
714 718 basenametofilename[basename].append(f)
715 719 dirnametofilename[dirname].append(f)
716 720
717 721 for f in missingfiles:
718 722 basename = os.path.basename(f)
719 723 dirname = os.path.dirname(f)
720 724 samebasename = basenametofilename[basename]
721 725 samedirname = dirnametofilename[dirname]
722 726 movecandidates = samebasename + samedirname
723 727 # f is guaranteed to be present in c2, that's why
724 728 # c2.filectx(f) won't fail
725 729 f2 = c2.filectx(f)
726 730 # we can have a lot of candidates which can slow down the heuristics
727 731 # config value to limit the number of candidates moves to check
728 732 maxcandidates = repo.ui.configint('experimental',
729 733 'copytrace.movecandidateslimit')
730 734
731 735 if len(movecandidates) > maxcandidates:
732 736 repo.ui.status(_("skipping copytracing for '%s', more "
733 737 "candidates than the limit: %d\n")
734 738 % (f, len(movecandidates)))
735 739 continue
736 740
737 741 for candidate in movecandidates:
738 742 f1 = c1.filectx(candidate)
739 743 if _related(f1, f2):
740 744 # if there are a few related copies then we'll merge
741 745 # changes into all of them. This matches the behaviour
742 746 # of upstream copytracing
743 747 copies[candidate] = f
744 748
745 749 return copies, {}, {}, {}, {}
746 750
747 751 def _related(f1, f2):
748 752 """return True if f1 and f2 filectx have a common ancestor
749 753
750 754 Walk back to common ancestor to see if the two files originate
751 755 from the same file. Since workingfilectx's rev() is None it messes
752 756 up the integer comparison logic, hence the pre-step check for
753 757 None (f1 and f2 can only be workingfilectx's initially).
754 758 """
755 759
756 760 if f1 == f2:
757 761 return True # a match
758 762
759 763 g1, g2 = f1.ancestors(), f2.ancestors()
760 764 try:
761 765 f1r, f2r = f1.linkrev(), f2.linkrev()
762 766
763 767 if f1r is None:
764 768 f1 = next(g1)
765 769 if f2r is None:
766 770 f2 = next(g2)
767 771
768 772 while True:
769 773 f1r, f2r = f1.linkrev(), f2.linkrev()
770 774 if f1r > f2r:
771 775 f1 = next(g1)
772 776 elif f2r > f1r:
773 777 f2 = next(g2)
774 778 else: # f1 and f2 point to files in the same linkrev
775 779 return f1 == f2 # true if they point to the same file
776 780 except StopIteration:
777 781 return False
778 782
779 783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
780 784 """reproduce copies from fromrev to rev in the dirstate
781 785
782 786 If skiprev is specified, it's a revision that should be used to
783 787 filter copy records. Any copies that occur between fromrev and
784 788 skiprev will not be duplicated, even if they appear in the set of
785 789 copies between fromrev and rev.
786 790 """
787 791 exclude = {}
788 792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
789 793 bctrace = stringutil.parsebool(ctraceconfig)
790 794 if (skiprev is not None and
791 795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
792 796 # copytrace='off' skips this line, but not the entire function because
793 797 # the line below is O(size of the repo) during a rebase, while the rest
794 798 # of the function is much faster (and is required for carrying copy
795 799 # metadata across the rebase anyway).
796 800 exclude = pathcopies(repo[fromrev], repo[skiprev])
797 801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
798 802 if dst in exclude:
799 803 continue
800 804 if dst in wctx:
801 805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now