##// END OF EJS Templates
copies: move short-circuiting of dirstate copies out of _forwardcopies()...
Martin von Zweigbergk -
r42794:bcb4b5c5 default
parent child Browse files
Show More
@@ -1,806 +1,805 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 util,
21 21 )
22 22 from .utils import (
23 23 stringutil,
24 24 )
25 25
26 26 def _findlimit(repo, ctxa, ctxb):
27 27 """
28 28 Find the last revision that needs to be checked to ensure that a full
29 29 transitive closure for file copies can be properly calculated.
30 30 Generally, this means finding the earliest revision number that's an
31 31 ancestor of a or b but not both, except when a or b is a direct descendent
32 32 of the other, in which case we can return the minimum revnum of a and b.
33 33 """
34 34
35 35 # basic idea:
36 36 # - mark a and b with different sides
37 37 # - if a parent's children are all on the same side, the parent is
38 38 # on that side, otherwise it is on no side
39 39 # - walk the graph in topological order with the help of a heap;
40 40 # - add unseen parents to side map
41 41 # - clear side of any parent that has children on different sides
42 42 # - track number of interesting revs that might still be on a side
43 43 # - track the lowest interesting rev seen
44 44 # - quit when interesting revs is zero
45 45
46 46 cl = repo.changelog
47 47 wdirparents = None
48 48 a = ctxa.rev()
49 49 b = ctxb.rev()
50 50 if a is None:
51 51 wdirparents = (ctxa.p1(), ctxa.p2())
52 52 a = node.wdirrev
53 53 if b is None:
54 54 assert not wdirparents
55 55 wdirparents = (ctxb.p1(), ctxb.p2())
56 56 b = node.wdirrev
57 57
58 58 side = {a: -1, b: 1}
59 59 visit = [-a, -b]
60 60 heapq.heapify(visit)
61 61 interesting = len(visit)
62 62 limit = node.wdirrev
63 63
64 64 while interesting:
65 65 r = -heapq.heappop(visit)
66 66 if r == node.wdirrev:
67 67 parents = [pctx.rev() for pctx in wdirparents]
68 68 else:
69 69 parents = cl.parentrevs(r)
70 70 if parents[1] == node.nullrev:
71 71 parents = parents[:1]
72 72 for p in parents:
73 73 if p not in side:
74 74 # first time we see p; add it to visit
75 75 side[p] = side[r]
76 76 if side[p]:
77 77 interesting += 1
78 78 heapq.heappush(visit, -p)
79 79 elif side[p] and side[p] != side[r]:
80 80 # p was interesting but now we know better
81 81 side[p] = 0
82 82 interesting -= 1
83 83 if side[r]:
84 84 limit = r # lowest rev visited
85 85 interesting -= 1
86 86
87 87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 88 # 1/ File 'a0' committed
89 89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 90 # 3/ Move back to first commit
91 91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 93 #
94 94 # During the amend in step five, we will be in this state:
95 95 #
96 96 # @ 3 temporary amend commit for a1-amend
97 97 # |
98 98 # o 2 a1-amend
99 99 # |
100 100 # | o 1 a1
101 101 # |/
102 102 # o 0 a0
103 103 #
104 104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 105 # yet the filelog has the copy information in rev 1 and we will not look
106 106 # back far enough unless we also look at the a and b as candidates.
107 107 # This only occurs when a is a descendent of b or visa-versa.
108 108 return min(limit, a, b)
109 109
110 110 def _chainandfilter(src, dst, a, b):
111 111 """chain two sets of copies 'a' and 'b' and filter result"""
112 112
113 113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 115 # following table (not including trivial cases). For example, case 2 is
116 116 # where a file existed in 'src' and remained under that name in 'mid' and
117 117 # then was renamed between 'mid' and 'dst'.
118 118 #
119 119 # case src mid dst result
120 120 # 1 x y - -
121 121 # 2 x y y x->y
122 122 # 3 x y x -
123 123 # 4 x y z x->z
124 124 # 5 - x y -
125 125 # 6 x x y x->y
126 126 #
127 127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 129 # between 5 and 6, so it includes all cases in its result.
130 130 # Cases 1, 3, and 5 are then removed by _filter().
131 131
132 132 t = _chain(a, b)
133 133 _filter(src, dst, t)
134 134 return t
135 135
136 136 def _filter(src, dst, t):
137 137 """filters out invalid copies after chaining"""
138 138 for k, v in list(t.items()):
139 139 # remove copies from files that didn't exist
140 140 if v not in src:
141 141 del t[k]
142 142 # remove criss-crossed copies
143 143 elif k in src and v in dst:
144 144 del t[k]
145 145 # remove copies to files that were then removed
146 146 elif k not in dst:
147 147 del t[k]
148 148
149 149 def _chain(a, b):
150 150 """chain two sets of copies 'a' and 'b'"""
151 151 t = a.copy()
152 152 for k, v in b.iteritems():
153 153 if v in t:
154 154 t[k] = t[v]
155 155 else:
156 156 t[k] = v
157 157 return t
158 158
159 159 def _tracefile(fctx, am, limit):
160 160 """return file context that is the ancestor of fctx present in ancestor
161 161 manifest am, stopping after the first ancestor lower than limit"""
162 162
163 163 for f in fctx.ancestors():
164 164 path = f.path()
165 165 if am.get(path, None) == f.filenode():
166 166 return path
167 167 if not f.isintroducedafter(limit):
168 168 return None
169 169
170 170 def _dirstatecopies(repo, match=None):
171 171 ds = repo.dirstate
172 172 c = ds.copies().copy()
173 173 for k in list(c):
174 174 if ds[k] not in 'anm' or (match and not match(k)):
175 175 del c[k]
176 176 return c
177 177
178 178 def _computeforwardmissing(a, b, match=None):
179 179 """Computes which files are in b but not a.
180 180 This is its own function so extensions can easily wrap this call to see what
181 181 files _forwardcopies is about to process.
182 182 """
183 183 ma = a.manifest()
184 184 mb = b.manifest()
185 185 return mb.filesnotin(ma, match=match)
186 186
187 187 def usechangesetcentricalgo(repo):
188 188 """Checks if we should use changeset-centric copy algorithms"""
189 189 return (repo.ui.config('experimental', 'copies.read-from') in
190 190 ('changeset-only', 'compatibility'))
191 191
192 192 def _committedforwardcopies(a, b, match):
193 193 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
194 194 # files might have to be traced back to the fctx parent of the last
195 195 # one-side-only changeset, but not further back than that
196 196 repo = a._repo
197 197
198 198 if usechangesetcentricalgo(repo):
199 199 return _changesetforwardcopies(a, b, match)
200 200
201 201 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
202 202 dbg = repo.ui.debug
203 203 if debug:
204 204 dbg('debug.copies: looking into rename from %s to %s\n'
205 205 % (a, b))
206 206 limit = _findlimit(repo, a, b)
207 207 if debug:
208 208 dbg('debug.copies: search limit: %d\n' % limit)
209 209 am = a.manifest()
210 210
211 211 # find where new files came from
212 212 # we currently don't try to find where old files went, too expensive
213 213 # this means we can miss a case like 'hg rm b; hg cp a b'
214 214 cm = {}
215 215
216 216 # Computing the forward missing is quite expensive on large manifests, since
217 217 # it compares the entire manifests. We can optimize it in the common use
218 218 # case of computing what copies are in a commit versus its parent (like
219 219 # during a rebase or histedit). Note, we exclude merge commits from this
220 220 # optimization, since the ctx.files() for a merge commit is not correct for
221 221 # this comparison.
222 222 forwardmissingmatch = match
223 223 if b.p1() == a and b.p2().node() == node.nullid:
224 224 filesmatcher = matchmod.exact(b.files())
225 225 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
226 226 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
227 227
228 228 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
229 229
230 230 if debug:
231 231 dbg('debug.copies: missing files to search: %d\n' % len(missing))
232 232
233 233 for f in sorted(missing):
234 234 if debug:
235 235 dbg('debug.copies: tracing file: %s\n' % f)
236 236 fctx = b[f]
237 237 fctx._ancestrycontext = ancestrycontext
238 238
239 239 if debug:
240 240 start = util.timer()
241 241 opath = _tracefile(fctx, am, limit)
242 242 if opath:
243 243 if debug:
244 244 dbg('debug.copies: rename of: %s\n' % opath)
245 245 cm[f] = opath
246 246 if debug:
247 247 dbg('debug.copies: time: %f seconds\n'
248 248 % (util.timer() - start))
249 249 return cm
250 250
251 251 def _changesetforwardcopies(a, b, match):
252 252 if a.rev() == node.nullrev:
253 253 return {}
254 254
255 255 repo = a.repo()
256 256 children = {}
257 257 cl = repo.changelog
258 258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 259 for r in missingrevs:
260 260 for p in cl.parentrevs(r):
261 261 if p == node.nullrev:
262 262 continue
263 263 if p not in children:
264 264 children[p] = [r]
265 265 else:
266 266 children[p].append(r)
267 267
268 268 roots = set(children) - set(missingrevs)
269 269 # 'work' contains 3-tuples of a (revision number, parent number, copies).
270 270 # The parent number is only used for knowing which parent the copies dict
271 271 # came from.
272 272 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
273 273 # instance for *one* of the child nodes (the last one). Once an instance
274 274 # has been put on the queue, it is thus no longer safe to modify it.
275 275 # Conversely, it *is* safe to modify an instance popped off the queue.
276 276 work = [(r, 1, {}) for r in roots]
277 277 heapq.heapify(work)
278 278 alwaysmatch = match.always()
279 279 while work:
280 280 r, i1, copies = heapq.heappop(work)
281 281 if work and work[0][0] == r:
282 282 # We are tracing copies from both parents
283 283 r, i2, copies2 = heapq.heappop(work)
284 284 for dst, src in copies2.items():
285 285 # Unlike when copies are stored in the filelog, we consider
286 286 # it a copy even if the destination already existed on the
287 287 # other branch. It's simply too expensive to check if the
288 288 # file existed in the manifest.
289 289 if dst not in copies:
290 290 # If it was copied on the p1 side, leave it as copied from
291 291 # that side, even if it was also copied on the p2 side.
292 292 copies[dst] = copies2[dst]
293 293 if r == b.rev():
294 294 _filter(a, b, copies)
295 295 return copies
296 296 for i, c in enumerate(children[r]):
297 297 childctx = repo[c]
298 298 if r == childctx.p1().rev():
299 299 parent = 1
300 300 childcopies = childctx.p1copies()
301 301 else:
302 302 assert r == childctx.p2().rev()
303 303 parent = 2
304 304 childcopies = childctx.p2copies()
305 305 if not alwaysmatch:
306 306 childcopies = {dst: src for dst, src in childcopies.items()
307 307 if match(dst)}
308 308 # Copy the dict only if later iterations will also need it
309 309 if i != len(children[r]) - 1:
310 310 newcopies = copies.copy()
311 311 else:
312 312 newcopies = copies
313 313 if childcopies:
314 314 newcopies = _chain(newcopies, childcopies)
315 315 for f in childctx.filesremoved():
316 316 if f in newcopies:
317 317 del newcopies[f]
318 318 heapq.heappush(work, (c, parent, newcopies))
319 319 assert False
320 320
321 321 def _forwardcopies(a, b, match=None):
322 322 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
323 323
324 324 match = a.repo().narrowmatch(match)
325 325 # check for working copy
326 326 if b.rev() is None:
327 if a == b.p1():
328 # short-circuit to avoid issues with merge states
329 return _dirstatecopies(b._repo, match)
330
331 327 cm = _committedforwardcopies(a, b.p1(), match)
332 328 # combine copies from dirstate if necessary
333 329 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
334 330 return _committedforwardcopies(a, b, match)
335 331
336 332 def _backwardrenames(a, b, match):
337 333 if a._repo.ui.config('experimental', 'copytrace') == 'off':
338 334 return {}
339 335
340 336 # Even though we're not taking copies into account, 1:n rename situations
341 337 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
342 338 # arbitrarily pick one of the renames.
343 339 # We don't want to pass in "match" here, since that would filter
344 340 # the destination by it. Since we're reversing the copies, we want
345 341 # to filter the source instead.
346 342 f = _forwardcopies(b, a)
347 343 r = {}
348 344 for k, v in sorted(f.iteritems()):
349 345 if match and not match(v):
350 346 continue
351 347 # remove copies
352 348 if v in a:
353 349 continue
354 350 r[v] = k
355 351 return r
356 352
357 353 def pathcopies(x, y, match=None):
358 354 """find {dst@y: src@x} copy mapping for directed compare"""
359 355 repo = x._repo
360 356 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
361 357 if debug:
362 358 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
363 359 % (x, y))
364 360 if x == y or not x or not y:
365 361 return {}
366 362 a = y.ancestor(x)
367 363 if a == x:
368 364 if debug:
369 365 repo.ui.debug('debug.copies: search mode: forward\n')
366 if y.rev() is None and x == y.p1():
367 # short-circuit to avoid issues with merge states
368 return _dirstatecopies(repo, match)
370 369 return _forwardcopies(x, y, match=match)
371 370 if a == y:
372 371 if debug:
373 372 repo.ui.debug('debug.copies: search mode: backward\n')
374 373 return _backwardrenames(x, y, match=match)
375 374 if debug:
376 375 repo.ui.debug('debug.copies: search mode: combined\n')
377 376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
378 377 _forwardcopies(a, y, match=match))
379 378
380 379 def mergecopies(repo, c1, c2, base):
381 380 """
382 381 Finds moves and copies between context c1 and c2 that are relevant for
383 382 merging. 'base' will be used as the merge base.
384 383
385 384 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
386 385 files that were moved/ copied in one merge parent and modified in another.
387 386 For example:
388 387
389 388 o ---> 4 another commit
390 389 |
391 390 | o ---> 3 commit that modifies a.txt
392 391 | /
393 392 o / ---> 2 commit that moves a.txt to b.txt
394 393 |/
395 394 o ---> 1 merge base
396 395
397 396 If we try to rebase revision 3 on revision 4, since there is no a.txt in
398 397 revision 4, and if user have copytrace disabled, we prints the following
399 398 message:
400 399
401 400 ```other changed <file> which local deleted```
402 401
403 402 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
404 403 "dirmove".
405 404
406 405 "copy" is a mapping from destination name -> source name,
407 406 where source is in c1 and destination is in c2 or vice-versa.
408 407
409 408 "movewithdir" is a mapping from source name -> destination name,
410 409 where the file at source present in one context but not the other
411 410 needs to be moved to destination by the merge process, because the
412 411 other context moved the directory it is in.
413 412
414 413 "diverge" is a mapping of source name -> list of destination names
415 414 for divergent renames.
416 415
417 416 "renamedelete" is a mapping of source name -> list of destination
418 417 names for files deleted in c1 that were renamed in c2 or vice-versa.
419 418
420 419 "dirmove" is a mapping of detected source dir -> destination dir renames.
421 420 This is needed for handling changes to new files previously grafted into
422 421 renamed directories.
423 422
424 423 This function calls different copytracing algorithms based on config.
425 424 """
426 425 # avoid silly behavior for update from empty dir
427 426 if not c1 or not c2 or c1 == c2:
428 427 return {}, {}, {}, {}, {}
429 428
430 429 narrowmatch = c1.repo().narrowmatch()
431 430
432 431 # avoid silly behavior for parent -> working dir
433 432 if c2.node() is None and c1.node() == repo.dirstate.p1():
434 433 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
435 434
436 435 copytracing = repo.ui.config('experimental', 'copytrace')
437 436 if stringutil.parsebool(copytracing) is False:
438 437 # stringutil.parsebool() returns None when it is unable to parse the
439 438 # value, so we should rely on making sure copytracing is on such cases
440 439 return {}, {}, {}, {}, {}
441 440
442 441 if usechangesetcentricalgo(repo):
443 442 # The heuristics don't make sense when we need changeset-centric algos
444 443 return _fullcopytracing(repo, c1, c2, base)
445 444
446 445 # Copy trace disabling is explicitly below the node == p1 logic above
447 446 # because the logic above is required for a simple copy to be kept across a
448 447 # rebase.
449 448 if copytracing == 'heuristics':
450 449 # Do full copytracing if only non-public revisions are involved as
451 450 # that will be fast enough and will also cover the copies which could
452 451 # be missed by heuristics
453 452 if _isfullcopytraceable(repo, c1, base):
454 453 return _fullcopytracing(repo, c1, c2, base)
455 454 return _heuristicscopytracing(repo, c1, c2, base)
456 455 else:
457 456 return _fullcopytracing(repo, c1, c2, base)
458 457
459 458 def _isfullcopytraceable(repo, c1, base):
460 459 """ Checks that if base, source and destination are all no-public branches,
461 460 if yes let's use the full copytrace algorithm for increased capabilities
462 461 since it will be fast enough.
463 462
464 463 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
465 464 number of changesets from c1 to base such that if number of changesets are
466 465 more than the limit, full copytracing algorithm won't be used.
467 466 """
468 467 if c1.rev() is None:
469 468 c1 = c1.p1()
470 469 if c1.mutable() and base.mutable():
471 470 sourcecommitlimit = repo.ui.configint('experimental',
472 471 'copytrace.sourcecommitlimit')
473 472 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
474 473 return commits < sourcecommitlimit
475 474 return False
476 475
477 476 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
478 477 copy, renamedelete):
479 478 if src not in m2:
480 479 # deleted on side 2
481 480 if src not in m1:
482 481 # renamed on side 1, deleted on side 2
483 482 renamedelete[src] = dsts1
484 483 elif m2[src] != mb[src]:
485 484 if not _related(c2[src], base[src]):
486 485 return
487 486 # modified on side 2
488 487 for dst in dsts1:
489 488 if dst not in m2:
490 489 # dst not added on side 2 (handle as regular
491 490 # "both created" case in manifestmerge otherwise)
492 491 copy[dst] = src
493 492
494 493 def _fullcopytracing(repo, c1, c2, base):
495 494 """ The full copytracing algorithm which finds all the new files that were
496 495 added from merge base up to the top commit and for each file it checks if
497 496 this file was copied from another file.
498 497
499 498 This is pretty slow when a lot of changesets are involved but will track all
500 499 the copies.
501 500 """
502 501 m1 = c1.manifest()
503 502 m2 = c2.manifest()
504 503 mb = base.manifest()
505 504
506 505 copies1 = pathcopies(base, c1)
507 506 copies2 = pathcopies(base, c2)
508 507
509 508 inversecopies1 = {}
510 509 inversecopies2 = {}
511 510 for dst, src in copies1.items():
512 511 inversecopies1.setdefault(src, []).append(dst)
513 512 for dst, src in copies2.items():
514 513 inversecopies2.setdefault(src, []).append(dst)
515 514
516 515 copy = {}
517 516 diverge = {}
518 517 renamedelete = {}
519 518 allsources = set(inversecopies1) | set(inversecopies2)
520 519 for src in allsources:
521 520 dsts1 = inversecopies1.get(src)
522 521 dsts2 = inversecopies2.get(src)
523 522 if dsts1 and dsts2:
524 523 # copied/renamed on both sides
525 524 if src not in m1 and src not in m2:
526 525 # renamed on both sides
527 526 dsts1 = set(dsts1)
528 527 dsts2 = set(dsts2)
529 528 # If there's some overlap in the rename destinations, we
530 529 # consider it not divergent. For example, if side 1 copies 'a'
531 530 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
532 531 # and 'd' and deletes 'a'.
533 532 if dsts1 & dsts2:
534 533 for dst in (dsts1 & dsts2):
535 534 copy[dst] = src
536 535 else:
537 536 diverge[src] = sorted(dsts1 | dsts2)
538 537 elif src in m1 and src in m2:
539 538 # copied on both sides
540 539 dsts1 = set(dsts1)
541 540 dsts2 = set(dsts2)
542 541 for dst in (dsts1 & dsts2):
543 542 copy[dst] = src
544 543 # TODO: Handle cases where it was renamed on one side and copied
545 544 # on the other side
546 545 elif dsts1:
547 546 # copied/renamed only on side 1
548 547 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
549 548 copy, renamedelete)
550 549 elif dsts2:
551 550 # copied/renamed only on side 2
552 551 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
553 552 copy, renamedelete)
554 553
555 554 renamedeleteset = set()
556 555 divergeset = set()
557 556 for dsts in diverge.values():
558 557 divergeset.update(dsts)
559 558 for dsts in renamedelete.values():
560 559 renamedeleteset.update(dsts)
561 560
562 561 # find interesting file sets from manifests
563 562 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
564 563 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
565 564 u1 = sorted(addedinm1 - addedinm2)
566 565 u2 = sorted(addedinm2 - addedinm1)
567 566
568 567 header = " unmatched files in %s"
569 568 if u1:
570 569 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
571 570 if u2:
572 571 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
573 572
574 573 fullcopy = copies1.copy()
575 574 fullcopy.update(copies2)
576 575 if not fullcopy:
577 576 return copy, {}, diverge, renamedelete, {}
578 577
579 578 if repo.ui.debugflag:
580 579 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
581 580 "% = renamed and deleted):\n")
582 581 for f in sorted(fullcopy):
583 582 note = ""
584 583 if f in copy:
585 584 note += "*"
586 585 if f in divergeset:
587 586 note += "!"
588 587 if f in renamedeleteset:
589 588 note += "%"
590 589 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
591 590 note))
592 591 del divergeset
593 592
594 593 repo.ui.debug(" checking for directory renames\n")
595 594
596 595 # generate a directory move map
597 596 d1, d2 = c1.dirs(), c2.dirs()
598 597 invalid = set()
599 598 dirmove = {}
600 599
601 600 # examine each file copy for a potential directory move, which is
602 601 # when all the files in a directory are moved to a new directory
603 602 for dst, src in fullcopy.iteritems():
604 603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
605 604 if dsrc in invalid:
606 605 # already seen to be uninteresting
607 606 continue
608 607 elif dsrc in d1 and ddst in d1:
609 608 # directory wasn't entirely moved locally
610 609 invalid.add(dsrc)
611 610 elif dsrc in d2 and ddst in d2:
612 611 # directory wasn't entirely moved remotely
613 612 invalid.add(dsrc)
614 613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
615 614 # files from the same directory moved to two different places
616 615 invalid.add(dsrc)
617 616 else:
618 617 # looks good so far
619 618 dirmove[dsrc] = ddst
620 619
621 620 for i in invalid:
622 621 if i in dirmove:
623 622 del dirmove[i]
624 623 del d1, d2, invalid
625 624
626 625 if not dirmove:
627 626 return copy, {}, diverge, renamedelete, {}
628 627
629 628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
630 629
631 630 for d in dirmove:
632 631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
633 632 (d, dirmove[d]))
634 633
635 634 movewithdir = {}
636 635 # check unaccounted nonoverlapping files against directory moves
637 636 for f in u1 + u2:
638 637 if f not in fullcopy:
639 638 for d in dirmove:
640 639 if f.startswith(d):
641 640 # new file added in a directory that was moved, move it
642 641 df = dirmove[d] + f[len(d):]
643 642 if df not in copy:
644 643 movewithdir[f] = df
645 644 repo.ui.debug((" pending file src: '%s' -> "
646 645 "dst: '%s'\n") % (f, df))
647 646 break
648 647
649 648 return copy, movewithdir, diverge, renamedelete, dirmove
650 649
651 650 def _heuristicscopytracing(repo, c1, c2, base):
652 651 """ Fast copytracing using filename heuristics
653 652
654 653 Assumes that moves or renames are of following two types:
655 654
656 655 1) Inside a directory only (same directory name but different filenames)
657 656 2) Move from one directory to another
658 657 (same filenames but different directory names)
659 658
660 659 Works only when there are no merge commits in the "source branch".
661 660 Source branch is commits from base up to c2 not including base.
662 661
663 662 If merge is involved it fallbacks to _fullcopytracing().
664 663
665 664 Can be used by setting the following config:
666 665
667 666 [experimental]
668 667 copytrace = heuristics
669 668
670 669 In some cases the copy/move candidates found by heuristics can be very large
671 670 in number and that will make the algorithm slow. The number of possible
672 671 candidates to check can be limited by using the config
673 672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
674 673 """
675 674
676 675 if c1.rev() is None:
677 676 c1 = c1.p1()
678 677 if c2.rev() is None:
679 678 c2 = c2.p1()
680 679
681 680 copies = {}
682 681
683 682 changedfiles = set()
684 683 m1 = c1.manifest()
685 684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
686 685 # If base is not in c2 branch, we switch to fullcopytracing
687 686 repo.ui.debug("switching to full copytracing as base is not "
688 687 "an ancestor of c2\n")
689 688 return _fullcopytracing(repo, c1, c2, base)
690 689
691 690 ctx = c2
692 691 while ctx != base:
693 692 if len(ctx.parents()) == 2:
694 693 # To keep things simple let's not handle merges
695 694 repo.ui.debug("switching to full copytracing because of merges\n")
696 695 return _fullcopytracing(repo, c1, c2, base)
697 696 changedfiles.update(ctx.files())
698 697 ctx = ctx.p1()
699 698
700 699 cp = _forwardcopies(base, c2)
701 700 for dst, src in cp.iteritems():
702 701 if src in m1:
703 702 copies[dst] = src
704 703
705 704 # file is missing if it isn't present in the destination, but is present in
706 705 # the base and present in the source.
707 706 # Presence in the base is important to exclude added files, presence in the
708 707 # source is important to exclude removed files.
709 708 filt = lambda f: f not in m1 and f in base and f in c2
710 709 missingfiles = [f for f in changedfiles if filt(f)]
711 710
712 711 if missingfiles:
713 712 basenametofilename = collections.defaultdict(list)
714 713 dirnametofilename = collections.defaultdict(list)
715 714
716 715 for f in m1.filesnotin(base.manifest()):
717 716 basename = os.path.basename(f)
718 717 dirname = os.path.dirname(f)
719 718 basenametofilename[basename].append(f)
720 719 dirnametofilename[dirname].append(f)
721 720
722 721 for f in missingfiles:
723 722 basename = os.path.basename(f)
724 723 dirname = os.path.dirname(f)
725 724 samebasename = basenametofilename[basename]
726 725 samedirname = dirnametofilename[dirname]
727 726 movecandidates = samebasename + samedirname
728 727 # f is guaranteed to be present in c2, that's why
729 728 # c2.filectx(f) won't fail
730 729 f2 = c2.filectx(f)
731 730 # we can have a lot of candidates which can slow down the heuristics
732 731 # config value to limit the number of candidates moves to check
733 732 maxcandidates = repo.ui.configint('experimental',
734 733 'copytrace.movecandidateslimit')
735 734
736 735 if len(movecandidates) > maxcandidates:
737 736 repo.ui.status(_("skipping copytracing for '%s', more "
738 737 "candidates than the limit: %d\n")
739 738 % (f, len(movecandidates)))
740 739 continue
741 740
742 741 for candidate in movecandidates:
743 742 f1 = c1.filectx(candidate)
744 743 if _related(f1, f2):
745 744 # if there are a few related copies then we'll merge
746 745 # changes into all of them. This matches the behaviour
747 746 # of upstream copytracing
748 747 copies[candidate] = f
749 748
750 749 return copies, {}, {}, {}, {}
751 750
752 751 def _related(f1, f2):
753 752 """return True if f1 and f2 filectx have a common ancestor
754 753
755 754 Walk back to common ancestor to see if the two files originate
756 755 from the same file. Since workingfilectx's rev() is None it messes
757 756 up the integer comparison logic, hence the pre-step check for
758 757 None (f1 and f2 can only be workingfilectx's initially).
759 758 """
760 759
761 760 if f1 == f2:
762 761 return True # a match
763 762
764 763 g1, g2 = f1.ancestors(), f2.ancestors()
765 764 try:
766 765 f1r, f2r = f1.linkrev(), f2.linkrev()
767 766
768 767 if f1r is None:
769 768 f1 = next(g1)
770 769 if f2r is None:
771 770 f2 = next(g2)
772 771
773 772 while True:
774 773 f1r, f2r = f1.linkrev(), f2.linkrev()
775 774 if f1r > f2r:
776 775 f1 = next(g1)
777 776 elif f2r > f1r:
778 777 f2 = next(g2)
779 778 else: # f1 and f2 point to files in the same linkrev
780 779 return f1 == f2 # true if they point to the same file
781 780 except StopIteration:
782 781 return False
783 782
784 783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
785 784 """reproduce copies from fromrev to rev in the dirstate
786 785
787 786 If skiprev is specified, it's a revision that should be used to
788 787 filter copy records. Any copies that occur between fromrev and
789 788 skiprev will not be duplicated, even if they appear in the set of
790 789 copies between fromrev and rev.
791 790 """
792 791 exclude = {}
793 792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
794 793 bctrace = stringutil.parsebool(ctraceconfig)
795 794 if (skiprev is not None and
796 795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
797 796 # copytrace='off' skips this line, but not the entire function because
798 797 # the line below is O(size of the repo) during a rebase, while the rest
799 798 # of the function is much faster (and is required for carrying copy
800 799 # metadata across the rebase anyway).
801 800 exclude = pathcopies(repo[fromrev], repo[skiprev])
802 801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
803 802 if dst in exclude:
804 803 continue
805 804 if dst in wctx:
806 805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now