##// END OF EJS Templates
copies: extract method for getting non-wdir forward copies...
Martin von Zweigbergk -
r35423:8801cdce default
parent child Browse files
Show More
@@ -1,884 +1,889 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 """chain two sets of copies a->b"""
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 126 for k, v in t.items():
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 """return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit"""
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 def _forwardcopies(a, b, match=None):
160 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
161
162 # check for working copy
163 w = None
164 if b.rev() is None:
165 w = b
166 b = w.p1()
167 if a == b:
168 # short-circuit to avoid issues with merge states
169 return _dirstatecopies(w, match)
170
159 def _committedforwardcopies(a, b, match):
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
171 161 # files might have to be traced back to the fctx parent of the last
172 162 # one-side-only changeset, but not further back than that
173 163 limit = _findlimit(a._repo, a.rev(), b.rev())
174 164 if limit is None:
175 165 limit = -1
176 166 am = a.manifest()
177 167
178 168 # find where new files came from
179 169 # we currently don't try to find where old files went, too expensive
180 170 # this means we can miss a case like 'hg rm b; hg cp a b'
181 171 cm = {}
182 172
183 173 # Computing the forward missing is quite expensive on large manifests, since
184 174 # it compares the entire manifests. We can optimize it in the common use
185 175 # case of computing what copies are in a commit versus its parent (like
186 176 # during a rebase or histedit). Note, we exclude merge commits from this
187 177 # optimization, since the ctx.files() for a merge commit is not correct for
188 178 # this comparison.
189 179 forwardmissingmatch = match
190 180 if b.p1() == a and b.p2().node() == node.nullid:
191 181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 184
195 185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 186 for f in missing:
197 187 fctx = b[f]
198 188 fctx._ancestrycontext = ancestrycontext
199 189 ofctx = _tracefile(fctx, am, limit)
200 190 if ofctx:
201 191 cm[f] = ofctx.path()
192 return cm
193
194 def _forwardcopies(a, b, match=None):
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196
197 # check for working copy
198 w = None
199 if b.rev() is None:
200 w = b
201 b = w.p1()
202 if a == b:
203 # short-circuit to avoid issues with merge states
204 return _dirstatecopies(w, match)
205
206 cm = _committedforwardcopies(a, b, match)
202 207
203 208 # combine copies from dirstate if necessary
204 209 if w is not None:
205 210 cm = _chain(a, w, cm, _dirstatecopies(w, match))
206 211
207 212 return cm
208 213
209 214 def _backwardrenames(a, b):
210 215 if a._repo.ui.config('experimental', 'copytrace') == 'off':
211 216 return {}
212 217
213 218 # Even though we're not taking copies into account, 1:n rename situations
214 219 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
215 220 # arbitrarily pick one of the renames.
216 221 f = _forwardcopies(b, a)
217 222 r = {}
218 223 for k, v in sorted(f.iteritems()):
219 224 # remove copies
220 225 if v in a:
221 226 continue
222 227 r[v] = k
223 228 return r
224 229
225 230 def pathcopies(x, y, match=None):
226 231 """find {dst@y: src@x} copy mapping for directed compare"""
227 232 if x == y or not x or not y:
228 233 return {}
229 234 a = y.ancestor(x)
230 235 if a == x:
231 236 return _forwardcopies(x, y, match=match)
232 237 if a == y:
233 238 return _backwardrenames(x, y)
234 239 return _chain(x, y, _backwardrenames(x, a),
235 240 _forwardcopies(a, y, match=match))
236 241
237 242 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
238 243 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
239 244 and c2. This is its own function so extensions can easily wrap this call
240 245 to see what files mergecopies is about to process.
241 246
242 247 Even though c1 and c2 are not used in this function, they are useful in
243 248 other extensions for being able to read the file nodes of the changed files.
244 249
245 250 "baselabel" can be passed to help distinguish the multiple computations
246 251 done in the graft case.
247 252 """
248 253 u1 = sorted(addedinm1 - addedinm2)
249 254 u2 = sorted(addedinm2 - addedinm1)
250 255
251 256 header = " unmatched files in %s"
252 257 if baselabel:
253 258 header += ' (from %s)' % baselabel
254 259 if u1:
255 260 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
256 261 if u2:
257 262 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
258 263 return u1, u2
259 264
260 265 def _makegetfctx(ctx):
261 266 """return a 'getfctx' function suitable for _checkcopies usage
262 267
263 268 We have to re-setup the function building 'filectx' for each
264 269 '_checkcopies' to ensure the linkrev adjustment is properly setup for
265 270 each. Linkrev adjustment is important to avoid bug in rename
266 271 detection. Moreover, having a proper '_ancestrycontext' setup ensures
267 272 the performance impact of this adjustment is kept limited. Without it,
268 273 each file could do a full dag traversal making the time complexity of
269 274 the operation explode (see issue4537).
270 275
271 276 This function exists here mostly to limit the impact on stable. Feel
272 277 free to refactor on default.
273 278 """
274 279 rev = ctx.rev()
275 280 repo = ctx._repo
276 281 ac = getattr(ctx, '_ancestrycontext', None)
277 282 if ac is None:
278 283 revs = [rev]
279 284 if rev is None:
280 285 revs = [p.rev() for p in ctx.parents()]
281 286 ac = repo.changelog.ancestors(revs, inclusive=True)
282 287 ctx._ancestrycontext = ac
283 288 def makectx(f, n):
284 289 if n in node.wdirnodes: # in a working context?
285 290 if ctx.rev() is None:
286 291 return ctx.filectx(f)
287 292 return repo[None][f]
288 293 fctx = repo.filectx(f, fileid=n)
289 294 # setup only needed for filectx not create from a changectx
290 295 fctx._ancestrycontext = ac
291 296 fctx._descendantrev = rev
292 297 return fctx
293 298 return util.lrucachefunc(makectx)
294 299
295 300 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
296 301 """combine partial copy paths"""
297 302 remainder = {}
298 303 for f in copyfrom:
299 304 if f in copyto:
300 305 finalcopy[copyto[f]] = copyfrom[f]
301 306 del copyto[f]
302 307 for f in incompletediverge:
303 308 assert f not in diverge
304 309 ic = incompletediverge[f]
305 310 if ic[0] in copyto:
306 311 diverge[f] = [copyto[ic[0]], ic[1]]
307 312 else:
308 313 remainder[f] = ic
309 314 return remainder
310 315
311 316 def mergecopies(repo, c1, c2, base):
312 317 """
313 318 The function calling different copytracing algorithms on the basis of config
314 319 which find moves and copies between context c1 and c2 that are relevant for
315 320 merging. 'base' will be used as the merge base.
316 321
317 322 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
318 323 files that were moved/ copied in one merge parent and modified in another.
319 324 For example:
320 325
321 326 o ---> 4 another commit
322 327 |
323 328 | o ---> 3 commit that modifies a.txt
324 329 | /
325 330 o / ---> 2 commit that moves a.txt to b.txt
326 331 |/
327 332 o ---> 1 merge base
328 333
329 334 If we try to rebase revision 3 on revision 4, since there is no a.txt in
330 335 revision 4, and if user have copytrace disabled, we prints the following
331 336 message:
332 337
333 338 ```other changed <file> which local deleted```
334 339
335 340 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
336 341 "dirmove".
337 342
338 343 "copy" is a mapping from destination name -> source name,
339 344 where source is in c1 and destination is in c2 or vice-versa.
340 345
341 346 "movewithdir" is a mapping from source name -> destination name,
342 347 where the file at source present in one context but not the other
343 348 needs to be moved to destination by the merge process, because the
344 349 other context moved the directory it is in.
345 350
346 351 "diverge" is a mapping of source name -> list of destination names
347 352 for divergent renames.
348 353
349 354 "renamedelete" is a mapping of source name -> list of destination
350 355 names for files deleted in c1 that were renamed in c2 or vice-versa.
351 356
352 357 "dirmove" is a mapping of detected source dir -> destination dir renames.
353 358 This is needed for handling changes to new files previously grafted into
354 359 renamed directories.
355 360 """
356 361 # avoid silly behavior for update from empty dir
357 362 if not c1 or not c2 or c1 == c2:
358 363 return {}, {}, {}, {}, {}
359 364
360 365 # avoid silly behavior for parent -> working dir
361 366 if c2.node() is None and c1.node() == repo.dirstate.p1():
362 367 return repo.dirstate.copies(), {}, {}, {}, {}
363 368
364 369 copytracing = repo.ui.config('experimental', 'copytrace')
365 370
366 371 # Copy trace disabling is explicitly below the node == p1 logic above
367 372 # because the logic above is required for a simple copy to be kept across a
368 373 # rebase.
369 374 if copytracing == 'off':
370 375 return {}, {}, {}, {}, {}
371 376 elif copytracing == 'heuristics':
372 377 # Do full copytracing if only non-public revisions are involved as
373 378 # that will be fast enough and will also cover the copies which could
374 379 # be missed by heuristics
375 380 if _isfullcopytraceable(repo, c1, base):
376 381 return _fullcopytracing(repo, c1, c2, base)
377 382 return _heuristicscopytracing(repo, c1, c2, base)
378 383 else:
379 384 return _fullcopytracing(repo, c1, c2, base)
380 385
381 386 def _isfullcopytraceable(repo, c1, base):
382 387 """ Checks that if base, source and destination are all no-public branches,
383 388 if yes let's use the full copytrace algorithm for increased capabilities
384 389 since it will be fast enough.
385 390
386 391 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
387 392 number of changesets from c1 to base such that if number of changesets are
388 393 more than the limit, full copytracing algorithm won't be used.
389 394 """
390 395 if c1.rev() is None:
391 396 c1 = c1.p1()
392 397 if c1.mutable() and base.mutable():
393 398 sourcecommitlimit = repo.ui.configint('experimental',
394 399 'copytrace.sourcecommitlimit')
395 400 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
396 401 return commits < sourcecommitlimit
397 402 return False
398 403
399 404 def _fullcopytracing(repo, c1, c2, base):
400 405 """ The full copytracing algorithm which finds all the new files that were
401 406 added from merge base up to the top commit and for each file it checks if
402 407 this file was copied from another file.
403 408
404 409 This is pretty slow when a lot of changesets are involved but will track all
405 410 the copies.
406 411 """
407 412 # In certain scenarios (e.g. graft, update or rebase), base can be
408 413 # overridden We still need to know a real common ancestor in this case We
409 414 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
410 415 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
411 416 # caller may not know if the revision passed in lieu of the CA is a genuine
412 417 # common ancestor or not without explicitly checking it, it's better to
413 418 # determine that here.
414 419 #
415 420 # base.descendant(wc) and base.descendant(base) are False, work around that
416 421 _c1 = c1.p1() if c1.rev() is None else c1
417 422 _c2 = c2.p1() if c2.rev() is None else c2
418 423 # an endpoint is "dirty" if it isn't a descendant of the merge base
419 424 # if we have a dirty endpoint, we need to trigger graft logic, and also
420 425 # keep track of which endpoint is dirty
421 426 dirtyc1 = not (base == _c1 or base.descendant(_c1))
422 427 dirtyc2 = not (base == _c2 or base.descendant(_c2))
423 428 graft = dirtyc1 or dirtyc2
424 429 tca = base
425 430 if graft:
426 431 tca = _c1.ancestor(_c2)
427 432
428 433 limit = _findlimit(repo, c1.rev(), c2.rev())
429 434 if limit is None:
430 435 # no common ancestor, no copies
431 436 return {}, {}, {}, {}, {}
432 437 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
433 438
434 439 m1 = c1.manifest()
435 440 m2 = c2.manifest()
436 441 mb = base.manifest()
437 442
438 443 # gather data from _checkcopies:
439 444 # - diverge = record all diverges in this dict
440 445 # - copy = record all non-divergent copies in this dict
441 446 # - fullcopy = record all copies in this dict
442 447 # - incomplete = record non-divergent partial copies here
443 448 # - incompletediverge = record divergent partial copies here
444 449 diverge = {} # divergence data is shared
445 450 incompletediverge = {}
446 451 data1 = {'copy': {},
447 452 'fullcopy': {},
448 453 'incomplete': {},
449 454 'diverge': diverge,
450 455 'incompletediverge': incompletediverge,
451 456 }
452 457 data2 = {'copy': {},
453 458 'fullcopy': {},
454 459 'incomplete': {},
455 460 'diverge': diverge,
456 461 'incompletediverge': incompletediverge,
457 462 }
458 463
459 464 # find interesting file sets from manifests
460 465 addedinm1 = m1.filesnotin(mb)
461 466 addedinm2 = m2.filesnotin(mb)
462 467 bothnew = sorted(addedinm1 & addedinm2)
463 468 if tca == base:
464 469 # unmatched file from base
465 470 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
466 471 u1u, u2u = u1r, u2r
467 472 else:
468 473 # unmatched file from base (DAG rotation in the graft case)
469 474 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
470 475 baselabel='base')
471 476 # unmatched file from topological common ancestors (no DAG rotation)
472 477 # need to recompute this for directory move handling when grafting
473 478 mta = tca.manifest()
474 479 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
475 480 m2.filesnotin(mta),
476 481 baselabel='topological common ancestor')
477 482
478 483 for f in u1u:
479 484 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
480 485
481 486 for f in u2u:
482 487 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
483 488
484 489 copy = dict(data1['copy'])
485 490 copy.update(data2['copy'])
486 491 fullcopy = dict(data1['fullcopy'])
487 492 fullcopy.update(data2['fullcopy'])
488 493
489 494 if dirtyc1:
490 495 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
491 496 incompletediverge)
492 497 else:
493 498 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
494 499 incompletediverge)
495 500
496 501 renamedelete = {}
497 502 renamedeleteset = set()
498 503 divergeset = set()
499 504 for of, fl in list(diverge.items()):
500 505 if len(fl) == 1 or of in c1 or of in c2:
501 506 del diverge[of] # not actually divergent, or not a rename
502 507 if of not in c1 and of not in c2:
503 508 # renamed on one side, deleted on the other side, but filter
504 509 # out files that have been renamed and then deleted
505 510 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
506 511 renamedeleteset.update(fl) # reverse map for below
507 512 else:
508 513 divergeset.update(fl) # reverse map for below
509 514
510 515 if bothnew:
511 516 repo.ui.debug(" unmatched files new in both:\n %s\n"
512 517 % "\n ".join(bothnew))
513 518 bothdiverge = {}
514 519 bothincompletediverge = {}
515 520 remainder = {}
516 521 both1 = {'copy': {},
517 522 'fullcopy': {},
518 523 'incomplete': {},
519 524 'diverge': bothdiverge,
520 525 'incompletediverge': bothincompletediverge
521 526 }
522 527 both2 = {'copy': {},
523 528 'fullcopy': {},
524 529 'incomplete': {},
525 530 'diverge': bothdiverge,
526 531 'incompletediverge': bothincompletediverge
527 532 }
528 533 for f in bothnew:
529 534 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
530 535 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
531 536 if dirtyc1:
532 537 # incomplete copies may only be found on the "dirty" side for bothnew
533 538 assert not both2['incomplete']
534 539 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
535 540 bothincompletediverge)
536 541 elif dirtyc2:
537 542 assert not both1['incomplete']
538 543 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
539 544 bothincompletediverge)
540 545 else:
541 546 # incomplete copies and divergences can't happen outside grafts
542 547 assert not both1['incomplete']
543 548 assert not both2['incomplete']
544 549 assert not bothincompletediverge
545 550 for f in remainder:
546 551 assert f not in bothdiverge
547 552 ic = remainder[f]
548 553 if ic[0] in (m1 if dirtyc1 else m2):
549 554 # backed-out rename on one side, but watch out for deleted files
550 555 bothdiverge[f] = ic
551 556 for of, fl in bothdiverge.items():
552 557 if len(fl) == 2 and fl[0] == fl[1]:
553 558 copy[fl[0]] = of # not actually divergent, just matching renames
554 559
555 560 if fullcopy and repo.ui.debugflag:
556 561 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 562 "% = renamed and deleted):\n")
558 563 for f in sorted(fullcopy):
559 564 note = ""
560 565 if f in copy:
561 566 note += "*"
562 567 if f in divergeset:
563 568 note += "!"
564 569 if f in renamedeleteset:
565 570 note += "%"
566 571 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 572 note))
568 573 del divergeset
569 574
570 575 if not fullcopy:
571 576 return copy, {}, diverge, renamedelete, {}
572 577
573 578 repo.ui.debug(" checking for directory renames\n")
574 579
575 580 # generate a directory move map
576 581 d1, d2 = c1.dirs(), c2.dirs()
577 582 # Hack for adding '', which is not otherwise added, to d1 and d2
578 583 d1.addpath('/')
579 584 d2.addpath('/')
580 585 invalid = set()
581 586 dirmove = {}
582 587
583 588 # examine each file copy for a potential directory move, which is
584 589 # when all the files in a directory are moved to a new directory
585 590 for dst, src in fullcopy.iteritems():
586 591 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
587 592 if dsrc in invalid:
588 593 # already seen to be uninteresting
589 594 continue
590 595 elif dsrc in d1 and ddst in d1:
591 596 # directory wasn't entirely moved locally
592 597 invalid.add(dsrc + "/")
593 598 elif dsrc in d2 and ddst in d2:
594 599 # directory wasn't entirely moved remotely
595 600 invalid.add(dsrc + "/")
596 601 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
597 602 # files from the same directory moved to two different places
598 603 invalid.add(dsrc + "/")
599 604 else:
600 605 # looks good so far
601 606 dirmove[dsrc + "/"] = ddst + "/"
602 607
603 608 for i in invalid:
604 609 if i in dirmove:
605 610 del dirmove[i]
606 611 del d1, d2, invalid
607 612
608 613 if not dirmove:
609 614 return copy, {}, diverge, renamedelete, {}
610 615
611 616 for d in dirmove:
612 617 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
613 618 (d, dirmove[d]))
614 619
615 620 movewithdir = {}
616 621 # check unaccounted nonoverlapping files against directory moves
617 622 for f in u1r + u2r:
618 623 if f not in fullcopy:
619 624 for d in dirmove:
620 625 if f.startswith(d):
621 626 # new file added in a directory that was moved, move it
622 627 df = dirmove[d] + f[len(d):]
623 628 if df not in copy:
624 629 movewithdir[f] = df
625 630 repo.ui.debug((" pending file src: '%s' -> "
626 631 "dst: '%s'\n") % (f, df))
627 632 break
628 633
629 634 return copy, movewithdir, diverge, renamedelete, dirmove
630 635
631 636 def _heuristicscopytracing(repo, c1, c2, base):
632 637 """ Fast copytracing using filename heuristics
633 638
634 639 Assumes that moves or renames are of following two types:
635 640
636 641 1) Inside a directory only (same directory name but different filenames)
637 642 2) Move from one directory to another
638 643 (same filenames but different directory names)
639 644
640 645 Works only when there are no merge commits in the "source branch".
641 646 Source branch is commits from base up to c2 not including base.
642 647
643 648 If merge is involved it fallbacks to _fullcopytracing().
644 649
645 650 Can be used by setting the following config:
646 651
647 652 [experimental]
648 653 copytrace = heuristics
649 654
650 655 In some cases the copy/move candidates found by heuristics can be very large
651 656 in number and that will make the algorithm slow. The number of possible
652 657 candidates to check can be limited by using the config
653 658 `experimental.copytrace.movecandidateslimit` which defaults to 100.
654 659 """
655 660
656 661 if c1.rev() is None:
657 662 c1 = c1.p1()
658 663 if c2.rev() is None:
659 664 c2 = c2.p1()
660 665
661 666 copies = {}
662 667
663 668 changedfiles = set()
664 669 m1 = c1.manifest()
665 670 if not repo.revs('%d::%d', base.rev(), c2.rev()):
666 671 # If base is not in c2 branch, we switch to fullcopytracing
667 672 repo.ui.debug("switching to full copytracing as base is not "
668 673 "an ancestor of c2\n")
669 674 return _fullcopytracing(repo, c1, c2, base)
670 675
671 676 ctx = c2
672 677 while ctx != base:
673 678 if len(ctx.parents()) == 2:
674 679 # To keep things simple let's not handle merges
675 680 repo.ui.debug("switching to full copytracing because of merges\n")
676 681 return _fullcopytracing(repo, c1, c2, base)
677 682 changedfiles.update(ctx.files())
678 683 ctx = ctx.p1()
679 684
680 685 cp = _forwardcopies(base, c2)
681 686 for dst, src in cp.iteritems():
682 687 if src in m1:
683 688 copies[dst] = src
684 689
685 690 # file is missing if it isn't present in the destination, but is present in
686 691 # the base and present in the source.
687 692 # Presence in the base is important to exclude added files, presence in the
688 693 # source is important to exclude removed files.
689 694 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
690 695 changedfiles)
691 696
692 697 if missingfiles:
693 698 basenametofilename = collections.defaultdict(list)
694 699 dirnametofilename = collections.defaultdict(list)
695 700
696 701 for f in m1.filesnotin(base.manifest()):
697 702 basename = os.path.basename(f)
698 703 dirname = os.path.dirname(f)
699 704 basenametofilename[basename].append(f)
700 705 dirnametofilename[dirname].append(f)
701 706
702 707 # in case of a rebase/graft, base may not be a common ancestor
703 708 anc = c1.ancestor(c2)
704 709
705 710 for f in missingfiles:
706 711 basename = os.path.basename(f)
707 712 dirname = os.path.dirname(f)
708 713 samebasename = basenametofilename[basename]
709 714 samedirname = dirnametofilename[dirname]
710 715 movecandidates = samebasename + samedirname
711 716 # f is guaranteed to be present in c2, that's why
712 717 # c2.filectx(f) won't fail
713 718 f2 = c2.filectx(f)
714 719 # we can have a lot of candidates which can slow down the heuristics
715 720 # config value to limit the number of candidates moves to check
716 721 maxcandidates = repo.ui.configint('experimental',
717 722 'copytrace.movecandidateslimit')
718 723
719 724 if len(movecandidates) > maxcandidates:
720 725 repo.ui.status(_("skipping copytracing for '%s', more "
721 726 "candidates than the limit: %d\n")
722 727 % (f, len(movecandidates)))
723 728 continue
724 729
725 730 for candidate in movecandidates:
726 731 f1 = c1.filectx(candidate)
727 732 if _related(f1, f2, anc.rev()):
728 733 # if there are a few related copies then we'll merge
729 734 # changes into all of them. This matches the behaviour
730 735 # of upstream copytracing
731 736 copies[candidate] = f
732 737
733 738 return copies, {}, {}, {}, {}
734 739
735 740 def _related(f1, f2, limit):
736 741 """return True if f1 and f2 filectx have a common ancestor
737 742
738 743 Walk back to common ancestor to see if the two files originate
739 744 from the same file. Since workingfilectx's rev() is None it messes
740 745 up the integer comparison logic, hence the pre-step check for
741 746 None (f1 and f2 can only be workingfilectx's initially).
742 747 """
743 748
744 749 if f1 == f2:
745 750 return f1 # a match
746 751
747 752 g1, g2 = f1.ancestors(), f2.ancestors()
748 753 try:
749 754 f1r, f2r = f1.linkrev(), f2.linkrev()
750 755
751 756 if f1r is None:
752 757 f1 = next(g1)
753 758 if f2r is None:
754 759 f2 = next(g2)
755 760
756 761 while True:
757 762 f1r, f2r = f1.linkrev(), f2.linkrev()
758 763 if f1r > f2r:
759 764 f1 = next(g1)
760 765 elif f2r > f1r:
761 766 f2 = next(g2)
762 767 elif f1 == f2:
763 768 return f1 # a match
764 769 elif f1r == f2r or f1r < limit or f2r < limit:
765 770 return False # copy no longer relevant
766 771 except StopIteration:
767 772 return False
768 773
769 774 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
770 775 """
771 776 check possible copies of f from msrc to mdst
772 777
773 778 srcctx = starting context for f in msrc
774 779 dstctx = destination context for f in mdst
775 780 f = the filename to check (as in msrc)
776 781 base = the changectx used as a merge base
777 782 tca = topological common ancestor for graft-like scenarios
778 783 remotebase = True if base is outside tca::srcctx, False otherwise
779 784 limit = the rev number to not search beyond
780 785 data = dictionary of dictionary to store copy data. (see mergecopies)
781 786
782 787 note: limit is only an optimization, and provides no guarantee that
783 788 irrelevant revisions will not be visited
784 789 there is no easy way to make this algorithm stop in a guaranteed way
785 790 once it "goes behind a certain revision".
786 791 """
787 792
788 793 msrc = srcctx.manifest()
789 794 mdst = dstctx.manifest()
790 795 mb = base.manifest()
791 796 mta = tca.manifest()
792 797 # Might be true if this call is about finding backward renames,
793 798 # This happens in the case of grafts because the DAG is then rotated.
794 799 # If the file exists in both the base and the source, we are not looking
795 800 # for a rename on the source side, but on the part of the DAG that is
796 801 # traversed backwards.
797 802 #
798 803 # In the case there is both backward and forward renames (before and after
799 804 # the base) this is more complicated as we must detect a divergence.
800 805 # We use 'backwards = False' in that case.
801 806 backwards = not remotebase and base != tca and f in mb
802 807 getsrcfctx = _makegetfctx(srcctx)
803 808 getdstfctx = _makegetfctx(dstctx)
804 809
805 810 if msrc[f] == mb.get(f) and not remotebase:
806 811 # Nothing to merge
807 812 return
808 813
809 814 of = None
810 815 seen = {f}
811 816 for oc in getsrcfctx(f, msrc[f]).ancestors():
812 817 ocr = oc.linkrev()
813 818 of = oc.path()
814 819 if of in seen:
815 820 # check limit late - grab last rename before
816 821 if ocr < limit:
817 822 break
818 823 continue
819 824 seen.add(of)
820 825
821 826 # remember for dir rename detection
822 827 if backwards:
823 828 data['fullcopy'][of] = f # grafting backwards through renames
824 829 else:
825 830 data['fullcopy'][f] = of
826 831 if of not in mdst:
827 832 continue # no match, keep looking
828 833 if mdst[of] == mb.get(of):
829 834 return # no merge needed, quit early
830 835 c2 = getdstfctx(of, mdst[of])
831 836 # c2 might be a plain new file on added on destination side that is
832 837 # unrelated to the droids we are looking for.
833 838 cr = _related(oc, c2, tca.rev())
834 839 if cr and (of == f or of == c2.path()): # non-divergent
835 840 if backwards:
836 841 data['copy'][of] = f
837 842 elif of in mb:
838 843 data['copy'][f] = of
839 844 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
840 845 data['copy'][of] = f
841 846 del data['fullcopy'][f]
842 847 data['fullcopy'][of] = f
843 848 else: # divergence w.r.t. graft CA on one side of topological CA
844 849 for sf in seen:
845 850 if sf in mb:
846 851 assert sf not in data['diverge']
847 852 data['diverge'][sf] = [f, of]
848 853 break
849 854 return
850 855
851 856 if of in mta:
852 857 if backwards or remotebase:
853 858 data['incomplete'][of] = f
854 859 else:
855 860 for sf in seen:
856 861 if sf in mb:
857 862 if tca == base:
858 863 data['diverge'].setdefault(sf, []).append(f)
859 864 else:
860 865 data['incompletediverge'][sf] = [of, f]
861 866 return
862 867
863 868 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
864 869 """reproduce copies from fromrev to rev in the dirstate
865 870
866 871 If skiprev is specified, it's a revision that should be used to
867 872 filter copy records. Any copies that occur between fromrev and
868 873 skiprev will not be duplicated, even if they appear in the set of
869 874 copies between fromrev and rev.
870 875 """
871 876 exclude = {}
872 877 if (skiprev is not None and
873 878 repo.ui.config('experimental', 'copytrace') != 'off'):
874 879 # copytrace='off' skips this line, but not the entire function because
875 880 # the line below is O(size of the repo) during a rebase, while the rest
876 881 # of the function is much faster (and is required for carrying copy
877 882 # metadata across the rebase anyway).
878 883 exclude = pathcopies(repo[fromrev], repo[skiprev])
879 884 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
880 885 # copies.pathcopies returns backward renames, so dst might not
881 886 # actually be in the dirstate
882 887 if dst in exclude:
883 888 continue
884 889 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now