##// END OF EJS Templates
copies: add docs for config `experimental.copytrace.sourcecommitlimit`...
Pulkit Goyal -
r34517:e79b3611 default
parent child Browse files
Show More
@@ -1,863 +1,867 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from . import (
15 15 match as matchmod,
16 16 node,
17 17 pathutil,
18 18 scmutil,
19 19 util,
20 20 )
21 21
22 22 def _findlimit(repo, a, b):
23 23 """
24 24 Find the last revision that needs to be checked to ensure that a full
25 25 transitive closure for file copies can be properly calculated.
26 26 Generally, this means finding the earliest revision number that's an
27 27 ancestor of a or b but not both, except when a or b is a direct descendent
28 28 of the other, in which case we can return the minimum revnum of a and b.
29 29 None if no such revision exists.
30 30 """
31 31
32 32 # basic idea:
33 33 # - mark a and b with different sides
34 34 # - if a parent's children are all on the same side, the parent is
35 35 # on that side, otherwise it is on no side
36 36 # - walk the graph in topological order with the help of a heap;
37 37 # - add unseen parents to side map
38 38 # - clear side of any parent that has children on different sides
39 39 # - track number of interesting revs that might still be on a side
40 40 # - track the lowest interesting rev seen
41 41 # - quit when interesting revs is zero
42 42
43 43 cl = repo.changelog
44 44 working = len(cl) # pseudo rev for the working directory
45 45 if a is None:
46 46 a = working
47 47 if b is None:
48 48 b = working
49 49
50 50 side = {a: -1, b: 1}
51 51 visit = [-a, -b]
52 52 heapq.heapify(visit)
53 53 interesting = len(visit)
54 54 hascommonancestor = False
55 55 limit = working
56 56
57 57 while interesting:
58 58 r = -heapq.heappop(visit)
59 59 if r == working:
60 60 parents = [cl.rev(p) for p in repo.dirstate.parents()]
61 61 else:
62 62 parents = cl.parentrevs(r)
63 63 for p in parents:
64 64 if p < 0:
65 65 continue
66 66 if p not in side:
67 67 # first time we see p; add it to visit
68 68 side[p] = side[r]
69 69 if side[p]:
70 70 interesting += 1
71 71 heapq.heappush(visit, -p)
72 72 elif side[p] and side[p] != side[r]:
73 73 # p was interesting but now we know better
74 74 side[p] = 0
75 75 interesting -= 1
76 76 hascommonancestor = True
77 77 if side[r]:
78 78 limit = r # lowest rev visited
79 79 interesting -= 1
80 80
81 81 if not hascommonancestor:
82 82 return None
83 83
84 84 # Consider the following flow (see test-commit-amend.t under issue4405):
85 85 # 1/ File 'a0' committed
86 86 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
87 87 # 3/ Move back to first commit
88 88 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
89 89 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
90 90 #
91 91 # During the amend in step five, we will be in this state:
92 92 #
93 93 # @ 3 temporary amend commit for a1-amend
94 94 # |
95 95 # o 2 a1-amend
96 96 # |
97 97 # | o 1 a1
98 98 # |/
99 99 # o 0 a0
100 100 #
101 101 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
102 102 # yet the filelog has the copy information in rev 1 and we will not look
103 103 # back far enough unless we also look at the a and b as candidates.
104 104 # This only occurs when a is a descendent of b or visa-versa.
105 105 return min(limit, a, b)
106 106
107 107 def _chain(src, dst, a, b):
108 108 '''chain two sets of copies a->b'''
109 109 t = a.copy()
110 110 for k, v in b.iteritems():
111 111 if v in t:
112 112 # found a chain
113 113 if t[v] != k:
114 114 # file wasn't renamed back to itself
115 115 t[k] = t[v]
116 116 if v not in dst:
117 117 # chain was a rename, not a copy
118 118 del t[v]
119 119 if v in src:
120 120 # file is a copy of an existing file
121 121 t[k] = v
122 122
123 123 # remove criss-crossed copies
124 124 for k, v in t.items():
125 125 if k in src and v in dst:
126 126 del t[k]
127 127
128 128 return t
129 129
130 130 def _tracefile(fctx, am, limit=-1):
131 131 '''return file context that is the ancestor of fctx present in ancestor
132 132 manifest am, stopping after the first ancestor lower than limit'''
133 133
134 134 for f in fctx.ancestors():
135 135 if am.get(f.path(), None) == f.filenode():
136 136 return f
137 137 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
138 138 return None
139 139
140 140 def _dirstatecopies(d):
141 141 ds = d._repo.dirstate
142 142 c = ds.copies().copy()
143 143 for k in list(c):
144 144 if ds[k] not in 'anm':
145 145 del c[k]
146 146 return c
147 147
148 148 def _computeforwardmissing(a, b, match=None):
149 149 """Computes which files are in b but not a.
150 150 This is its own function so extensions can easily wrap this call to see what
151 151 files _forwardcopies is about to process.
152 152 """
153 153 ma = a.manifest()
154 154 mb = b.manifest()
155 155 return mb.filesnotin(ma, match=match)
156 156
157 157 def _forwardcopies(a, b, match=None):
158 158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159 159
160 160 # check for working copy
161 161 w = None
162 162 if b.rev() is None:
163 163 w = b
164 164 b = w.p1()
165 165 if a == b:
166 166 # short-circuit to avoid issues with merge states
167 167 return _dirstatecopies(w)
168 168
169 169 # files might have to be traced back to the fctx parent of the last
170 170 # one-side-only changeset, but not further back than that
171 171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 172 if limit is None:
173 173 limit = -1
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if b.p1() == a and b.p2().node() == node.nullid:
189 189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
190 190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
191 191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
192 192
193 193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
194 194 for f in missing:
195 195 fctx = b[f]
196 196 fctx._ancestrycontext = ancestrycontext
197 197 ofctx = _tracefile(fctx, am, limit)
198 198 if ofctx:
199 199 cm[f] = ofctx.path()
200 200
201 201 # combine copies from dirstate if necessary
202 202 if w is not None:
203 203 cm = _chain(a, w, cm, _dirstatecopies(w))
204 204
205 205 return cm
206 206
207 207 def _backwardrenames(a, b):
208 208 if a._repo.ui.config('experimental', 'copytrace') == 'off':
209 209 return {}
210 210
211 211 # Even though we're not taking copies into account, 1:n rename situations
212 212 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
213 213 # arbitrarily pick one of the renames.
214 214 f = _forwardcopies(b, a)
215 215 r = {}
216 216 for k, v in sorted(f.iteritems()):
217 217 # remove copies
218 218 if v in a:
219 219 continue
220 220 r[v] = k
221 221 return r
222 222
223 223 def pathcopies(x, y, match=None):
224 224 '''find {dst@y: src@x} copy mapping for directed compare'''
225 225 if x == y or not x or not y:
226 226 return {}
227 227 a = y.ancestor(x)
228 228 if a == x:
229 229 return _forwardcopies(x, y, match=match)
230 230 if a == y:
231 231 return _backwardrenames(x, y)
232 232 return _chain(x, y, _backwardrenames(x, a),
233 233 _forwardcopies(a, y, match=match))
234 234
235 235 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
236 236 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
237 237 and c2. This is its own function so extensions can easily wrap this call
238 238 to see what files mergecopies is about to process.
239 239
240 240 Even though c1 and c2 are not used in this function, they are useful in
241 241 other extensions for being able to read the file nodes of the changed files.
242 242
243 243 "baselabel" can be passed to help distinguish the multiple computations
244 244 done in the graft case.
245 245 """
246 246 u1 = sorted(addedinm1 - addedinm2)
247 247 u2 = sorted(addedinm2 - addedinm1)
248 248
249 249 header = " unmatched files in %s"
250 250 if baselabel:
251 251 header += ' (from %s)' % baselabel
252 252 if u1:
253 253 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
254 254 if u2:
255 255 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
256 256 return u1, u2
257 257
258 258 def _makegetfctx(ctx):
259 259 """return a 'getfctx' function suitable for _checkcopies usage
260 260
261 261 We have to re-setup the function building 'filectx' for each
262 262 '_checkcopies' to ensure the linkrev adjustment is properly setup for
263 263 each. Linkrev adjustment is important to avoid bug in rename
264 264 detection. Moreover, having a proper '_ancestrycontext' setup ensures
265 265 the performance impact of this adjustment is kept limited. Without it,
266 266 each file could do a full dag traversal making the time complexity of
267 267 the operation explode (see issue4537).
268 268
269 269 This function exists here mostly to limit the impact on stable. Feel
270 270 free to refactor on default.
271 271 """
272 272 rev = ctx.rev()
273 273 repo = ctx._repo
274 274 ac = getattr(ctx, '_ancestrycontext', None)
275 275 if ac is None:
276 276 revs = [rev]
277 277 if rev is None:
278 278 revs = [p.rev() for p in ctx.parents()]
279 279 ac = repo.changelog.ancestors(revs, inclusive=True)
280 280 ctx._ancestrycontext = ac
281 281 def makectx(f, n):
282 282 if n in node.wdirnodes: # in a working context?
283 283 if ctx.rev() is None:
284 284 return ctx.filectx(f)
285 285 return repo[None][f]
286 286 fctx = repo.filectx(f, fileid=n)
287 287 # setup only needed for filectx not create from a changectx
288 288 fctx._ancestrycontext = ac
289 289 fctx._descendantrev = rev
290 290 return fctx
291 291 return util.lrucachefunc(makectx)
292 292
293 293 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
294 294 """combine partial copy paths"""
295 295 remainder = {}
296 296 for f in copyfrom:
297 297 if f in copyto:
298 298 finalcopy[copyto[f]] = copyfrom[f]
299 299 del copyto[f]
300 300 for f in incompletediverge:
301 301 assert f not in diverge
302 302 ic = incompletediverge[f]
303 303 if ic[0] in copyto:
304 304 diverge[f] = [copyto[ic[0]], ic[1]]
305 305 else:
306 306 remainder[f] = ic
307 307 return remainder
308 308
309 309 def mergecopies(repo, c1, c2, base):
310 310 """
311 311 The function calling different copytracing algorithms on the basis of config
312 312 which find moves and copies between context c1 and c2 that are relevant for
313 313 merging. 'base' will be used as the merge base.
314 314
315 315 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
316 316 files that were moved/ copied in one merge parent and modified in another.
317 317 For example:
318 318
319 319 o ---> 4 another commit
320 320 |
321 321 | o ---> 3 commit that modifies a.txt
322 322 | /
323 323 o / ---> 2 commit that moves a.txt to b.txt
324 324 |/
325 325 o ---> 1 merge base
326 326
327 327 If we try to rebase revision 3 on revision 4, since there is no a.txt in
328 328 revision 4, and if user have copytrace disabled, we prints the following
329 329 message:
330 330
331 331 ```other changed <file> which local deleted```
332 332
333 333 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
334 334 "dirmove".
335 335
336 336 "copy" is a mapping from destination name -> source name,
337 337 where source is in c1 and destination is in c2 or vice-versa.
338 338
339 339 "movewithdir" is a mapping from source name -> destination name,
340 340 where the file at source present in one context but not the other
341 341 needs to be moved to destination by the merge process, because the
342 342 other context moved the directory it is in.
343 343
344 344 "diverge" is a mapping of source name -> list of destination names
345 345 for divergent renames.
346 346
347 347 "renamedelete" is a mapping of source name -> list of destination
348 348 names for files deleted in c1 that were renamed in c2 or vice-versa.
349 349
350 350 "dirmove" is a mapping of detected source dir -> destination dir renames.
351 351 This is needed for handling changes to new files previously grafted into
352 352 renamed directories.
353 353 """
354 354 # avoid silly behavior for update from empty dir
355 355 if not c1 or not c2 or c1 == c2:
356 356 return {}, {}, {}, {}, {}
357 357
358 358 # avoid silly behavior for parent -> working dir
359 359 if c2.node() is None and c1.node() == repo.dirstate.p1():
360 360 return repo.dirstate.copies(), {}, {}, {}, {}
361 361
362 362 copytracing = repo.ui.config('experimental', 'copytrace')
363 363
364 364 # Copy trace disabling is explicitly below the node == p1 logic above
365 365 # because the logic above is required for a simple copy to be kept across a
366 366 # rebase.
367 367 if copytracing == 'off':
368 368 return {}, {}, {}, {}, {}
369 369 elif copytracing == 'heuristics':
370 370 # Do full copytracing if only non-public revisions are involved as
371 371 # that will be fast enough and will also cover the copies which could
372 372 # be missed by heuristics
373 373 if _isfullcopytraceable(repo, c1, base):
374 374 return _fullcopytracing(repo, c1, c2, base)
375 375 return _heuristicscopytracing(repo, c1, c2, base)
376 376 else:
377 377 return _fullcopytracing(repo, c1, c2, base)
378 378
379 379 def _isfullcopytraceable(repo, c1, base):
380 380 """ Checks that if base, source and destination are all no-public branches,
381 381 if yes let's use the full copytrace algorithm for increased capabilities
382 382 since it will be fast enough.
383
384 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
385 number of changesets from c1 to base such that if number of changesets are
386 more than the limit, full copytracing algorithm won't be used.
383 387 """
384 388 if c1.rev() is None:
385 389 c1 = c1.p1()
386 390 if c1.mutable() and base.mutable():
387 391 sourcecommitlimit = repo.ui.configint('experimental',
388 392 'copytrace.sourcecommitlimit')
389 393 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
390 394 return commits < sourcecommitlimit
391 395 return False
392 396
393 397 def _fullcopytracing(repo, c1, c2, base):
394 398 """ The full copytracing algorithm which finds all the new files that were
395 399 added from merge base up to the top commit and for each file it checks if
396 400 this file was copied from another file.
397 401
398 402 This is pretty slow when a lot of changesets are involved but will track all
399 403 the copies.
400 404 """
401 405 # In certain scenarios (e.g. graft, update or rebase), base can be
402 406 # overridden We still need to know a real common ancestor in this case We
403 407 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
404 408 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
405 409 # caller may not know if the revision passed in lieu of the CA is a genuine
406 410 # common ancestor or not without explicitly checking it, it's better to
407 411 # determine that here.
408 412 #
409 413 # base.descendant(wc) and base.descendant(base) are False, work around that
410 414 _c1 = c1.p1() if c1.rev() is None else c1
411 415 _c2 = c2.p1() if c2.rev() is None else c2
412 416 # an endpoint is "dirty" if it isn't a descendant of the merge base
413 417 # if we have a dirty endpoint, we need to trigger graft logic, and also
414 418 # keep track of which endpoint is dirty
415 419 dirtyc1 = not (base == _c1 or base.descendant(_c1))
416 420 dirtyc2 = not (base == _c2 or base.descendant(_c2))
417 421 graft = dirtyc1 or dirtyc2
418 422 tca = base
419 423 if graft:
420 424 tca = _c1.ancestor(_c2)
421 425
422 426 limit = _findlimit(repo, c1.rev(), c2.rev())
423 427 if limit is None:
424 428 # no common ancestor, no copies
425 429 return {}, {}, {}, {}, {}
426 430 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
427 431
428 432 m1 = c1.manifest()
429 433 m2 = c2.manifest()
430 434 mb = base.manifest()
431 435
432 436 # gather data from _checkcopies:
433 437 # - diverge = record all diverges in this dict
434 438 # - copy = record all non-divergent copies in this dict
435 439 # - fullcopy = record all copies in this dict
436 440 # - incomplete = record non-divergent partial copies here
437 441 # - incompletediverge = record divergent partial copies here
438 442 diverge = {} # divergence data is shared
439 443 incompletediverge = {}
440 444 data1 = {'copy': {},
441 445 'fullcopy': {},
442 446 'incomplete': {},
443 447 'diverge': diverge,
444 448 'incompletediverge': incompletediverge,
445 449 }
446 450 data2 = {'copy': {},
447 451 'fullcopy': {},
448 452 'incomplete': {},
449 453 'diverge': diverge,
450 454 'incompletediverge': incompletediverge,
451 455 }
452 456
453 457 # find interesting file sets from manifests
454 458 addedinm1 = m1.filesnotin(mb)
455 459 addedinm2 = m2.filesnotin(mb)
456 460 bothnew = sorted(addedinm1 & addedinm2)
457 461 if tca == base:
458 462 # unmatched file from base
459 463 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
460 464 u1u, u2u = u1r, u2r
461 465 else:
462 466 # unmatched file from base (DAG rotation in the graft case)
463 467 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
464 468 baselabel='base')
465 469 # unmatched file from topological common ancestors (no DAG rotation)
466 470 # need to recompute this for directory move handling when grafting
467 471 mta = tca.manifest()
468 472 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
469 473 m2.filesnotin(mta),
470 474 baselabel='topological common ancestor')
471 475
472 476 for f in u1u:
473 477 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
474 478
475 479 for f in u2u:
476 480 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
477 481
478 482 copy = dict(data1['copy'])
479 483 copy.update(data2['copy'])
480 484 fullcopy = dict(data1['fullcopy'])
481 485 fullcopy.update(data2['fullcopy'])
482 486
483 487 if dirtyc1:
484 488 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
485 489 incompletediverge)
486 490 else:
487 491 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
488 492 incompletediverge)
489 493
490 494 renamedelete = {}
491 495 renamedeleteset = set()
492 496 divergeset = set()
493 497 for of, fl in list(diverge.items()):
494 498 if len(fl) == 1 or of in c1 or of in c2:
495 499 del diverge[of] # not actually divergent, or not a rename
496 500 if of not in c1 and of not in c2:
497 501 # renamed on one side, deleted on the other side, but filter
498 502 # out files that have been renamed and then deleted
499 503 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
500 504 renamedeleteset.update(fl) # reverse map for below
501 505 else:
502 506 divergeset.update(fl) # reverse map for below
503 507
504 508 if bothnew:
505 509 repo.ui.debug(" unmatched files new in both:\n %s\n"
506 510 % "\n ".join(bothnew))
507 511 bothdiverge = {}
508 512 bothincompletediverge = {}
509 513 remainder = {}
510 514 both1 = {'copy': {},
511 515 'fullcopy': {},
512 516 'incomplete': {},
513 517 'diverge': bothdiverge,
514 518 'incompletediverge': bothincompletediverge
515 519 }
516 520 both2 = {'copy': {},
517 521 'fullcopy': {},
518 522 'incomplete': {},
519 523 'diverge': bothdiverge,
520 524 'incompletediverge': bothincompletediverge
521 525 }
522 526 for f in bothnew:
523 527 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
524 528 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
525 529 if dirtyc1:
526 530 # incomplete copies may only be found on the "dirty" side for bothnew
527 531 assert not both2['incomplete']
528 532 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
529 533 bothincompletediverge)
530 534 elif dirtyc2:
531 535 assert not both1['incomplete']
532 536 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
533 537 bothincompletediverge)
534 538 else:
535 539 # incomplete copies and divergences can't happen outside grafts
536 540 assert not both1['incomplete']
537 541 assert not both2['incomplete']
538 542 assert not bothincompletediverge
539 543 for f in remainder:
540 544 assert f not in bothdiverge
541 545 ic = remainder[f]
542 546 if ic[0] in (m1 if dirtyc1 else m2):
543 547 # backed-out rename on one side, but watch out for deleted files
544 548 bothdiverge[f] = ic
545 549 for of, fl in bothdiverge.items():
546 550 if len(fl) == 2 and fl[0] == fl[1]:
547 551 copy[fl[0]] = of # not actually divergent, just matching renames
548 552
549 553 if fullcopy and repo.ui.debugflag:
550 554 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
551 555 "% = renamed and deleted):\n")
552 556 for f in sorted(fullcopy):
553 557 note = ""
554 558 if f in copy:
555 559 note += "*"
556 560 if f in divergeset:
557 561 note += "!"
558 562 if f in renamedeleteset:
559 563 note += "%"
560 564 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
561 565 note))
562 566 del divergeset
563 567
564 568 if not fullcopy:
565 569 return copy, {}, diverge, renamedelete, {}
566 570
567 571 repo.ui.debug(" checking for directory renames\n")
568 572
569 573 # generate a directory move map
570 574 d1, d2 = c1.dirs(), c2.dirs()
571 575 # Hack for adding '', which is not otherwise added, to d1 and d2
572 576 d1.addpath('/')
573 577 d2.addpath('/')
574 578 invalid = set()
575 579 dirmove = {}
576 580
577 581 # examine each file copy for a potential directory move, which is
578 582 # when all the files in a directory are moved to a new directory
579 583 for dst, src in fullcopy.iteritems():
580 584 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
581 585 if dsrc in invalid:
582 586 # already seen to be uninteresting
583 587 continue
584 588 elif dsrc in d1 and ddst in d1:
585 589 # directory wasn't entirely moved locally
586 590 invalid.add(dsrc + "/")
587 591 elif dsrc in d2 and ddst in d2:
588 592 # directory wasn't entirely moved remotely
589 593 invalid.add(dsrc + "/")
590 594 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
591 595 # files from the same directory moved to two different places
592 596 invalid.add(dsrc + "/")
593 597 else:
594 598 # looks good so far
595 599 dirmove[dsrc + "/"] = ddst + "/"
596 600
597 601 for i in invalid:
598 602 if i in dirmove:
599 603 del dirmove[i]
600 604 del d1, d2, invalid
601 605
602 606 if not dirmove:
603 607 return copy, {}, diverge, renamedelete, {}
604 608
605 609 for d in dirmove:
606 610 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
607 611 (d, dirmove[d]))
608 612
609 613 movewithdir = {}
610 614 # check unaccounted nonoverlapping files against directory moves
611 615 for f in u1r + u2r:
612 616 if f not in fullcopy:
613 617 for d in dirmove:
614 618 if f.startswith(d):
615 619 # new file added in a directory that was moved, move it
616 620 df = dirmove[d] + f[len(d):]
617 621 if df not in copy:
618 622 movewithdir[f] = df
619 623 repo.ui.debug((" pending file src: '%s' -> "
620 624 "dst: '%s'\n") % (f, df))
621 625 break
622 626
623 627 return copy, movewithdir, diverge, renamedelete, dirmove
624 628
625 629 def _heuristicscopytracing(repo, c1, c2, base):
626 630 """ Fast copytracing using filename heuristics
627 631
628 632 Assumes that moves or renames are of following two types:
629 633
630 634 1) Inside a directory only (same directory name but different filenames)
631 635 2) Move from one directory to another
632 636 (same filenames but different directory names)
633 637
634 638 Works only when there are no merge commits in the "source branch".
635 639 Source branch is commits from base up to c2 not including base.
636 640
637 641 If merge is involved it fallbacks to _fullcopytracing().
638 642
639 643 Can be used by setting the following config:
640 644
641 645 [experimental]
642 646 copytrace = heuristics
643 647 """
644 648
645 649 if c1.rev() is None:
646 650 c1 = c1.p1()
647 651 if c2.rev() is None:
648 652 c2 = c2.p1()
649 653
650 654 copies = {}
651 655
652 656 changedfiles = set()
653 657 m1 = c1.manifest()
654 658 if not repo.revs('%d::%d', base.rev(), c2.rev()):
655 659 # If base is not in c2 branch, we switch to fullcopytracing
656 660 repo.ui.debug("switching to full copytracing as base is not "
657 661 "an ancestor of c2\n")
658 662 return _fullcopytracing(repo, c1, c2, base)
659 663
660 664 ctx = c2
661 665 while ctx != base:
662 666 if len(ctx.parents()) == 2:
663 667 # To keep things simple let's not handle merges
664 668 repo.ui.debug("switching to full copytracing because of merges\n")
665 669 return _fullcopytracing(repo, c1, c2, base)
666 670 changedfiles.update(ctx.files())
667 671 ctx = ctx.p1()
668 672
669 673 cp = _forwardcopies(base, c2)
670 674 for dst, src in cp.iteritems():
671 675 if src in m1:
672 676 copies[dst] = src
673 677
674 678 # file is missing if it isn't present in the destination, but is present in
675 679 # the base and present in the source.
676 680 # Presence in the base is important to exclude added files, presence in the
677 681 # source is important to exclude removed files.
678 682 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
679 683 changedfiles)
680 684
681 685 if missingfiles:
682 686 basenametofilename = collections.defaultdict(list)
683 687 dirnametofilename = collections.defaultdict(list)
684 688
685 689 for f in m1.filesnotin(base.manifest()):
686 690 basename = os.path.basename(f)
687 691 dirname = os.path.dirname(f)
688 692 basenametofilename[basename].append(f)
689 693 dirnametofilename[dirname].append(f)
690 694
691 695 # in case of a rebase/graft, base may not be a common ancestor
692 696 anc = c1.ancestor(c2)
693 697
694 698 for f in missingfiles:
695 699 basename = os.path.basename(f)
696 700 dirname = os.path.dirname(f)
697 701 samebasename = basenametofilename[basename]
698 702 samedirname = dirnametofilename[dirname]
699 703 movecandidates = samebasename + samedirname
700 704 # f is guaranteed to be present in c2, that's why
701 705 # c2.filectx(f) won't fail
702 706 f2 = c2.filectx(f)
703 707 for candidate in movecandidates:
704 708 f1 = c1.filectx(candidate)
705 709 if _related(f1, f2, anc.rev()):
706 710 # if there are a few related copies then we'll merge
707 711 # changes into all of them. This matches the behaviour
708 712 # of upstream copytracing
709 713 copies[candidate] = f
710 714
711 715 return copies, {}, {}, {}, {}
712 716
713 717 def _related(f1, f2, limit):
714 718 """return True if f1 and f2 filectx have a common ancestor
715 719
716 720 Walk back to common ancestor to see if the two files originate
717 721 from the same file. Since workingfilectx's rev() is None it messes
718 722 up the integer comparison logic, hence the pre-step check for
719 723 None (f1 and f2 can only be workingfilectx's initially).
720 724 """
721 725
722 726 if f1 == f2:
723 727 return f1 # a match
724 728
725 729 g1, g2 = f1.ancestors(), f2.ancestors()
726 730 try:
727 731 f1r, f2r = f1.linkrev(), f2.linkrev()
728 732
729 733 if f1r is None:
730 734 f1 = next(g1)
731 735 if f2r is None:
732 736 f2 = next(g2)
733 737
734 738 while True:
735 739 f1r, f2r = f1.linkrev(), f2.linkrev()
736 740 if f1r > f2r:
737 741 f1 = next(g1)
738 742 elif f2r > f1r:
739 743 f2 = next(g2)
740 744 elif f1 == f2:
741 745 return f1 # a match
742 746 elif f1r == f2r or f1r < limit or f2r < limit:
743 747 return False # copy no longer relevant
744 748 except StopIteration:
745 749 return False
746 750
747 751 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
748 752 """
749 753 check possible copies of f from msrc to mdst
750 754
751 755 srcctx = starting context for f in msrc
752 756 dstctx = destination context for f in mdst
753 757 f = the filename to check (as in msrc)
754 758 base = the changectx used as a merge base
755 759 tca = topological common ancestor for graft-like scenarios
756 760 remotebase = True if base is outside tca::srcctx, False otherwise
757 761 limit = the rev number to not search beyond
758 762 data = dictionary of dictionary to store copy data. (see mergecopies)
759 763
760 764 note: limit is only an optimization, and provides no guarantee that
761 765 irrelevant revisions will not be visited
762 766 there is no easy way to make this algorithm stop in a guaranteed way
763 767 once it "goes behind a certain revision".
764 768 """
765 769
766 770 msrc = srcctx.manifest()
767 771 mdst = dstctx.manifest()
768 772 mb = base.manifest()
769 773 mta = tca.manifest()
770 774 # Might be true if this call is about finding backward renames,
771 775 # This happens in the case of grafts because the DAG is then rotated.
772 776 # If the file exists in both the base and the source, we are not looking
773 777 # for a rename on the source side, but on the part of the DAG that is
774 778 # traversed backwards.
775 779 #
776 780 # In the case there is both backward and forward renames (before and after
777 781 # the base) this is more complicated as we must detect a divergence.
778 782 # We use 'backwards = False' in that case.
779 783 backwards = not remotebase and base != tca and f in mb
780 784 getsrcfctx = _makegetfctx(srcctx)
781 785 getdstfctx = _makegetfctx(dstctx)
782 786
783 787 if msrc[f] == mb.get(f) and not remotebase:
784 788 # Nothing to merge
785 789 return
786 790
787 791 of = None
788 792 seen = {f}
789 793 for oc in getsrcfctx(f, msrc[f]).ancestors():
790 794 ocr = oc.linkrev()
791 795 of = oc.path()
792 796 if of in seen:
793 797 # check limit late - grab last rename before
794 798 if ocr < limit:
795 799 break
796 800 continue
797 801 seen.add(of)
798 802
799 803 # remember for dir rename detection
800 804 if backwards:
801 805 data['fullcopy'][of] = f # grafting backwards through renames
802 806 else:
803 807 data['fullcopy'][f] = of
804 808 if of not in mdst:
805 809 continue # no match, keep looking
806 810 if mdst[of] == mb.get(of):
807 811 return # no merge needed, quit early
808 812 c2 = getdstfctx(of, mdst[of])
809 813 # c2 might be a plain new file on added on destination side that is
810 814 # unrelated to the droids we are looking for.
811 815 cr = _related(oc, c2, tca.rev())
812 816 if cr and (of == f or of == c2.path()): # non-divergent
813 817 if backwards:
814 818 data['copy'][of] = f
815 819 elif of in mb:
816 820 data['copy'][f] = of
817 821 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
818 822 data['copy'][of] = f
819 823 del data['fullcopy'][f]
820 824 data['fullcopy'][of] = f
821 825 else: # divergence w.r.t. graft CA on one side of topological CA
822 826 for sf in seen:
823 827 if sf in mb:
824 828 assert sf not in data['diverge']
825 829 data['diverge'][sf] = [f, of]
826 830 break
827 831 return
828 832
829 833 if of in mta:
830 834 if backwards or remotebase:
831 835 data['incomplete'][of] = f
832 836 else:
833 837 for sf in seen:
834 838 if sf in mb:
835 839 if tca == base:
836 840 data['diverge'].setdefault(sf, []).append(f)
837 841 else:
838 842 data['incompletediverge'][sf] = [of, f]
839 843 return
840 844
841 845 def duplicatecopies(repo, rev, fromrev, skiprev=None):
842 846 '''reproduce copies from fromrev to rev in the dirstate
843 847
844 848 If skiprev is specified, it's a revision that should be used to
845 849 filter copy records. Any copies that occur between fromrev and
846 850 skiprev will not be duplicated, even if they appear in the set of
847 851 copies between fromrev and rev.
848 852 '''
849 853 exclude = {}
850 854 if (skiprev is not None and
851 855 repo.ui.config('experimental', 'copytrace') != 'off'):
852 856 # copytrace='off' skips this line, but not the entire function because
853 857 # the line below is O(size of the repo) during a rebase, while the rest
854 858 # of the function is much faster (and is required for carrying copy
855 859 # metadata across the rebase anyway).
856 860 exclude = pathcopies(repo[fromrev], repo[skiprev])
857 861 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
858 862 # copies.pathcopies returns backward renames, so dst might not
859 863 # actually be in the dirstate
860 864 if dst in exclude:
861 865 continue
862 866 if repo.dirstate[dst] in "nma":
863 867 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now