##// END OF EJS Templates
copies: consider nullrev a common ancestor...
Martin von Zweigbergk -
r41437:b80af070 default
parent child Browse files
Show More
@@ -1,922 +1,911 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, a, b):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
35 34 """
36 35
37 36 # basic idea:
38 37 # - mark a and b with different sides
39 38 # - if a parent's children are all on the same side, the parent is
40 39 # on that side, otherwise it is on no side
41 40 # - walk the graph in topological order with the help of a heap;
42 41 # - add unseen parents to side map
43 42 # - clear side of any parent that has children on different sides
44 43 # - track number of interesting revs that might still be on a side
45 44 # - track the lowest interesting rev seen
46 45 # - quit when interesting revs is zero
47 46
48 47 cl = repo.changelog
49 48 if a is None:
50 49 a = node.wdirrev
51 50 if b is None:
52 51 b = node.wdirrev
53 52
54 53 side = {a: -1, b: 1}
55 54 visit = [-a, -b]
56 55 heapq.heapify(visit)
57 56 interesting = len(visit)
58 hascommonancestor = False
59 57 limit = node.wdirrev
60 58
61 59 while interesting:
62 60 r = -heapq.heappop(visit)
63 61 if r == node.wdirrev:
64 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 63 else:
66 64 parents = cl.parentrevs(r)
65 if parents[1] == node.nullrev:
66 parents = parents[:1]
67 67 for p in parents:
68 if p < 0:
69 continue
70 68 if p not in side:
71 69 # first time we see p; add it to visit
72 70 side[p] = side[r]
73 71 if side[p]:
74 72 interesting += 1
75 73 heapq.heappush(visit, -p)
76 74 elif side[p] and side[p] != side[r]:
77 75 # p was interesting but now we know better
78 76 side[p] = 0
79 77 interesting -= 1
80 hascommonancestor = True
81 78 if side[r]:
82 79 limit = r # lowest rev visited
83 80 interesting -= 1
84 81
85 if not hascommonancestor:
86 return None
87
88 82 # Consider the following flow (see test-commit-amend.t under issue4405):
89 83 # 1/ File 'a0' committed
90 84 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 85 # 3/ Move back to first commit
92 86 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 87 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 88 #
95 89 # During the amend in step five, we will be in this state:
96 90 #
97 91 # @ 3 temporary amend commit for a1-amend
98 92 # |
99 93 # o 2 a1-amend
100 94 # |
101 95 # | o 1 a1
102 96 # |/
103 97 # o 0 a0
104 98 #
105 99 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 100 # yet the filelog has the copy information in rev 1 and we will not look
107 101 # back far enough unless we also look at the a and b as candidates.
108 102 # This only occurs when a is a descendent of b or visa-versa.
109 103 return min(limit, a, b)
110 104
111 105 def _chain(src, dst, a, b):
112 106 """chain two sets of copies a->b"""
113 107 t = a.copy()
114 108 for k, v in b.iteritems():
115 109 if v in t:
116 110 # found a chain
117 111 if t[v] != k:
118 112 # file wasn't renamed back to itself
119 113 t[k] = t[v]
120 114 if v not in dst:
121 115 # chain was a rename, not a copy
122 116 del t[v]
123 117 if v in src:
124 118 # file is a copy of an existing file
125 119 t[k] = v
126 120
127 121 # remove criss-crossed copies
128 122 for k, v in list(t.items()):
129 123 if k in src and v in dst:
130 124 del t[k]
131 125
132 126 return t
133 127
134 128 def _tracefile(fctx, am, limit=node.nullrev):
135 129 """return file context that is the ancestor of fctx present in ancestor
136 130 manifest am, stopping after the first ancestor lower than limit"""
137 131
138 132 for f in fctx.ancestors():
139 133 if am.get(f.path(), None) == f.filenode():
140 134 return f
141 135 if limit >= 0 and not f.isintroducedafter(limit):
142 136 return None
143 137
144 138 def _dirstatecopies(d, match=None):
145 139 ds = d._repo.dirstate
146 140 c = ds.copies().copy()
147 141 for k in list(c):
148 142 if ds[k] not in 'anm' or (match and not match(k)):
149 143 del c[k]
150 144 return c
151 145
152 146 def _computeforwardmissing(a, b, match=None):
153 147 """Computes which files are in b but not a.
154 148 This is its own function so extensions can easily wrap this call to see what
155 149 files _forwardcopies is about to process.
156 150 """
157 151 ma = a.manifest()
158 152 mb = b.manifest()
159 153 return mb.filesnotin(ma, match=match)
160 154
161 155 def _committedforwardcopies(a, b, match):
162 156 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 157 # files might have to be traced back to the fctx parent of the last
164 158 # one-side-only changeset, but not further back than that
165 159 repo = a._repo
166 160 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 161 dbg = repo.ui.debug
168 162 if debug:
169 163 dbg('debug.copies: looking into rename from %s to %s\n'
170 164 % (a, b))
171 165 limit = _findlimit(repo, a.rev(), b.rev())
172 if limit is None:
173 limit = node.nullrev
174 166 if debug:
175 167 dbg('debug.copies: search limit: %d\n' % limit)
176 168 am = a.manifest()
177 169
178 170 # find where new files came from
179 171 # we currently don't try to find where old files went, too expensive
180 172 # this means we can miss a case like 'hg rm b; hg cp a b'
181 173 cm = {}
182 174
183 175 # Computing the forward missing is quite expensive on large manifests, since
184 176 # it compares the entire manifests. We can optimize it in the common use
185 177 # case of computing what copies are in a commit versus its parent (like
186 178 # during a rebase or histedit). Note, we exclude merge commits from this
187 179 # optimization, since the ctx.files() for a merge commit is not correct for
188 180 # this comparison.
189 181 forwardmissingmatch = match
190 182 if b.p1() == a and b.p2().node() == node.nullid:
191 183 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 184 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 185 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 186
195 187 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 188
197 189 if debug:
198 190 dbg('debug.copies: missing file to search: %d\n' % len(missing))
199 191
200 192 for f in missing:
201 193 if debug:
202 194 dbg('debug.copies: tracing file: %s\n' % f)
203 195 fctx = b[f]
204 196 fctx._ancestrycontext = ancestrycontext
205 197
206 198 if debug:
207 199 start = util.timer()
208 200 ofctx = _tracefile(fctx, am, limit)
209 201 if ofctx:
210 202 if debug:
211 203 dbg('debug.copies: rename of: %s\n' % ofctx._path)
212 204 cm[f] = ofctx.path()
213 205 if debug:
214 206 dbg('debug.copies: time: %f seconds\n'
215 207 % (util.timer() - start))
216 208 return cm
217 209
218 210 def _forwardcopies(a, b, match=None):
219 211 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
220 212
221 213 match = a.repo().narrowmatch(match)
222 214 # check for working copy
223 215 if b.rev() is None:
224 216 if a == b.p1():
225 217 # short-circuit to avoid issues with merge states
226 218 return _dirstatecopies(b, match)
227 219
228 220 cm = _committedforwardcopies(a, b.p1(), match)
229 221 # combine copies from dirstate if necessary
230 222 return _chain(a, b, cm, _dirstatecopies(b, match))
231 223 return _committedforwardcopies(a, b, match)
232 224
233 225 def _backwardrenames(a, b):
234 226 if a._repo.ui.config('experimental', 'copytrace') == 'off':
235 227 return {}
236 228
237 229 # Even though we're not taking copies into account, 1:n rename situations
238 230 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
239 231 # arbitrarily pick one of the renames.
240 232 f = _forwardcopies(b, a)
241 233 r = {}
242 234 for k, v in sorted(f.iteritems()):
243 235 # remove copies
244 236 if v in a:
245 237 continue
246 238 r[v] = k
247 239 return r
248 240
249 241 def pathcopies(x, y, match=None):
250 242 """find {dst@y: src@x} copy mapping for directed compare"""
251 243 repo = x._repo
252 244 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
253 245 if debug:
254 246 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
255 247 % (x, y))
256 248 if x == y or not x or not y:
257 249 return {}
258 250 a = y.ancestor(x)
259 251 if a == x:
260 252 if debug:
261 253 repo.ui.debug('debug.copies: search mode: forward\n')
262 254 return _forwardcopies(x, y, match=match)
263 255 if a == y:
264 256 if debug:
265 257 repo.ui.debug('debug.copies: search mode: backward\n')
266 258 return _backwardrenames(x, y)
267 259 if debug:
268 260 repo.ui.debug('debug.copies: search mode: combined\n')
269 261 return _chain(x, y, _backwardrenames(x, a),
270 262 _forwardcopies(a, y, match=match))
271 263
272 264 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
273 265 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
274 266 and c2. This is its own function so extensions can easily wrap this call
275 267 to see what files mergecopies is about to process.
276 268
277 269 Even though c1 and c2 are not used in this function, they are useful in
278 270 other extensions for being able to read the file nodes of the changed files.
279 271
280 272 "baselabel" can be passed to help distinguish the multiple computations
281 273 done in the graft case.
282 274 """
283 275 u1 = sorted(addedinm1 - addedinm2)
284 276 u2 = sorted(addedinm2 - addedinm1)
285 277
286 278 header = " unmatched files in %s"
287 279 if baselabel:
288 280 header += ' (from %s)' % baselabel
289 281 if u1:
290 282 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
291 283 if u2:
292 284 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
293 285
294 286 return u1, u2
295 287
296 288 def _makegetfctx(ctx):
297 289 """return a 'getfctx' function suitable for _checkcopies usage
298 290
299 291 We have to re-setup the function building 'filectx' for each
300 292 '_checkcopies' to ensure the linkrev adjustment is properly setup for
301 293 each. Linkrev adjustment is important to avoid bug in rename
302 294 detection. Moreover, having a proper '_ancestrycontext' setup ensures
303 295 the performance impact of this adjustment is kept limited. Without it,
304 296 each file could do a full dag traversal making the time complexity of
305 297 the operation explode (see issue4537).
306 298
307 299 This function exists here mostly to limit the impact on stable. Feel
308 300 free to refactor on default.
309 301 """
310 302 rev = ctx.rev()
311 303 repo = ctx._repo
312 304 ac = getattr(ctx, '_ancestrycontext', None)
313 305 if ac is None:
314 306 revs = [rev]
315 307 if rev is None:
316 308 revs = [p.rev() for p in ctx.parents()]
317 309 ac = repo.changelog.ancestors(revs, inclusive=True)
318 310 ctx._ancestrycontext = ac
319 311 def makectx(f, n):
320 312 if n in node.wdirfilenodeids: # in a working context?
321 313 if ctx.rev() is None:
322 314 return ctx.filectx(f)
323 315 return repo[None][f]
324 316 fctx = repo.filectx(f, fileid=n)
325 317 # setup only needed for filectx not create from a changectx
326 318 fctx._ancestrycontext = ac
327 319 fctx._descendantrev = rev
328 320 return fctx
329 321 return util.lrucachefunc(makectx)
330 322
331 323 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
332 324 """combine partial copy paths"""
333 325 remainder = {}
334 326 for f in copyfrom:
335 327 if f in copyto:
336 328 finalcopy[copyto[f]] = copyfrom[f]
337 329 del copyto[f]
338 330 for f in incompletediverge:
339 331 assert f not in diverge
340 332 ic = incompletediverge[f]
341 333 if ic[0] in copyto:
342 334 diverge[f] = [copyto[ic[0]], ic[1]]
343 335 else:
344 336 remainder[f] = ic
345 337 return remainder
346 338
347 339 def mergecopies(repo, c1, c2, base):
348 340 """
349 341 The function calling different copytracing algorithms on the basis of config
350 342 which find moves and copies between context c1 and c2 that are relevant for
351 343 merging. 'base' will be used as the merge base.
352 344
353 345 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
354 346 files that were moved/ copied in one merge parent and modified in another.
355 347 For example:
356 348
357 349 o ---> 4 another commit
358 350 |
359 351 | o ---> 3 commit that modifies a.txt
360 352 | /
361 353 o / ---> 2 commit that moves a.txt to b.txt
362 354 |/
363 355 o ---> 1 merge base
364 356
365 357 If we try to rebase revision 3 on revision 4, since there is no a.txt in
366 358 revision 4, and if user have copytrace disabled, we prints the following
367 359 message:
368 360
369 361 ```other changed <file> which local deleted```
370 362
371 363 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
372 364 "dirmove".
373 365
374 366 "copy" is a mapping from destination name -> source name,
375 367 where source is in c1 and destination is in c2 or vice-versa.
376 368
377 369 "movewithdir" is a mapping from source name -> destination name,
378 370 where the file at source present in one context but not the other
379 371 needs to be moved to destination by the merge process, because the
380 372 other context moved the directory it is in.
381 373
382 374 "diverge" is a mapping of source name -> list of destination names
383 375 for divergent renames.
384 376
385 377 "renamedelete" is a mapping of source name -> list of destination
386 378 names for files deleted in c1 that were renamed in c2 or vice-versa.
387 379
388 380 "dirmove" is a mapping of detected source dir -> destination dir renames.
389 381 This is needed for handling changes to new files previously grafted into
390 382 renamed directories.
391 383 """
392 384 # avoid silly behavior for update from empty dir
393 385 if not c1 or not c2 or c1 == c2:
394 386 return {}, {}, {}, {}, {}
395 387
396 388 # avoid silly behavior for parent -> working dir
397 389 if c2.node() is None and c1.node() == repo.dirstate.p1():
398 390 return repo.dirstate.copies(), {}, {}, {}, {}
399 391
400 392 copytracing = repo.ui.config('experimental', 'copytrace')
401 393 boolctrace = stringutil.parsebool(copytracing)
402 394
403 395 # Copy trace disabling is explicitly below the node == p1 logic above
404 396 # because the logic above is required for a simple copy to be kept across a
405 397 # rebase.
406 398 if copytracing == 'heuristics':
407 399 # Do full copytracing if only non-public revisions are involved as
408 400 # that will be fast enough and will also cover the copies which could
409 401 # be missed by heuristics
410 402 if _isfullcopytraceable(repo, c1, base):
411 403 return _fullcopytracing(repo, c1, c2, base)
412 404 return _heuristicscopytracing(repo, c1, c2, base)
413 405 elif boolctrace is False:
414 406 # stringutil.parsebool() returns None when it is unable to parse the
415 407 # value, so we should rely on making sure copytracing is on such cases
416 408 return {}, {}, {}, {}, {}
417 409 else:
418 410 return _fullcopytracing(repo, c1, c2, base)
419 411
420 412 def _isfullcopytraceable(repo, c1, base):
421 413 """ Checks that if base, source and destination are all no-public branches,
422 414 if yes let's use the full copytrace algorithm for increased capabilities
423 415 since it will be fast enough.
424 416
425 417 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
426 418 number of changesets from c1 to base such that if number of changesets are
427 419 more than the limit, full copytracing algorithm won't be used.
428 420 """
429 421 if c1.rev() is None:
430 422 c1 = c1.p1()
431 423 if c1.mutable() and base.mutable():
432 424 sourcecommitlimit = repo.ui.configint('experimental',
433 425 'copytrace.sourcecommitlimit')
434 426 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
435 427 return commits < sourcecommitlimit
436 428 return False
437 429
438 430 def _fullcopytracing(repo, c1, c2, base):
439 431 """ The full copytracing algorithm which finds all the new files that were
440 432 added from merge base up to the top commit and for each file it checks if
441 433 this file was copied from another file.
442 434
443 435 This is pretty slow when a lot of changesets are involved but will track all
444 436 the copies.
445 437 """
446 438 # In certain scenarios (e.g. graft, update or rebase), base can be
447 439 # overridden We still need to know a real common ancestor in this case We
448 440 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
449 441 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
450 442 # caller may not know if the revision passed in lieu of the CA is a genuine
451 443 # common ancestor or not without explicitly checking it, it's better to
452 444 # determine that here.
453 445 #
454 446 # base.isancestorof(wc) is False, work around that
455 447 _c1 = c1.p1() if c1.rev() is None else c1
456 448 _c2 = c2.p1() if c2.rev() is None else c2
457 449 # an endpoint is "dirty" if it isn't a descendant of the merge base
458 450 # if we have a dirty endpoint, we need to trigger graft logic, and also
459 451 # keep track of which endpoint is dirty
460 452 dirtyc1 = not base.isancestorof(_c1)
461 453 dirtyc2 = not base.isancestorof(_c2)
462 454 graft = dirtyc1 or dirtyc2
463 455 tca = base
464 456 if graft:
465 457 tca = _c1.ancestor(_c2)
466 458
467 459 limit = _findlimit(repo, c1.rev(), c2.rev())
468 if limit is None:
469 # no common ancestor, no copies
470 return {}, {}, {}, {}, {}
471 460 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
472 461
473 462 m1 = c1.manifest()
474 463 m2 = c2.manifest()
475 464 mb = base.manifest()
476 465
477 466 # gather data from _checkcopies:
478 467 # - diverge = record all diverges in this dict
479 468 # - copy = record all non-divergent copies in this dict
480 469 # - fullcopy = record all copies in this dict
481 470 # - incomplete = record non-divergent partial copies here
482 471 # - incompletediverge = record divergent partial copies here
483 472 diverge = {} # divergence data is shared
484 473 incompletediverge = {}
485 474 data1 = {'copy': {},
486 475 'fullcopy': {},
487 476 'incomplete': {},
488 477 'diverge': diverge,
489 478 'incompletediverge': incompletediverge,
490 479 }
491 480 data2 = {'copy': {},
492 481 'fullcopy': {},
493 482 'incomplete': {},
494 483 'diverge': diverge,
495 484 'incompletediverge': incompletediverge,
496 485 }
497 486
498 487 # find interesting file sets from manifests
499 488 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
500 489 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
501 490 bothnew = sorted(addedinm1 & addedinm2)
502 491 if tca == base:
503 492 # unmatched file from base
504 493 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
505 494 u1u, u2u = u1r, u2r
506 495 else:
507 496 # unmatched file from base (DAG rotation in the graft case)
508 497 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
509 498 baselabel='base')
510 499 # unmatched file from topological common ancestors (no DAG rotation)
511 500 # need to recompute this for directory move handling when grafting
512 501 mta = tca.manifest()
513 502 u1u, u2u = _computenonoverlap(repo, c1, c2,
514 503 m1.filesnotin(mta, repo.narrowmatch()),
515 504 m2.filesnotin(mta, repo.narrowmatch()),
516 505 baselabel='topological common ancestor')
517 506
518 507 for f in u1u:
519 508 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520 509
521 510 for f in u2u:
522 511 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523 512
524 513 copy = dict(data1['copy'])
525 514 copy.update(data2['copy'])
526 515 fullcopy = dict(data1['fullcopy'])
527 516 fullcopy.update(data2['fullcopy'])
528 517
529 518 if dirtyc1:
530 519 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 520 incompletediverge)
532 521 else:
533 522 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 523 incompletediverge)
535 524
536 525 renamedelete = {}
537 526 renamedeleteset = set()
538 527 divergeset = set()
539 528 for of, fl in list(diverge.items()):
540 529 if len(fl) == 1 or of in c1 or of in c2:
541 530 del diverge[of] # not actually divergent, or not a rename
542 531 if of not in c1 and of not in c2:
543 532 # renamed on one side, deleted on the other side, but filter
544 533 # out files that have been renamed and then deleted
545 534 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 535 renamedeleteset.update(fl) # reverse map for below
547 536 else:
548 537 divergeset.update(fl) # reverse map for below
549 538
550 539 if bothnew:
551 540 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 541 % "\n ".join(bothnew))
553 542 bothdiverge = {}
554 543 bothincompletediverge = {}
555 544 remainder = {}
556 545 both1 = {'copy': {},
557 546 'fullcopy': {},
558 547 'incomplete': {},
559 548 'diverge': bothdiverge,
560 549 'incompletediverge': bothincompletediverge
561 550 }
562 551 both2 = {'copy': {},
563 552 'fullcopy': {},
564 553 'incomplete': {},
565 554 'diverge': bothdiverge,
566 555 'incompletediverge': bothincompletediverge
567 556 }
568 557 for f in bothnew:
569 558 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 559 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 560 if dirtyc1:
572 561 # incomplete copies may only be found on the "dirty" side for bothnew
573 562 assert not both2['incomplete']
574 563 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 564 bothincompletediverge)
576 565 elif dirtyc2:
577 566 assert not both1['incomplete']
578 567 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 568 bothincompletediverge)
580 569 else:
581 570 # incomplete copies and divergences can't happen outside grafts
582 571 assert not both1['incomplete']
583 572 assert not both2['incomplete']
584 573 assert not bothincompletediverge
585 574 for f in remainder:
586 575 assert f not in bothdiverge
587 576 ic = remainder[f]
588 577 if ic[0] in (m1 if dirtyc1 else m2):
589 578 # backed-out rename on one side, but watch out for deleted files
590 579 bothdiverge[f] = ic
591 580 for of, fl in bothdiverge.items():
592 581 if len(fl) == 2 and fl[0] == fl[1]:
593 582 copy[fl[0]] = of # not actually divergent, just matching renames
594 583
595 584 if fullcopy and repo.ui.debugflag:
596 585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 586 "% = renamed and deleted):\n")
598 587 for f in sorted(fullcopy):
599 588 note = ""
600 589 if f in copy:
601 590 note += "*"
602 591 if f in divergeset:
603 592 note += "!"
604 593 if f in renamedeleteset:
605 594 note += "%"
606 595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 596 note))
608 597 del divergeset
609 598
610 599 if not fullcopy:
611 600 return copy, {}, diverge, renamedelete, {}
612 601
613 602 repo.ui.debug(" checking for directory renames\n")
614 603
615 604 # generate a directory move map
616 605 d1, d2 = c1.dirs(), c2.dirs()
617 606 # Hack for adding '', which is not otherwise added, to d1 and d2
618 607 d1.addpath('/')
619 608 d2.addpath('/')
620 609 invalid = set()
621 610 dirmove = {}
622 611
623 612 # examine each file copy for a potential directory move, which is
624 613 # when all the files in a directory are moved to a new directory
625 614 for dst, src in fullcopy.iteritems():
626 615 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 616 if dsrc in invalid:
628 617 # already seen to be uninteresting
629 618 continue
630 619 elif dsrc in d1 and ddst in d1:
631 620 # directory wasn't entirely moved locally
632 621 invalid.add(dsrc)
633 622 elif dsrc in d2 and ddst in d2:
634 623 # directory wasn't entirely moved remotely
635 624 invalid.add(dsrc)
636 625 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 626 # files from the same directory moved to two different places
638 627 invalid.add(dsrc)
639 628 else:
640 629 # looks good so far
641 630 dirmove[dsrc] = ddst
642 631
643 632 for i in invalid:
644 633 if i in dirmove:
645 634 del dirmove[i]
646 635 del d1, d2, invalid
647 636
648 637 if not dirmove:
649 638 return copy, {}, diverge, renamedelete, {}
650 639
651 640 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652 641
653 642 for d in dirmove:
654 643 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 644 (d, dirmove[d]))
656 645
657 646 movewithdir = {}
658 647 # check unaccounted nonoverlapping files against directory moves
659 648 for f in u1r + u2r:
660 649 if f not in fullcopy:
661 650 for d in dirmove:
662 651 if f.startswith(d):
663 652 # new file added in a directory that was moved, move it
664 653 df = dirmove[d] + f[len(d):]
665 654 if df not in copy:
666 655 movewithdir[f] = df
667 656 repo.ui.debug((" pending file src: '%s' -> "
668 657 "dst: '%s'\n") % (f, df))
669 658 break
670 659
671 660 return copy, movewithdir, diverge, renamedelete, dirmove
672 661
673 662 def _heuristicscopytracing(repo, c1, c2, base):
674 663 """ Fast copytracing using filename heuristics
675 664
676 665 Assumes that moves or renames are of following two types:
677 666
678 667 1) Inside a directory only (same directory name but different filenames)
679 668 2) Move from one directory to another
680 669 (same filenames but different directory names)
681 670
682 671 Works only when there are no merge commits in the "source branch".
683 672 Source branch is commits from base up to c2 not including base.
684 673
685 674 If merge is involved it fallbacks to _fullcopytracing().
686 675
687 676 Can be used by setting the following config:
688 677
689 678 [experimental]
690 679 copytrace = heuristics
691 680
692 681 In some cases the copy/move candidates found by heuristics can be very large
693 682 in number and that will make the algorithm slow. The number of possible
694 683 candidates to check can be limited by using the config
695 684 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 685 """
697 686
698 687 if c1.rev() is None:
699 688 c1 = c1.p1()
700 689 if c2.rev() is None:
701 690 c2 = c2.p1()
702 691
703 692 copies = {}
704 693
705 694 changedfiles = set()
706 695 m1 = c1.manifest()
707 696 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 697 # If base is not in c2 branch, we switch to fullcopytracing
709 698 repo.ui.debug("switching to full copytracing as base is not "
710 699 "an ancestor of c2\n")
711 700 return _fullcopytracing(repo, c1, c2, base)
712 701
713 702 ctx = c2
714 703 while ctx != base:
715 704 if len(ctx.parents()) == 2:
716 705 # To keep things simple let's not handle merges
717 706 repo.ui.debug("switching to full copytracing because of merges\n")
718 707 return _fullcopytracing(repo, c1, c2, base)
719 708 changedfiles.update(ctx.files())
720 709 ctx = ctx.p1()
721 710
722 711 cp = _forwardcopies(base, c2)
723 712 for dst, src in cp.iteritems():
724 713 if src in m1:
725 714 copies[dst] = src
726 715
727 716 # file is missing if it isn't present in the destination, but is present in
728 717 # the base and present in the source.
729 718 # Presence in the base is important to exclude added files, presence in the
730 719 # source is important to exclude removed files.
731 720 filt = lambda f: f not in m1 and f in base and f in c2
732 721 missingfiles = [f for f in changedfiles if filt(f)]
733 722
734 723 if missingfiles:
735 724 basenametofilename = collections.defaultdict(list)
736 725 dirnametofilename = collections.defaultdict(list)
737 726
738 727 for f in m1.filesnotin(base.manifest()):
739 728 basename = os.path.basename(f)
740 729 dirname = os.path.dirname(f)
741 730 basenametofilename[basename].append(f)
742 731 dirnametofilename[dirname].append(f)
743 732
744 733 for f in missingfiles:
745 734 basename = os.path.basename(f)
746 735 dirname = os.path.dirname(f)
747 736 samebasename = basenametofilename[basename]
748 737 samedirname = dirnametofilename[dirname]
749 738 movecandidates = samebasename + samedirname
750 739 # f is guaranteed to be present in c2, that's why
751 740 # c2.filectx(f) won't fail
752 741 f2 = c2.filectx(f)
753 742 # we can have a lot of candidates which can slow down the heuristics
754 743 # config value to limit the number of candidates moves to check
755 744 maxcandidates = repo.ui.configint('experimental',
756 745 'copytrace.movecandidateslimit')
757 746
758 747 if len(movecandidates) > maxcandidates:
759 748 repo.ui.status(_("skipping copytracing for '%s', more "
760 749 "candidates than the limit: %d\n")
761 750 % (f, len(movecandidates)))
762 751 continue
763 752
764 753 for candidate in movecandidates:
765 754 f1 = c1.filectx(candidate)
766 755 if _related(f1, f2):
767 756 # if there are a few related copies then we'll merge
768 757 # changes into all of them. This matches the behaviour
769 758 # of upstream copytracing
770 759 copies[candidate] = f
771 760
772 761 return copies, {}, {}, {}, {}
773 762
774 763 def _related(f1, f2):
775 764 """return True if f1 and f2 filectx have a common ancestor
776 765
777 766 Walk back to common ancestor to see if the two files originate
778 767 from the same file. Since workingfilectx's rev() is None it messes
779 768 up the integer comparison logic, hence the pre-step check for
780 769 None (f1 and f2 can only be workingfilectx's initially).
781 770 """
782 771
783 772 if f1 == f2:
784 773 return f1 # a match
785 774
786 775 g1, g2 = f1.ancestors(), f2.ancestors()
787 776 try:
788 777 f1r, f2r = f1.linkrev(), f2.linkrev()
789 778
790 779 if f1r is None:
791 780 f1 = next(g1)
792 781 if f2r is None:
793 782 f2 = next(g2)
794 783
795 784 while True:
796 785 f1r, f2r = f1.linkrev(), f2.linkrev()
797 786 if f1r > f2r:
798 787 f1 = next(g1)
799 788 elif f2r > f1r:
800 789 f2 = next(g2)
801 790 else: # f1 and f2 point to files in the same linkrev
802 791 return f1 == f2 # true if they point to the same file
803 792 except StopIteration:
804 793 return False
805 794
806 795 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 796 """
808 797 check possible copies of f from msrc to mdst
809 798
810 799 srcctx = starting context for f in msrc
811 800 dstctx = destination context for f in mdst
812 801 f = the filename to check (as in msrc)
813 802 base = the changectx used as a merge base
814 803 tca = topological common ancestor for graft-like scenarios
815 804 remotebase = True if base is outside tca::srcctx, False otherwise
816 805 limit = the rev number to not search beyond
817 806 data = dictionary of dictionary to store copy data. (see mergecopies)
818 807
819 808 note: limit is only an optimization, and provides no guarantee that
820 809 irrelevant revisions will not be visited
821 810 there is no easy way to make this algorithm stop in a guaranteed way
822 811 once it "goes behind a certain revision".
823 812 """
824 813
825 814 msrc = srcctx.manifest()
826 815 mdst = dstctx.manifest()
827 816 mb = base.manifest()
828 817 mta = tca.manifest()
829 818 # Might be true if this call is about finding backward renames,
830 819 # This happens in the case of grafts because the DAG is then rotated.
831 820 # If the file exists in both the base and the source, we are not looking
832 821 # for a rename on the source side, but on the part of the DAG that is
833 822 # traversed backwards.
834 823 #
835 824 # In the case there is both backward and forward renames (before and after
836 825 # the base) this is more complicated as we must detect a divergence.
837 826 # We use 'backwards = False' in that case.
838 827 backwards = not remotebase and base != tca and f in mb
839 828 getsrcfctx = _makegetfctx(srcctx)
840 829 getdstfctx = _makegetfctx(dstctx)
841 830
842 831 if msrc[f] == mb.get(f) and not remotebase:
843 832 # Nothing to merge
844 833 return
845 834
846 835 of = None
847 836 seen = {f}
848 837 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 838 of = oc.path()
850 839 if of in seen:
851 840 # check limit late - grab last rename before
852 841 if oc.linkrev() < limit:
853 842 break
854 843 continue
855 844 seen.add(of)
856 845
857 846 # remember for dir rename detection
858 847 if backwards:
859 848 data['fullcopy'][of] = f # grafting backwards through renames
860 849 else:
861 850 data['fullcopy'][f] = of
862 851 if of not in mdst:
863 852 continue # no match, keep looking
864 853 if mdst[of] == mb.get(of):
865 854 return # no merge needed, quit early
866 855 c2 = getdstfctx(of, mdst[of])
867 856 # c2 might be a plain new file on added on destination side that is
868 857 # unrelated to the droids we are looking for.
869 858 cr = _related(oc, c2)
870 859 if cr and (of == f or of == c2.path()): # non-divergent
871 860 if backwards:
872 861 data['copy'][of] = f
873 862 elif of in mb:
874 863 data['copy'][f] = of
875 864 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 865 data['copy'][of] = f
877 866 del data['fullcopy'][f]
878 867 data['fullcopy'][of] = f
879 868 else: # divergence w.r.t. graft CA on one side of topological CA
880 869 for sf in seen:
881 870 if sf in mb:
882 871 assert sf not in data['diverge']
883 872 data['diverge'][sf] = [f, of]
884 873 break
885 874 return
886 875
887 876 if of in mta:
888 877 if backwards or remotebase:
889 878 data['incomplete'][of] = f
890 879 else:
891 880 for sf in seen:
892 881 if sf in mb:
893 882 if tca == base:
894 883 data['diverge'].setdefault(sf, []).append(f)
895 884 else:
896 885 data['incompletediverge'][sf] = [of, f]
897 886 return
898 887
899 888 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 889 """reproduce copies from fromrev to rev in the dirstate
901 890
902 891 If skiprev is specified, it's a revision that should be used to
903 892 filter copy records. Any copies that occur between fromrev and
904 893 skiprev will not be duplicated, even if they appear in the set of
905 894 copies between fromrev and rev.
906 895 """
907 896 exclude = {}
908 897 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 898 bctrace = stringutil.parsebool(ctraceconfig)
910 899 if (skiprev is not None and
911 900 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 901 # copytrace='off' skips this line, but not the entire function because
913 902 # the line below is O(size of the repo) during a rebase, while the rest
914 903 # of the function is much faster (and is required for carrying copy
915 904 # metadata across the rebase anyway).
916 905 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 906 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 907 # copies.pathcopies returns backward renames, so dst might not
919 908 # actually be in the dirstate
920 909 if dst in exclude:
921 910 continue
922 911 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now