##// END OF EJS Templates
copies: respect narrowmatcher in "parent -> working dir" case...
Martin von Zweigbergk -
r41902:fd973639 default draft
parent child Browse files
Show More
@@ -1,917 +1,919
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, ctxa, ctxb):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 """
35 35
36 36 # basic idea:
37 37 # - mark a and b with different sides
38 38 # - if a parent's children are all on the same side, the parent is
39 39 # on that side, otherwise it is on no side
40 40 # - walk the graph in topological order with the help of a heap;
41 41 # - add unseen parents to side map
42 42 # - clear side of any parent that has children on different sides
43 43 # - track number of interesting revs that might still be on a side
44 44 # - track the lowest interesting rev seen
45 45 # - quit when interesting revs is zero
46 46
47 47 cl = repo.changelog
48 48 wdirparents = None
49 49 a = ctxa.rev()
50 50 b = ctxb.rev()
51 51 if a is None:
52 52 wdirparents = (ctxa.p1(), ctxa.p2())
53 53 a = node.wdirrev
54 54 if b is None:
55 55 assert not wdirparents
56 56 wdirparents = (ctxb.p1(), ctxb.p2())
57 57 b = node.wdirrev
58 58
59 59 side = {a: -1, b: 1}
60 60 visit = [-a, -b]
61 61 heapq.heapify(visit)
62 62 interesting = len(visit)
63 63 limit = node.wdirrev
64 64
65 65 while interesting:
66 66 r = -heapq.heappop(visit)
67 67 if r == node.wdirrev:
68 68 parents = [pctx.rev() for pctx in wdirparents]
69 69 else:
70 70 parents = cl.parentrevs(r)
71 71 if parents[1] == node.nullrev:
72 72 parents = parents[:1]
73 73 for p in parents:
74 74 if p not in side:
75 75 # first time we see p; add it to visit
76 76 side[p] = side[r]
77 77 if side[p]:
78 78 interesting += 1
79 79 heapq.heappush(visit, -p)
80 80 elif side[p] and side[p] != side[r]:
81 81 # p was interesting but now we know better
82 82 side[p] = 0
83 83 interesting -= 1
84 84 if side[r]:
85 85 limit = r # lowest rev visited
86 86 interesting -= 1
87 87
88 88 # Consider the following flow (see test-commit-amend.t under issue4405):
89 89 # 1/ File 'a0' committed
90 90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 91 # 3/ Move back to first commit
92 92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 94 #
95 95 # During the amend in step five, we will be in this state:
96 96 #
97 97 # @ 3 temporary amend commit for a1-amend
98 98 # |
99 99 # o 2 a1-amend
100 100 # |
101 101 # | o 1 a1
102 102 # |/
103 103 # o 0 a0
104 104 #
105 105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 106 # yet the filelog has the copy information in rev 1 and we will not look
107 107 # back far enough unless we also look at the a and b as candidates.
108 108 # This only occurs when a is a descendent of b or visa-versa.
109 109 return min(limit, a, b)
110 110
111 111 def _chain(src, dst, a, b):
112 112 """chain two sets of copies a->b"""
113 113 t = a.copy()
114 114 for k, v in b.iteritems():
115 115 if v in t:
116 116 # found a chain
117 117 if t[v] != k:
118 118 # file wasn't renamed back to itself
119 119 t[k] = t[v]
120 120 if v not in dst:
121 121 # chain was a rename, not a copy
122 122 del t[v]
123 123 if v in src:
124 124 # file is a copy of an existing file
125 125 t[k] = v
126 126
127 127 # remove criss-crossed copies
128 128 for k, v in list(t.items()):
129 129 if k in src and v in dst:
130 130 del t[k]
131 131
132 132 return t
133 133
134 134 def _tracefile(fctx, am, limit=node.nullrev):
135 135 """return file context that is the ancestor of fctx present in ancestor
136 136 manifest am, stopping after the first ancestor lower than limit"""
137 137
138 138 for f in fctx.ancestors():
139 139 if am.get(f.path(), None) == f.filenode():
140 140 return f
141 141 if limit >= 0 and not f.isintroducedafter(limit):
142 142 return None
143 143
144 def _dirstatecopies(d, match=None):
145 ds = d._repo.dirstate
144 def _dirstatecopies(repo, match=None):
145 ds = repo.dirstate
146 146 c = ds.copies().copy()
147 147 for k in list(c):
148 148 if ds[k] not in 'anm' or (match and not match(k)):
149 149 del c[k]
150 150 return c
151 151
152 152 def _computeforwardmissing(a, b, match=None):
153 153 """Computes which files are in b but not a.
154 154 This is its own function so extensions can easily wrap this call to see what
155 155 files _forwardcopies is about to process.
156 156 """
157 157 ma = a.manifest()
158 158 mb = b.manifest()
159 159 return mb.filesnotin(ma, match=match)
160 160
161 161 def _committedforwardcopies(a, b, match):
162 162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 163 # files might have to be traced back to the fctx parent of the last
164 164 # one-side-only changeset, but not further back than that
165 165 repo = a._repo
166 166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 167 dbg = repo.ui.debug
168 168 if debug:
169 169 dbg('debug.copies: looking into rename from %s to %s\n'
170 170 % (a, b))
171 171 limit = _findlimit(repo, a, b)
172 172 if debug:
173 173 dbg('debug.copies: search limit: %d\n' % limit)
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if b.p1() == a and b.p2().node() == node.nullid:
189 189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
190 190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
191 191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
192 192
193 193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
194 194
195 195 if debug:
196 196 dbg('debug.copies: missing file to search: %d\n' % len(missing))
197 197
198 198 for f in missing:
199 199 if debug:
200 200 dbg('debug.copies: tracing file: %s\n' % f)
201 201 fctx = b[f]
202 202 fctx._ancestrycontext = ancestrycontext
203 203
204 204 if debug:
205 205 start = util.timer()
206 206 ofctx = _tracefile(fctx, am, limit)
207 207 if ofctx:
208 208 if debug:
209 209 dbg('debug.copies: rename of: %s\n' % ofctx._path)
210 210 cm[f] = ofctx.path()
211 211 if debug:
212 212 dbg('debug.copies: time: %f seconds\n'
213 213 % (util.timer() - start))
214 214 return cm
215 215
216 216 def _forwardcopies(a, b, match=None):
217 217 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
218 218
219 219 match = a.repo().narrowmatch(match)
220 220 # check for working copy
221 221 if b.rev() is None:
222 222 if a == b.p1():
223 223 # short-circuit to avoid issues with merge states
224 return _dirstatecopies(b, match)
224 return _dirstatecopies(b._repo, match)
225 225
226 226 cm = _committedforwardcopies(a, b.p1(), match)
227 227 # combine copies from dirstate if necessary
228 return _chain(a, b, cm, _dirstatecopies(b, match))
228 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
229 229 return _committedforwardcopies(a, b, match)
230 230
231 231 def _backwardrenames(a, b):
232 232 if a._repo.ui.config('experimental', 'copytrace') == 'off':
233 233 return {}
234 234
235 235 # Even though we're not taking copies into account, 1:n rename situations
236 236 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
237 237 # arbitrarily pick one of the renames.
238 238 f = _forwardcopies(b, a)
239 239 r = {}
240 240 for k, v in sorted(f.iteritems()):
241 241 # remove copies
242 242 if v in a:
243 243 continue
244 244 r[v] = k
245 245 return r
246 246
247 247 def pathcopies(x, y, match=None):
248 248 """find {dst@y: src@x} copy mapping for directed compare"""
249 249 repo = x._repo
250 250 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
251 251 if debug:
252 252 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
253 253 % (x, y))
254 254 if x == y or not x or not y:
255 255 return {}
256 256 a = y.ancestor(x)
257 257 if a == x:
258 258 if debug:
259 259 repo.ui.debug('debug.copies: search mode: forward\n')
260 260 return _forwardcopies(x, y, match=match)
261 261 if a == y:
262 262 if debug:
263 263 repo.ui.debug('debug.copies: search mode: backward\n')
264 264 return _backwardrenames(x, y)
265 265 if debug:
266 266 repo.ui.debug('debug.copies: search mode: combined\n')
267 267 return _chain(x, y, _backwardrenames(x, a),
268 268 _forwardcopies(a, y, match=match))
269 269
270 270 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
271 271 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
272 272 and c2. This is its own function so extensions can easily wrap this call
273 273 to see what files mergecopies is about to process.
274 274
275 275 Even though c1 and c2 are not used in this function, they are useful in
276 276 other extensions for being able to read the file nodes of the changed files.
277 277
278 278 "baselabel" can be passed to help distinguish the multiple computations
279 279 done in the graft case.
280 280 """
281 281 u1 = sorted(addedinm1 - addedinm2)
282 282 u2 = sorted(addedinm2 - addedinm1)
283 283
284 284 header = " unmatched files in %s"
285 285 if baselabel:
286 286 header += ' (from %s)' % baselabel
287 287 if u1:
288 288 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
289 289 if u2:
290 290 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
291 291
292 292 return u1, u2
293 293
294 294 def _makegetfctx(ctx):
295 295 """return a 'getfctx' function suitable for _checkcopies usage
296 296
297 297 We have to re-setup the function building 'filectx' for each
298 298 '_checkcopies' to ensure the linkrev adjustment is properly setup for
299 299 each. Linkrev adjustment is important to avoid bug in rename
300 300 detection. Moreover, having a proper '_ancestrycontext' setup ensures
301 301 the performance impact of this adjustment is kept limited. Without it,
302 302 each file could do a full dag traversal making the time complexity of
303 303 the operation explode (see issue4537).
304 304
305 305 This function exists here mostly to limit the impact on stable. Feel
306 306 free to refactor on default.
307 307 """
308 308 rev = ctx.rev()
309 309 repo = ctx._repo
310 310 ac = getattr(ctx, '_ancestrycontext', None)
311 311 if ac is None:
312 312 revs = [rev]
313 313 if rev is None:
314 314 revs = [p.rev() for p in ctx.parents()]
315 315 ac = repo.changelog.ancestors(revs, inclusive=True)
316 316 ctx._ancestrycontext = ac
317 317 def makectx(f, n):
318 318 if n in node.wdirfilenodeids: # in a working context?
319 319 if ctx.rev() is None:
320 320 return ctx.filectx(f)
321 321 return repo[None][f]
322 322 fctx = repo.filectx(f, fileid=n)
323 323 # setup only needed for filectx not create from a changectx
324 324 fctx._ancestrycontext = ac
325 325 fctx._descendantrev = rev
326 326 return fctx
327 327 return util.lrucachefunc(makectx)
328 328
329 329 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
330 330 """combine partial copy paths"""
331 331 remainder = {}
332 332 for f in copyfrom:
333 333 if f in copyto:
334 334 finalcopy[copyto[f]] = copyfrom[f]
335 335 del copyto[f]
336 336 for f in incompletediverge:
337 337 assert f not in diverge
338 338 ic = incompletediverge[f]
339 339 if ic[0] in copyto:
340 340 diverge[f] = [copyto[ic[0]], ic[1]]
341 341 else:
342 342 remainder[f] = ic
343 343 return remainder
344 344
345 345 def mergecopies(repo, c1, c2, base):
346 346 """
347 347 The function calling different copytracing algorithms on the basis of config
348 348 which find moves and copies between context c1 and c2 that are relevant for
349 349 merging. 'base' will be used as the merge base.
350 350
351 351 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
352 352 files that were moved/ copied in one merge parent and modified in another.
353 353 For example:
354 354
355 355 o ---> 4 another commit
356 356 |
357 357 | o ---> 3 commit that modifies a.txt
358 358 | /
359 359 o / ---> 2 commit that moves a.txt to b.txt
360 360 |/
361 361 o ---> 1 merge base
362 362
363 363 If we try to rebase revision 3 on revision 4, since there is no a.txt in
364 364 revision 4, and if user have copytrace disabled, we prints the following
365 365 message:
366 366
367 367 ```other changed <file> which local deleted```
368 368
369 369 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
370 370 "dirmove".
371 371
372 372 "copy" is a mapping from destination name -> source name,
373 373 where source is in c1 and destination is in c2 or vice-versa.
374 374
375 375 "movewithdir" is a mapping from source name -> destination name,
376 376 where the file at source present in one context but not the other
377 377 needs to be moved to destination by the merge process, because the
378 378 other context moved the directory it is in.
379 379
380 380 "diverge" is a mapping of source name -> list of destination names
381 381 for divergent renames.
382 382
383 383 "renamedelete" is a mapping of source name -> list of destination
384 384 names for files deleted in c1 that were renamed in c2 or vice-versa.
385 385
386 386 "dirmove" is a mapping of detected source dir -> destination dir renames.
387 387 This is needed for handling changes to new files previously grafted into
388 388 renamed directories.
389 389 """
390 390 # avoid silly behavior for update from empty dir
391 391 if not c1 or not c2 or c1 == c2:
392 392 return {}, {}, {}, {}, {}
393 393
394 narrowmatch = c1.repo().narrowmatch()
395
394 396 # avoid silly behavior for parent -> working dir
395 397 if c2.node() is None and c1.node() == repo.dirstate.p1():
396 return repo.dirstate.copies(), {}, {}, {}, {}
398 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
397 399
398 400 copytracing = repo.ui.config('experimental', 'copytrace')
399 401 boolctrace = stringutil.parsebool(copytracing)
400 402
401 403 # Copy trace disabling is explicitly below the node == p1 logic above
402 404 # because the logic above is required for a simple copy to be kept across a
403 405 # rebase.
404 406 if copytracing == 'heuristics':
405 407 # Do full copytracing if only non-public revisions are involved as
406 408 # that will be fast enough and will also cover the copies which could
407 409 # be missed by heuristics
408 410 if _isfullcopytraceable(repo, c1, base):
409 411 return _fullcopytracing(repo, c1, c2, base)
410 412 return _heuristicscopytracing(repo, c1, c2, base)
411 413 elif boolctrace is False:
412 414 # stringutil.parsebool() returns None when it is unable to parse the
413 415 # value, so we should rely on making sure copytracing is on such cases
414 416 return {}, {}, {}, {}, {}
415 417 else:
416 418 return _fullcopytracing(repo, c1, c2, base)
417 419
418 420 def _isfullcopytraceable(repo, c1, base):
419 421 """ Checks that if base, source and destination are all no-public branches,
420 422 if yes let's use the full copytrace algorithm for increased capabilities
421 423 since it will be fast enough.
422 424
423 425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
424 426 number of changesets from c1 to base such that if number of changesets are
425 427 more than the limit, full copytracing algorithm won't be used.
426 428 """
427 429 if c1.rev() is None:
428 430 c1 = c1.p1()
429 431 if c1.mutable() and base.mutable():
430 432 sourcecommitlimit = repo.ui.configint('experimental',
431 433 'copytrace.sourcecommitlimit')
432 434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
433 435 return commits < sourcecommitlimit
434 436 return False
435 437
436 438 def _fullcopytracing(repo, c1, c2, base):
437 439 """ The full copytracing algorithm which finds all the new files that were
438 440 added from merge base up to the top commit and for each file it checks if
439 441 this file was copied from another file.
440 442
441 443 This is pretty slow when a lot of changesets are involved but will track all
442 444 the copies.
443 445 """
444 446 # In certain scenarios (e.g. graft, update or rebase), base can be
445 447 # overridden We still need to know a real common ancestor in this case We
446 448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
447 449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
448 450 # caller may not know if the revision passed in lieu of the CA is a genuine
449 451 # common ancestor or not without explicitly checking it, it's better to
450 452 # determine that here.
451 453 #
452 454 # base.isancestorof(wc) is False, work around that
453 455 _c1 = c1.p1() if c1.rev() is None else c1
454 456 _c2 = c2.p1() if c2.rev() is None else c2
455 457 # an endpoint is "dirty" if it isn't a descendant of the merge base
456 458 # if we have a dirty endpoint, we need to trigger graft logic, and also
457 459 # keep track of which endpoint is dirty
458 460 dirtyc1 = not base.isancestorof(_c1)
459 461 dirtyc2 = not base.isancestorof(_c2)
460 462 graft = dirtyc1 or dirtyc2
461 463 tca = base
462 464 if graft:
463 465 tca = _c1.ancestor(_c2)
464 466
465 467 limit = _findlimit(repo, c1, c2)
466 468 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
467 469
468 470 m1 = c1.manifest()
469 471 m2 = c2.manifest()
470 472 mb = base.manifest()
471 473
472 474 # gather data from _checkcopies:
473 475 # - diverge = record all diverges in this dict
474 476 # - copy = record all non-divergent copies in this dict
475 477 # - fullcopy = record all copies in this dict
476 478 # - incomplete = record non-divergent partial copies here
477 479 # - incompletediverge = record divergent partial copies here
478 480 diverge = {} # divergence data is shared
479 481 incompletediverge = {}
480 482 data1 = {'copy': {},
481 483 'fullcopy': {},
482 484 'incomplete': {},
483 485 'diverge': diverge,
484 486 'incompletediverge': incompletediverge,
485 487 }
486 488 data2 = {'copy': {},
487 489 'fullcopy': {},
488 490 'incomplete': {},
489 491 'diverge': diverge,
490 492 'incompletediverge': incompletediverge,
491 493 }
492 494
493 495 # find interesting file sets from manifests
494 496 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
495 497 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
496 498 bothnew = sorted(addedinm1 & addedinm2)
497 499 if tca == base:
498 500 # unmatched file from base
499 501 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
500 502 u1u, u2u = u1r, u2r
501 503 else:
502 504 # unmatched file from base (DAG rotation in the graft case)
503 505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
504 506 baselabel='base')
505 507 # unmatched file from topological common ancestors (no DAG rotation)
506 508 # need to recompute this for directory move handling when grafting
507 509 mta = tca.manifest()
508 510 u1u, u2u = _computenonoverlap(repo, c1, c2,
509 511 m1.filesnotin(mta, repo.narrowmatch()),
510 512 m2.filesnotin(mta, repo.narrowmatch()),
511 513 baselabel='topological common ancestor')
512 514
513 515 for f in u1u:
514 516 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
515 517
516 518 for f in u2u:
517 519 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
518 520
519 521 copy = dict(data1['copy'])
520 522 copy.update(data2['copy'])
521 523 fullcopy = dict(data1['fullcopy'])
522 524 fullcopy.update(data2['fullcopy'])
523 525
524 526 if dirtyc1:
525 527 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
526 528 incompletediverge)
527 529 else:
528 530 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
529 531 incompletediverge)
530 532
531 533 renamedelete = {}
532 534 renamedeleteset = set()
533 535 divergeset = set()
534 536 for of, fl in list(diverge.items()):
535 537 if len(fl) == 1 or of in c1 or of in c2:
536 538 del diverge[of] # not actually divergent, or not a rename
537 539 if of not in c1 and of not in c2:
538 540 # renamed on one side, deleted on the other side, but filter
539 541 # out files that have been renamed and then deleted
540 542 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
541 543 renamedeleteset.update(fl) # reverse map for below
542 544 else:
543 545 divergeset.update(fl) # reverse map for below
544 546
545 547 if bothnew:
546 548 repo.ui.debug(" unmatched files new in both:\n %s\n"
547 549 % "\n ".join(bothnew))
548 550 bothdiverge = {}
549 551 bothincompletediverge = {}
550 552 remainder = {}
551 553 both1 = {'copy': {},
552 554 'fullcopy': {},
553 555 'incomplete': {},
554 556 'diverge': bothdiverge,
555 557 'incompletediverge': bothincompletediverge
556 558 }
557 559 both2 = {'copy': {},
558 560 'fullcopy': {},
559 561 'incomplete': {},
560 562 'diverge': bothdiverge,
561 563 'incompletediverge': bothincompletediverge
562 564 }
563 565 for f in bothnew:
564 566 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
565 567 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
566 568 if dirtyc1:
567 569 # incomplete copies may only be found on the "dirty" side for bothnew
568 570 assert not both2['incomplete']
569 571 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
570 572 bothincompletediverge)
571 573 elif dirtyc2:
572 574 assert not both1['incomplete']
573 575 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
574 576 bothincompletediverge)
575 577 else:
576 578 # incomplete copies and divergences can't happen outside grafts
577 579 assert not both1['incomplete']
578 580 assert not both2['incomplete']
579 581 assert not bothincompletediverge
580 582 for f in remainder:
581 583 assert f not in bothdiverge
582 584 ic = remainder[f]
583 585 if ic[0] in (m1 if dirtyc1 else m2):
584 586 # backed-out rename on one side, but watch out for deleted files
585 587 bothdiverge[f] = ic
586 588 for of, fl in bothdiverge.items():
587 589 if len(fl) == 2 and fl[0] == fl[1]:
588 590 copy[fl[0]] = of # not actually divergent, just matching renames
589 591
590 592 if fullcopy and repo.ui.debugflag:
591 593 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
592 594 "% = renamed and deleted):\n")
593 595 for f in sorted(fullcopy):
594 596 note = ""
595 597 if f in copy:
596 598 note += "*"
597 599 if f in divergeset:
598 600 note += "!"
599 601 if f in renamedeleteset:
600 602 note += "%"
601 603 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
602 604 note))
603 605 del divergeset
604 606
605 607 if not fullcopy:
606 608 return copy, {}, diverge, renamedelete, {}
607 609
608 610 repo.ui.debug(" checking for directory renames\n")
609 611
610 612 # generate a directory move map
611 613 d1, d2 = c1.dirs(), c2.dirs()
612 614 # Hack for adding '', which is not otherwise added, to d1 and d2
613 615 d1.addpath('/')
614 616 d2.addpath('/')
615 617 invalid = set()
616 618 dirmove = {}
617 619
618 620 # examine each file copy for a potential directory move, which is
619 621 # when all the files in a directory are moved to a new directory
620 622 for dst, src in fullcopy.iteritems():
621 623 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
622 624 if dsrc in invalid:
623 625 # already seen to be uninteresting
624 626 continue
625 627 elif dsrc in d1 and ddst in d1:
626 628 # directory wasn't entirely moved locally
627 629 invalid.add(dsrc)
628 630 elif dsrc in d2 and ddst in d2:
629 631 # directory wasn't entirely moved remotely
630 632 invalid.add(dsrc)
631 633 elif dsrc in dirmove and dirmove[dsrc] != ddst:
632 634 # files from the same directory moved to two different places
633 635 invalid.add(dsrc)
634 636 else:
635 637 # looks good so far
636 638 dirmove[dsrc] = ddst
637 639
638 640 for i in invalid:
639 641 if i in dirmove:
640 642 del dirmove[i]
641 643 del d1, d2, invalid
642 644
643 645 if not dirmove:
644 646 return copy, {}, diverge, renamedelete, {}
645 647
646 648 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
647 649
648 650 for d in dirmove:
649 651 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
650 652 (d, dirmove[d]))
651 653
652 654 movewithdir = {}
653 655 # check unaccounted nonoverlapping files against directory moves
654 656 for f in u1r + u2r:
655 657 if f not in fullcopy:
656 658 for d in dirmove:
657 659 if f.startswith(d):
658 660 # new file added in a directory that was moved, move it
659 661 df = dirmove[d] + f[len(d):]
660 662 if df not in copy:
661 663 movewithdir[f] = df
662 664 repo.ui.debug((" pending file src: '%s' -> "
663 665 "dst: '%s'\n") % (f, df))
664 666 break
665 667
666 668 return copy, movewithdir, diverge, renamedelete, dirmove
667 669
668 670 def _heuristicscopytracing(repo, c1, c2, base):
669 671 """ Fast copytracing using filename heuristics
670 672
671 673 Assumes that moves or renames are of following two types:
672 674
673 675 1) Inside a directory only (same directory name but different filenames)
674 676 2) Move from one directory to another
675 677 (same filenames but different directory names)
676 678
677 679 Works only when there are no merge commits in the "source branch".
678 680 Source branch is commits from base up to c2 not including base.
679 681
680 682 If merge is involved it fallbacks to _fullcopytracing().
681 683
682 684 Can be used by setting the following config:
683 685
684 686 [experimental]
685 687 copytrace = heuristics
686 688
687 689 In some cases the copy/move candidates found by heuristics can be very large
688 690 in number and that will make the algorithm slow. The number of possible
689 691 candidates to check can be limited by using the config
690 692 `experimental.copytrace.movecandidateslimit` which defaults to 100.
691 693 """
692 694
693 695 if c1.rev() is None:
694 696 c1 = c1.p1()
695 697 if c2.rev() is None:
696 698 c2 = c2.p1()
697 699
698 700 copies = {}
699 701
700 702 changedfiles = set()
701 703 m1 = c1.manifest()
702 704 if not repo.revs('%d::%d', base.rev(), c2.rev()):
703 705 # If base is not in c2 branch, we switch to fullcopytracing
704 706 repo.ui.debug("switching to full copytracing as base is not "
705 707 "an ancestor of c2\n")
706 708 return _fullcopytracing(repo, c1, c2, base)
707 709
708 710 ctx = c2
709 711 while ctx != base:
710 712 if len(ctx.parents()) == 2:
711 713 # To keep things simple let's not handle merges
712 714 repo.ui.debug("switching to full copytracing because of merges\n")
713 715 return _fullcopytracing(repo, c1, c2, base)
714 716 changedfiles.update(ctx.files())
715 717 ctx = ctx.p1()
716 718
717 719 cp = _forwardcopies(base, c2)
718 720 for dst, src in cp.iteritems():
719 721 if src in m1:
720 722 copies[dst] = src
721 723
722 724 # file is missing if it isn't present in the destination, but is present in
723 725 # the base and present in the source.
724 726 # Presence in the base is important to exclude added files, presence in the
725 727 # source is important to exclude removed files.
726 728 filt = lambda f: f not in m1 and f in base and f in c2
727 729 missingfiles = [f for f in changedfiles if filt(f)]
728 730
729 731 if missingfiles:
730 732 basenametofilename = collections.defaultdict(list)
731 733 dirnametofilename = collections.defaultdict(list)
732 734
733 735 for f in m1.filesnotin(base.manifest()):
734 736 basename = os.path.basename(f)
735 737 dirname = os.path.dirname(f)
736 738 basenametofilename[basename].append(f)
737 739 dirnametofilename[dirname].append(f)
738 740
739 741 for f in missingfiles:
740 742 basename = os.path.basename(f)
741 743 dirname = os.path.dirname(f)
742 744 samebasename = basenametofilename[basename]
743 745 samedirname = dirnametofilename[dirname]
744 746 movecandidates = samebasename + samedirname
745 747 # f is guaranteed to be present in c2, that's why
746 748 # c2.filectx(f) won't fail
747 749 f2 = c2.filectx(f)
748 750 # we can have a lot of candidates which can slow down the heuristics
749 751 # config value to limit the number of candidates moves to check
750 752 maxcandidates = repo.ui.configint('experimental',
751 753 'copytrace.movecandidateslimit')
752 754
753 755 if len(movecandidates) > maxcandidates:
754 756 repo.ui.status(_("skipping copytracing for '%s', more "
755 757 "candidates than the limit: %d\n")
756 758 % (f, len(movecandidates)))
757 759 continue
758 760
759 761 for candidate in movecandidates:
760 762 f1 = c1.filectx(candidate)
761 763 if _related(f1, f2):
762 764 # if there are a few related copies then we'll merge
763 765 # changes into all of them. This matches the behaviour
764 766 # of upstream copytracing
765 767 copies[candidate] = f
766 768
767 769 return copies, {}, {}, {}, {}
768 770
769 771 def _related(f1, f2):
770 772 """return True if f1 and f2 filectx have a common ancestor
771 773
772 774 Walk back to common ancestor to see if the two files originate
773 775 from the same file. Since workingfilectx's rev() is None it messes
774 776 up the integer comparison logic, hence the pre-step check for
775 777 None (f1 and f2 can only be workingfilectx's initially).
776 778 """
777 779
778 780 if f1 == f2:
779 781 return True # a match
780 782
781 783 g1, g2 = f1.ancestors(), f2.ancestors()
782 784 try:
783 785 f1r, f2r = f1.linkrev(), f2.linkrev()
784 786
785 787 if f1r is None:
786 788 f1 = next(g1)
787 789 if f2r is None:
788 790 f2 = next(g2)
789 791
790 792 while True:
791 793 f1r, f2r = f1.linkrev(), f2.linkrev()
792 794 if f1r > f2r:
793 795 f1 = next(g1)
794 796 elif f2r > f1r:
795 797 f2 = next(g2)
796 798 else: # f1 and f2 point to files in the same linkrev
797 799 return f1 == f2 # true if they point to the same file
798 800 except StopIteration:
799 801 return False
800 802
801 803 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
802 804 """
803 805 check possible copies of f from msrc to mdst
804 806
805 807 srcctx = starting context for f in msrc
806 808 dstctx = destination context for f in mdst
807 809 f = the filename to check (as in msrc)
808 810 base = the changectx used as a merge base
809 811 tca = topological common ancestor for graft-like scenarios
810 812 remotebase = True if base is outside tca::srcctx, False otherwise
811 813 limit = the rev number to not search beyond
812 814 data = dictionary of dictionary to store copy data. (see mergecopies)
813 815
814 816 note: limit is only an optimization, and provides no guarantee that
815 817 irrelevant revisions will not be visited
816 818 there is no easy way to make this algorithm stop in a guaranteed way
817 819 once it "goes behind a certain revision".
818 820 """
819 821
820 822 msrc = srcctx.manifest()
821 823 mdst = dstctx.manifest()
822 824 mb = base.manifest()
823 825 mta = tca.manifest()
824 826 # Might be true if this call is about finding backward renames,
825 827 # This happens in the case of grafts because the DAG is then rotated.
826 828 # If the file exists in both the base and the source, we are not looking
827 829 # for a rename on the source side, but on the part of the DAG that is
828 830 # traversed backwards.
829 831 #
830 832 # In the case there is both backward and forward renames (before and after
831 833 # the base) this is more complicated as we must detect a divergence.
832 834 # We use 'backwards = False' in that case.
833 835 backwards = not remotebase and base != tca and f in mb
834 836 getsrcfctx = _makegetfctx(srcctx)
835 837 getdstfctx = _makegetfctx(dstctx)
836 838
837 839 if msrc[f] == mb.get(f) and not remotebase:
838 840 # Nothing to merge
839 841 return
840 842
841 843 of = None
842 844 seen = {f}
843 845 for oc in getsrcfctx(f, msrc[f]).ancestors():
844 846 of = oc.path()
845 847 if of in seen:
846 848 # check limit late - grab last rename before
847 849 if oc.linkrev() < limit:
848 850 break
849 851 continue
850 852 seen.add(of)
851 853
852 854 # remember for dir rename detection
853 855 if backwards:
854 856 data['fullcopy'][of] = f # grafting backwards through renames
855 857 else:
856 858 data['fullcopy'][f] = of
857 859 if of not in mdst:
858 860 continue # no match, keep looking
859 861 if mdst[of] == mb.get(of):
860 862 return # no merge needed, quit early
861 863 c2 = getdstfctx(of, mdst[of])
862 864 # c2 might be a plain new file on added on destination side that is
863 865 # unrelated to the droids we are looking for.
864 866 cr = _related(oc, c2)
865 867 if cr and (of == f or of == c2.path()): # non-divergent
866 868 if backwards:
867 869 data['copy'][of] = f
868 870 elif of in mb:
869 871 data['copy'][f] = of
870 872 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
871 873 data['copy'][of] = f
872 874 del data['fullcopy'][f]
873 875 data['fullcopy'][of] = f
874 876 else: # divergence w.r.t. graft CA on one side of topological CA
875 877 for sf in seen:
876 878 if sf in mb:
877 879 assert sf not in data['diverge']
878 880 data['diverge'][sf] = [f, of]
879 881 break
880 882 return
881 883
882 884 if of in mta:
883 885 if backwards or remotebase:
884 886 data['incomplete'][of] = f
885 887 else:
886 888 for sf in seen:
887 889 if sf in mb:
888 890 if tca == base:
889 891 data['diverge'].setdefault(sf, []).append(f)
890 892 else:
891 893 data['incompletediverge'][sf] = [of, f]
892 894 return
893 895
894 896 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
895 897 """reproduce copies from fromrev to rev in the dirstate
896 898
897 899 If skiprev is specified, it's a revision that should be used to
898 900 filter copy records. Any copies that occur between fromrev and
899 901 skiprev will not be duplicated, even if they appear in the set of
900 902 copies between fromrev and rev.
901 903 """
902 904 exclude = {}
903 905 ctraceconfig = repo.ui.config('experimental', 'copytrace')
904 906 bctrace = stringutil.parsebool(ctraceconfig)
905 907 if (skiprev is not None and
906 908 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
907 909 # copytrace='off' skips this line, but not the entire function because
908 910 # the line below is O(size of the repo) during a rebase, while the rest
909 911 # of the function is much faster (and is required for carrying copy
910 912 # metadata across the rebase anyway).
911 913 exclude = pathcopies(repo[fromrev], repo[skiprev])
912 914 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
913 915 # copies.pathcopies returns backward renames, so dst might not
914 916 # actually be in the dirstate
915 917 if dst in exclude:
916 918 continue
917 919 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now