##// END OF EJS Templates
copies: return True instead of filename as it is expected to return boolean...
Sushil khanchi -
r41873:35158796 default
parent child Browse files
Show More
@@ -1,917 +1,917 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, ctxa, ctxb):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 """
35 35
36 36 # basic idea:
37 37 # - mark a and b with different sides
38 38 # - if a parent's children are all on the same side, the parent is
39 39 # on that side, otherwise it is on no side
40 40 # - walk the graph in topological order with the help of a heap;
41 41 # - add unseen parents to side map
42 42 # - clear side of any parent that has children on different sides
43 43 # - track number of interesting revs that might still be on a side
44 44 # - track the lowest interesting rev seen
45 45 # - quit when interesting revs is zero
46 46
47 47 cl = repo.changelog
48 48 wdirparents = None
49 49 a = ctxa.rev()
50 50 b = ctxb.rev()
51 51 if a is None:
52 52 wdirparents = (ctxa.p1(), ctxa.p2())
53 53 a = node.wdirrev
54 54 if b is None:
55 55 assert not wdirparents
56 56 wdirparents = (ctxb.p1(), ctxb.p2())
57 57 b = node.wdirrev
58 58
59 59 side = {a: -1, b: 1}
60 60 visit = [-a, -b]
61 61 heapq.heapify(visit)
62 62 interesting = len(visit)
63 63 limit = node.wdirrev
64 64
65 65 while interesting:
66 66 r = -heapq.heappop(visit)
67 67 if r == node.wdirrev:
68 68 parents = [pctx.rev() for pctx in wdirparents]
69 69 else:
70 70 parents = cl.parentrevs(r)
71 71 if parents[1] == node.nullrev:
72 72 parents = parents[:1]
73 73 for p in parents:
74 74 if p not in side:
75 75 # first time we see p; add it to visit
76 76 side[p] = side[r]
77 77 if side[p]:
78 78 interesting += 1
79 79 heapq.heappush(visit, -p)
80 80 elif side[p] and side[p] != side[r]:
81 81 # p was interesting but now we know better
82 82 side[p] = 0
83 83 interesting -= 1
84 84 if side[r]:
85 85 limit = r # lowest rev visited
86 86 interesting -= 1
87 87
88 88 # Consider the following flow (see test-commit-amend.t under issue4405):
89 89 # 1/ File 'a0' committed
90 90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 91 # 3/ Move back to first commit
92 92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 94 #
95 95 # During the amend in step five, we will be in this state:
96 96 #
97 97 # @ 3 temporary amend commit for a1-amend
98 98 # |
99 99 # o 2 a1-amend
100 100 # |
101 101 # | o 1 a1
102 102 # |/
103 103 # o 0 a0
104 104 #
105 105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 106 # yet the filelog has the copy information in rev 1 and we will not look
107 107 # back far enough unless we also look at the a and b as candidates.
108 108 # This only occurs when a is a descendent of b or visa-versa.
109 109 return min(limit, a, b)
110 110
111 111 def _chain(src, dst, a, b):
112 112 """chain two sets of copies a->b"""
113 113 t = a.copy()
114 114 for k, v in b.iteritems():
115 115 if v in t:
116 116 # found a chain
117 117 if t[v] != k:
118 118 # file wasn't renamed back to itself
119 119 t[k] = t[v]
120 120 if v not in dst:
121 121 # chain was a rename, not a copy
122 122 del t[v]
123 123 if v in src:
124 124 # file is a copy of an existing file
125 125 t[k] = v
126 126
127 127 # remove criss-crossed copies
128 128 for k, v in list(t.items()):
129 129 if k in src and v in dst:
130 130 del t[k]
131 131
132 132 return t
133 133
134 134 def _tracefile(fctx, am, limit=node.nullrev):
135 135 """return file context that is the ancestor of fctx present in ancestor
136 136 manifest am, stopping after the first ancestor lower than limit"""
137 137
138 138 for f in fctx.ancestors():
139 139 if am.get(f.path(), None) == f.filenode():
140 140 return f
141 141 if limit >= 0 and not f.isintroducedafter(limit):
142 142 return None
143 143
144 144 def _dirstatecopies(d, match=None):
145 145 ds = d._repo.dirstate
146 146 c = ds.copies().copy()
147 147 for k in list(c):
148 148 if ds[k] not in 'anm' or (match and not match(k)):
149 149 del c[k]
150 150 return c
151 151
152 152 def _computeforwardmissing(a, b, match=None):
153 153 """Computes which files are in b but not a.
154 154 This is its own function so extensions can easily wrap this call to see what
155 155 files _forwardcopies is about to process.
156 156 """
157 157 ma = a.manifest()
158 158 mb = b.manifest()
159 159 return mb.filesnotin(ma, match=match)
160 160
161 161 def _committedforwardcopies(a, b, match):
162 162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 163 # files might have to be traced back to the fctx parent of the last
164 164 # one-side-only changeset, but not further back than that
165 165 repo = a._repo
166 166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 167 dbg = repo.ui.debug
168 168 if debug:
169 169 dbg('debug.copies: looking into rename from %s to %s\n'
170 170 % (a, b))
171 171 limit = _findlimit(repo, a, b)
172 172 if debug:
173 173 dbg('debug.copies: search limit: %d\n' % limit)
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if b.p1() == a and b.p2().node() == node.nullid:
189 189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
190 190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
191 191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
192 192
193 193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
194 194
195 195 if debug:
196 196 dbg('debug.copies: missing file to search: %d\n' % len(missing))
197 197
198 198 for f in missing:
199 199 if debug:
200 200 dbg('debug.copies: tracing file: %s\n' % f)
201 201 fctx = b[f]
202 202 fctx._ancestrycontext = ancestrycontext
203 203
204 204 if debug:
205 205 start = util.timer()
206 206 ofctx = _tracefile(fctx, am, limit)
207 207 if ofctx:
208 208 if debug:
209 209 dbg('debug.copies: rename of: %s\n' % ofctx._path)
210 210 cm[f] = ofctx.path()
211 211 if debug:
212 212 dbg('debug.copies: time: %f seconds\n'
213 213 % (util.timer() - start))
214 214 return cm
215 215
216 216 def _forwardcopies(a, b, match=None):
217 217 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
218 218
219 219 match = a.repo().narrowmatch(match)
220 220 # check for working copy
221 221 if b.rev() is None:
222 222 if a == b.p1():
223 223 # short-circuit to avoid issues with merge states
224 224 return _dirstatecopies(b, match)
225 225
226 226 cm = _committedforwardcopies(a, b.p1(), match)
227 227 # combine copies from dirstate if necessary
228 228 return _chain(a, b, cm, _dirstatecopies(b, match))
229 229 return _committedforwardcopies(a, b, match)
230 230
231 231 def _backwardrenames(a, b):
232 232 if a._repo.ui.config('experimental', 'copytrace') == 'off':
233 233 return {}
234 234
235 235 # Even though we're not taking copies into account, 1:n rename situations
236 236 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
237 237 # arbitrarily pick one of the renames.
238 238 f = _forwardcopies(b, a)
239 239 r = {}
240 240 for k, v in sorted(f.iteritems()):
241 241 # remove copies
242 242 if v in a:
243 243 continue
244 244 r[v] = k
245 245 return r
246 246
247 247 def pathcopies(x, y, match=None):
248 248 """find {dst@y: src@x} copy mapping for directed compare"""
249 249 repo = x._repo
250 250 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
251 251 if debug:
252 252 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
253 253 % (x, y))
254 254 if x == y or not x or not y:
255 255 return {}
256 256 a = y.ancestor(x)
257 257 if a == x:
258 258 if debug:
259 259 repo.ui.debug('debug.copies: search mode: forward\n')
260 260 return _forwardcopies(x, y, match=match)
261 261 if a == y:
262 262 if debug:
263 263 repo.ui.debug('debug.copies: search mode: backward\n')
264 264 return _backwardrenames(x, y)
265 265 if debug:
266 266 repo.ui.debug('debug.copies: search mode: combined\n')
267 267 return _chain(x, y, _backwardrenames(x, a),
268 268 _forwardcopies(a, y, match=match))
269 269
270 270 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
271 271 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
272 272 and c2. This is its own function so extensions can easily wrap this call
273 273 to see what files mergecopies is about to process.
274 274
275 275 Even though c1 and c2 are not used in this function, they are useful in
276 276 other extensions for being able to read the file nodes of the changed files.
277 277
278 278 "baselabel" can be passed to help distinguish the multiple computations
279 279 done in the graft case.
280 280 """
281 281 u1 = sorted(addedinm1 - addedinm2)
282 282 u2 = sorted(addedinm2 - addedinm1)
283 283
284 284 header = " unmatched files in %s"
285 285 if baselabel:
286 286 header += ' (from %s)' % baselabel
287 287 if u1:
288 288 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
289 289 if u2:
290 290 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
291 291
292 292 return u1, u2
293 293
294 294 def _makegetfctx(ctx):
295 295 """return a 'getfctx' function suitable for _checkcopies usage
296 296
297 297 We have to re-setup the function building 'filectx' for each
298 298 '_checkcopies' to ensure the linkrev adjustment is properly setup for
299 299 each. Linkrev adjustment is important to avoid bug in rename
300 300 detection. Moreover, having a proper '_ancestrycontext' setup ensures
301 301 the performance impact of this adjustment is kept limited. Without it,
302 302 each file could do a full dag traversal making the time complexity of
303 303 the operation explode (see issue4537).
304 304
305 305 This function exists here mostly to limit the impact on stable. Feel
306 306 free to refactor on default.
307 307 """
308 308 rev = ctx.rev()
309 309 repo = ctx._repo
310 310 ac = getattr(ctx, '_ancestrycontext', None)
311 311 if ac is None:
312 312 revs = [rev]
313 313 if rev is None:
314 314 revs = [p.rev() for p in ctx.parents()]
315 315 ac = repo.changelog.ancestors(revs, inclusive=True)
316 316 ctx._ancestrycontext = ac
317 317 def makectx(f, n):
318 318 if n in node.wdirfilenodeids: # in a working context?
319 319 if ctx.rev() is None:
320 320 return ctx.filectx(f)
321 321 return repo[None][f]
322 322 fctx = repo.filectx(f, fileid=n)
323 323 # setup only needed for filectx not create from a changectx
324 324 fctx._ancestrycontext = ac
325 325 fctx._descendantrev = rev
326 326 return fctx
327 327 return util.lrucachefunc(makectx)
328 328
329 329 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
330 330 """combine partial copy paths"""
331 331 remainder = {}
332 332 for f in copyfrom:
333 333 if f in copyto:
334 334 finalcopy[copyto[f]] = copyfrom[f]
335 335 del copyto[f]
336 336 for f in incompletediverge:
337 337 assert f not in diverge
338 338 ic = incompletediverge[f]
339 339 if ic[0] in copyto:
340 340 diverge[f] = [copyto[ic[0]], ic[1]]
341 341 else:
342 342 remainder[f] = ic
343 343 return remainder
344 344
345 345 def mergecopies(repo, c1, c2, base):
346 346 """
347 347 The function calling different copytracing algorithms on the basis of config
348 348 which find moves and copies between context c1 and c2 that are relevant for
349 349 merging. 'base' will be used as the merge base.
350 350
351 351 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
352 352 files that were moved/ copied in one merge parent and modified in another.
353 353 For example:
354 354
355 355 o ---> 4 another commit
356 356 |
357 357 | o ---> 3 commit that modifies a.txt
358 358 | /
359 359 o / ---> 2 commit that moves a.txt to b.txt
360 360 |/
361 361 o ---> 1 merge base
362 362
363 363 If we try to rebase revision 3 on revision 4, since there is no a.txt in
364 364 revision 4, and if user have copytrace disabled, we prints the following
365 365 message:
366 366
367 367 ```other changed <file> which local deleted```
368 368
369 369 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
370 370 "dirmove".
371 371
372 372 "copy" is a mapping from destination name -> source name,
373 373 where source is in c1 and destination is in c2 or vice-versa.
374 374
375 375 "movewithdir" is a mapping from source name -> destination name,
376 376 where the file at source present in one context but not the other
377 377 needs to be moved to destination by the merge process, because the
378 378 other context moved the directory it is in.
379 379
380 380 "diverge" is a mapping of source name -> list of destination names
381 381 for divergent renames.
382 382
383 383 "renamedelete" is a mapping of source name -> list of destination
384 384 names for files deleted in c1 that were renamed in c2 or vice-versa.
385 385
386 386 "dirmove" is a mapping of detected source dir -> destination dir renames.
387 387 This is needed for handling changes to new files previously grafted into
388 388 renamed directories.
389 389 """
390 390 # avoid silly behavior for update from empty dir
391 391 if not c1 or not c2 or c1 == c2:
392 392 return {}, {}, {}, {}, {}
393 393
394 394 # avoid silly behavior for parent -> working dir
395 395 if c2.node() is None and c1.node() == repo.dirstate.p1():
396 396 return repo.dirstate.copies(), {}, {}, {}, {}
397 397
398 398 copytracing = repo.ui.config('experimental', 'copytrace')
399 399 boolctrace = stringutil.parsebool(copytracing)
400 400
401 401 # Copy trace disabling is explicitly below the node == p1 logic above
402 402 # because the logic above is required for a simple copy to be kept across a
403 403 # rebase.
404 404 if copytracing == 'heuristics':
405 405 # Do full copytracing if only non-public revisions are involved as
406 406 # that will be fast enough and will also cover the copies which could
407 407 # be missed by heuristics
408 408 if _isfullcopytraceable(repo, c1, base):
409 409 return _fullcopytracing(repo, c1, c2, base)
410 410 return _heuristicscopytracing(repo, c1, c2, base)
411 411 elif boolctrace is False:
412 412 # stringutil.parsebool() returns None when it is unable to parse the
413 413 # value, so we should rely on making sure copytracing is on such cases
414 414 return {}, {}, {}, {}, {}
415 415 else:
416 416 return _fullcopytracing(repo, c1, c2, base)
417 417
418 418 def _isfullcopytraceable(repo, c1, base):
419 419 """ Checks that if base, source and destination are all no-public branches,
420 420 if yes let's use the full copytrace algorithm for increased capabilities
421 421 since it will be fast enough.
422 422
423 423 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
424 424 number of changesets from c1 to base such that if number of changesets are
425 425 more than the limit, full copytracing algorithm won't be used.
426 426 """
427 427 if c1.rev() is None:
428 428 c1 = c1.p1()
429 429 if c1.mutable() and base.mutable():
430 430 sourcecommitlimit = repo.ui.configint('experimental',
431 431 'copytrace.sourcecommitlimit')
432 432 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
433 433 return commits < sourcecommitlimit
434 434 return False
435 435
436 436 def _fullcopytracing(repo, c1, c2, base):
437 437 """ The full copytracing algorithm which finds all the new files that were
438 438 added from merge base up to the top commit and for each file it checks if
439 439 this file was copied from another file.
440 440
441 441 This is pretty slow when a lot of changesets are involved but will track all
442 442 the copies.
443 443 """
444 444 # In certain scenarios (e.g. graft, update or rebase), base can be
445 445 # overridden We still need to know a real common ancestor in this case We
446 446 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
447 447 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
448 448 # caller may not know if the revision passed in lieu of the CA is a genuine
449 449 # common ancestor or not without explicitly checking it, it's better to
450 450 # determine that here.
451 451 #
452 452 # base.isancestorof(wc) is False, work around that
453 453 _c1 = c1.p1() if c1.rev() is None else c1
454 454 _c2 = c2.p1() if c2.rev() is None else c2
455 455 # an endpoint is "dirty" if it isn't a descendant of the merge base
456 456 # if we have a dirty endpoint, we need to trigger graft logic, and also
457 457 # keep track of which endpoint is dirty
458 458 dirtyc1 = not base.isancestorof(_c1)
459 459 dirtyc2 = not base.isancestorof(_c2)
460 460 graft = dirtyc1 or dirtyc2
461 461 tca = base
462 462 if graft:
463 463 tca = _c1.ancestor(_c2)
464 464
465 465 limit = _findlimit(repo, c1, c2)
466 466 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
467 467
468 468 m1 = c1.manifest()
469 469 m2 = c2.manifest()
470 470 mb = base.manifest()
471 471
472 472 # gather data from _checkcopies:
473 473 # - diverge = record all diverges in this dict
474 474 # - copy = record all non-divergent copies in this dict
475 475 # - fullcopy = record all copies in this dict
476 476 # - incomplete = record non-divergent partial copies here
477 477 # - incompletediverge = record divergent partial copies here
478 478 diverge = {} # divergence data is shared
479 479 incompletediverge = {}
480 480 data1 = {'copy': {},
481 481 'fullcopy': {},
482 482 'incomplete': {},
483 483 'diverge': diverge,
484 484 'incompletediverge': incompletediverge,
485 485 }
486 486 data2 = {'copy': {},
487 487 'fullcopy': {},
488 488 'incomplete': {},
489 489 'diverge': diverge,
490 490 'incompletediverge': incompletediverge,
491 491 }
492 492
493 493 # find interesting file sets from manifests
494 494 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
495 495 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
496 496 bothnew = sorted(addedinm1 & addedinm2)
497 497 if tca == base:
498 498 # unmatched file from base
499 499 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
500 500 u1u, u2u = u1r, u2r
501 501 else:
502 502 # unmatched file from base (DAG rotation in the graft case)
503 503 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
504 504 baselabel='base')
505 505 # unmatched file from topological common ancestors (no DAG rotation)
506 506 # need to recompute this for directory move handling when grafting
507 507 mta = tca.manifest()
508 508 u1u, u2u = _computenonoverlap(repo, c1, c2,
509 509 m1.filesnotin(mta, repo.narrowmatch()),
510 510 m2.filesnotin(mta, repo.narrowmatch()),
511 511 baselabel='topological common ancestor')
512 512
513 513 for f in u1u:
514 514 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
515 515
516 516 for f in u2u:
517 517 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
518 518
519 519 copy = dict(data1['copy'])
520 520 copy.update(data2['copy'])
521 521 fullcopy = dict(data1['fullcopy'])
522 522 fullcopy.update(data2['fullcopy'])
523 523
524 524 if dirtyc1:
525 525 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
526 526 incompletediverge)
527 527 else:
528 528 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
529 529 incompletediverge)
530 530
531 531 renamedelete = {}
532 532 renamedeleteset = set()
533 533 divergeset = set()
534 534 for of, fl in list(diverge.items()):
535 535 if len(fl) == 1 or of in c1 or of in c2:
536 536 del diverge[of] # not actually divergent, or not a rename
537 537 if of not in c1 and of not in c2:
538 538 # renamed on one side, deleted on the other side, but filter
539 539 # out files that have been renamed and then deleted
540 540 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
541 541 renamedeleteset.update(fl) # reverse map for below
542 542 else:
543 543 divergeset.update(fl) # reverse map for below
544 544
545 545 if bothnew:
546 546 repo.ui.debug(" unmatched files new in both:\n %s\n"
547 547 % "\n ".join(bothnew))
548 548 bothdiverge = {}
549 549 bothincompletediverge = {}
550 550 remainder = {}
551 551 both1 = {'copy': {},
552 552 'fullcopy': {},
553 553 'incomplete': {},
554 554 'diverge': bothdiverge,
555 555 'incompletediverge': bothincompletediverge
556 556 }
557 557 both2 = {'copy': {},
558 558 'fullcopy': {},
559 559 'incomplete': {},
560 560 'diverge': bothdiverge,
561 561 'incompletediverge': bothincompletediverge
562 562 }
563 563 for f in bothnew:
564 564 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
565 565 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
566 566 if dirtyc1:
567 567 # incomplete copies may only be found on the "dirty" side for bothnew
568 568 assert not both2['incomplete']
569 569 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
570 570 bothincompletediverge)
571 571 elif dirtyc2:
572 572 assert not both1['incomplete']
573 573 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
574 574 bothincompletediverge)
575 575 else:
576 576 # incomplete copies and divergences can't happen outside grafts
577 577 assert not both1['incomplete']
578 578 assert not both2['incomplete']
579 579 assert not bothincompletediverge
580 580 for f in remainder:
581 581 assert f not in bothdiverge
582 582 ic = remainder[f]
583 583 if ic[0] in (m1 if dirtyc1 else m2):
584 584 # backed-out rename on one side, but watch out for deleted files
585 585 bothdiverge[f] = ic
586 586 for of, fl in bothdiverge.items():
587 587 if len(fl) == 2 and fl[0] == fl[1]:
588 588 copy[fl[0]] = of # not actually divergent, just matching renames
589 589
590 590 if fullcopy and repo.ui.debugflag:
591 591 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
592 592 "% = renamed and deleted):\n")
593 593 for f in sorted(fullcopy):
594 594 note = ""
595 595 if f in copy:
596 596 note += "*"
597 597 if f in divergeset:
598 598 note += "!"
599 599 if f in renamedeleteset:
600 600 note += "%"
601 601 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
602 602 note))
603 603 del divergeset
604 604
605 605 if not fullcopy:
606 606 return copy, {}, diverge, renamedelete, {}
607 607
608 608 repo.ui.debug(" checking for directory renames\n")
609 609
610 610 # generate a directory move map
611 611 d1, d2 = c1.dirs(), c2.dirs()
612 612 # Hack for adding '', which is not otherwise added, to d1 and d2
613 613 d1.addpath('/')
614 614 d2.addpath('/')
615 615 invalid = set()
616 616 dirmove = {}
617 617
618 618 # examine each file copy for a potential directory move, which is
619 619 # when all the files in a directory are moved to a new directory
620 620 for dst, src in fullcopy.iteritems():
621 621 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
622 622 if dsrc in invalid:
623 623 # already seen to be uninteresting
624 624 continue
625 625 elif dsrc in d1 and ddst in d1:
626 626 # directory wasn't entirely moved locally
627 627 invalid.add(dsrc)
628 628 elif dsrc in d2 and ddst in d2:
629 629 # directory wasn't entirely moved remotely
630 630 invalid.add(dsrc)
631 631 elif dsrc in dirmove and dirmove[dsrc] != ddst:
632 632 # files from the same directory moved to two different places
633 633 invalid.add(dsrc)
634 634 else:
635 635 # looks good so far
636 636 dirmove[dsrc] = ddst
637 637
638 638 for i in invalid:
639 639 if i in dirmove:
640 640 del dirmove[i]
641 641 del d1, d2, invalid
642 642
643 643 if not dirmove:
644 644 return copy, {}, diverge, renamedelete, {}
645 645
646 646 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
647 647
648 648 for d in dirmove:
649 649 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
650 650 (d, dirmove[d]))
651 651
652 652 movewithdir = {}
653 653 # check unaccounted nonoverlapping files against directory moves
654 654 for f in u1r + u2r:
655 655 if f not in fullcopy:
656 656 for d in dirmove:
657 657 if f.startswith(d):
658 658 # new file added in a directory that was moved, move it
659 659 df = dirmove[d] + f[len(d):]
660 660 if df not in copy:
661 661 movewithdir[f] = df
662 662 repo.ui.debug((" pending file src: '%s' -> "
663 663 "dst: '%s'\n") % (f, df))
664 664 break
665 665
666 666 return copy, movewithdir, diverge, renamedelete, dirmove
667 667
668 668 def _heuristicscopytracing(repo, c1, c2, base):
669 669 """ Fast copytracing using filename heuristics
670 670
671 671 Assumes that moves or renames are of following two types:
672 672
673 673 1) Inside a directory only (same directory name but different filenames)
674 674 2) Move from one directory to another
675 675 (same filenames but different directory names)
676 676
677 677 Works only when there are no merge commits in the "source branch".
678 678 Source branch is commits from base up to c2 not including base.
679 679
680 680 If merge is involved it fallbacks to _fullcopytracing().
681 681
682 682 Can be used by setting the following config:
683 683
684 684 [experimental]
685 685 copytrace = heuristics
686 686
687 687 In some cases the copy/move candidates found by heuristics can be very large
688 688 in number and that will make the algorithm slow. The number of possible
689 689 candidates to check can be limited by using the config
690 690 `experimental.copytrace.movecandidateslimit` which defaults to 100.
691 691 """
692 692
693 693 if c1.rev() is None:
694 694 c1 = c1.p1()
695 695 if c2.rev() is None:
696 696 c2 = c2.p1()
697 697
698 698 copies = {}
699 699
700 700 changedfiles = set()
701 701 m1 = c1.manifest()
702 702 if not repo.revs('%d::%d', base.rev(), c2.rev()):
703 703 # If base is not in c2 branch, we switch to fullcopytracing
704 704 repo.ui.debug("switching to full copytracing as base is not "
705 705 "an ancestor of c2\n")
706 706 return _fullcopytracing(repo, c1, c2, base)
707 707
708 708 ctx = c2
709 709 while ctx != base:
710 710 if len(ctx.parents()) == 2:
711 711 # To keep things simple let's not handle merges
712 712 repo.ui.debug("switching to full copytracing because of merges\n")
713 713 return _fullcopytracing(repo, c1, c2, base)
714 714 changedfiles.update(ctx.files())
715 715 ctx = ctx.p1()
716 716
717 717 cp = _forwardcopies(base, c2)
718 718 for dst, src in cp.iteritems():
719 719 if src in m1:
720 720 copies[dst] = src
721 721
722 722 # file is missing if it isn't present in the destination, but is present in
723 723 # the base and present in the source.
724 724 # Presence in the base is important to exclude added files, presence in the
725 725 # source is important to exclude removed files.
726 726 filt = lambda f: f not in m1 and f in base and f in c2
727 727 missingfiles = [f for f in changedfiles if filt(f)]
728 728
729 729 if missingfiles:
730 730 basenametofilename = collections.defaultdict(list)
731 731 dirnametofilename = collections.defaultdict(list)
732 732
733 733 for f in m1.filesnotin(base.manifest()):
734 734 basename = os.path.basename(f)
735 735 dirname = os.path.dirname(f)
736 736 basenametofilename[basename].append(f)
737 737 dirnametofilename[dirname].append(f)
738 738
739 739 for f in missingfiles:
740 740 basename = os.path.basename(f)
741 741 dirname = os.path.dirname(f)
742 742 samebasename = basenametofilename[basename]
743 743 samedirname = dirnametofilename[dirname]
744 744 movecandidates = samebasename + samedirname
745 745 # f is guaranteed to be present in c2, that's why
746 746 # c2.filectx(f) won't fail
747 747 f2 = c2.filectx(f)
748 748 # we can have a lot of candidates which can slow down the heuristics
749 749 # config value to limit the number of candidates moves to check
750 750 maxcandidates = repo.ui.configint('experimental',
751 751 'copytrace.movecandidateslimit')
752 752
753 753 if len(movecandidates) > maxcandidates:
754 754 repo.ui.status(_("skipping copytracing for '%s', more "
755 755 "candidates than the limit: %d\n")
756 756 % (f, len(movecandidates)))
757 757 continue
758 758
759 759 for candidate in movecandidates:
760 760 f1 = c1.filectx(candidate)
761 761 if _related(f1, f2):
762 762 # if there are a few related copies then we'll merge
763 763 # changes into all of them. This matches the behaviour
764 764 # of upstream copytracing
765 765 copies[candidate] = f
766 766
767 767 return copies, {}, {}, {}, {}
768 768
769 769 def _related(f1, f2):
770 770 """return True if f1 and f2 filectx have a common ancestor
771 771
772 772 Walk back to common ancestor to see if the two files originate
773 773 from the same file. Since workingfilectx's rev() is None it messes
774 774 up the integer comparison logic, hence the pre-step check for
775 775 None (f1 and f2 can only be workingfilectx's initially).
776 776 """
777 777
778 778 if f1 == f2:
779 return f1 # a match
779 return True # a match
780 780
781 781 g1, g2 = f1.ancestors(), f2.ancestors()
782 782 try:
783 783 f1r, f2r = f1.linkrev(), f2.linkrev()
784 784
785 785 if f1r is None:
786 786 f1 = next(g1)
787 787 if f2r is None:
788 788 f2 = next(g2)
789 789
790 790 while True:
791 791 f1r, f2r = f1.linkrev(), f2.linkrev()
792 792 if f1r > f2r:
793 793 f1 = next(g1)
794 794 elif f2r > f1r:
795 795 f2 = next(g2)
796 796 else: # f1 and f2 point to files in the same linkrev
797 797 return f1 == f2 # true if they point to the same file
798 798 except StopIteration:
799 799 return False
800 800
801 801 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
802 802 """
803 803 check possible copies of f from msrc to mdst
804 804
805 805 srcctx = starting context for f in msrc
806 806 dstctx = destination context for f in mdst
807 807 f = the filename to check (as in msrc)
808 808 base = the changectx used as a merge base
809 809 tca = topological common ancestor for graft-like scenarios
810 810 remotebase = True if base is outside tca::srcctx, False otherwise
811 811 limit = the rev number to not search beyond
812 812 data = dictionary of dictionary to store copy data. (see mergecopies)
813 813
814 814 note: limit is only an optimization, and provides no guarantee that
815 815 irrelevant revisions will not be visited
816 816 there is no easy way to make this algorithm stop in a guaranteed way
817 817 once it "goes behind a certain revision".
818 818 """
819 819
820 820 msrc = srcctx.manifest()
821 821 mdst = dstctx.manifest()
822 822 mb = base.manifest()
823 823 mta = tca.manifest()
824 824 # Might be true if this call is about finding backward renames,
825 825 # This happens in the case of grafts because the DAG is then rotated.
826 826 # If the file exists in both the base and the source, we are not looking
827 827 # for a rename on the source side, but on the part of the DAG that is
828 828 # traversed backwards.
829 829 #
830 830 # In the case there is both backward and forward renames (before and after
831 831 # the base) this is more complicated as we must detect a divergence.
832 832 # We use 'backwards = False' in that case.
833 833 backwards = not remotebase and base != tca and f in mb
834 834 getsrcfctx = _makegetfctx(srcctx)
835 835 getdstfctx = _makegetfctx(dstctx)
836 836
837 837 if msrc[f] == mb.get(f) and not remotebase:
838 838 # Nothing to merge
839 839 return
840 840
841 841 of = None
842 842 seen = {f}
843 843 for oc in getsrcfctx(f, msrc[f]).ancestors():
844 844 of = oc.path()
845 845 if of in seen:
846 846 # check limit late - grab last rename before
847 847 if oc.linkrev() < limit:
848 848 break
849 849 continue
850 850 seen.add(of)
851 851
852 852 # remember for dir rename detection
853 853 if backwards:
854 854 data['fullcopy'][of] = f # grafting backwards through renames
855 855 else:
856 856 data['fullcopy'][f] = of
857 857 if of not in mdst:
858 858 continue # no match, keep looking
859 859 if mdst[of] == mb.get(of):
860 860 return # no merge needed, quit early
861 861 c2 = getdstfctx(of, mdst[of])
862 862 # c2 might be a plain new file on added on destination side that is
863 863 # unrelated to the droids we are looking for.
864 864 cr = _related(oc, c2)
865 865 if cr and (of == f or of == c2.path()): # non-divergent
866 866 if backwards:
867 867 data['copy'][of] = f
868 868 elif of in mb:
869 869 data['copy'][f] = of
870 870 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
871 871 data['copy'][of] = f
872 872 del data['fullcopy'][f]
873 873 data['fullcopy'][of] = f
874 874 else: # divergence w.r.t. graft CA on one side of topological CA
875 875 for sf in seen:
876 876 if sf in mb:
877 877 assert sf not in data['diverge']
878 878 data['diverge'][sf] = [f, of]
879 879 break
880 880 return
881 881
882 882 if of in mta:
883 883 if backwards or remotebase:
884 884 data['incomplete'][of] = f
885 885 else:
886 886 for sf in seen:
887 887 if sf in mb:
888 888 if tca == base:
889 889 data['diverge'].setdefault(sf, []).append(f)
890 890 else:
891 891 data['incompletediverge'][sf] = [of, f]
892 892 return
893 893
894 894 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
895 895 """reproduce copies from fromrev to rev in the dirstate
896 896
897 897 If skiprev is specified, it's a revision that should be used to
898 898 filter copy records. Any copies that occur between fromrev and
899 899 skiprev will not be duplicated, even if they appear in the set of
900 900 copies between fromrev and rev.
901 901 """
902 902 exclude = {}
903 903 ctraceconfig = repo.ui.config('experimental', 'copytrace')
904 904 bctrace = stringutil.parsebool(ctraceconfig)
905 905 if (skiprev is not None and
906 906 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
907 907 # copytrace='off' skips this line, but not the entire function because
908 908 # the line below is O(size of the repo) during a rebase, while the rest
909 909 # of the function is much faster (and is required for carrying copy
910 910 # metadata across the rebase anyway).
911 911 exclude = pathcopies(repo[fromrev], repo[skiprev])
912 912 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
913 913 # copies.pathcopies returns backward renames, so dst might not
914 914 # actually be in the dirstate
915 915 if dst in exclude:
916 916 continue
917 917 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now