##// END OF EJS Templates
copies: use node.nullrev instead of literal -1...
Martin von Zweigbergk -
r41268:f3f5bfbf default
parent child Browse files
Show More
@@ -1,922 +1,922 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, a, b):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 None if no such revision exists.
35 35 """
36 36
37 37 # basic idea:
38 38 # - mark a and b with different sides
39 39 # - if a parent's children are all on the same side, the parent is
40 40 # on that side, otherwise it is on no side
41 41 # - walk the graph in topological order with the help of a heap;
42 42 # - add unseen parents to side map
43 43 # - clear side of any parent that has children on different sides
44 44 # - track number of interesting revs that might still be on a side
45 45 # - track the lowest interesting rev seen
46 46 # - quit when interesting revs is zero
47 47
48 48 cl = repo.changelog
49 49 if a is None:
50 50 a = node.wdirrev
51 51 if b is None:
52 52 b = node.wdirrev
53 53
54 54 side = {a: -1, b: 1}
55 55 visit = [-a, -b]
56 56 heapq.heapify(visit)
57 57 interesting = len(visit)
58 58 hascommonancestor = False
59 59 limit = node.wdirrev
60 60
61 61 while interesting:
62 62 r = -heapq.heappop(visit)
63 63 if r == node.wdirrev:
64 64 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 65 else:
66 66 parents = cl.parentrevs(r)
67 67 for p in parents:
68 68 if p < 0:
69 69 continue
70 70 if p not in side:
71 71 # first time we see p; add it to visit
72 72 side[p] = side[r]
73 73 if side[p]:
74 74 interesting += 1
75 75 heapq.heappush(visit, -p)
76 76 elif side[p] and side[p] != side[r]:
77 77 # p was interesting but now we know better
78 78 side[p] = 0
79 79 interesting -= 1
80 80 hascommonancestor = True
81 81 if side[r]:
82 82 limit = r # lowest rev visited
83 83 interesting -= 1
84 84
85 85 if not hascommonancestor:
86 86 return None
87 87
88 88 # Consider the following flow (see test-commit-amend.t under issue4405):
89 89 # 1/ File 'a0' committed
90 90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 91 # 3/ Move back to first commit
92 92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 94 #
95 95 # During the amend in step five, we will be in this state:
96 96 #
97 97 # @ 3 temporary amend commit for a1-amend
98 98 # |
99 99 # o 2 a1-amend
100 100 # |
101 101 # | o 1 a1
102 102 # |/
103 103 # o 0 a0
104 104 #
105 105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 106 # yet the filelog has the copy information in rev 1 and we will not look
107 107 # back far enough unless we also look at the a and b as candidates.
108 108 # This only occurs when a is a descendent of b or visa-versa.
109 109 return min(limit, a, b)
110 110
111 111 def _chain(src, dst, a, b):
112 112 """chain two sets of copies a->b"""
113 113 t = a.copy()
114 114 for k, v in b.iteritems():
115 115 if v in t:
116 116 # found a chain
117 117 if t[v] != k:
118 118 # file wasn't renamed back to itself
119 119 t[k] = t[v]
120 120 if v not in dst:
121 121 # chain was a rename, not a copy
122 122 del t[v]
123 123 if v in src:
124 124 # file is a copy of an existing file
125 125 t[k] = v
126 126
127 127 # remove criss-crossed copies
128 128 for k, v in list(t.items()):
129 129 if k in src and v in dst:
130 130 del t[k]
131 131
132 132 return t
133 133
134 def _tracefile(fctx, am, limit=-1):
134 def _tracefile(fctx, am, limit=node.nullrev):
135 135 """return file context that is the ancestor of fctx present in ancestor
136 136 manifest am, stopping after the first ancestor lower than limit"""
137 137
138 138 for f in fctx.ancestors():
139 139 if am.get(f.path(), None) == f.filenode():
140 140 return f
141 141 if limit >= 0 and not f.isintroducedafter(limit):
142 142 return None
143 143
144 144 def _dirstatecopies(d, match=None):
145 145 ds = d._repo.dirstate
146 146 c = ds.copies().copy()
147 147 for k in list(c):
148 148 if ds[k] not in 'anm' or (match and not match(k)):
149 149 del c[k]
150 150 return c
151 151
152 152 def _computeforwardmissing(a, b, match=None):
153 153 """Computes which files are in b but not a.
154 154 This is its own function so extensions can easily wrap this call to see what
155 155 files _forwardcopies is about to process.
156 156 """
157 157 ma = a.manifest()
158 158 mb = b.manifest()
159 159 return mb.filesnotin(ma, match=match)
160 160
161 161 def _committedforwardcopies(a, b, match):
162 162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 163 # files might have to be traced back to the fctx parent of the last
164 164 # one-side-only changeset, but not further back than that
165 165 repo = a._repo
166 166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 167 dbg = repo.ui.debug
168 168 if debug:
169 169 dbg('debug.copies: looking into rename from %s to %s\n'
170 170 % (a, b))
171 171 limit = _findlimit(repo, a.rev(), b.rev())
172 172 if limit is None:
173 limit = -1
173 limit = node.nullrev
174 174 if debug:
175 175 dbg('debug.copies: search limit: %d\n' % limit)
176 176 am = a.manifest()
177 177
178 178 # find where new files came from
179 179 # we currently don't try to find where old files went, too expensive
180 180 # this means we can miss a case like 'hg rm b; hg cp a b'
181 181 cm = {}
182 182
183 183 # Computing the forward missing is quite expensive on large manifests, since
184 184 # it compares the entire manifests. We can optimize it in the common use
185 185 # case of computing what copies are in a commit versus its parent (like
186 186 # during a rebase or histedit). Note, we exclude merge commits from this
187 187 # optimization, since the ctx.files() for a merge commit is not correct for
188 188 # this comparison.
189 189 forwardmissingmatch = match
190 190 if b.p1() == a and b.p2().node() == node.nullid:
191 191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 194
195 195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 196
197 197 if debug:
198 198 dbg('debug.copies: missing file to search: %d\n' % len(missing))
199 199
200 200 for f in missing:
201 201 if debug:
202 202 dbg('debug.copies: tracing file: %s\n' % f)
203 203 fctx = b[f]
204 204 fctx._ancestrycontext = ancestrycontext
205 205
206 206 if debug:
207 207 start = util.timer()
208 208 ofctx = _tracefile(fctx, am, limit)
209 209 if ofctx:
210 210 if debug:
211 211 dbg('debug.copies: rename of: %s\n' % ofctx._path)
212 212 cm[f] = ofctx.path()
213 213 if debug:
214 214 dbg('debug.copies: time: %f seconds\n'
215 215 % (util.timer() - start))
216 216 return cm
217 217
218 218 def _forwardcopies(a, b, match=None):
219 219 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
220 220
221 221 match = a.repo().narrowmatch(match)
222 222 # check for working copy
223 223 if b.rev() is None:
224 224 if a == b.p1():
225 225 # short-circuit to avoid issues with merge states
226 226 return _dirstatecopies(b, match)
227 227
228 228 cm = _committedforwardcopies(a, b.p1(), match)
229 229 # combine copies from dirstate if necessary
230 230 return _chain(a, b, cm, _dirstatecopies(b, match))
231 231 return _committedforwardcopies(a, b, match)
232 232
233 233 def _backwardrenames(a, b):
234 234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
235 235 return {}
236 236
237 237 # Even though we're not taking copies into account, 1:n rename situations
238 238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
239 239 # arbitrarily pick one of the renames.
240 240 f = _forwardcopies(b, a)
241 241 r = {}
242 242 for k, v in sorted(f.iteritems()):
243 243 # remove copies
244 244 if v in a:
245 245 continue
246 246 r[v] = k
247 247 return r
248 248
249 249 def pathcopies(x, y, match=None):
250 250 """find {dst@y: src@x} copy mapping for directed compare"""
251 251 repo = x._repo
252 252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
253 253 if debug:
254 254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
255 255 % (x, y))
256 256 if x == y or not x or not y:
257 257 return {}
258 258 a = y.ancestor(x)
259 259 if a == x:
260 260 if debug:
261 261 repo.ui.debug('debug.copies: search mode: forward\n')
262 262 return _forwardcopies(x, y, match=match)
263 263 if a == y:
264 264 if debug:
265 265 repo.ui.debug('debug.copies: search mode: backward\n')
266 266 return _backwardrenames(x, y)
267 267 if debug:
268 268 repo.ui.debug('debug.copies: search mode: combined\n')
269 269 return _chain(x, y, _backwardrenames(x, a),
270 270 _forwardcopies(a, y, match=match))
271 271
272 272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
273 273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
274 274 and c2. This is its own function so extensions can easily wrap this call
275 275 to see what files mergecopies is about to process.
276 276
277 277 Even though c1 and c2 are not used in this function, they are useful in
278 278 other extensions for being able to read the file nodes of the changed files.
279 279
280 280 "baselabel" can be passed to help distinguish the multiple computations
281 281 done in the graft case.
282 282 """
283 283 u1 = sorted(addedinm1 - addedinm2)
284 284 u2 = sorted(addedinm2 - addedinm1)
285 285
286 286 header = " unmatched files in %s"
287 287 if baselabel:
288 288 header += ' (from %s)' % baselabel
289 289 if u1:
290 290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
291 291 if u2:
292 292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
293 293
294 294 return u1, u2
295 295
296 296 def _makegetfctx(ctx):
297 297 """return a 'getfctx' function suitable for _checkcopies usage
298 298
299 299 We have to re-setup the function building 'filectx' for each
300 300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
301 301 each. Linkrev adjustment is important to avoid bug in rename
302 302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
303 303 the performance impact of this adjustment is kept limited. Without it,
304 304 each file could do a full dag traversal making the time complexity of
305 305 the operation explode (see issue4537).
306 306
307 307 This function exists here mostly to limit the impact on stable. Feel
308 308 free to refactor on default.
309 309 """
310 310 rev = ctx.rev()
311 311 repo = ctx._repo
312 312 ac = getattr(ctx, '_ancestrycontext', None)
313 313 if ac is None:
314 314 revs = [rev]
315 315 if rev is None:
316 316 revs = [p.rev() for p in ctx.parents()]
317 317 ac = repo.changelog.ancestors(revs, inclusive=True)
318 318 ctx._ancestrycontext = ac
319 319 def makectx(f, n):
320 320 if n in node.wdirfilenodeids: # in a working context?
321 321 if ctx.rev() is None:
322 322 return ctx.filectx(f)
323 323 return repo[None][f]
324 324 fctx = repo.filectx(f, fileid=n)
325 325 # setup only needed for filectx not create from a changectx
326 326 fctx._ancestrycontext = ac
327 327 fctx._descendantrev = rev
328 328 return fctx
329 329 return util.lrucachefunc(makectx)
330 330
331 331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
332 332 """combine partial copy paths"""
333 333 remainder = {}
334 334 for f in copyfrom:
335 335 if f in copyto:
336 336 finalcopy[copyto[f]] = copyfrom[f]
337 337 del copyto[f]
338 338 for f in incompletediverge:
339 339 assert f not in diverge
340 340 ic = incompletediverge[f]
341 341 if ic[0] in copyto:
342 342 diverge[f] = [copyto[ic[0]], ic[1]]
343 343 else:
344 344 remainder[f] = ic
345 345 return remainder
346 346
347 347 def mergecopies(repo, c1, c2, base):
348 348 """
349 349 The function calling different copytracing algorithms on the basis of config
350 350 which find moves and copies between context c1 and c2 that are relevant for
351 351 merging. 'base' will be used as the merge base.
352 352
353 353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
354 354 files that were moved/ copied in one merge parent and modified in another.
355 355 For example:
356 356
357 357 o ---> 4 another commit
358 358 |
359 359 | o ---> 3 commit that modifies a.txt
360 360 | /
361 361 o / ---> 2 commit that moves a.txt to b.txt
362 362 |/
363 363 o ---> 1 merge base
364 364
365 365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
366 366 revision 4, and if user have copytrace disabled, we prints the following
367 367 message:
368 368
369 369 ```other changed <file> which local deleted```
370 370
371 371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
372 372 "dirmove".
373 373
374 374 "copy" is a mapping from destination name -> source name,
375 375 where source is in c1 and destination is in c2 or vice-versa.
376 376
377 377 "movewithdir" is a mapping from source name -> destination name,
378 378 where the file at source present in one context but not the other
379 379 needs to be moved to destination by the merge process, because the
380 380 other context moved the directory it is in.
381 381
382 382 "diverge" is a mapping of source name -> list of destination names
383 383 for divergent renames.
384 384
385 385 "renamedelete" is a mapping of source name -> list of destination
386 386 names for files deleted in c1 that were renamed in c2 or vice-versa.
387 387
388 388 "dirmove" is a mapping of detected source dir -> destination dir renames.
389 389 This is needed for handling changes to new files previously grafted into
390 390 renamed directories.
391 391 """
392 392 # avoid silly behavior for update from empty dir
393 393 if not c1 or not c2 or c1 == c2:
394 394 return {}, {}, {}, {}, {}
395 395
396 396 # avoid silly behavior for parent -> working dir
397 397 if c2.node() is None and c1.node() == repo.dirstate.p1():
398 398 return repo.dirstate.copies(), {}, {}, {}, {}
399 399
400 400 copytracing = repo.ui.config('experimental', 'copytrace')
401 401 boolctrace = stringutil.parsebool(copytracing)
402 402
403 403 # Copy trace disabling is explicitly below the node == p1 logic above
404 404 # because the logic above is required for a simple copy to be kept across a
405 405 # rebase.
406 406 if copytracing == 'heuristics':
407 407 # Do full copytracing if only non-public revisions are involved as
408 408 # that will be fast enough and will also cover the copies which could
409 409 # be missed by heuristics
410 410 if _isfullcopytraceable(repo, c1, base):
411 411 return _fullcopytracing(repo, c1, c2, base)
412 412 return _heuristicscopytracing(repo, c1, c2, base)
413 413 elif boolctrace is False:
414 414 # stringutil.parsebool() returns None when it is unable to parse the
415 415 # value, so we should rely on making sure copytracing is on such cases
416 416 return {}, {}, {}, {}, {}
417 417 else:
418 418 return _fullcopytracing(repo, c1, c2, base)
419 419
420 420 def _isfullcopytraceable(repo, c1, base):
421 421 """ Checks that if base, source and destination are all no-public branches,
422 422 if yes let's use the full copytrace algorithm for increased capabilities
423 423 since it will be fast enough.
424 424
425 425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
426 426 number of changesets from c1 to base such that if number of changesets are
427 427 more than the limit, full copytracing algorithm won't be used.
428 428 """
429 429 if c1.rev() is None:
430 430 c1 = c1.p1()
431 431 if c1.mutable() and base.mutable():
432 432 sourcecommitlimit = repo.ui.configint('experimental',
433 433 'copytrace.sourcecommitlimit')
434 434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
435 435 return commits < sourcecommitlimit
436 436 return False
437 437
438 438 def _fullcopytracing(repo, c1, c2, base):
439 439 """ The full copytracing algorithm which finds all the new files that were
440 440 added from merge base up to the top commit and for each file it checks if
441 441 this file was copied from another file.
442 442
443 443 This is pretty slow when a lot of changesets are involved but will track all
444 444 the copies.
445 445 """
446 446 # In certain scenarios (e.g. graft, update or rebase), base can be
447 447 # overridden We still need to know a real common ancestor in this case We
448 448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
449 449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
450 450 # caller may not know if the revision passed in lieu of the CA is a genuine
451 451 # common ancestor or not without explicitly checking it, it's better to
452 452 # determine that here.
453 453 #
454 454 # base.isancestorof(wc) is False, work around that
455 455 _c1 = c1.p1() if c1.rev() is None else c1
456 456 _c2 = c2.p1() if c2.rev() is None else c2
457 457 # an endpoint is "dirty" if it isn't a descendant of the merge base
458 458 # if we have a dirty endpoint, we need to trigger graft logic, and also
459 459 # keep track of which endpoint is dirty
460 460 dirtyc1 = not base.isancestorof(_c1)
461 461 dirtyc2 = not base.isancestorof(_c2)
462 462 graft = dirtyc1 or dirtyc2
463 463 tca = base
464 464 if graft:
465 465 tca = _c1.ancestor(_c2)
466 466
467 467 limit = _findlimit(repo, c1.rev(), c2.rev())
468 468 if limit is None:
469 469 # no common ancestor, no copies
470 470 return {}, {}, {}, {}, {}
471 471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
472 472
473 473 m1 = c1.manifest()
474 474 m2 = c2.manifest()
475 475 mb = base.manifest()
476 476
477 477 # gather data from _checkcopies:
478 478 # - diverge = record all diverges in this dict
479 479 # - copy = record all non-divergent copies in this dict
480 480 # - fullcopy = record all copies in this dict
481 481 # - incomplete = record non-divergent partial copies here
482 482 # - incompletediverge = record divergent partial copies here
483 483 diverge = {} # divergence data is shared
484 484 incompletediverge = {}
485 485 data1 = {'copy': {},
486 486 'fullcopy': {},
487 487 'incomplete': {},
488 488 'diverge': diverge,
489 489 'incompletediverge': incompletediverge,
490 490 }
491 491 data2 = {'copy': {},
492 492 'fullcopy': {},
493 493 'incomplete': {},
494 494 'diverge': diverge,
495 495 'incompletediverge': incompletediverge,
496 496 }
497 497
498 498 # find interesting file sets from manifests
499 499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
500 500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
501 501 bothnew = sorted(addedinm1 & addedinm2)
502 502 if tca == base:
503 503 # unmatched file from base
504 504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
505 505 u1u, u2u = u1r, u2r
506 506 else:
507 507 # unmatched file from base (DAG rotation in the graft case)
508 508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
509 509 baselabel='base')
510 510 # unmatched file from topological common ancestors (no DAG rotation)
511 511 # need to recompute this for directory move handling when grafting
512 512 mta = tca.manifest()
513 513 u1u, u2u = _computenonoverlap(repo, c1, c2,
514 514 m1.filesnotin(mta, repo.narrowmatch()),
515 515 m2.filesnotin(mta, repo.narrowmatch()),
516 516 baselabel='topological common ancestor')
517 517
518 518 for f in u1u:
519 519 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520 520
521 521 for f in u2u:
522 522 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523 523
524 524 copy = dict(data1['copy'])
525 525 copy.update(data2['copy'])
526 526 fullcopy = dict(data1['fullcopy'])
527 527 fullcopy.update(data2['fullcopy'])
528 528
529 529 if dirtyc1:
530 530 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 531 incompletediverge)
532 532 else:
533 533 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 534 incompletediverge)
535 535
536 536 renamedelete = {}
537 537 renamedeleteset = set()
538 538 divergeset = set()
539 539 for of, fl in list(diverge.items()):
540 540 if len(fl) == 1 or of in c1 or of in c2:
541 541 del diverge[of] # not actually divergent, or not a rename
542 542 if of not in c1 and of not in c2:
543 543 # renamed on one side, deleted on the other side, but filter
544 544 # out files that have been renamed and then deleted
545 545 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 546 renamedeleteset.update(fl) # reverse map for below
547 547 else:
548 548 divergeset.update(fl) # reverse map for below
549 549
550 550 if bothnew:
551 551 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 552 % "\n ".join(bothnew))
553 553 bothdiverge = {}
554 554 bothincompletediverge = {}
555 555 remainder = {}
556 556 both1 = {'copy': {},
557 557 'fullcopy': {},
558 558 'incomplete': {},
559 559 'diverge': bothdiverge,
560 560 'incompletediverge': bothincompletediverge
561 561 }
562 562 both2 = {'copy': {},
563 563 'fullcopy': {},
564 564 'incomplete': {},
565 565 'diverge': bothdiverge,
566 566 'incompletediverge': bothincompletediverge
567 567 }
568 568 for f in bothnew:
569 569 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 570 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 571 if dirtyc1:
572 572 # incomplete copies may only be found on the "dirty" side for bothnew
573 573 assert not both2['incomplete']
574 574 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 575 bothincompletediverge)
576 576 elif dirtyc2:
577 577 assert not both1['incomplete']
578 578 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 579 bothincompletediverge)
580 580 else:
581 581 # incomplete copies and divergences can't happen outside grafts
582 582 assert not both1['incomplete']
583 583 assert not both2['incomplete']
584 584 assert not bothincompletediverge
585 585 for f in remainder:
586 586 assert f not in bothdiverge
587 587 ic = remainder[f]
588 588 if ic[0] in (m1 if dirtyc1 else m2):
589 589 # backed-out rename on one side, but watch out for deleted files
590 590 bothdiverge[f] = ic
591 591 for of, fl in bothdiverge.items():
592 592 if len(fl) == 2 and fl[0] == fl[1]:
593 593 copy[fl[0]] = of # not actually divergent, just matching renames
594 594
595 595 if fullcopy and repo.ui.debugflag:
596 596 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 597 "% = renamed and deleted):\n")
598 598 for f in sorted(fullcopy):
599 599 note = ""
600 600 if f in copy:
601 601 note += "*"
602 602 if f in divergeset:
603 603 note += "!"
604 604 if f in renamedeleteset:
605 605 note += "%"
606 606 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 607 note))
608 608 del divergeset
609 609
610 610 if not fullcopy:
611 611 return copy, {}, diverge, renamedelete, {}
612 612
613 613 repo.ui.debug(" checking for directory renames\n")
614 614
615 615 # generate a directory move map
616 616 d1, d2 = c1.dirs(), c2.dirs()
617 617 # Hack for adding '', which is not otherwise added, to d1 and d2
618 618 d1.addpath('/')
619 619 d2.addpath('/')
620 620 invalid = set()
621 621 dirmove = {}
622 622
623 623 # examine each file copy for a potential directory move, which is
624 624 # when all the files in a directory are moved to a new directory
625 625 for dst, src in fullcopy.iteritems():
626 626 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 627 if dsrc in invalid:
628 628 # already seen to be uninteresting
629 629 continue
630 630 elif dsrc in d1 and ddst in d1:
631 631 # directory wasn't entirely moved locally
632 632 invalid.add(dsrc)
633 633 elif dsrc in d2 and ddst in d2:
634 634 # directory wasn't entirely moved remotely
635 635 invalid.add(dsrc)
636 636 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 637 # files from the same directory moved to two different places
638 638 invalid.add(dsrc)
639 639 else:
640 640 # looks good so far
641 641 dirmove[dsrc] = ddst
642 642
643 643 for i in invalid:
644 644 if i in dirmove:
645 645 del dirmove[i]
646 646 del d1, d2, invalid
647 647
648 648 if not dirmove:
649 649 return copy, {}, diverge, renamedelete, {}
650 650
651 651 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652 652
653 653 for d in dirmove:
654 654 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 655 (d, dirmove[d]))
656 656
657 657 movewithdir = {}
658 658 # check unaccounted nonoverlapping files against directory moves
659 659 for f in u1r + u2r:
660 660 if f not in fullcopy:
661 661 for d in dirmove:
662 662 if f.startswith(d):
663 663 # new file added in a directory that was moved, move it
664 664 df = dirmove[d] + f[len(d):]
665 665 if df not in copy:
666 666 movewithdir[f] = df
667 667 repo.ui.debug((" pending file src: '%s' -> "
668 668 "dst: '%s'\n") % (f, df))
669 669 break
670 670
671 671 return copy, movewithdir, diverge, renamedelete, dirmove
672 672
673 673 def _heuristicscopytracing(repo, c1, c2, base):
674 674 """ Fast copytracing using filename heuristics
675 675
676 676 Assumes that moves or renames are of following two types:
677 677
678 678 1) Inside a directory only (same directory name but different filenames)
679 679 2) Move from one directory to another
680 680 (same filenames but different directory names)
681 681
682 682 Works only when there are no merge commits in the "source branch".
683 683 Source branch is commits from base up to c2 not including base.
684 684
685 685 If merge is involved it fallbacks to _fullcopytracing().
686 686
687 687 Can be used by setting the following config:
688 688
689 689 [experimental]
690 690 copytrace = heuristics
691 691
692 692 In some cases the copy/move candidates found by heuristics can be very large
693 693 in number and that will make the algorithm slow. The number of possible
694 694 candidates to check can be limited by using the config
695 695 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 696 """
697 697
698 698 if c1.rev() is None:
699 699 c1 = c1.p1()
700 700 if c2.rev() is None:
701 701 c2 = c2.p1()
702 702
703 703 copies = {}
704 704
705 705 changedfiles = set()
706 706 m1 = c1.manifest()
707 707 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 708 # If base is not in c2 branch, we switch to fullcopytracing
709 709 repo.ui.debug("switching to full copytracing as base is not "
710 710 "an ancestor of c2\n")
711 711 return _fullcopytracing(repo, c1, c2, base)
712 712
713 713 ctx = c2
714 714 while ctx != base:
715 715 if len(ctx.parents()) == 2:
716 716 # To keep things simple let's not handle merges
717 717 repo.ui.debug("switching to full copytracing because of merges\n")
718 718 return _fullcopytracing(repo, c1, c2, base)
719 719 changedfiles.update(ctx.files())
720 720 ctx = ctx.p1()
721 721
722 722 cp = _forwardcopies(base, c2)
723 723 for dst, src in cp.iteritems():
724 724 if src in m1:
725 725 copies[dst] = src
726 726
727 727 # file is missing if it isn't present in the destination, but is present in
728 728 # the base and present in the source.
729 729 # Presence in the base is important to exclude added files, presence in the
730 730 # source is important to exclude removed files.
731 731 filt = lambda f: f not in m1 and f in base and f in c2
732 732 missingfiles = [f for f in changedfiles if filt(f)]
733 733
734 734 if missingfiles:
735 735 basenametofilename = collections.defaultdict(list)
736 736 dirnametofilename = collections.defaultdict(list)
737 737
738 738 for f in m1.filesnotin(base.manifest()):
739 739 basename = os.path.basename(f)
740 740 dirname = os.path.dirname(f)
741 741 basenametofilename[basename].append(f)
742 742 dirnametofilename[dirname].append(f)
743 743
744 744 for f in missingfiles:
745 745 basename = os.path.basename(f)
746 746 dirname = os.path.dirname(f)
747 747 samebasename = basenametofilename[basename]
748 748 samedirname = dirnametofilename[dirname]
749 749 movecandidates = samebasename + samedirname
750 750 # f is guaranteed to be present in c2, that's why
751 751 # c2.filectx(f) won't fail
752 752 f2 = c2.filectx(f)
753 753 # we can have a lot of candidates which can slow down the heuristics
754 754 # config value to limit the number of candidates moves to check
755 755 maxcandidates = repo.ui.configint('experimental',
756 756 'copytrace.movecandidateslimit')
757 757
758 758 if len(movecandidates) > maxcandidates:
759 759 repo.ui.status(_("skipping copytracing for '%s', more "
760 760 "candidates than the limit: %d\n")
761 761 % (f, len(movecandidates)))
762 762 continue
763 763
764 764 for candidate in movecandidates:
765 765 f1 = c1.filectx(candidate)
766 766 if _related(f1, f2):
767 767 # if there are a few related copies then we'll merge
768 768 # changes into all of them. This matches the behaviour
769 769 # of upstream copytracing
770 770 copies[candidate] = f
771 771
772 772 return copies, {}, {}, {}, {}
773 773
774 774 def _related(f1, f2):
775 775 """return True if f1 and f2 filectx have a common ancestor
776 776
777 777 Walk back to common ancestor to see if the two files originate
778 778 from the same file. Since workingfilectx's rev() is None it messes
779 779 up the integer comparison logic, hence the pre-step check for
780 780 None (f1 and f2 can only be workingfilectx's initially).
781 781 """
782 782
783 783 if f1 == f2:
784 784 return f1 # a match
785 785
786 786 g1, g2 = f1.ancestors(), f2.ancestors()
787 787 try:
788 788 f1r, f2r = f1.linkrev(), f2.linkrev()
789 789
790 790 if f1r is None:
791 791 f1 = next(g1)
792 792 if f2r is None:
793 793 f2 = next(g2)
794 794
795 795 while True:
796 796 f1r, f2r = f1.linkrev(), f2.linkrev()
797 797 if f1r > f2r:
798 798 f1 = next(g1)
799 799 elif f2r > f1r:
800 800 f2 = next(g2)
801 801 else: # f1 and f2 point to files in the same linkrev
802 802 return f1 == f2 # true if they point to the same file
803 803 except StopIteration:
804 804 return False
805 805
806 806 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 807 """
808 808 check possible copies of f from msrc to mdst
809 809
810 810 srcctx = starting context for f in msrc
811 811 dstctx = destination context for f in mdst
812 812 f = the filename to check (as in msrc)
813 813 base = the changectx used as a merge base
814 814 tca = topological common ancestor for graft-like scenarios
815 815 remotebase = True if base is outside tca::srcctx, False otherwise
816 816 limit = the rev number to not search beyond
817 817 data = dictionary of dictionary to store copy data. (see mergecopies)
818 818
819 819 note: limit is only an optimization, and provides no guarantee that
820 820 irrelevant revisions will not be visited
821 821 there is no easy way to make this algorithm stop in a guaranteed way
822 822 once it "goes behind a certain revision".
823 823 """
824 824
825 825 msrc = srcctx.manifest()
826 826 mdst = dstctx.manifest()
827 827 mb = base.manifest()
828 828 mta = tca.manifest()
829 829 # Might be true if this call is about finding backward renames,
830 830 # This happens in the case of grafts because the DAG is then rotated.
831 831 # If the file exists in both the base and the source, we are not looking
832 832 # for a rename on the source side, but on the part of the DAG that is
833 833 # traversed backwards.
834 834 #
835 835 # In the case there is both backward and forward renames (before and after
836 836 # the base) this is more complicated as we must detect a divergence.
837 837 # We use 'backwards = False' in that case.
838 838 backwards = not remotebase and base != tca and f in mb
839 839 getsrcfctx = _makegetfctx(srcctx)
840 840 getdstfctx = _makegetfctx(dstctx)
841 841
842 842 if msrc[f] == mb.get(f) and not remotebase:
843 843 # Nothing to merge
844 844 return
845 845
846 846 of = None
847 847 seen = {f}
848 848 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 849 of = oc.path()
850 850 if of in seen:
851 851 # check limit late - grab last rename before
852 852 if oc.linkrev() < limit:
853 853 break
854 854 continue
855 855 seen.add(of)
856 856
857 857 # remember for dir rename detection
858 858 if backwards:
859 859 data['fullcopy'][of] = f # grafting backwards through renames
860 860 else:
861 861 data['fullcopy'][f] = of
862 862 if of not in mdst:
863 863 continue # no match, keep looking
864 864 if mdst[of] == mb.get(of):
865 865 return # no merge needed, quit early
866 866 c2 = getdstfctx(of, mdst[of])
867 867 # c2 might be a plain new file on added on destination side that is
868 868 # unrelated to the droids we are looking for.
869 869 cr = _related(oc, c2)
870 870 if cr and (of == f or of == c2.path()): # non-divergent
871 871 if backwards:
872 872 data['copy'][of] = f
873 873 elif of in mb:
874 874 data['copy'][f] = of
875 875 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 876 data['copy'][of] = f
877 877 del data['fullcopy'][f]
878 878 data['fullcopy'][of] = f
879 879 else: # divergence w.r.t. graft CA on one side of topological CA
880 880 for sf in seen:
881 881 if sf in mb:
882 882 assert sf not in data['diverge']
883 883 data['diverge'][sf] = [f, of]
884 884 break
885 885 return
886 886
887 887 if of in mta:
888 888 if backwards or remotebase:
889 889 data['incomplete'][of] = f
890 890 else:
891 891 for sf in seen:
892 892 if sf in mb:
893 893 if tca == base:
894 894 data['diverge'].setdefault(sf, []).append(f)
895 895 else:
896 896 data['incompletediverge'][sf] = [of, f]
897 897 return
898 898
899 899 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 900 """reproduce copies from fromrev to rev in the dirstate
901 901
902 902 If skiprev is specified, it's a revision that should be used to
903 903 filter copy records. Any copies that occur between fromrev and
904 904 skiprev will not be duplicated, even if they appear in the set of
905 905 copies between fromrev and rev.
906 906 """
907 907 exclude = {}
908 908 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 909 bctrace = stringutil.parsebool(ctraceconfig)
910 910 if (skiprev is not None and
911 911 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 912 # copytrace='off' skips this line, but not the entire function because
913 913 # the line below is O(size of the repo) during a rebase, while the rest
914 914 # of the function is much faster (and is required for carrying copy
915 915 # metadata across the rebase anyway).
916 916 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 917 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 918 # copies.pathcopies returns backward renames, so dst might not
919 919 # actually be in the dirstate
920 920 if dst in exclude:
921 921 continue
922 922 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now