##// END OF EJS Templates
copies: use node.wdirrev instead of inventing another constant for it...
Martin von Zweigbergk -
r41267:e3e1b063 default
parent child Browse files
Show More
@@ -1,923 +1,922 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, a, b):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 None if no such revision exists.
35 35 """
36 36
37 37 # basic idea:
38 38 # - mark a and b with different sides
39 39 # - if a parent's children are all on the same side, the parent is
40 40 # on that side, otherwise it is on no side
41 41 # - walk the graph in topological order with the help of a heap;
42 42 # - add unseen parents to side map
43 43 # - clear side of any parent that has children on different sides
44 44 # - track number of interesting revs that might still be on a side
45 45 # - track the lowest interesting rev seen
46 46 # - quit when interesting revs is zero
47 47
48 48 cl = repo.changelog
49 working = len(cl) # pseudo rev for the working directory
50 49 if a is None:
51 a = working
50 a = node.wdirrev
52 51 if b is None:
53 b = working
52 b = node.wdirrev
54 53
55 54 side = {a: -1, b: 1}
56 55 visit = [-a, -b]
57 56 heapq.heapify(visit)
58 57 interesting = len(visit)
59 58 hascommonancestor = False
60 limit = working
59 limit = node.wdirrev
61 60
62 61 while interesting:
63 62 r = -heapq.heappop(visit)
64 if r == working:
63 if r == node.wdirrev:
65 64 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 65 else:
67 66 parents = cl.parentrevs(r)
68 67 for p in parents:
69 68 if p < 0:
70 69 continue
71 70 if p not in side:
72 71 # first time we see p; add it to visit
73 72 side[p] = side[r]
74 73 if side[p]:
75 74 interesting += 1
76 75 heapq.heappush(visit, -p)
77 76 elif side[p] and side[p] != side[r]:
78 77 # p was interesting but now we know better
79 78 side[p] = 0
80 79 interesting -= 1
81 80 hascommonancestor = True
82 81 if side[r]:
83 82 limit = r # lowest rev visited
84 83 interesting -= 1
85 84
86 85 if not hascommonancestor:
87 86 return None
88 87
89 88 # Consider the following flow (see test-commit-amend.t under issue4405):
90 89 # 1/ File 'a0' committed
91 90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 91 # 3/ Move back to first commit
93 92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 94 #
96 95 # During the amend in step five, we will be in this state:
97 96 #
98 97 # @ 3 temporary amend commit for a1-amend
99 98 # |
100 99 # o 2 a1-amend
101 100 # |
102 101 # | o 1 a1
103 102 # |/
104 103 # o 0 a0
105 104 #
106 105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 106 # yet the filelog has the copy information in rev 1 and we will not look
108 107 # back far enough unless we also look at the a and b as candidates.
109 108 # This only occurs when a is a descendent of b or visa-versa.
110 109 return min(limit, a, b)
111 110
112 111 def _chain(src, dst, a, b):
113 112 """chain two sets of copies a->b"""
114 113 t = a.copy()
115 114 for k, v in b.iteritems():
116 115 if v in t:
117 116 # found a chain
118 117 if t[v] != k:
119 118 # file wasn't renamed back to itself
120 119 t[k] = t[v]
121 120 if v not in dst:
122 121 # chain was a rename, not a copy
123 122 del t[v]
124 123 if v in src:
125 124 # file is a copy of an existing file
126 125 t[k] = v
127 126
128 127 # remove criss-crossed copies
129 128 for k, v in list(t.items()):
130 129 if k in src and v in dst:
131 130 del t[k]
132 131
133 132 return t
134 133
135 134 def _tracefile(fctx, am, limit=-1):
136 135 """return file context that is the ancestor of fctx present in ancestor
137 136 manifest am, stopping after the first ancestor lower than limit"""
138 137
139 138 for f in fctx.ancestors():
140 139 if am.get(f.path(), None) == f.filenode():
141 140 return f
142 141 if limit >= 0 and not f.isintroducedafter(limit):
143 142 return None
144 143
145 144 def _dirstatecopies(d, match=None):
146 145 ds = d._repo.dirstate
147 146 c = ds.copies().copy()
148 147 for k in list(c):
149 148 if ds[k] not in 'anm' or (match and not match(k)):
150 149 del c[k]
151 150 return c
152 151
153 152 def _computeforwardmissing(a, b, match=None):
154 153 """Computes which files are in b but not a.
155 154 This is its own function so extensions can easily wrap this call to see what
156 155 files _forwardcopies is about to process.
157 156 """
158 157 ma = a.manifest()
159 158 mb = b.manifest()
160 159 return mb.filesnotin(ma, match=match)
161 160
162 161 def _committedforwardcopies(a, b, match):
163 162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 163 # files might have to be traced back to the fctx parent of the last
165 164 # one-side-only changeset, but not further back than that
166 165 repo = a._repo
167 166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
168 167 dbg = repo.ui.debug
169 168 if debug:
170 169 dbg('debug.copies: looking into rename from %s to %s\n'
171 170 % (a, b))
172 171 limit = _findlimit(repo, a.rev(), b.rev())
173 172 if limit is None:
174 173 limit = -1
175 174 if debug:
176 175 dbg('debug.copies: search limit: %d\n' % limit)
177 176 am = a.manifest()
178 177
179 178 # find where new files came from
180 179 # we currently don't try to find where old files went, too expensive
181 180 # this means we can miss a case like 'hg rm b; hg cp a b'
182 181 cm = {}
183 182
184 183 # Computing the forward missing is quite expensive on large manifests, since
185 184 # it compares the entire manifests. We can optimize it in the common use
186 185 # case of computing what copies are in a commit versus its parent (like
187 186 # during a rebase or histedit). Note, we exclude merge commits from this
188 187 # optimization, since the ctx.files() for a merge commit is not correct for
189 188 # this comparison.
190 189 forwardmissingmatch = match
191 190 if b.p1() == a and b.p2().node() == node.nullid:
192 191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
193 192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
194 193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
195 194
196 195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
197 196
198 197 if debug:
199 198 dbg('debug.copies: missing file to search: %d\n' % len(missing))
200 199
201 200 for f in missing:
202 201 if debug:
203 202 dbg('debug.copies: tracing file: %s\n' % f)
204 203 fctx = b[f]
205 204 fctx._ancestrycontext = ancestrycontext
206 205
207 206 if debug:
208 207 start = util.timer()
209 208 ofctx = _tracefile(fctx, am, limit)
210 209 if ofctx:
211 210 if debug:
212 211 dbg('debug.copies: rename of: %s\n' % ofctx._path)
213 212 cm[f] = ofctx.path()
214 213 if debug:
215 214 dbg('debug.copies: time: %f seconds\n'
216 215 % (util.timer() - start))
217 216 return cm
218 217
219 218 def _forwardcopies(a, b, match=None):
220 219 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221 220
222 221 match = a.repo().narrowmatch(match)
223 222 # check for working copy
224 223 if b.rev() is None:
225 224 if a == b.p1():
226 225 # short-circuit to avoid issues with merge states
227 226 return _dirstatecopies(b, match)
228 227
229 228 cm = _committedforwardcopies(a, b.p1(), match)
230 229 # combine copies from dirstate if necessary
231 230 return _chain(a, b, cm, _dirstatecopies(b, match))
232 231 return _committedforwardcopies(a, b, match)
233 232
234 233 def _backwardrenames(a, b):
235 234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
236 235 return {}
237 236
238 237 # Even though we're not taking copies into account, 1:n rename situations
239 238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
240 239 # arbitrarily pick one of the renames.
241 240 f = _forwardcopies(b, a)
242 241 r = {}
243 242 for k, v in sorted(f.iteritems()):
244 243 # remove copies
245 244 if v in a:
246 245 continue
247 246 r[v] = k
248 247 return r
249 248
250 249 def pathcopies(x, y, match=None):
251 250 """find {dst@y: src@x} copy mapping for directed compare"""
252 251 repo = x._repo
253 252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
254 253 if debug:
255 254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
256 255 % (x, y))
257 256 if x == y or not x or not y:
258 257 return {}
259 258 a = y.ancestor(x)
260 259 if a == x:
261 260 if debug:
262 261 repo.ui.debug('debug.copies: search mode: forward\n')
263 262 return _forwardcopies(x, y, match=match)
264 263 if a == y:
265 264 if debug:
266 265 repo.ui.debug('debug.copies: search mode: backward\n')
267 266 return _backwardrenames(x, y)
268 267 if debug:
269 268 repo.ui.debug('debug.copies: search mode: combined\n')
270 269 return _chain(x, y, _backwardrenames(x, a),
271 270 _forwardcopies(a, y, match=match))
272 271
273 272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
274 273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
275 274 and c2. This is its own function so extensions can easily wrap this call
276 275 to see what files mergecopies is about to process.
277 276
278 277 Even though c1 and c2 are not used in this function, they are useful in
279 278 other extensions for being able to read the file nodes of the changed files.
280 279
281 280 "baselabel" can be passed to help distinguish the multiple computations
282 281 done in the graft case.
283 282 """
284 283 u1 = sorted(addedinm1 - addedinm2)
285 284 u2 = sorted(addedinm2 - addedinm1)
286 285
287 286 header = " unmatched files in %s"
288 287 if baselabel:
289 288 header += ' (from %s)' % baselabel
290 289 if u1:
291 290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
292 291 if u2:
293 292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
294 293
295 294 return u1, u2
296 295
297 296 def _makegetfctx(ctx):
298 297 """return a 'getfctx' function suitable for _checkcopies usage
299 298
300 299 We have to re-setup the function building 'filectx' for each
301 300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
302 301 each. Linkrev adjustment is important to avoid bug in rename
303 302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
304 303 the performance impact of this adjustment is kept limited. Without it,
305 304 each file could do a full dag traversal making the time complexity of
306 305 the operation explode (see issue4537).
307 306
308 307 This function exists here mostly to limit the impact on stable. Feel
309 308 free to refactor on default.
310 309 """
311 310 rev = ctx.rev()
312 311 repo = ctx._repo
313 312 ac = getattr(ctx, '_ancestrycontext', None)
314 313 if ac is None:
315 314 revs = [rev]
316 315 if rev is None:
317 316 revs = [p.rev() for p in ctx.parents()]
318 317 ac = repo.changelog.ancestors(revs, inclusive=True)
319 318 ctx._ancestrycontext = ac
320 319 def makectx(f, n):
321 320 if n in node.wdirfilenodeids: # in a working context?
322 321 if ctx.rev() is None:
323 322 return ctx.filectx(f)
324 323 return repo[None][f]
325 324 fctx = repo.filectx(f, fileid=n)
326 325 # setup only needed for filectx not create from a changectx
327 326 fctx._ancestrycontext = ac
328 327 fctx._descendantrev = rev
329 328 return fctx
330 329 return util.lrucachefunc(makectx)
331 330
332 331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
333 332 """combine partial copy paths"""
334 333 remainder = {}
335 334 for f in copyfrom:
336 335 if f in copyto:
337 336 finalcopy[copyto[f]] = copyfrom[f]
338 337 del copyto[f]
339 338 for f in incompletediverge:
340 339 assert f not in diverge
341 340 ic = incompletediverge[f]
342 341 if ic[0] in copyto:
343 342 diverge[f] = [copyto[ic[0]], ic[1]]
344 343 else:
345 344 remainder[f] = ic
346 345 return remainder
347 346
348 347 def mergecopies(repo, c1, c2, base):
349 348 """
350 349 The function calling different copytracing algorithms on the basis of config
351 350 which find moves and copies between context c1 and c2 that are relevant for
352 351 merging. 'base' will be used as the merge base.
353 352
354 353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
355 354 files that were moved/ copied in one merge parent and modified in another.
356 355 For example:
357 356
358 357 o ---> 4 another commit
359 358 |
360 359 | o ---> 3 commit that modifies a.txt
361 360 | /
362 361 o / ---> 2 commit that moves a.txt to b.txt
363 362 |/
364 363 o ---> 1 merge base
365 364
366 365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
367 366 revision 4, and if user have copytrace disabled, we prints the following
368 367 message:
369 368
370 369 ```other changed <file> which local deleted```
371 370
372 371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
373 372 "dirmove".
374 373
375 374 "copy" is a mapping from destination name -> source name,
376 375 where source is in c1 and destination is in c2 or vice-versa.
377 376
378 377 "movewithdir" is a mapping from source name -> destination name,
379 378 where the file at source present in one context but not the other
380 379 needs to be moved to destination by the merge process, because the
381 380 other context moved the directory it is in.
382 381
383 382 "diverge" is a mapping of source name -> list of destination names
384 383 for divergent renames.
385 384
386 385 "renamedelete" is a mapping of source name -> list of destination
387 386 names for files deleted in c1 that were renamed in c2 or vice-versa.
388 387
389 388 "dirmove" is a mapping of detected source dir -> destination dir renames.
390 389 This is needed for handling changes to new files previously grafted into
391 390 renamed directories.
392 391 """
393 392 # avoid silly behavior for update from empty dir
394 393 if not c1 or not c2 or c1 == c2:
395 394 return {}, {}, {}, {}, {}
396 395
397 396 # avoid silly behavior for parent -> working dir
398 397 if c2.node() is None and c1.node() == repo.dirstate.p1():
399 398 return repo.dirstate.copies(), {}, {}, {}, {}
400 399
401 400 copytracing = repo.ui.config('experimental', 'copytrace')
402 401 boolctrace = stringutil.parsebool(copytracing)
403 402
404 403 # Copy trace disabling is explicitly below the node == p1 logic above
405 404 # because the logic above is required for a simple copy to be kept across a
406 405 # rebase.
407 406 if copytracing == 'heuristics':
408 407 # Do full copytracing if only non-public revisions are involved as
409 408 # that will be fast enough and will also cover the copies which could
410 409 # be missed by heuristics
411 410 if _isfullcopytraceable(repo, c1, base):
412 411 return _fullcopytracing(repo, c1, c2, base)
413 412 return _heuristicscopytracing(repo, c1, c2, base)
414 413 elif boolctrace is False:
415 414 # stringutil.parsebool() returns None when it is unable to parse the
416 415 # value, so we should rely on making sure copytracing is on such cases
417 416 return {}, {}, {}, {}, {}
418 417 else:
419 418 return _fullcopytracing(repo, c1, c2, base)
420 419
421 420 def _isfullcopytraceable(repo, c1, base):
422 421 """ Checks that if base, source and destination are all no-public branches,
423 422 if yes let's use the full copytrace algorithm for increased capabilities
424 423 since it will be fast enough.
425 424
426 425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
427 426 number of changesets from c1 to base such that if number of changesets are
428 427 more than the limit, full copytracing algorithm won't be used.
429 428 """
430 429 if c1.rev() is None:
431 430 c1 = c1.p1()
432 431 if c1.mutable() and base.mutable():
433 432 sourcecommitlimit = repo.ui.configint('experimental',
434 433 'copytrace.sourcecommitlimit')
435 434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
436 435 return commits < sourcecommitlimit
437 436 return False
438 437
439 438 def _fullcopytracing(repo, c1, c2, base):
440 439 """ The full copytracing algorithm which finds all the new files that were
441 440 added from merge base up to the top commit and for each file it checks if
442 441 this file was copied from another file.
443 442
444 443 This is pretty slow when a lot of changesets are involved but will track all
445 444 the copies.
446 445 """
447 446 # In certain scenarios (e.g. graft, update or rebase), base can be
448 447 # overridden We still need to know a real common ancestor in this case We
449 448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
450 449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
451 450 # caller may not know if the revision passed in lieu of the CA is a genuine
452 451 # common ancestor or not without explicitly checking it, it's better to
453 452 # determine that here.
454 453 #
455 454 # base.isancestorof(wc) is False, work around that
456 455 _c1 = c1.p1() if c1.rev() is None else c1
457 456 _c2 = c2.p1() if c2.rev() is None else c2
458 457 # an endpoint is "dirty" if it isn't a descendant of the merge base
459 458 # if we have a dirty endpoint, we need to trigger graft logic, and also
460 459 # keep track of which endpoint is dirty
461 460 dirtyc1 = not base.isancestorof(_c1)
462 461 dirtyc2 = not base.isancestorof(_c2)
463 462 graft = dirtyc1 or dirtyc2
464 463 tca = base
465 464 if graft:
466 465 tca = _c1.ancestor(_c2)
467 466
468 467 limit = _findlimit(repo, c1.rev(), c2.rev())
469 468 if limit is None:
470 469 # no common ancestor, no copies
471 470 return {}, {}, {}, {}, {}
472 471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
473 472
474 473 m1 = c1.manifest()
475 474 m2 = c2.manifest()
476 475 mb = base.manifest()
477 476
478 477 # gather data from _checkcopies:
479 478 # - diverge = record all diverges in this dict
480 479 # - copy = record all non-divergent copies in this dict
481 480 # - fullcopy = record all copies in this dict
482 481 # - incomplete = record non-divergent partial copies here
483 482 # - incompletediverge = record divergent partial copies here
484 483 diverge = {} # divergence data is shared
485 484 incompletediverge = {}
486 485 data1 = {'copy': {},
487 486 'fullcopy': {},
488 487 'incomplete': {},
489 488 'diverge': diverge,
490 489 'incompletediverge': incompletediverge,
491 490 }
492 491 data2 = {'copy': {},
493 492 'fullcopy': {},
494 493 'incomplete': {},
495 494 'diverge': diverge,
496 495 'incompletediverge': incompletediverge,
497 496 }
498 497
499 498 # find interesting file sets from manifests
500 499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
501 500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
502 501 bothnew = sorted(addedinm1 & addedinm2)
503 502 if tca == base:
504 503 # unmatched file from base
505 504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
506 505 u1u, u2u = u1r, u2r
507 506 else:
508 507 # unmatched file from base (DAG rotation in the graft case)
509 508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
510 509 baselabel='base')
511 510 # unmatched file from topological common ancestors (no DAG rotation)
512 511 # need to recompute this for directory move handling when grafting
513 512 mta = tca.manifest()
514 513 u1u, u2u = _computenonoverlap(repo, c1, c2,
515 514 m1.filesnotin(mta, repo.narrowmatch()),
516 515 m2.filesnotin(mta, repo.narrowmatch()),
517 516 baselabel='topological common ancestor')
518 517
519 518 for f in u1u:
520 519 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
521 520
522 521 for f in u2u:
523 522 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
524 523
525 524 copy = dict(data1['copy'])
526 525 copy.update(data2['copy'])
527 526 fullcopy = dict(data1['fullcopy'])
528 527 fullcopy.update(data2['fullcopy'])
529 528
530 529 if dirtyc1:
531 530 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
532 531 incompletediverge)
533 532 else:
534 533 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
535 534 incompletediverge)
536 535
537 536 renamedelete = {}
538 537 renamedeleteset = set()
539 538 divergeset = set()
540 539 for of, fl in list(diverge.items()):
541 540 if len(fl) == 1 or of in c1 or of in c2:
542 541 del diverge[of] # not actually divergent, or not a rename
543 542 if of not in c1 and of not in c2:
544 543 # renamed on one side, deleted on the other side, but filter
545 544 # out files that have been renamed and then deleted
546 545 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
547 546 renamedeleteset.update(fl) # reverse map for below
548 547 else:
549 548 divergeset.update(fl) # reverse map for below
550 549
551 550 if bothnew:
552 551 repo.ui.debug(" unmatched files new in both:\n %s\n"
553 552 % "\n ".join(bothnew))
554 553 bothdiverge = {}
555 554 bothincompletediverge = {}
556 555 remainder = {}
557 556 both1 = {'copy': {},
558 557 'fullcopy': {},
559 558 'incomplete': {},
560 559 'diverge': bothdiverge,
561 560 'incompletediverge': bothincompletediverge
562 561 }
563 562 both2 = {'copy': {},
564 563 'fullcopy': {},
565 564 'incomplete': {},
566 565 'diverge': bothdiverge,
567 566 'incompletediverge': bothincompletediverge
568 567 }
569 568 for f in bothnew:
570 569 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
571 570 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
572 571 if dirtyc1:
573 572 # incomplete copies may only be found on the "dirty" side for bothnew
574 573 assert not both2['incomplete']
575 574 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
576 575 bothincompletediverge)
577 576 elif dirtyc2:
578 577 assert not both1['incomplete']
579 578 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
580 579 bothincompletediverge)
581 580 else:
582 581 # incomplete copies and divergences can't happen outside grafts
583 582 assert not both1['incomplete']
584 583 assert not both2['incomplete']
585 584 assert not bothincompletediverge
586 585 for f in remainder:
587 586 assert f not in bothdiverge
588 587 ic = remainder[f]
589 588 if ic[0] in (m1 if dirtyc1 else m2):
590 589 # backed-out rename on one side, but watch out for deleted files
591 590 bothdiverge[f] = ic
592 591 for of, fl in bothdiverge.items():
593 592 if len(fl) == 2 and fl[0] == fl[1]:
594 593 copy[fl[0]] = of # not actually divergent, just matching renames
595 594
596 595 if fullcopy and repo.ui.debugflag:
597 596 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
598 597 "% = renamed and deleted):\n")
599 598 for f in sorted(fullcopy):
600 599 note = ""
601 600 if f in copy:
602 601 note += "*"
603 602 if f in divergeset:
604 603 note += "!"
605 604 if f in renamedeleteset:
606 605 note += "%"
607 606 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
608 607 note))
609 608 del divergeset
610 609
611 610 if not fullcopy:
612 611 return copy, {}, diverge, renamedelete, {}
613 612
614 613 repo.ui.debug(" checking for directory renames\n")
615 614
616 615 # generate a directory move map
617 616 d1, d2 = c1.dirs(), c2.dirs()
618 617 # Hack for adding '', which is not otherwise added, to d1 and d2
619 618 d1.addpath('/')
620 619 d2.addpath('/')
621 620 invalid = set()
622 621 dirmove = {}
623 622
624 623 # examine each file copy for a potential directory move, which is
625 624 # when all the files in a directory are moved to a new directory
626 625 for dst, src in fullcopy.iteritems():
627 626 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
628 627 if dsrc in invalid:
629 628 # already seen to be uninteresting
630 629 continue
631 630 elif dsrc in d1 and ddst in d1:
632 631 # directory wasn't entirely moved locally
633 632 invalid.add(dsrc)
634 633 elif dsrc in d2 and ddst in d2:
635 634 # directory wasn't entirely moved remotely
636 635 invalid.add(dsrc)
637 636 elif dsrc in dirmove and dirmove[dsrc] != ddst:
638 637 # files from the same directory moved to two different places
639 638 invalid.add(dsrc)
640 639 else:
641 640 # looks good so far
642 641 dirmove[dsrc] = ddst
643 642
644 643 for i in invalid:
645 644 if i in dirmove:
646 645 del dirmove[i]
647 646 del d1, d2, invalid
648 647
649 648 if not dirmove:
650 649 return copy, {}, diverge, renamedelete, {}
651 650
652 651 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
653 652
654 653 for d in dirmove:
655 654 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
656 655 (d, dirmove[d]))
657 656
658 657 movewithdir = {}
659 658 # check unaccounted nonoverlapping files against directory moves
660 659 for f in u1r + u2r:
661 660 if f not in fullcopy:
662 661 for d in dirmove:
663 662 if f.startswith(d):
664 663 # new file added in a directory that was moved, move it
665 664 df = dirmove[d] + f[len(d):]
666 665 if df not in copy:
667 666 movewithdir[f] = df
668 667 repo.ui.debug((" pending file src: '%s' -> "
669 668 "dst: '%s'\n") % (f, df))
670 669 break
671 670
672 671 return copy, movewithdir, diverge, renamedelete, dirmove
673 672
674 673 def _heuristicscopytracing(repo, c1, c2, base):
675 674 """ Fast copytracing using filename heuristics
676 675
677 676 Assumes that moves or renames are of following two types:
678 677
679 678 1) Inside a directory only (same directory name but different filenames)
680 679 2) Move from one directory to another
681 680 (same filenames but different directory names)
682 681
683 682 Works only when there are no merge commits in the "source branch".
684 683 Source branch is commits from base up to c2 not including base.
685 684
686 685 If merge is involved it fallbacks to _fullcopytracing().
687 686
688 687 Can be used by setting the following config:
689 688
690 689 [experimental]
691 690 copytrace = heuristics
692 691
693 692 In some cases the copy/move candidates found by heuristics can be very large
694 693 in number and that will make the algorithm slow. The number of possible
695 694 candidates to check can be limited by using the config
696 695 `experimental.copytrace.movecandidateslimit` which defaults to 100.
697 696 """
698 697
699 698 if c1.rev() is None:
700 699 c1 = c1.p1()
701 700 if c2.rev() is None:
702 701 c2 = c2.p1()
703 702
704 703 copies = {}
705 704
706 705 changedfiles = set()
707 706 m1 = c1.manifest()
708 707 if not repo.revs('%d::%d', base.rev(), c2.rev()):
709 708 # If base is not in c2 branch, we switch to fullcopytracing
710 709 repo.ui.debug("switching to full copytracing as base is not "
711 710 "an ancestor of c2\n")
712 711 return _fullcopytracing(repo, c1, c2, base)
713 712
714 713 ctx = c2
715 714 while ctx != base:
716 715 if len(ctx.parents()) == 2:
717 716 # To keep things simple let's not handle merges
718 717 repo.ui.debug("switching to full copytracing because of merges\n")
719 718 return _fullcopytracing(repo, c1, c2, base)
720 719 changedfiles.update(ctx.files())
721 720 ctx = ctx.p1()
722 721
723 722 cp = _forwardcopies(base, c2)
724 723 for dst, src in cp.iteritems():
725 724 if src in m1:
726 725 copies[dst] = src
727 726
728 727 # file is missing if it isn't present in the destination, but is present in
729 728 # the base and present in the source.
730 729 # Presence in the base is important to exclude added files, presence in the
731 730 # source is important to exclude removed files.
732 731 filt = lambda f: f not in m1 and f in base and f in c2
733 732 missingfiles = [f for f in changedfiles if filt(f)]
734 733
735 734 if missingfiles:
736 735 basenametofilename = collections.defaultdict(list)
737 736 dirnametofilename = collections.defaultdict(list)
738 737
739 738 for f in m1.filesnotin(base.manifest()):
740 739 basename = os.path.basename(f)
741 740 dirname = os.path.dirname(f)
742 741 basenametofilename[basename].append(f)
743 742 dirnametofilename[dirname].append(f)
744 743
745 744 for f in missingfiles:
746 745 basename = os.path.basename(f)
747 746 dirname = os.path.dirname(f)
748 747 samebasename = basenametofilename[basename]
749 748 samedirname = dirnametofilename[dirname]
750 749 movecandidates = samebasename + samedirname
751 750 # f is guaranteed to be present in c2, that's why
752 751 # c2.filectx(f) won't fail
753 752 f2 = c2.filectx(f)
754 753 # we can have a lot of candidates which can slow down the heuristics
755 754 # config value to limit the number of candidates moves to check
756 755 maxcandidates = repo.ui.configint('experimental',
757 756 'copytrace.movecandidateslimit')
758 757
759 758 if len(movecandidates) > maxcandidates:
760 759 repo.ui.status(_("skipping copytracing for '%s', more "
761 760 "candidates than the limit: %d\n")
762 761 % (f, len(movecandidates)))
763 762 continue
764 763
765 764 for candidate in movecandidates:
766 765 f1 = c1.filectx(candidate)
767 766 if _related(f1, f2):
768 767 # if there are a few related copies then we'll merge
769 768 # changes into all of them. This matches the behaviour
770 769 # of upstream copytracing
771 770 copies[candidate] = f
772 771
773 772 return copies, {}, {}, {}, {}
774 773
775 774 def _related(f1, f2):
776 775 """return True if f1 and f2 filectx have a common ancestor
777 776
778 777 Walk back to common ancestor to see if the two files originate
779 778 from the same file. Since workingfilectx's rev() is None it messes
780 779 up the integer comparison logic, hence the pre-step check for
781 780 None (f1 and f2 can only be workingfilectx's initially).
782 781 """
783 782
784 783 if f1 == f2:
785 784 return f1 # a match
786 785
787 786 g1, g2 = f1.ancestors(), f2.ancestors()
788 787 try:
789 788 f1r, f2r = f1.linkrev(), f2.linkrev()
790 789
791 790 if f1r is None:
792 791 f1 = next(g1)
793 792 if f2r is None:
794 793 f2 = next(g2)
795 794
796 795 while True:
797 796 f1r, f2r = f1.linkrev(), f2.linkrev()
798 797 if f1r > f2r:
799 798 f1 = next(g1)
800 799 elif f2r > f1r:
801 800 f2 = next(g2)
802 801 else: # f1 and f2 point to files in the same linkrev
803 802 return f1 == f2 # true if they point to the same file
804 803 except StopIteration:
805 804 return False
806 805
807 806 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
808 807 """
809 808 check possible copies of f from msrc to mdst
810 809
811 810 srcctx = starting context for f in msrc
812 811 dstctx = destination context for f in mdst
813 812 f = the filename to check (as in msrc)
814 813 base = the changectx used as a merge base
815 814 tca = topological common ancestor for graft-like scenarios
816 815 remotebase = True if base is outside tca::srcctx, False otherwise
817 816 limit = the rev number to not search beyond
818 817 data = dictionary of dictionary to store copy data. (see mergecopies)
819 818
820 819 note: limit is only an optimization, and provides no guarantee that
821 820 irrelevant revisions will not be visited
822 821 there is no easy way to make this algorithm stop in a guaranteed way
823 822 once it "goes behind a certain revision".
824 823 """
825 824
826 825 msrc = srcctx.manifest()
827 826 mdst = dstctx.manifest()
828 827 mb = base.manifest()
829 828 mta = tca.manifest()
830 829 # Might be true if this call is about finding backward renames,
831 830 # This happens in the case of grafts because the DAG is then rotated.
832 831 # If the file exists in both the base and the source, we are not looking
833 832 # for a rename on the source side, but on the part of the DAG that is
834 833 # traversed backwards.
835 834 #
836 835 # In the case there is both backward and forward renames (before and after
837 836 # the base) this is more complicated as we must detect a divergence.
838 837 # We use 'backwards = False' in that case.
839 838 backwards = not remotebase and base != tca and f in mb
840 839 getsrcfctx = _makegetfctx(srcctx)
841 840 getdstfctx = _makegetfctx(dstctx)
842 841
843 842 if msrc[f] == mb.get(f) and not remotebase:
844 843 # Nothing to merge
845 844 return
846 845
847 846 of = None
848 847 seen = {f}
849 848 for oc in getsrcfctx(f, msrc[f]).ancestors():
850 849 of = oc.path()
851 850 if of in seen:
852 851 # check limit late - grab last rename before
853 852 if oc.linkrev() < limit:
854 853 break
855 854 continue
856 855 seen.add(of)
857 856
858 857 # remember for dir rename detection
859 858 if backwards:
860 859 data['fullcopy'][of] = f # grafting backwards through renames
861 860 else:
862 861 data['fullcopy'][f] = of
863 862 if of not in mdst:
864 863 continue # no match, keep looking
865 864 if mdst[of] == mb.get(of):
866 865 return # no merge needed, quit early
867 866 c2 = getdstfctx(of, mdst[of])
868 867 # c2 might be a plain new file on added on destination side that is
869 868 # unrelated to the droids we are looking for.
870 869 cr = _related(oc, c2)
871 870 if cr and (of == f or of == c2.path()): # non-divergent
872 871 if backwards:
873 872 data['copy'][of] = f
874 873 elif of in mb:
875 874 data['copy'][f] = of
876 875 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
877 876 data['copy'][of] = f
878 877 del data['fullcopy'][f]
879 878 data['fullcopy'][of] = f
880 879 else: # divergence w.r.t. graft CA on one side of topological CA
881 880 for sf in seen:
882 881 if sf in mb:
883 882 assert sf not in data['diverge']
884 883 data['diverge'][sf] = [f, of]
885 884 break
886 885 return
887 886
888 887 if of in mta:
889 888 if backwards or remotebase:
890 889 data['incomplete'][of] = f
891 890 else:
892 891 for sf in seen:
893 892 if sf in mb:
894 893 if tca == base:
895 894 data['diverge'].setdefault(sf, []).append(f)
896 895 else:
897 896 data['incompletediverge'][sf] = [of, f]
898 897 return
899 898
900 899 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
901 900 """reproduce copies from fromrev to rev in the dirstate
902 901
903 902 If skiprev is specified, it's a revision that should be used to
904 903 filter copy records. Any copies that occur between fromrev and
905 904 skiprev will not be duplicated, even if they appear in the set of
906 905 copies between fromrev and rev.
907 906 """
908 907 exclude = {}
909 908 ctraceconfig = repo.ui.config('experimental', 'copytrace')
910 909 bctrace = stringutil.parsebool(ctraceconfig)
911 910 if (skiprev is not None and
912 911 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
913 912 # copytrace='off' skips this line, but not the entire function because
914 913 # the line below is O(size of the repo) during a rebase, while the rest
915 914 # of the function is much faster (and is required for carrying copy
916 915 # metadata across the rebase anyway).
917 916 exclude = pathcopies(repo[fromrev], repo[skiprev])
918 917 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
919 918 # copies.pathcopies returns backward renames, so dst might not
920 919 # actually be in the dirstate
921 920 if dst in exclude:
922 921 continue
923 922 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now