##// END OF EJS Templates
copies: inline a variable that's used only once...
Martin von Zweigbergk -
r39981:85c8ff26 default
parent child Browse files
Show More
@@ -1,893 +1,892 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, a, b):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 None if no such revision exists.
35 35 """
36 36
37 37 # basic idea:
38 38 # - mark a and b with different sides
39 39 # - if a parent's children are all on the same side, the parent is
40 40 # on that side, otherwise it is on no side
41 41 # - walk the graph in topological order with the help of a heap;
42 42 # - add unseen parents to side map
43 43 # - clear side of any parent that has children on different sides
44 44 # - track number of interesting revs that might still be on a side
45 45 # - track the lowest interesting rev seen
46 46 # - quit when interesting revs is zero
47 47
48 48 cl = repo.changelog
49 49 working = len(cl) # pseudo rev for the working directory
50 50 if a is None:
51 51 a = working
52 52 if b is None:
53 53 b = working
54 54
55 55 side = {a: -1, b: 1}
56 56 visit = [-a, -b]
57 57 heapq.heapify(visit)
58 58 interesting = len(visit)
59 59 hascommonancestor = False
60 60 limit = working
61 61
62 62 while interesting:
63 63 r = -heapq.heappop(visit)
64 64 if r == working:
65 65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 66 else:
67 67 parents = cl.parentrevs(r)
68 68 for p in parents:
69 69 if p < 0:
70 70 continue
71 71 if p not in side:
72 72 # first time we see p; add it to visit
73 73 side[p] = side[r]
74 74 if side[p]:
75 75 interesting += 1
76 76 heapq.heappush(visit, -p)
77 77 elif side[p] and side[p] != side[r]:
78 78 # p was interesting but now we know better
79 79 side[p] = 0
80 80 interesting -= 1
81 81 hascommonancestor = True
82 82 if side[r]:
83 83 limit = r # lowest rev visited
84 84 interesting -= 1
85 85
86 86 if not hascommonancestor:
87 87 return None
88 88
89 89 # Consider the following flow (see test-commit-amend.t under issue4405):
90 90 # 1/ File 'a0' committed
91 91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 92 # 3/ Move back to first commit
93 93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 95 #
96 96 # During the amend in step five, we will be in this state:
97 97 #
98 98 # @ 3 temporary amend commit for a1-amend
99 99 # |
100 100 # o 2 a1-amend
101 101 # |
102 102 # | o 1 a1
103 103 # |/
104 104 # o 0 a0
105 105 #
106 106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 107 # yet the filelog has the copy information in rev 1 and we will not look
108 108 # back far enough unless we also look at the a and b as candidates.
109 109 # This only occurs when a is a descendent of b or visa-versa.
110 110 return min(limit, a, b)
111 111
112 112 def _chain(src, dst, a, b):
113 113 """chain two sets of copies a->b"""
114 114 t = a.copy()
115 115 for k, v in b.iteritems():
116 116 if v in t:
117 117 # found a chain
118 118 if t[v] != k:
119 119 # file wasn't renamed back to itself
120 120 t[k] = t[v]
121 121 if v not in dst:
122 122 # chain was a rename, not a copy
123 123 del t[v]
124 124 if v in src:
125 125 # file is a copy of an existing file
126 126 t[k] = v
127 127
128 128 # remove criss-crossed copies
129 129 for k, v in list(t.items()):
130 130 if k in src and v in dst:
131 131 del t[k]
132 132
133 133 return t
134 134
135 135 def _tracefile(fctx, am, limit=-1):
136 136 """return file context that is the ancestor of fctx present in ancestor
137 137 manifest am, stopping after the first ancestor lower than limit"""
138 138
139 139 for f in fctx.ancestors():
140 140 if am.get(f.path(), None) == f.filenode():
141 141 return f
142 142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
143 143 return None
144 144
145 145 def _dirstatecopies(d, match=None):
146 146 ds = d._repo.dirstate
147 147 c = ds.copies().copy()
148 148 for k in list(c):
149 149 if ds[k] not in 'anm' or (match and not match(k)):
150 150 del c[k]
151 151 return c
152 152
153 153 def _computeforwardmissing(a, b, match=None):
154 154 """Computes which files are in b but not a.
155 155 This is its own function so extensions can easily wrap this call to see what
156 156 files _forwardcopies is about to process.
157 157 """
158 158 ma = a.manifest()
159 159 mb = b.manifest()
160 160 return mb.filesnotin(ma, match=match)
161 161
162 162 def _committedforwardcopies(a, b, match):
163 163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 164 # files might have to be traced back to the fctx parent of the last
165 165 # one-side-only changeset, but not further back than that
166 166 limit = _findlimit(a._repo, a.rev(), b.rev())
167 167 if limit is None:
168 168 limit = -1
169 169 am = a.manifest()
170 170
171 171 # find where new files came from
172 172 # we currently don't try to find where old files went, too expensive
173 173 # this means we can miss a case like 'hg rm b; hg cp a b'
174 174 cm = {}
175 175
176 176 # Computing the forward missing is quite expensive on large manifests, since
177 177 # it compares the entire manifests. We can optimize it in the common use
178 178 # case of computing what copies are in a commit versus its parent (like
179 179 # during a rebase or histedit). Note, we exclude merge commits from this
180 180 # optimization, since the ctx.files() for a merge commit is not correct for
181 181 # this comparison.
182 182 forwardmissingmatch = match
183 183 if b.p1() == a and b.p2().node() == node.nullid:
184 184 filesmatcher = scmutil.matchfiles(a._repo, b.files())
185 185 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
186 186 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
187 187
188 188 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
189 189 for f in missing:
190 190 fctx = b[f]
191 191 fctx._ancestrycontext = ancestrycontext
192 192 ofctx = _tracefile(fctx, am, limit)
193 193 if ofctx:
194 194 cm[f] = ofctx.path()
195 195 return cm
196 196
197 197 def _forwardcopies(a, b, match=None):
198 198 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
199 199
200 200 # check for working copy
201 201 if b.rev() is None:
202 202 if a == b.p1():
203 203 # short-circuit to avoid issues with merge states
204 204 return _dirstatecopies(b, match)
205 205
206 206 cm = _committedforwardcopies(a, b.p1(), match)
207 207 # combine copies from dirstate if necessary
208 208 return _chain(a, b, cm, _dirstatecopies(b, match))
209 209 return _committedforwardcopies(a, b, match)
210 210
211 211 def _backwardrenames(a, b):
212 212 if a._repo.ui.config('experimental', 'copytrace') == 'off':
213 213 return {}
214 214
215 215 # Even though we're not taking copies into account, 1:n rename situations
216 216 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
217 217 # arbitrarily pick one of the renames.
218 218 f = _forwardcopies(b, a)
219 219 r = {}
220 220 for k, v in sorted(f.iteritems()):
221 221 # remove copies
222 222 if v in a:
223 223 continue
224 224 r[v] = k
225 225 return r
226 226
227 227 def pathcopies(x, y, match=None):
228 228 """find {dst@y: src@x} copy mapping for directed compare"""
229 229 if x == y or not x or not y:
230 230 return {}
231 231 a = y.ancestor(x)
232 232 if a == x:
233 233 return _forwardcopies(x, y, match=match)
234 234 if a == y:
235 235 return _backwardrenames(x, y)
236 236 return _chain(x, y, _backwardrenames(x, a),
237 237 _forwardcopies(a, y, match=match))
238 238
239 239 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
240 240 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
241 241 and c2. This is its own function so extensions can easily wrap this call
242 242 to see what files mergecopies is about to process.
243 243
244 244 Even though c1 and c2 are not used in this function, they are useful in
245 245 other extensions for being able to read the file nodes of the changed files.
246 246
247 247 "baselabel" can be passed to help distinguish the multiple computations
248 248 done in the graft case.
249 249 """
250 250 u1 = sorted(addedinm1 - addedinm2)
251 251 u2 = sorted(addedinm2 - addedinm1)
252 252
253 253 header = " unmatched files in %s"
254 254 if baselabel:
255 255 header += ' (from %s)' % baselabel
256 256 if u1:
257 257 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
258 258 if u2:
259 259 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
260 260
261 261 narrowmatch = repo.narrowmatch()
262 262 if not narrowmatch.always():
263 263 u1 = [f for f in u1 if narrowmatch(f)]
264 264 u2 = [f for f in u2 if narrowmatch(f)]
265 265 return u1, u2
266 266
267 267 def _makegetfctx(ctx):
268 268 """return a 'getfctx' function suitable for _checkcopies usage
269 269
270 270 We have to re-setup the function building 'filectx' for each
271 271 '_checkcopies' to ensure the linkrev adjustment is properly setup for
272 272 each. Linkrev adjustment is important to avoid bug in rename
273 273 detection. Moreover, having a proper '_ancestrycontext' setup ensures
274 274 the performance impact of this adjustment is kept limited. Without it,
275 275 each file could do a full dag traversal making the time complexity of
276 276 the operation explode (see issue4537).
277 277
278 278 This function exists here mostly to limit the impact on stable. Feel
279 279 free to refactor on default.
280 280 """
281 281 rev = ctx.rev()
282 282 repo = ctx._repo
283 283 ac = getattr(ctx, '_ancestrycontext', None)
284 284 if ac is None:
285 285 revs = [rev]
286 286 if rev is None:
287 287 revs = [p.rev() for p in ctx.parents()]
288 288 ac = repo.changelog.ancestors(revs, inclusive=True)
289 289 ctx._ancestrycontext = ac
290 290 def makectx(f, n):
291 291 if n in node.wdirfilenodeids: # in a working context?
292 292 if ctx.rev() is None:
293 293 return ctx.filectx(f)
294 294 return repo[None][f]
295 295 fctx = repo.filectx(f, fileid=n)
296 296 # setup only needed for filectx not create from a changectx
297 297 fctx._ancestrycontext = ac
298 298 fctx._descendantrev = rev
299 299 return fctx
300 300 return util.lrucachefunc(makectx)
301 301
302 302 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
303 303 """combine partial copy paths"""
304 304 remainder = {}
305 305 for f in copyfrom:
306 306 if f in copyto:
307 307 finalcopy[copyto[f]] = copyfrom[f]
308 308 del copyto[f]
309 309 for f in incompletediverge:
310 310 assert f not in diverge
311 311 ic = incompletediverge[f]
312 312 if ic[0] in copyto:
313 313 diverge[f] = [copyto[ic[0]], ic[1]]
314 314 else:
315 315 remainder[f] = ic
316 316 return remainder
317 317
318 318 def mergecopies(repo, c1, c2, base):
319 319 """
320 320 The function calling different copytracing algorithms on the basis of config
321 321 which find moves and copies between context c1 and c2 that are relevant for
322 322 merging. 'base' will be used as the merge base.
323 323
324 324 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
325 325 files that were moved/ copied in one merge parent and modified in another.
326 326 For example:
327 327
328 328 o ---> 4 another commit
329 329 |
330 330 | o ---> 3 commit that modifies a.txt
331 331 | /
332 332 o / ---> 2 commit that moves a.txt to b.txt
333 333 |/
334 334 o ---> 1 merge base
335 335
336 336 If we try to rebase revision 3 on revision 4, since there is no a.txt in
337 337 revision 4, and if user have copytrace disabled, we prints the following
338 338 message:
339 339
340 340 ```other changed <file> which local deleted```
341 341
342 342 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
343 343 "dirmove".
344 344
345 345 "copy" is a mapping from destination name -> source name,
346 346 where source is in c1 and destination is in c2 or vice-versa.
347 347
348 348 "movewithdir" is a mapping from source name -> destination name,
349 349 where the file at source present in one context but not the other
350 350 needs to be moved to destination by the merge process, because the
351 351 other context moved the directory it is in.
352 352
353 353 "diverge" is a mapping of source name -> list of destination names
354 354 for divergent renames.
355 355
356 356 "renamedelete" is a mapping of source name -> list of destination
357 357 names for files deleted in c1 that were renamed in c2 or vice-versa.
358 358
359 359 "dirmove" is a mapping of detected source dir -> destination dir renames.
360 360 This is needed for handling changes to new files previously grafted into
361 361 renamed directories.
362 362 """
363 363 # avoid silly behavior for update from empty dir
364 364 if not c1 or not c2 or c1 == c2:
365 365 return {}, {}, {}, {}, {}
366 366
367 367 # avoid silly behavior for parent -> working dir
368 368 if c2.node() is None and c1.node() == repo.dirstate.p1():
369 369 return repo.dirstate.copies(), {}, {}, {}, {}
370 370
371 371 copytracing = repo.ui.config('experimental', 'copytrace')
372 372 boolctrace = stringutil.parsebool(copytracing)
373 373
374 374 # Copy trace disabling is explicitly below the node == p1 logic above
375 375 # because the logic above is required for a simple copy to be kept across a
376 376 # rebase.
377 377 if copytracing == 'heuristics':
378 378 # Do full copytracing if only non-public revisions are involved as
379 379 # that will be fast enough and will also cover the copies which could
380 380 # be missed by heuristics
381 381 if _isfullcopytraceable(repo, c1, base):
382 382 return _fullcopytracing(repo, c1, c2, base)
383 383 return _heuristicscopytracing(repo, c1, c2, base)
384 384 elif boolctrace is False:
385 385 # stringutil.parsebool() returns None when it is unable to parse the
386 386 # value, so we should rely on making sure copytracing is on such cases
387 387 return {}, {}, {}, {}, {}
388 388 else:
389 389 return _fullcopytracing(repo, c1, c2, base)
390 390
391 391 def _isfullcopytraceable(repo, c1, base):
392 392 """ Checks that if base, source and destination are all no-public branches,
393 393 if yes let's use the full copytrace algorithm for increased capabilities
394 394 since it will be fast enough.
395 395
396 396 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
397 397 number of changesets from c1 to base such that if number of changesets are
398 398 more than the limit, full copytracing algorithm won't be used.
399 399 """
400 400 if c1.rev() is None:
401 401 c1 = c1.p1()
402 402 if c1.mutable() and base.mutable():
403 403 sourcecommitlimit = repo.ui.configint('experimental',
404 404 'copytrace.sourcecommitlimit')
405 405 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
406 406 return commits < sourcecommitlimit
407 407 return False
408 408
409 409 def _fullcopytracing(repo, c1, c2, base):
410 410 """ The full copytracing algorithm which finds all the new files that were
411 411 added from merge base up to the top commit and for each file it checks if
412 412 this file was copied from another file.
413 413
414 414 This is pretty slow when a lot of changesets are involved but will track all
415 415 the copies.
416 416 """
417 417 # In certain scenarios (e.g. graft, update or rebase), base can be
418 418 # overridden We still need to know a real common ancestor in this case We
419 419 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
420 420 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
421 421 # caller may not know if the revision passed in lieu of the CA is a genuine
422 422 # common ancestor or not without explicitly checking it, it's better to
423 423 # determine that here.
424 424 #
425 425 # base.isancestorof(wc) is False, work around that
426 426 _c1 = c1.p1() if c1.rev() is None else c1
427 427 _c2 = c2.p1() if c2.rev() is None else c2
428 428 # an endpoint is "dirty" if it isn't a descendant of the merge base
429 429 # if we have a dirty endpoint, we need to trigger graft logic, and also
430 430 # keep track of which endpoint is dirty
431 431 dirtyc1 = not base.isancestorof(_c1)
432 432 dirtyc2 = not base.isancestorof(_c2)
433 433 graft = dirtyc1 or dirtyc2
434 434 tca = base
435 435 if graft:
436 436 tca = _c1.ancestor(_c2)
437 437
438 438 limit = _findlimit(repo, c1.rev(), c2.rev())
439 439 if limit is None:
440 440 # no common ancestor, no copies
441 441 return {}, {}, {}, {}, {}
442 442 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
443 443
444 444 m1 = c1.manifest()
445 445 m2 = c2.manifest()
446 446 mb = base.manifest()
447 447
448 448 # gather data from _checkcopies:
449 449 # - diverge = record all diverges in this dict
450 450 # - copy = record all non-divergent copies in this dict
451 451 # - fullcopy = record all copies in this dict
452 452 # - incomplete = record non-divergent partial copies here
453 453 # - incompletediverge = record divergent partial copies here
454 454 diverge = {} # divergence data is shared
455 455 incompletediverge = {}
456 456 data1 = {'copy': {},
457 457 'fullcopy': {},
458 458 'incomplete': {},
459 459 'diverge': diverge,
460 460 'incompletediverge': incompletediverge,
461 461 }
462 462 data2 = {'copy': {},
463 463 'fullcopy': {},
464 464 'incomplete': {},
465 465 'diverge': diverge,
466 466 'incompletediverge': incompletediverge,
467 467 }
468 468
469 469 # find interesting file sets from manifests
470 470 addedinm1 = m1.filesnotin(mb)
471 471 addedinm2 = m2.filesnotin(mb)
472 472 bothnew = sorted(addedinm1 & addedinm2)
473 473 if tca == base:
474 474 # unmatched file from base
475 475 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
476 476 u1u, u2u = u1r, u2r
477 477 else:
478 478 # unmatched file from base (DAG rotation in the graft case)
479 479 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
480 480 baselabel='base')
481 481 # unmatched file from topological common ancestors (no DAG rotation)
482 482 # need to recompute this for directory move handling when grafting
483 483 mta = tca.manifest()
484 484 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
485 485 m2.filesnotin(mta),
486 486 baselabel='topological common ancestor')
487 487
488 488 for f in u1u:
489 489 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
490 490
491 491 for f in u2u:
492 492 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
493 493
494 494 copy = dict(data1['copy'])
495 495 copy.update(data2['copy'])
496 496 fullcopy = dict(data1['fullcopy'])
497 497 fullcopy.update(data2['fullcopy'])
498 498
499 499 if dirtyc1:
500 500 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
501 501 incompletediverge)
502 502 else:
503 503 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
504 504 incompletediverge)
505 505
506 506 renamedelete = {}
507 507 renamedeleteset = set()
508 508 divergeset = set()
509 509 for of, fl in list(diverge.items()):
510 510 if len(fl) == 1 or of in c1 or of in c2:
511 511 del diverge[of] # not actually divergent, or not a rename
512 512 if of not in c1 and of not in c2:
513 513 # renamed on one side, deleted on the other side, but filter
514 514 # out files that have been renamed and then deleted
515 515 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
516 516 renamedeleteset.update(fl) # reverse map for below
517 517 else:
518 518 divergeset.update(fl) # reverse map for below
519 519
520 520 if bothnew:
521 521 repo.ui.debug(" unmatched files new in both:\n %s\n"
522 522 % "\n ".join(bothnew))
523 523 bothdiverge = {}
524 524 bothincompletediverge = {}
525 525 remainder = {}
526 526 both1 = {'copy': {},
527 527 'fullcopy': {},
528 528 'incomplete': {},
529 529 'diverge': bothdiverge,
530 530 'incompletediverge': bothincompletediverge
531 531 }
532 532 both2 = {'copy': {},
533 533 'fullcopy': {},
534 534 'incomplete': {},
535 535 'diverge': bothdiverge,
536 536 'incompletediverge': bothincompletediverge
537 537 }
538 538 for f in bothnew:
539 539 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
540 540 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
541 541 if dirtyc1:
542 542 # incomplete copies may only be found on the "dirty" side for bothnew
543 543 assert not both2['incomplete']
544 544 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
545 545 bothincompletediverge)
546 546 elif dirtyc2:
547 547 assert not both1['incomplete']
548 548 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
549 549 bothincompletediverge)
550 550 else:
551 551 # incomplete copies and divergences can't happen outside grafts
552 552 assert not both1['incomplete']
553 553 assert not both2['incomplete']
554 554 assert not bothincompletediverge
555 555 for f in remainder:
556 556 assert f not in bothdiverge
557 557 ic = remainder[f]
558 558 if ic[0] in (m1 if dirtyc1 else m2):
559 559 # backed-out rename on one side, but watch out for deleted files
560 560 bothdiverge[f] = ic
561 561 for of, fl in bothdiverge.items():
562 562 if len(fl) == 2 and fl[0] == fl[1]:
563 563 copy[fl[0]] = of # not actually divergent, just matching renames
564 564
565 565 if fullcopy and repo.ui.debugflag:
566 566 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
567 567 "% = renamed and deleted):\n")
568 568 for f in sorted(fullcopy):
569 569 note = ""
570 570 if f in copy:
571 571 note += "*"
572 572 if f in divergeset:
573 573 note += "!"
574 574 if f in renamedeleteset:
575 575 note += "%"
576 576 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
577 577 note))
578 578 del divergeset
579 579
580 580 if not fullcopy:
581 581 return copy, {}, diverge, renamedelete, {}
582 582
583 583 repo.ui.debug(" checking for directory renames\n")
584 584
585 585 # generate a directory move map
586 586 d1, d2 = c1.dirs(), c2.dirs()
587 587 # Hack for adding '', which is not otherwise added, to d1 and d2
588 588 d1.addpath('/')
589 589 d2.addpath('/')
590 590 invalid = set()
591 591 dirmove = {}
592 592
593 593 # examine each file copy for a potential directory move, which is
594 594 # when all the files in a directory are moved to a new directory
595 595 for dst, src in fullcopy.iteritems():
596 596 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
597 597 if dsrc in invalid:
598 598 # already seen to be uninteresting
599 599 continue
600 600 elif dsrc in d1 and ddst in d1:
601 601 # directory wasn't entirely moved locally
602 602 invalid.add(dsrc)
603 603 elif dsrc in d2 and ddst in d2:
604 604 # directory wasn't entirely moved remotely
605 605 invalid.add(dsrc)
606 606 elif dsrc in dirmove and dirmove[dsrc] != ddst:
607 607 # files from the same directory moved to two different places
608 608 invalid.add(dsrc)
609 609 else:
610 610 # looks good so far
611 611 dirmove[dsrc] = ddst
612 612
613 613 for i in invalid:
614 614 if i in dirmove:
615 615 del dirmove[i]
616 616 del d1, d2, invalid
617 617
618 618 if not dirmove:
619 619 return copy, {}, diverge, renamedelete, {}
620 620
621 621 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
622 622
623 623 for d in dirmove:
624 624 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
625 625 (d, dirmove[d]))
626 626
627 627 movewithdir = {}
628 628 # check unaccounted nonoverlapping files against directory moves
629 629 for f in u1r + u2r:
630 630 if f not in fullcopy:
631 631 for d in dirmove:
632 632 if f.startswith(d):
633 633 # new file added in a directory that was moved, move it
634 634 df = dirmove[d] + f[len(d):]
635 635 if df not in copy:
636 636 movewithdir[f] = df
637 637 repo.ui.debug((" pending file src: '%s' -> "
638 638 "dst: '%s'\n") % (f, df))
639 639 break
640 640
641 641 return copy, movewithdir, diverge, renamedelete, dirmove
642 642
643 643 def _heuristicscopytracing(repo, c1, c2, base):
644 644 """ Fast copytracing using filename heuristics
645 645
646 646 Assumes that moves or renames are of following two types:
647 647
648 648 1) Inside a directory only (same directory name but different filenames)
649 649 2) Move from one directory to another
650 650 (same filenames but different directory names)
651 651
652 652 Works only when there are no merge commits in the "source branch".
653 653 Source branch is commits from base up to c2 not including base.
654 654
655 655 If merge is involved it fallbacks to _fullcopytracing().
656 656
657 657 Can be used by setting the following config:
658 658
659 659 [experimental]
660 660 copytrace = heuristics
661 661
662 662 In some cases the copy/move candidates found by heuristics can be very large
663 663 in number and that will make the algorithm slow. The number of possible
664 664 candidates to check can be limited by using the config
665 665 `experimental.copytrace.movecandidateslimit` which defaults to 100.
666 666 """
667 667
668 668 if c1.rev() is None:
669 669 c1 = c1.p1()
670 670 if c2.rev() is None:
671 671 c2 = c2.p1()
672 672
673 673 copies = {}
674 674
675 675 changedfiles = set()
676 676 m1 = c1.manifest()
677 677 if not repo.revs('%d::%d', base.rev(), c2.rev()):
678 678 # If base is not in c2 branch, we switch to fullcopytracing
679 679 repo.ui.debug("switching to full copytracing as base is not "
680 680 "an ancestor of c2\n")
681 681 return _fullcopytracing(repo, c1, c2, base)
682 682
683 683 ctx = c2
684 684 while ctx != base:
685 685 if len(ctx.parents()) == 2:
686 686 # To keep things simple let's not handle merges
687 687 repo.ui.debug("switching to full copytracing because of merges\n")
688 688 return _fullcopytracing(repo, c1, c2, base)
689 689 changedfiles.update(ctx.files())
690 690 ctx = ctx.p1()
691 691
692 692 cp = _forwardcopies(base, c2)
693 693 for dst, src in cp.iteritems():
694 694 if src in m1:
695 695 copies[dst] = src
696 696
697 697 # file is missing if it isn't present in the destination, but is present in
698 698 # the base and present in the source.
699 699 # Presence in the base is important to exclude added files, presence in the
700 700 # source is important to exclude removed files.
701 701 filt = lambda f: f not in m1 and f in base and f in c2
702 702 missingfiles = [f for f in changedfiles if filt(f)]
703 703
704 704 if missingfiles:
705 705 basenametofilename = collections.defaultdict(list)
706 706 dirnametofilename = collections.defaultdict(list)
707 707
708 708 for f in m1.filesnotin(base.manifest()):
709 709 basename = os.path.basename(f)
710 710 dirname = os.path.dirname(f)
711 711 basenametofilename[basename].append(f)
712 712 dirnametofilename[dirname].append(f)
713 713
714 714 for f in missingfiles:
715 715 basename = os.path.basename(f)
716 716 dirname = os.path.dirname(f)
717 717 samebasename = basenametofilename[basename]
718 718 samedirname = dirnametofilename[dirname]
719 719 movecandidates = samebasename + samedirname
720 720 # f is guaranteed to be present in c2, that's why
721 721 # c2.filectx(f) won't fail
722 722 f2 = c2.filectx(f)
723 723 # we can have a lot of candidates which can slow down the heuristics
724 724 # config value to limit the number of candidates moves to check
725 725 maxcandidates = repo.ui.configint('experimental',
726 726 'copytrace.movecandidateslimit')
727 727
728 728 if len(movecandidates) > maxcandidates:
729 729 repo.ui.status(_("skipping copytracing for '%s', more "
730 730 "candidates than the limit: %d\n")
731 731 % (f, len(movecandidates)))
732 732 continue
733 733
734 734 for candidate in movecandidates:
735 735 f1 = c1.filectx(candidate)
736 736 if _related(f1, f2):
737 737 # if there are a few related copies then we'll merge
738 738 # changes into all of them. This matches the behaviour
739 739 # of upstream copytracing
740 740 copies[candidate] = f
741 741
742 742 return copies, {}, {}, {}, {}
743 743
744 744 def _related(f1, f2):
745 745 """return True if f1 and f2 filectx have a common ancestor
746 746
747 747 Walk back to common ancestor to see if the two files originate
748 748 from the same file. Since workingfilectx's rev() is None it messes
749 749 up the integer comparison logic, hence the pre-step check for
750 750 None (f1 and f2 can only be workingfilectx's initially).
751 751 """
752 752
753 753 if f1 == f2:
754 754 return f1 # a match
755 755
756 756 g1, g2 = f1.ancestors(), f2.ancestors()
757 757 try:
758 758 f1r, f2r = f1.linkrev(), f2.linkrev()
759 759
760 760 if f1r is None:
761 761 f1 = next(g1)
762 762 if f2r is None:
763 763 f2 = next(g2)
764 764
765 765 while True:
766 766 f1r, f2r = f1.linkrev(), f2.linkrev()
767 767 if f1r > f2r:
768 768 f1 = next(g1)
769 769 elif f2r > f1r:
770 770 f2 = next(g2)
771 771 else: # f1 and f2 point to files in the same linkrev
772 772 return f1 == f2 # true if they point to the same file
773 773 except StopIteration:
774 774 return False
775 775
776 776 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
777 777 """
778 778 check possible copies of f from msrc to mdst
779 779
780 780 srcctx = starting context for f in msrc
781 781 dstctx = destination context for f in mdst
782 782 f = the filename to check (as in msrc)
783 783 base = the changectx used as a merge base
784 784 tca = topological common ancestor for graft-like scenarios
785 785 remotebase = True if base is outside tca::srcctx, False otherwise
786 786 limit = the rev number to not search beyond
787 787 data = dictionary of dictionary to store copy data. (see mergecopies)
788 788
789 789 note: limit is only an optimization, and provides no guarantee that
790 790 irrelevant revisions will not be visited
791 791 there is no easy way to make this algorithm stop in a guaranteed way
792 792 once it "goes behind a certain revision".
793 793 """
794 794
795 795 msrc = srcctx.manifest()
796 796 mdst = dstctx.manifest()
797 797 mb = base.manifest()
798 798 mta = tca.manifest()
799 799 # Might be true if this call is about finding backward renames,
800 800 # This happens in the case of grafts because the DAG is then rotated.
801 801 # If the file exists in both the base and the source, we are not looking
802 802 # for a rename on the source side, but on the part of the DAG that is
803 803 # traversed backwards.
804 804 #
805 805 # In the case there is both backward and forward renames (before and after
806 806 # the base) this is more complicated as we must detect a divergence.
807 807 # We use 'backwards = False' in that case.
808 808 backwards = not remotebase and base != tca and f in mb
809 809 getsrcfctx = _makegetfctx(srcctx)
810 810 getdstfctx = _makegetfctx(dstctx)
811 811
812 812 if msrc[f] == mb.get(f) and not remotebase:
813 813 # Nothing to merge
814 814 return
815 815
816 816 of = None
817 817 seen = {f}
818 818 for oc in getsrcfctx(f, msrc[f]).ancestors():
819 ocr = oc.linkrev()
820 819 of = oc.path()
821 820 if of in seen:
822 821 # check limit late - grab last rename before
823 if ocr < limit:
822 if oc.linkrev() < limit:
824 823 break
825 824 continue
826 825 seen.add(of)
827 826
828 827 # remember for dir rename detection
829 828 if backwards:
830 829 data['fullcopy'][of] = f # grafting backwards through renames
831 830 else:
832 831 data['fullcopy'][f] = of
833 832 if of not in mdst:
834 833 continue # no match, keep looking
835 834 if mdst[of] == mb.get(of):
836 835 return # no merge needed, quit early
837 836 c2 = getdstfctx(of, mdst[of])
838 837 # c2 might be a plain new file on added on destination side that is
839 838 # unrelated to the droids we are looking for.
840 839 cr = _related(oc, c2)
841 840 if cr and (of == f or of == c2.path()): # non-divergent
842 841 if backwards:
843 842 data['copy'][of] = f
844 843 elif of in mb:
845 844 data['copy'][f] = of
846 845 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
847 846 data['copy'][of] = f
848 847 del data['fullcopy'][f]
849 848 data['fullcopy'][of] = f
850 849 else: # divergence w.r.t. graft CA on one side of topological CA
851 850 for sf in seen:
852 851 if sf in mb:
853 852 assert sf not in data['diverge']
854 853 data['diverge'][sf] = [f, of]
855 854 break
856 855 return
857 856
858 857 if of in mta:
859 858 if backwards or remotebase:
860 859 data['incomplete'][of] = f
861 860 else:
862 861 for sf in seen:
863 862 if sf in mb:
864 863 if tca == base:
865 864 data['diverge'].setdefault(sf, []).append(f)
866 865 else:
867 866 data['incompletediverge'][sf] = [of, f]
868 867 return
869 868
870 869 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
871 870 """reproduce copies from fromrev to rev in the dirstate
872 871
873 872 If skiprev is specified, it's a revision that should be used to
874 873 filter copy records. Any copies that occur between fromrev and
875 874 skiprev will not be duplicated, even if they appear in the set of
876 875 copies between fromrev and rev.
877 876 """
878 877 exclude = {}
879 878 ctraceconfig = repo.ui.config('experimental', 'copytrace')
880 879 bctrace = stringutil.parsebool(ctraceconfig)
881 880 if (skiprev is not None and
882 881 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
883 882 # copytrace='off' skips this line, but not the entire function because
884 883 # the line below is O(size of the repo) during a rebase, while the rest
885 884 # of the function is much faster (and is required for carrying copy
886 885 # metadata across the rebase anyway).
887 886 exclude = pathcopies(repo[fromrev], repo[skiprev])
888 887 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
889 888 # copies.pathcopies returns backward renames, so dst might not
890 889 # actually be in the dirstate
891 890 if dst in exclude:
892 891 continue
893 892 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now