##// END OF EJS Templates
py3: avoid changing dictionary during iteration...
Gregory Szorc -
r36134:c0277161 default
parent child Browse files
Show More
@@ -1,883 +1,883 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 def _findlimit(repo, a, b):
25 25 """
26 26 Find the last revision that needs to be checked to ensure that a full
27 27 transitive closure for file copies can be properly calculated.
28 28 Generally, this means finding the earliest revision number that's an
29 29 ancestor of a or b but not both, except when a or b is a direct descendent
30 30 of the other, in which case we can return the minimum revnum of a and b.
31 31 None if no such revision exists.
32 32 """
33 33
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85
86 86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 87 # 1/ File 'a0' committed
88 88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 89 # 3/ Move back to first commit
90 90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 92 #
93 93 # During the amend in step five, we will be in this state:
94 94 #
95 95 # @ 3 temporary amend commit for a1-amend
96 96 # |
97 97 # o 2 a1-amend
98 98 # |
99 99 # | o 1 a1
100 100 # |/
101 101 # o 0 a0
102 102 #
103 103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 104 # yet the filelog has the copy information in rev 1 and we will not look
105 105 # back far enough unless we also look at the a and b as candidates.
106 106 # This only occurs when a is a descendent of b or visa-versa.
107 107 return min(limit, a, b)
108 108
109 109 def _chain(src, dst, a, b):
110 110 """chain two sets of copies a->b"""
111 111 t = a.copy()
112 112 for k, v in b.iteritems():
113 113 if v in t:
114 114 # found a chain
115 115 if t[v] != k:
116 116 # file wasn't renamed back to itself
117 117 t[k] = t[v]
118 118 if v not in dst:
119 119 # chain was a rename, not a copy
120 120 del t[v]
121 121 if v in src:
122 122 # file is a copy of an existing file
123 123 t[k] = v
124 124
125 125 # remove criss-crossed copies
126 for k, v in t.items():
126 for k, v in list(t.items()):
127 127 if k in src and v in dst:
128 128 del t[k]
129 129
130 130 return t
131 131
132 132 def _tracefile(fctx, am, limit=-1):
133 133 """return file context that is the ancestor of fctx present in ancestor
134 134 manifest am, stopping after the first ancestor lower than limit"""
135 135
136 136 for f in fctx.ancestors():
137 137 if am.get(f.path(), None) == f.filenode():
138 138 return f
139 139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 140 return None
141 141
142 142 def _dirstatecopies(d, match=None):
143 143 ds = d._repo.dirstate
144 144 c = ds.copies().copy()
145 145 for k in list(c):
146 146 if ds[k] not in 'anm' or (match and not match(k)):
147 147 del c[k]
148 148 return c
149 149
150 150 def _computeforwardmissing(a, b, match=None):
151 151 """Computes which files are in b but not a.
152 152 This is its own function so extensions can easily wrap this call to see what
153 153 files _forwardcopies is about to process.
154 154 """
155 155 ma = a.manifest()
156 156 mb = b.manifest()
157 157 return mb.filesnotin(ma, match=match)
158 158
159 159 def _committedforwardcopies(a, b, match):
160 160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 161 # files might have to be traced back to the fctx parent of the last
162 162 # one-side-only changeset, but not further back than that
163 163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 164 if limit is None:
165 165 limit = -1
166 166 am = a.manifest()
167 167
168 168 # find where new files came from
169 169 # we currently don't try to find where old files went, too expensive
170 170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 171 cm = {}
172 172
173 173 # Computing the forward missing is quite expensive on large manifests, since
174 174 # it compares the entire manifests. We can optimize it in the common use
175 175 # case of computing what copies are in a commit versus its parent (like
176 176 # during a rebase or histedit). Note, we exclude merge commits from this
177 177 # optimization, since the ctx.files() for a merge commit is not correct for
178 178 # this comparison.
179 179 forwardmissingmatch = match
180 180 if b.p1() == a and b.p2().node() == node.nullid:
181 181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184 184
185 185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 186 for f in missing:
187 187 fctx = b[f]
188 188 fctx._ancestrycontext = ancestrycontext
189 189 ofctx = _tracefile(fctx, am, limit)
190 190 if ofctx:
191 191 cm[f] = ofctx.path()
192 192 return cm
193 193
194 194 def _forwardcopies(a, b, match=None):
195 195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196 196
197 197 # check for working copy
198 198 if b.rev() is None:
199 199 if a == b.p1():
200 200 # short-circuit to avoid issues with merge states
201 201 return _dirstatecopies(b, match)
202 202
203 203 cm = _committedforwardcopies(a, b.p1(), match)
204 204 # combine copies from dirstate if necessary
205 205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 206 return _committedforwardcopies(a, b, match)
207 207
208 208 def _backwardrenames(a, b):
209 209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 210 return {}
211 211
212 212 # Even though we're not taking copies into account, 1:n rename situations
213 213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 214 # arbitrarily pick one of the renames.
215 215 f = _forwardcopies(b, a)
216 216 r = {}
217 217 for k, v in sorted(f.iteritems()):
218 218 # remove copies
219 219 if v in a:
220 220 continue
221 221 r[v] = k
222 222 return r
223 223
224 224 def pathcopies(x, y, match=None):
225 225 """find {dst@y: src@x} copy mapping for directed compare"""
226 226 if x == y or not x or not y:
227 227 return {}
228 228 a = y.ancestor(x)
229 229 if a == x:
230 230 return _forwardcopies(x, y, match=match)
231 231 if a == y:
232 232 return _backwardrenames(x, y)
233 233 return _chain(x, y, _backwardrenames(x, a),
234 234 _forwardcopies(a, y, match=match))
235 235
236 236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 238 and c2. This is its own function so extensions can easily wrap this call
239 239 to see what files mergecopies is about to process.
240 240
241 241 Even though c1 and c2 are not used in this function, they are useful in
242 242 other extensions for being able to read the file nodes of the changed files.
243 243
244 244 "baselabel" can be passed to help distinguish the multiple computations
245 245 done in the graft case.
246 246 """
247 247 u1 = sorted(addedinm1 - addedinm2)
248 248 u2 = sorted(addedinm2 - addedinm1)
249 249
250 250 header = " unmatched files in %s"
251 251 if baselabel:
252 252 header += ' (from %s)' % baselabel
253 253 if u1:
254 254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 255 if u2:
256 256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 257 return u1, u2
258 258
259 259 def _makegetfctx(ctx):
260 260 """return a 'getfctx' function suitable for _checkcopies usage
261 261
262 262 We have to re-setup the function building 'filectx' for each
263 263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 264 each. Linkrev adjustment is important to avoid bug in rename
265 265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 266 the performance impact of this adjustment is kept limited. Without it,
267 267 each file could do a full dag traversal making the time complexity of
268 268 the operation explode (see issue4537).
269 269
270 270 This function exists here mostly to limit the impact on stable. Feel
271 271 free to refactor on default.
272 272 """
273 273 rev = ctx.rev()
274 274 repo = ctx._repo
275 275 ac = getattr(ctx, '_ancestrycontext', None)
276 276 if ac is None:
277 277 revs = [rev]
278 278 if rev is None:
279 279 revs = [p.rev() for p in ctx.parents()]
280 280 ac = repo.changelog.ancestors(revs, inclusive=True)
281 281 ctx._ancestrycontext = ac
282 282 def makectx(f, n):
283 283 if n in node.wdirnodes: # in a working context?
284 284 if ctx.rev() is None:
285 285 return ctx.filectx(f)
286 286 return repo[None][f]
287 287 fctx = repo.filectx(f, fileid=n)
288 288 # setup only needed for filectx not create from a changectx
289 289 fctx._ancestrycontext = ac
290 290 fctx._descendantrev = rev
291 291 return fctx
292 292 return util.lrucachefunc(makectx)
293 293
294 294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 295 """combine partial copy paths"""
296 296 remainder = {}
297 297 for f in copyfrom:
298 298 if f in copyto:
299 299 finalcopy[copyto[f]] = copyfrom[f]
300 300 del copyto[f]
301 301 for f in incompletediverge:
302 302 assert f not in diverge
303 303 ic = incompletediverge[f]
304 304 if ic[0] in copyto:
305 305 diverge[f] = [copyto[ic[0]], ic[1]]
306 306 else:
307 307 remainder[f] = ic
308 308 return remainder
309 309
310 310 def mergecopies(repo, c1, c2, base):
311 311 """
312 312 The function calling different copytracing algorithms on the basis of config
313 313 which find moves and copies between context c1 and c2 that are relevant for
314 314 merging. 'base' will be used as the merge base.
315 315
316 316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 317 files that were moved/ copied in one merge parent and modified in another.
318 318 For example:
319 319
320 320 o ---> 4 another commit
321 321 |
322 322 | o ---> 3 commit that modifies a.txt
323 323 | /
324 324 o / ---> 2 commit that moves a.txt to b.txt
325 325 |/
326 326 o ---> 1 merge base
327 327
328 328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 329 revision 4, and if user have copytrace disabled, we prints the following
330 330 message:
331 331
332 332 ```other changed <file> which local deleted```
333 333
334 334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 335 "dirmove".
336 336
337 337 "copy" is a mapping from destination name -> source name,
338 338 where source is in c1 and destination is in c2 or vice-versa.
339 339
340 340 "movewithdir" is a mapping from source name -> destination name,
341 341 where the file at source present in one context but not the other
342 342 needs to be moved to destination by the merge process, because the
343 343 other context moved the directory it is in.
344 344
345 345 "diverge" is a mapping of source name -> list of destination names
346 346 for divergent renames.
347 347
348 348 "renamedelete" is a mapping of source name -> list of destination
349 349 names for files deleted in c1 that were renamed in c2 or vice-versa.
350 350
351 351 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 352 This is needed for handling changes to new files previously grafted into
353 353 renamed directories.
354 354 """
355 355 # avoid silly behavior for update from empty dir
356 356 if not c1 or not c2 or c1 == c2:
357 357 return {}, {}, {}, {}, {}
358 358
359 359 # avoid silly behavior for parent -> working dir
360 360 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 361 return repo.dirstate.copies(), {}, {}, {}, {}
362 362
363 363 copytracing = repo.ui.config('experimental', 'copytrace')
364 364
365 365 # Copy trace disabling is explicitly below the node == p1 logic above
366 366 # because the logic above is required for a simple copy to be kept across a
367 367 # rebase.
368 368 if copytracing == 'off':
369 369 return {}, {}, {}, {}, {}
370 370 elif copytracing == 'heuristics':
371 371 # Do full copytracing if only non-public revisions are involved as
372 372 # that will be fast enough and will also cover the copies which could
373 373 # be missed by heuristics
374 374 if _isfullcopytraceable(repo, c1, base):
375 375 return _fullcopytracing(repo, c1, c2, base)
376 376 return _heuristicscopytracing(repo, c1, c2, base)
377 377 else:
378 378 return _fullcopytracing(repo, c1, c2, base)
379 379
380 380 def _isfullcopytraceable(repo, c1, base):
381 381 """ Checks that if base, source and destination are all no-public branches,
382 382 if yes let's use the full copytrace algorithm for increased capabilities
383 383 since it will be fast enough.
384 384
385 385 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 386 number of changesets from c1 to base such that if number of changesets are
387 387 more than the limit, full copytracing algorithm won't be used.
388 388 """
389 389 if c1.rev() is None:
390 390 c1 = c1.p1()
391 391 if c1.mutable() and base.mutable():
392 392 sourcecommitlimit = repo.ui.configint('experimental',
393 393 'copytrace.sourcecommitlimit')
394 394 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 395 return commits < sourcecommitlimit
396 396 return False
397 397
398 398 def _fullcopytracing(repo, c1, c2, base):
399 399 """ The full copytracing algorithm which finds all the new files that were
400 400 added from merge base up to the top commit and for each file it checks if
401 401 this file was copied from another file.
402 402
403 403 This is pretty slow when a lot of changesets are involved but will track all
404 404 the copies.
405 405 """
406 406 # In certain scenarios (e.g. graft, update or rebase), base can be
407 407 # overridden We still need to know a real common ancestor in this case We
408 408 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 409 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 410 # caller may not know if the revision passed in lieu of the CA is a genuine
411 411 # common ancestor or not without explicitly checking it, it's better to
412 412 # determine that here.
413 413 #
414 414 # base.descendant(wc) and base.descendant(base) are False, work around that
415 415 _c1 = c1.p1() if c1.rev() is None else c1
416 416 _c2 = c2.p1() if c2.rev() is None else c2
417 417 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 418 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 419 # keep track of which endpoint is dirty
420 420 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 421 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 422 graft = dirtyc1 or dirtyc2
423 423 tca = base
424 424 if graft:
425 425 tca = _c1.ancestor(_c2)
426 426
427 427 limit = _findlimit(repo, c1.rev(), c2.rev())
428 428 if limit is None:
429 429 # no common ancestor, no copies
430 430 return {}, {}, {}, {}, {}
431 431 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432 432
433 433 m1 = c1.manifest()
434 434 m2 = c2.manifest()
435 435 mb = base.manifest()
436 436
437 437 # gather data from _checkcopies:
438 438 # - diverge = record all diverges in this dict
439 439 # - copy = record all non-divergent copies in this dict
440 440 # - fullcopy = record all copies in this dict
441 441 # - incomplete = record non-divergent partial copies here
442 442 # - incompletediverge = record divergent partial copies here
443 443 diverge = {} # divergence data is shared
444 444 incompletediverge = {}
445 445 data1 = {'copy': {},
446 446 'fullcopy': {},
447 447 'incomplete': {},
448 448 'diverge': diverge,
449 449 'incompletediverge': incompletediverge,
450 450 }
451 451 data2 = {'copy': {},
452 452 'fullcopy': {},
453 453 'incomplete': {},
454 454 'diverge': diverge,
455 455 'incompletediverge': incompletediverge,
456 456 }
457 457
458 458 # find interesting file sets from manifests
459 459 addedinm1 = m1.filesnotin(mb)
460 460 addedinm2 = m2.filesnotin(mb)
461 461 bothnew = sorted(addedinm1 & addedinm2)
462 462 if tca == base:
463 463 # unmatched file from base
464 464 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 465 u1u, u2u = u1r, u2r
466 466 else:
467 467 # unmatched file from base (DAG rotation in the graft case)
468 468 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 469 baselabel='base')
470 470 # unmatched file from topological common ancestors (no DAG rotation)
471 471 # need to recompute this for directory move handling when grafting
472 472 mta = tca.manifest()
473 473 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 474 m2.filesnotin(mta),
475 475 baselabel='topological common ancestor')
476 476
477 477 for f in u1u:
478 478 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479 479
480 480 for f in u2u:
481 481 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482 482
483 483 copy = dict(data1['copy'])
484 484 copy.update(data2['copy'])
485 485 fullcopy = dict(data1['fullcopy'])
486 486 fullcopy.update(data2['fullcopy'])
487 487
488 488 if dirtyc1:
489 489 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 490 incompletediverge)
491 491 else:
492 492 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 493 incompletediverge)
494 494
495 495 renamedelete = {}
496 496 renamedeleteset = set()
497 497 divergeset = set()
498 498 for of, fl in list(diverge.items()):
499 499 if len(fl) == 1 or of in c1 or of in c2:
500 500 del diverge[of] # not actually divergent, or not a rename
501 501 if of not in c1 and of not in c2:
502 502 # renamed on one side, deleted on the other side, but filter
503 503 # out files that have been renamed and then deleted
504 504 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 505 renamedeleteset.update(fl) # reverse map for below
506 506 else:
507 507 divergeset.update(fl) # reverse map for below
508 508
509 509 if bothnew:
510 510 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 511 % "\n ".join(bothnew))
512 512 bothdiverge = {}
513 513 bothincompletediverge = {}
514 514 remainder = {}
515 515 both1 = {'copy': {},
516 516 'fullcopy': {},
517 517 'incomplete': {},
518 518 'diverge': bothdiverge,
519 519 'incompletediverge': bothincompletediverge
520 520 }
521 521 both2 = {'copy': {},
522 522 'fullcopy': {},
523 523 'incomplete': {},
524 524 'diverge': bothdiverge,
525 525 'incompletediverge': bothincompletediverge
526 526 }
527 527 for f in bothnew:
528 528 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 529 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 530 if dirtyc1:
531 531 # incomplete copies may only be found on the "dirty" side for bothnew
532 532 assert not both2['incomplete']
533 533 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 534 bothincompletediverge)
535 535 elif dirtyc2:
536 536 assert not both1['incomplete']
537 537 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 538 bothincompletediverge)
539 539 else:
540 540 # incomplete copies and divergences can't happen outside grafts
541 541 assert not both1['incomplete']
542 542 assert not both2['incomplete']
543 543 assert not bothincompletediverge
544 544 for f in remainder:
545 545 assert f not in bothdiverge
546 546 ic = remainder[f]
547 547 if ic[0] in (m1 if dirtyc1 else m2):
548 548 # backed-out rename on one side, but watch out for deleted files
549 549 bothdiverge[f] = ic
550 550 for of, fl in bothdiverge.items():
551 551 if len(fl) == 2 and fl[0] == fl[1]:
552 552 copy[fl[0]] = of # not actually divergent, just matching renames
553 553
554 554 if fullcopy and repo.ui.debugflag:
555 555 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 556 "% = renamed and deleted):\n")
557 557 for f in sorted(fullcopy):
558 558 note = ""
559 559 if f in copy:
560 560 note += "*"
561 561 if f in divergeset:
562 562 note += "!"
563 563 if f in renamedeleteset:
564 564 note += "%"
565 565 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 566 note))
567 567 del divergeset
568 568
569 569 if not fullcopy:
570 570 return copy, {}, diverge, renamedelete, {}
571 571
572 572 repo.ui.debug(" checking for directory renames\n")
573 573
574 574 # generate a directory move map
575 575 d1, d2 = c1.dirs(), c2.dirs()
576 576 # Hack for adding '', which is not otherwise added, to d1 and d2
577 577 d1.addpath('/')
578 578 d2.addpath('/')
579 579 invalid = set()
580 580 dirmove = {}
581 581
582 582 # examine each file copy for a potential directory move, which is
583 583 # when all the files in a directory are moved to a new directory
584 584 for dst, src in fullcopy.iteritems():
585 585 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 586 if dsrc in invalid:
587 587 # already seen to be uninteresting
588 588 continue
589 589 elif dsrc in d1 and ddst in d1:
590 590 # directory wasn't entirely moved locally
591 591 invalid.add(dsrc + "/")
592 592 elif dsrc in d2 and ddst in d2:
593 593 # directory wasn't entirely moved remotely
594 594 invalid.add(dsrc + "/")
595 595 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 596 # files from the same directory moved to two different places
597 597 invalid.add(dsrc + "/")
598 598 else:
599 599 # looks good so far
600 600 dirmove[dsrc + "/"] = ddst + "/"
601 601
602 602 for i in invalid:
603 603 if i in dirmove:
604 604 del dirmove[i]
605 605 del d1, d2, invalid
606 606
607 607 if not dirmove:
608 608 return copy, {}, diverge, renamedelete, {}
609 609
610 610 for d in dirmove:
611 611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 612 (d, dirmove[d]))
613 613
614 614 movewithdir = {}
615 615 # check unaccounted nonoverlapping files against directory moves
616 616 for f in u1r + u2r:
617 617 if f not in fullcopy:
618 618 for d in dirmove:
619 619 if f.startswith(d):
620 620 # new file added in a directory that was moved, move it
621 621 df = dirmove[d] + f[len(d):]
622 622 if df not in copy:
623 623 movewithdir[f] = df
624 624 repo.ui.debug((" pending file src: '%s' -> "
625 625 "dst: '%s'\n") % (f, df))
626 626 break
627 627
628 628 return copy, movewithdir, diverge, renamedelete, dirmove
629 629
630 630 def _heuristicscopytracing(repo, c1, c2, base):
631 631 """ Fast copytracing using filename heuristics
632 632
633 633 Assumes that moves or renames are of following two types:
634 634
635 635 1) Inside a directory only (same directory name but different filenames)
636 636 2) Move from one directory to another
637 637 (same filenames but different directory names)
638 638
639 639 Works only when there are no merge commits in the "source branch".
640 640 Source branch is commits from base up to c2 not including base.
641 641
642 642 If merge is involved it fallbacks to _fullcopytracing().
643 643
644 644 Can be used by setting the following config:
645 645
646 646 [experimental]
647 647 copytrace = heuristics
648 648
649 649 In some cases the copy/move candidates found by heuristics can be very large
650 650 in number and that will make the algorithm slow. The number of possible
651 651 candidates to check can be limited by using the config
652 652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 653 """
654 654
655 655 if c1.rev() is None:
656 656 c1 = c1.p1()
657 657 if c2.rev() is None:
658 658 c2 = c2.p1()
659 659
660 660 copies = {}
661 661
662 662 changedfiles = set()
663 663 m1 = c1.manifest()
664 664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 665 # If base is not in c2 branch, we switch to fullcopytracing
666 666 repo.ui.debug("switching to full copytracing as base is not "
667 667 "an ancestor of c2\n")
668 668 return _fullcopytracing(repo, c1, c2, base)
669 669
670 670 ctx = c2
671 671 while ctx != base:
672 672 if len(ctx.parents()) == 2:
673 673 # To keep things simple let's not handle merges
674 674 repo.ui.debug("switching to full copytracing because of merges\n")
675 675 return _fullcopytracing(repo, c1, c2, base)
676 676 changedfiles.update(ctx.files())
677 677 ctx = ctx.p1()
678 678
679 679 cp = _forwardcopies(base, c2)
680 680 for dst, src in cp.iteritems():
681 681 if src in m1:
682 682 copies[dst] = src
683 683
684 684 # file is missing if it isn't present in the destination, but is present in
685 685 # the base and present in the source.
686 686 # Presence in the base is important to exclude added files, presence in the
687 687 # source is important to exclude removed files.
688 688 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
689 689 changedfiles)
690 690
691 691 if missingfiles:
692 692 basenametofilename = collections.defaultdict(list)
693 693 dirnametofilename = collections.defaultdict(list)
694 694
695 695 for f in m1.filesnotin(base.manifest()):
696 696 basename = os.path.basename(f)
697 697 dirname = os.path.dirname(f)
698 698 basenametofilename[basename].append(f)
699 699 dirnametofilename[dirname].append(f)
700 700
701 701 # in case of a rebase/graft, base may not be a common ancestor
702 702 anc = c1.ancestor(c2)
703 703
704 704 for f in missingfiles:
705 705 basename = os.path.basename(f)
706 706 dirname = os.path.dirname(f)
707 707 samebasename = basenametofilename[basename]
708 708 samedirname = dirnametofilename[dirname]
709 709 movecandidates = samebasename + samedirname
710 710 # f is guaranteed to be present in c2, that's why
711 711 # c2.filectx(f) won't fail
712 712 f2 = c2.filectx(f)
713 713 # we can have a lot of candidates which can slow down the heuristics
714 714 # config value to limit the number of candidates moves to check
715 715 maxcandidates = repo.ui.configint('experimental',
716 716 'copytrace.movecandidateslimit')
717 717
718 718 if len(movecandidates) > maxcandidates:
719 719 repo.ui.status(_("skipping copytracing for '%s', more "
720 720 "candidates than the limit: %d\n")
721 721 % (f, len(movecandidates)))
722 722 continue
723 723
724 724 for candidate in movecandidates:
725 725 f1 = c1.filectx(candidate)
726 726 if _related(f1, f2, anc.rev()):
727 727 # if there are a few related copies then we'll merge
728 728 # changes into all of them. This matches the behaviour
729 729 # of upstream copytracing
730 730 copies[candidate] = f
731 731
732 732 return copies, {}, {}, {}, {}
733 733
734 734 def _related(f1, f2, limit):
735 735 """return True if f1 and f2 filectx have a common ancestor
736 736
737 737 Walk back to common ancestor to see if the two files originate
738 738 from the same file. Since workingfilectx's rev() is None it messes
739 739 up the integer comparison logic, hence the pre-step check for
740 740 None (f1 and f2 can only be workingfilectx's initially).
741 741 """
742 742
743 743 if f1 == f2:
744 744 return f1 # a match
745 745
746 746 g1, g2 = f1.ancestors(), f2.ancestors()
747 747 try:
748 748 f1r, f2r = f1.linkrev(), f2.linkrev()
749 749
750 750 if f1r is None:
751 751 f1 = next(g1)
752 752 if f2r is None:
753 753 f2 = next(g2)
754 754
755 755 while True:
756 756 f1r, f2r = f1.linkrev(), f2.linkrev()
757 757 if f1r > f2r:
758 758 f1 = next(g1)
759 759 elif f2r > f1r:
760 760 f2 = next(g2)
761 761 elif f1 == f2:
762 762 return f1 # a match
763 763 elif f1r == f2r or f1r < limit or f2r < limit:
764 764 return False # copy no longer relevant
765 765 except StopIteration:
766 766 return False
767 767
768 768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 769 """
770 770 check possible copies of f from msrc to mdst
771 771
772 772 srcctx = starting context for f in msrc
773 773 dstctx = destination context for f in mdst
774 774 f = the filename to check (as in msrc)
775 775 base = the changectx used as a merge base
776 776 tca = topological common ancestor for graft-like scenarios
777 777 remotebase = True if base is outside tca::srcctx, False otherwise
778 778 limit = the rev number to not search beyond
779 779 data = dictionary of dictionary to store copy data. (see mergecopies)
780 780
781 781 note: limit is only an optimization, and provides no guarantee that
782 782 irrelevant revisions will not be visited
783 783 there is no easy way to make this algorithm stop in a guaranteed way
784 784 once it "goes behind a certain revision".
785 785 """
786 786
787 787 msrc = srcctx.manifest()
788 788 mdst = dstctx.manifest()
789 789 mb = base.manifest()
790 790 mta = tca.manifest()
791 791 # Might be true if this call is about finding backward renames,
792 792 # This happens in the case of grafts because the DAG is then rotated.
793 793 # If the file exists in both the base and the source, we are not looking
794 794 # for a rename on the source side, but on the part of the DAG that is
795 795 # traversed backwards.
796 796 #
797 797 # In the case there is both backward and forward renames (before and after
798 798 # the base) this is more complicated as we must detect a divergence.
799 799 # We use 'backwards = False' in that case.
800 800 backwards = not remotebase and base != tca and f in mb
801 801 getsrcfctx = _makegetfctx(srcctx)
802 802 getdstfctx = _makegetfctx(dstctx)
803 803
804 804 if msrc[f] == mb.get(f) and not remotebase:
805 805 # Nothing to merge
806 806 return
807 807
808 808 of = None
809 809 seen = {f}
810 810 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 811 ocr = oc.linkrev()
812 812 of = oc.path()
813 813 if of in seen:
814 814 # check limit late - grab last rename before
815 815 if ocr < limit:
816 816 break
817 817 continue
818 818 seen.add(of)
819 819
820 820 # remember for dir rename detection
821 821 if backwards:
822 822 data['fullcopy'][of] = f # grafting backwards through renames
823 823 else:
824 824 data['fullcopy'][f] = of
825 825 if of not in mdst:
826 826 continue # no match, keep looking
827 827 if mdst[of] == mb.get(of):
828 828 return # no merge needed, quit early
829 829 c2 = getdstfctx(of, mdst[of])
830 830 # c2 might be a plain new file on added on destination side that is
831 831 # unrelated to the droids we are looking for.
832 832 cr = _related(oc, c2, tca.rev())
833 833 if cr and (of == f or of == c2.path()): # non-divergent
834 834 if backwards:
835 835 data['copy'][of] = f
836 836 elif of in mb:
837 837 data['copy'][f] = of
838 838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 839 data['copy'][of] = f
840 840 del data['fullcopy'][f]
841 841 data['fullcopy'][of] = f
842 842 else: # divergence w.r.t. graft CA on one side of topological CA
843 843 for sf in seen:
844 844 if sf in mb:
845 845 assert sf not in data['diverge']
846 846 data['diverge'][sf] = [f, of]
847 847 break
848 848 return
849 849
850 850 if of in mta:
851 851 if backwards or remotebase:
852 852 data['incomplete'][of] = f
853 853 else:
854 854 for sf in seen:
855 855 if sf in mb:
856 856 if tca == base:
857 857 data['diverge'].setdefault(sf, []).append(f)
858 858 else:
859 859 data['incompletediverge'][sf] = [of, f]
860 860 return
861 861
862 862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 863 """reproduce copies from fromrev to rev in the dirstate
864 864
865 865 If skiprev is specified, it's a revision that should be used to
866 866 filter copy records. Any copies that occur between fromrev and
867 867 skiprev will not be duplicated, even if they appear in the set of
868 868 copies between fromrev and rev.
869 869 """
870 870 exclude = {}
871 871 if (skiprev is not None and
872 872 repo.ui.config('experimental', 'copytrace') != 'off'):
873 873 # copytrace='off' skips this line, but not the entire function because
874 874 # the line below is O(size of the repo) during a rebase, while the rest
875 875 # of the function is much faster (and is required for carrying copy
876 876 # metadata across the rebase anyway).
877 877 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 879 # copies.pathcopies returns backward renames, so dst might not
880 880 # actually be in the dirstate
881 881 if dst in exclude:
882 882 continue
883 883 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now