##// END OF EJS Templates
copytrace: move the default copytracing algorithm in a new function...
Pulkit Goyal -
r34080:b4b19609 default
parent child Browse files
Show More
@@ -1,743 +1,752 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 match as matchmod,
14 14 node,
15 15 pathutil,
16 16 scmutil,
17 17 util,
18 18 )
19 19
20 20 def _findlimit(repo, a, b):
21 21 """
22 22 Find the last revision that needs to be checked to ensure that a full
23 23 transitive closure for file copies can be properly calculated.
24 24 Generally, this means finding the earliest revision number that's an
25 25 ancestor of a or b but not both, except when a or b is a direct descendent
26 26 of the other, in which case we can return the minimum revnum of a and b.
27 27 None if no such revision exists.
28 28 """
29 29
30 30 # basic idea:
31 31 # - mark a and b with different sides
32 32 # - if a parent's children are all on the same side, the parent is
33 33 # on that side, otherwise it is on no side
34 34 # - walk the graph in topological order with the help of a heap;
35 35 # - add unseen parents to side map
36 36 # - clear side of any parent that has children on different sides
37 37 # - track number of interesting revs that might still be on a side
38 38 # - track the lowest interesting rev seen
39 39 # - quit when interesting revs is zero
40 40
41 41 cl = repo.changelog
42 42 working = len(cl) # pseudo rev for the working directory
43 43 if a is None:
44 44 a = working
45 45 if b is None:
46 46 b = working
47 47
48 48 side = {a: -1, b: 1}
49 49 visit = [-a, -b]
50 50 heapq.heapify(visit)
51 51 interesting = len(visit)
52 52 hascommonancestor = False
53 53 limit = working
54 54
55 55 while interesting:
56 56 r = -heapq.heappop(visit)
57 57 if r == working:
58 58 parents = [cl.rev(p) for p in repo.dirstate.parents()]
59 59 else:
60 60 parents = cl.parentrevs(r)
61 61 for p in parents:
62 62 if p < 0:
63 63 continue
64 64 if p not in side:
65 65 # first time we see p; add it to visit
66 66 side[p] = side[r]
67 67 if side[p]:
68 68 interesting += 1
69 69 heapq.heappush(visit, -p)
70 70 elif side[p] and side[p] != side[r]:
71 71 # p was interesting but now we know better
72 72 side[p] = 0
73 73 interesting -= 1
74 74 hascommonancestor = True
75 75 if side[r]:
76 76 limit = r # lowest rev visited
77 77 interesting -= 1
78 78
79 79 if not hascommonancestor:
80 80 return None
81 81
82 82 # Consider the following flow (see test-commit-amend.t under issue4405):
83 83 # 1/ File 'a0' committed
84 84 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
85 85 # 3/ Move back to first commit
86 86 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
87 87 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
88 88 #
89 89 # During the amend in step five, we will be in this state:
90 90 #
91 91 # @ 3 temporary amend commit for a1-amend
92 92 # |
93 93 # o 2 a1-amend
94 94 # |
95 95 # | o 1 a1
96 96 # |/
97 97 # o 0 a0
98 98 #
99 99 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
100 100 # yet the filelog has the copy information in rev 1 and we will not look
101 101 # back far enough unless we also look at the a and b as candidates.
102 102 # This only occurs when a is a descendent of b or visa-versa.
103 103 return min(limit, a, b)
104 104
105 105 def _chain(src, dst, a, b):
106 106 '''chain two sets of copies a->b'''
107 107 t = a.copy()
108 108 for k, v in b.iteritems():
109 109 if v in t:
110 110 # found a chain
111 111 if t[v] != k:
112 112 # file wasn't renamed back to itself
113 113 t[k] = t[v]
114 114 if v not in dst:
115 115 # chain was a rename, not a copy
116 116 del t[v]
117 117 if v in src:
118 118 # file is a copy of an existing file
119 119 t[k] = v
120 120
121 121 # remove criss-crossed copies
122 122 for k, v in t.items():
123 123 if k in src and v in dst:
124 124 del t[k]
125 125
126 126 return t
127 127
128 128 def _tracefile(fctx, am, limit=-1):
129 129 '''return file context that is the ancestor of fctx present in ancestor
130 130 manifest am, stopping after the first ancestor lower than limit'''
131 131
132 132 for f in fctx.ancestors():
133 133 if am.get(f.path(), None) == f.filenode():
134 134 return f
135 135 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
136 136 return None
137 137
138 138 def _dirstatecopies(d):
139 139 ds = d._repo.dirstate
140 140 c = ds.copies().copy()
141 141 for k in c.keys():
142 142 if ds[k] not in 'anm':
143 143 del c[k]
144 144 return c
145 145
146 146 def _computeforwardmissing(a, b, match=None):
147 147 """Computes which files are in b but not a.
148 148 This is its own function so extensions can easily wrap this call to see what
149 149 files _forwardcopies is about to process.
150 150 """
151 151 ma = a.manifest()
152 152 mb = b.manifest()
153 153 return mb.filesnotin(ma, match=match)
154 154
155 155 def _forwardcopies(a, b, match=None):
156 156 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
157 157
158 158 # check for working copy
159 159 w = None
160 160 if b.rev() is None:
161 161 w = b
162 162 b = w.p1()
163 163 if a == b:
164 164 # short-circuit to avoid issues with merge states
165 165 return _dirstatecopies(w)
166 166
167 167 # files might have to be traced back to the fctx parent of the last
168 168 # one-side-only changeset, but not further back than that
169 169 limit = _findlimit(a._repo, a.rev(), b.rev())
170 170 if limit is None:
171 171 limit = -1
172 172 am = a.manifest()
173 173
174 174 # find where new files came from
175 175 # we currently don't try to find where old files went, too expensive
176 176 # this means we can miss a case like 'hg rm b; hg cp a b'
177 177 cm = {}
178 178
179 179 # Computing the forward missing is quite expensive on large manifests, since
180 180 # it compares the entire manifests. We can optimize it in the common use
181 181 # case of computing what copies are in a commit versus its parent (like
182 182 # during a rebase or histedit). Note, we exclude merge commits from this
183 183 # optimization, since the ctx.files() for a merge commit is not correct for
184 184 # this comparison.
185 185 forwardmissingmatch = match
186 186 if b.p1() == a and b.p2().node() == node.nullid:
187 187 filesmatcher = scmutil.matchfiles(a._repo, b.files())
188 188 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
189 189 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
190 190
191 191 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
192 192 for f in missing:
193 193 fctx = b[f]
194 194 fctx._ancestrycontext = ancestrycontext
195 195 ofctx = _tracefile(fctx, am, limit)
196 196 if ofctx:
197 197 cm[f] = ofctx.path()
198 198
199 199 # combine copies from dirstate if necessary
200 200 if w is not None:
201 201 cm = _chain(a, w, cm, _dirstatecopies(w))
202 202
203 203 return cm
204 204
205 205 def _backwardrenames(a, b):
206 206 if a._repo.ui.config('experimental', 'copytrace') == 'off':
207 207 return {}
208 208
209 209 # Even though we're not taking copies into account, 1:n rename situations
210 210 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
211 211 # arbitrarily pick one of the renames.
212 212 f = _forwardcopies(b, a)
213 213 r = {}
214 214 for k, v in sorted(f.iteritems()):
215 215 # remove copies
216 216 if v in a:
217 217 continue
218 218 r[v] = k
219 219 return r
220 220
221 221 def pathcopies(x, y, match=None):
222 222 '''find {dst@y: src@x} copy mapping for directed compare'''
223 223 if x == y or not x or not y:
224 224 return {}
225 225 a = y.ancestor(x)
226 226 if a == x:
227 227 return _forwardcopies(x, y, match=match)
228 228 if a == y:
229 229 return _backwardrenames(x, y)
230 230 return _chain(x, y, _backwardrenames(x, a),
231 231 _forwardcopies(a, y, match=match))
232 232
233 233 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
234 234 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
235 235 and c2. This is its own function so extensions can easily wrap this call
236 236 to see what files mergecopies is about to process.
237 237
238 238 Even though c1 and c2 are not used in this function, they are useful in
239 239 other extensions for being able to read the file nodes of the changed files.
240 240
241 241 "baselabel" can be passed to help distinguish the multiple computations
242 242 done in the graft case.
243 243 """
244 244 u1 = sorted(addedinm1 - addedinm2)
245 245 u2 = sorted(addedinm2 - addedinm1)
246 246
247 247 header = " unmatched files in %s"
248 248 if baselabel:
249 249 header += ' (from %s)' % baselabel
250 250 if u1:
251 251 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
252 252 if u2:
253 253 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
254 254 return u1, u2
255 255
256 256 def _makegetfctx(ctx):
257 257 """return a 'getfctx' function suitable for _checkcopies usage
258 258
259 259 We have to re-setup the function building 'filectx' for each
260 260 '_checkcopies' to ensure the linkrev adjustment is properly setup for
261 261 each. Linkrev adjustment is important to avoid bug in rename
262 262 detection. Moreover, having a proper '_ancestrycontext' setup ensures
263 263 the performance impact of this adjustment is kept limited. Without it,
264 264 each file could do a full dag traversal making the time complexity of
265 265 the operation explode (see issue4537).
266 266
267 267 This function exists here mostly to limit the impact on stable. Feel
268 268 free to refactor on default.
269 269 """
270 270 rev = ctx.rev()
271 271 repo = ctx._repo
272 272 ac = getattr(ctx, '_ancestrycontext', None)
273 273 if ac is None:
274 274 revs = [rev]
275 275 if rev is None:
276 276 revs = [p.rev() for p in ctx.parents()]
277 277 ac = repo.changelog.ancestors(revs, inclusive=True)
278 278 ctx._ancestrycontext = ac
279 279 def makectx(f, n):
280 280 if n in node.wdirnodes: # in a working context?
281 281 if ctx.rev() is None:
282 282 return ctx.filectx(f)
283 283 return repo[None][f]
284 284 fctx = repo.filectx(f, fileid=n)
285 285 # setup only needed for filectx not create from a changectx
286 286 fctx._ancestrycontext = ac
287 287 fctx._descendantrev = rev
288 288 return fctx
289 289 return util.lrucachefunc(makectx)
290 290
291 291 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
292 292 """combine partial copy paths"""
293 293 remainder = {}
294 294 for f in copyfrom:
295 295 if f in copyto:
296 296 finalcopy[copyto[f]] = copyfrom[f]
297 297 del copyto[f]
298 298 for f in incompletediverge:
299 299 assert f not in diverge
300 300 ic = incompletediverge[f]
301 301 if ic[0] in copyto:
302 302 diverge[f] = [copyto[ic[0]], ic[1]]
303 303 else:
304 304 remainder[f] = ic
305 305 return remainder
306 306
307 307 def mergecopies(repo, c1, c2, base):
308 308 """
309 The basic algorithm for copytracing. Copytracing is used in commands like
310 rebase, merge, unshelve, etc to merge files that were moved/ copied in one
311 merge parent and modified in another. For example:
309 The function calling different copytracing algorithms on the basis of config
310 which find moves and copies between context c1 and c2 that are relevant for
311 merging. 'base' will be used as the merge base.
312
313 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
314 files that were moved/ copied in one merge parent and modified in another.
315 For example:
312 316
313 317 o ---> 4 another commit
314 318 |
315 319 | o ---> 3 commit that modifies a.txt
316 320 | /
317 321 o / ---> 2 commit that moves a.txt to b.txt
318 322 |/
319 323 o ---> 1 merge base
320 324
321 325 If we try to rebase revision 3 on revision 4, since there is no a.txt in
322 326 revision 4, and if user have copytrace disabled, we prints the following
323 327 message:
324 328
325 329 ```other changed <file> which local deleted```
326 330
327 If copytrace is enabled, this function finds all the new files that were
328 added from merge base up to the top commit (here 4), and for each file it
329 checks if this file was copied from another file (a.txt in the above case).
330
331 Find moves and copies between context c1 and c2 that are relevant
332 for merging. 'base' will be used as the merge base.
333
334 331 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 332 "dirmove".
336 333
337 334 "copy" is a mapping from destination name -> source name,
338 335 where source is in c1 and destination is in c2 or vice-versa.
339 336
340 337 "movewithdir" is a mapping from source name -> destination name,
341 338 where the file at source present in one context but not the other
342 339 needs to be moved to destination by the merge process, because the
343 340 other context moved the directory it is in.
344 341
345 342 "diverge" is a mapping of source name -> list of destination names
346 343 for divergent renames.
347 344
348 345 "renamedelete" is a mapping of source name -> list of destination
349 346 names for files deleted in c1 that were renamed in c2 or vice-versa.
350 347
351 348 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 349 This is needed for handling changes to new files previously grafted into
353 350 renamed directories.
354 351 """
355 352 # avoid silly behavior for update from empty dir
356 353 if not c1 or not c2 or c1 == c2:
357 354 return {}, {}, {}, {}, {}
358 355
359 356 # avoid silly behavior for parent -> working dir
360 357 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 358 return repo.dirstate.copies(), {}, {}, {}, {}
362 359
360 copytracing = repo.ui.config('experimental', 'copytrace')
361
363 362 # Copy trace disabling is explicitly below the node == p1 logic above
364 363 # because the logic above is required for a simple copy to be kept across a
365 364 # rebase.
366 if repo.ui.config('experimental', 'copytrace') == 'off':
365 if copytracing == 'off':
367 366 return {}, {}, {}, {}, {}
367 else:
368 return _fullcopytracing(repo, c1, c2, base)
368 369
370 def _fullcopytracing(repo, c1, c2, base):
371 """ The full copytracing algorithm which finds all the new files that were
372 added from merge base up to the top commit and for each file it checks if
373 this file was copied from another file.
374
375 This is pretty slow when a lot of changesets are involved but will track all
376 the copies.
377 """
369 378 # In certain scenarios (e.g. graft, update or rebase), base can be
370 379 # overridden We still need to know a real common ancestor in this case We
371 380 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
372 381 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
373 382 # caller may not know if the revision passed in lieu of the CA is a genuine
374 383 # common ancestor or not without explicitly checking it, it's better to
375 384 # determine that here.
376 385 #
377 386 # base.descendant(wc) and base.descendant(base) are False, work around that
378 387 _c1 = c1.p1() if c1.rev() is None else c1
379 388 _c2 = c2.p1() if c2.rev() is None else c2
380 389 # an endpoint is "dirty" if it isn't a descendant of the merge base
381 390 # if we have a dirty endpoint, we need to trigger graft logic, and also
382 391 # keep track of which endpoint is dirty
383 392 dirtyc1 = not (base == _c1 or base.descendant(_c1))
384 393 dirtyc2 = not (base == _c2 or base.descendant(_c2))
385 394 graft = dirtyc1 or dirtyc2
386 395 tca = base
387 396 if graft:
388 397 tca = _c1.ancestor(_c2)
389 398
390 399 limit = _findlimit(repo, c1.rev(), c2.rev())
391 400 if limit is None:
392 401 # no common ancestor, no copies
393 402 return {}, {}, {}, {}, {}
394 403 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
395 404
396 405 m1 = c1.manifest()
397 406 m2 = c2.manifest()
398 407 mb = base.manifest()
399 408
400 409 # gather data from _checkcopies:
401 410 # - diverge = record all diverges in this dict
402 411 # - copy = record all non-divergent copies in this dict
403 412 # - fullcopy = record all copies in this dict
404 413 # - incomplete = record non-divergent partial copies here
405 414 # - incompletediverge = record divergent partial copies here
406 415 diverge = {} # divergence data is shared
407 416 incompletediverge = {}
408 417 data1 = {'copy': {},
409 418 'fullcopy': {},
410 419 'incomplete': {},
411 420 'diverge': diverge,
412 421 'incompletediverge': incompletediverge,
413 422 }
414 423 data2 = {'copy': {},
415 424 'fullcopy': {},
416 425 'incomplete': {},
417 426 'diverge': diverge,
418 427 'incompletediverge': incompletediverge,
419 428 }
420 429
421 430 # find interesting file sets from manifests
422 431 addedinm1 = m1.filesnotin(mb)
423 432 addedinm2 = m2.filesnotin(mb)
424 433 bothnew = sorted(addedinm1 & addedinm2)
425 434 if tca == base:
426 435 # unmatched file from base
427 436 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
428 437 u1u, u2u = u1r, u2r
429 438 else:
430 439 # unmatched file from base (DAG rotation in the graft case)
431 440 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
432 441 baselabel='base')
433 442 # unmatched file from topological common ancestors (no DAG rotation)
434 443 # need to recompute this for directory move handling when grafting
435 444 mta = tca.manifest()
436 445 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
437 446 m2.filesnotin(mta),
438 447 baselabel='topological common ancestor')
439 448
440 449 for f in u1u:
441 450 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
442 451
443 452 for f in u2u:
444 453 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
445 454
446 455 copy = dict(data1['copy'])
447 456 copy.update(data2['copy'])
448 457 fullcopy = dict(data1['fullcopy'])
449 458 fullcopy.update(data2['fullcopy'])
450 459
451 460 if dirtyc1:
452 461 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
453 462 incompletediverge)
454 463 else:
455 464 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
456 465 incompletediverge)
457 466
458 467 renamedelete = {}
459 468 renamedeleteset = set()
460 469 divergeset = set()
461 470 for of, fl in diverge.items():
462 471 if len(fl) == 1 or of in c1 or of in c2:
463 472 del diverge[of] # not actually divergent, or not a rename
464 473 if of not in c1 and of not in c2:
465 474 # renamed on one side, deleted on the other side, but filter
466 475 # out files that have been renamed and then deleted
467 476 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
468 477 renamedeleteset.update(fl) # reverse map for below
469 478 else:
470 479 divergeset.update(fl) # reverse map for below
471 480
472 481 if bothnew:
473 482 repo.ui.debug(" unmatched files new in both:\n %s\n"
474 483 % "\n ".join(bothnew))
475 484 bothdiverge = {}
476 485 bothincompletediverge = {}
477 486 remainder = {}
478 487 both1 = {'copy': {},
479 488 'fullcopy': {},
480 489 'incomplete': {},
481 490 'diverge': bothdiverge,
482 491 'incompletediverge': bothincompletediverge
483 492 }
484 493 both2 = {'copy': {},
485 494 'fullcopy': {},
486 495 'incomplete': {},
487 496 'diverge': bothdiverge,
488 497 'incompletediverge': bothincompletediverge
489 498 }
490 499 for f in bothnew:
491 500 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
492 501 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
493 502 if dirtyc1:
494 503 # incomplete copies may only be found on the "dirty" side for bothnew
495 504 assert not both2['incomplete']
496 505 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
497 506 bothincompletediverge)
498 507 elif dirtyc2:
499 508 assert not both1['incomplete']
500 509 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
501 510 bothincompletediverge)
502 511 else:
503 512 # incomplete copies and divergences can't happen outside grafts
504 513 assert not both1['incomplete']
505 514 assert not both2['incomplete']
506 515 assert not bothincompletediverge
507 516 for f in remainder:
508 517 assert f not in bothdiverge
509 518 ic = remainder[f]
510 519 if ic[0] in (m1 if dirtyc1 else m2):
511 520 # backed-out rename on one side, but watch out for deleted files
512 521 bothdiverge[f] = ic
513 522 for of, fl in bothdiverge.items():
514 523 if len(fl) == 2 and fl[0] == fl[1]:
515 524 copy[fl[0]] = of # not actually divergent, just matching renames
516 525
517 526 if fullcopy and repo.ui.debugflag:
518 527 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
519 528 "% = renamed and deleted):\n")
520 529 for f in sorted(fullcopy):
521 530 note = ""
522 531 if f in copy:
523 532 note += "*"
524 533 if f in divergeset:
525 534 note += "!"
526 535 if f in renamedeleteset:
527 536 note += "%"
528 537 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
529 538 note))
530 539 del divergeset
531 540
532 541 if not fullcopy:
533 542 return copy, {}, diverge, renamedelete, {}
534 543
535 544 repo.ui.debug(" checking for directory renames\n")
536 545
537 546 # generate a directory move map
538 547 d1, d2 = c1.dirs(), c2.dirs()
539 548 # Hack for adding '', which is not otherwise added, to d1 and d2
540 549 d1.addpath('/')
541 550 d2.addpath('/')
542 551 invalid = set()
543 552 dirmove = {}
544 553
545 554 # examine each file copy for a potential directory move, which is
546 555 # when all the files in a directory are moved to a new directory
547 556 for dst, src in fullcopy.iteritems():
548 557 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
549 558 if dsrc in invalid:
550 559 # already seen to be uninteresting
551 560 continue
552 561 elif dsrc in d1 and ddst in d1:
553 562 # directory wasn't entirely moved locally
554 563 invalid.add(dsrc + "/")
555 564 elif dsrc in d2 and ddst in d2:
556 565 # directory wasn't entirely moved remotely
557 566 invalid.add(dsrc + "/")
558 567 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
559 568 # files from the same directory moved to two different places
560 569 invalid.add(dsrc + "/")
561 570 else:
562 571 # looks good so far
563 572 dirmove[dsrc + "/"] = ddst + "/"
564 573
565 574 for i in invalid:
566 575 if i in dirmove:
567 576 del dirmove[i]
568 577 del d1, d2, invalid
569 578
570 579 if not dirmove:
571 580 return copy, {}, diverge, renamedelete, {}
572 581
573 582 for d in dirmove:
574 583 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
575 584 (d, dirmove[d]))
576 585
577 586 movewithdir = {}
578 587 # check unaccounted nonoverlapping files against directory moves
579 588 for f in u1r + u2r:
580 589 if f not in fullcopy:
581 590 for d in dirmove:
582 591 if f.startswith(d):
583 592 # new file added in a directory that was moved, move it
584 593 df = dirmove[d] + f[len(d):]
585 594 if df not in copy:
586 595 movewithdir[f] = df
587 596 repo.ui.debug((" pending file src: '%s' -> "
588 597 "dst: '%s'\n") % (f, df))
589 598 break
590 599
591 600 return copy, movewithdir, diverge, renamedelete, dirmove
592 601
593 602 def _related(f1, f2, limit):
594 603 """return True if f1 and f2 filectx have a common ancestor
595 604
596 605 Walk back to common ancestor to see if the two files originate
597 606 from the same file. Since workingfilectx's rev() is None it messes
598 607 up the integer comparison logic, hence the pre-step check for
599 608 None (f1 and f2 can only be workingfilectx's initially).
600 609 """
601 610
602 611 if f1 == f2:
603 612 return f1 # a match
604 613
605 614 g1, g2 = f1.ancestors(), f2.ancestors()
606 615 try:
607 616 f1r, f2r = f1.linkrev(), f2.linkrev()
608 617
609 618 if f1r is None:
610 619 f1 = next(g1)
611 620 if f2r is None:
612 621 f2 = next(g2)
613 622
614 623 while True:
615 624 f1r, f2r = f1.linkrev(), f2.linkrev()
616 625 if f1r > f2r:
617 626 f1 = next(g1)
618 627 elif f2r > f1r:
619 628 f2 = next(g2)
620 629 elif f1 == f2:
621 630 return f1 # a match
622 631 elif f1r == f2r or f1r < limit or f2r < limit:
623 632 return False # copy no longer relevant
624 633 except StopIteration:
625 634 return False
626 635
627 636 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
628 637 """
629 638 check possible copies of f from msrc to mdst
630 639
631 640 srcctx = starting context for f in msrc
632 641 dstctx = destination context for f in mdst
633 642 f = the filename to check (as in msrc)
634 643 base = the changectx used as a merge base
635 644 tca = topological common ancestor for graft-like scenarios
636 645 remotebase = True if base is outside tca::srcctx, False otherwise
637 646 limit = the rev number to not search beyond
638 647 data = dictionary of dictionary to store copy data. (see mergecopies)
639 648
640 649 note: limit is only an optimization, and provides no guarantee that
641 650 irrelevant revisions will not be visited
642 651 there is no easy way to make this algorithm stop in a guaranteed way
643 652 once it "goes behind a certain revision".
644 653 """
645 654
646 655 msrc = srcctx.manifest()
647 656 mdst = dstctx.manifest()
648 657 mb = base.manifest()
649 658 mta = tca.manifest()
650 659 # Might be true if this call is about finding backward renames,
651 660 # This happens in the case of grafts because the DAG is then rotated.
652 661 # If the file exists in both the base and the source, we are not looking
653 662 # for a rename on the source side, but on the part of the DAG that is
654 663 # traversed backwards.
655 664 #
656 665 # In the case there is both backward and forward renames (before and after
657 666 # the base) this is more complicated as we must detect a divergence.
658 667 # We use 'backwards = False' in that case.
659 668 backwards = not remotebase and base != tca and f in mb
660 669 getsrcfctx = _makegetfctx(srcctx)
661 670 getdstfctx = _makegetfctx(dstctx)
662 671
663 672 if msrc[f] == mb.get(f) and not remotebase:
664 673 # Nothing to merge
665 674 return
666 675
667 676 of = None
668 677 seen = {f}
669 678 for oc in getsrcfctx(f, msrc[f]).ancestors():
670 679 ocr = oc.linkrev()
671 680 of = oc.path()
672 681 if of in seen:
673 682 # check limit late - grab last rename before
674 683 if ocr < limit:
675 684 break
676 685 continue
677 686 seen.add(of)
678 687
679 688 # remember for dir rename detection
680 689 if backwards:
681 690 data['fullcopy'][of] = f # grafting backwards through renames
682 691 else:
683 692 data['fullcopy'][f] = of
684 693 if of not in mdst:
685 694 continue # no match, keep looking
686 695 if mdst[of] == mb.get(of):
687 696 return # no merge needed, quit early
688 697 c2 = getdstfctx(of, mdst[of])
689 698 # c2 might be a plain new file on added on destination side that is
690 699 # unrelated to the droids we are looking for.
691 700 cr = _related(oc, c2, tca.rev())
692 701 if cr and (of == f or of == c2.path()): # non-divergent
693 702 if backwards:
694 703 data['copy'][of] = f
695 704 elif of in mb:
696 705 data['copy'][f] = of
697 706 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
698 707 data['copy'][of] = f
699 708 del data['fullcopy'][f]
700 709 data['fullcopy'][of] = f
701 710 else: # divergence w.r.t. graft CA on one side of topological CA
702 711 for sf in seen:
703 712 if sf in mb:
704 713 assert sf not in data['diverge']
705 714 data['diverge'][sf] = [f, of]
706 715 break
707 716 return
708 717
709 718 if of in mta:
710 719 if backwards or remotebase:
711 720 data['incomplete'][of] = f
712 721 else:
713 722 for sf in seen:
714 723 if sf in mb:
715 724 if tca == base:
716 725 data['diverge'].setdefault(sf, []).append(f)
717 726 else:
718 727 data['incompletediverge'][sf] = [of, f]
719 728 return
720 729
721 730 def duplicatecopies(repo, rev, fromrev, skiprev=None):
722 731 '''reproduce copies from fromrev to rev in the dirstate
723 732
724 733 If skiprev is specified, it's a revision that should be used to
725 734 filter copy records. Any copies that occur between fromrev and
726 735 skiprev will not be duplicated, even if they appear in the set of
727 736 copies between fromrev and rev.
728 737 '''
729 738 exclude = {}
730 739 if (skiprev is not None and
731 740 repo.ui.config('experimental', 'copytrace') != 'off'):
732 741 # copytrace='off' skips this line, but not the entire function because
733 742 # the line below is O(size of the repo) during a rebase, while the rest
734 743 # of the function is much faster (and is required for carrying copy
735 744 # metadata across the rebase anyway).
736 745 exclude = pathcopies(repo[fromrev], repo[skiprev])
737 746 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
738 747 # copies.pathcopies returns backward renames, so dst might not
739 748 # actually be in the dirstate
740 749 if dst in exclude:
741 750 continue
742 751 if repo.dirstate[dst] in "nma":
743 752 repo.dirstate.copy(src, dst)
@@ -1,168 +1,171 b''
1 Test for the full copytracing algorithm
2 =======================================
3
1 4 $ hg init t
2 5 $ cd t
3 6
4 7 $ echo 1 > a
5 8 $ hg ci -qAm "first"
6 9
7 10 $ hg cp a b
8 11 $ hg mv a c
9 12 $ echo 2 >> b
10 13 $ echo 2 >> c
11 14
12 15 $ hg ci -qAm "second"
13 16
14 17 $ hg co -C 0
15 18 1 files updated, 0 files merged, 2 files removed, 0 files unresolved
16 19
17 20 $ echo 0 > a
18 21 $ echo 1 >> a
19 22
20 23 $ hg ci -qAm "other"
21 24
22 25 $ hg merge --debug
23 26 searching for copies back to rev 1
24 27 unmatched files in other:
25 28 b
26 29 c
27 30 all copies found (* = to merge, ! = divergent, % = renamed and deleted):
28 31 src: 'a' -> dst: 'b' *
29 32 src: 'a' -> dst: 'c' *
30 33 checking for directory renames
31 34 resolving manifests
32 35 branchmerge: True, force: False, partial: False
33 36 ancestor: b8bf91eeebbc, local: add3f11052fa+, remote: 17c05bb7fcb6
34 37 preserving a for resolve of b
35 38 preserving a for resolve of c
36 39 removing a
37 40 starting 4 threads for background file closing (?)
38 41 b: remote moved from a -> m (premerge)
39 42 picked tool ':merge' for b (binary False symlink False changedelete False)
40 43 merging a and b to b
41 44 my b@add3f11052fa+ other b@17c05bb7fcb6 ancestor a@b8bf91eeebbc
42 45 premerge successful
43 46 c: remote moved from a -> m (premerge)
44 47 picked tool ':merge' for c (binary False symlink False changedelete False)
45 48 merging a and c to c
46 49 my c@add3f11052fa+ other c@17c05bb7fcb6 ancestor a@b8bf91eeebbc
47 50 premerge successful
48 51 0 files updated, 2 files merged, 0 files removed, 0 files unresolved
49 52 (branch merge, don't forget to commit)
50 53
51 54 file b
52 55 $ cat b
53 56 0
54 57 1
55 58 2
56 59
57 60 file c
58 61 $ cat c
59 62 0
60 63 1
61 64 2
62 65
63 66 Test disabling copy tracing
64 67
65 68 - first verify copy metadata was kept
66 69
67 70 $ hg up -qC 2
68 71 $ hg rebase --keep -d 1 -b 2 --config extensions.rebase=
69 72 rebasing 2:add3f11052fa "other" (tip)
70 73 merging b and a to b
71 74 merging c and a to c
72 75
73 76 $ cat b
74 77 0
75 78 1
76 79 2
77 80
78 81 - next verify copy metadata is lost when disabled
79 82
80 83 $ hg strip -r . --config extensions.strip=
81 84 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
82 85 saved backup bundle to $TESTTMP/t/.hg/strip-backup/550bd84c0cd3-fc575957-backup.hg (glob)
83 86 $ hg up -qC 2
84 87 $ hg rebase --keep -d 1 -b 2 --config extensions.rebase= --config experimental.copytrace=off --config ui.interactive=True << EOF
85 88 > c
86 89 > EOF
87 90 rebasing 2:add3f11052fa "other" (tip)
88 91 other [source] changed a which local [dest] deleted
89 92 use (c)hanged version, leave (d)eleted, or leave (u)nresolved? c
90 93
91 94 $ cat b
92 95 1
93 96 2
94 97
95 98 $ cd ..
96 99
97 100 Verify disabling copy tracing still keeps copies from rebase source
98 101
99 102 $ hg init copydisable
100 103 $ cd copydisable
101 104 $ touch a
102 105 $ hg ci -Aqm 'add a'
103 106 $ touch b
104 107 $ hg ci -Aqm 'add b, c'
105 108 $ hg cp b x
106 109 $ echo x >> x
107 110 $ hg ci -qm 'copy b->x'
108 111 $ hg up -q 1
109 112 $ touch z
110 113 $ hg ci -Aqm 'add z'
111 114 $ hg log -G -T '{rev} {desc}\n'
112 115 @ 3 add z
113 116 |
114 117 | o 2 copy b->x
115 118 |/
116 119 o 1 add b, c
117 120 |
118 121 o 0 add a
119 122
120 123 $ hg rebase -d . -b 2 --config extensions.rebase= --config experimental.copytrace=off
121 124 rebasing 2:6adcf8c12e7d "copy b->x"
122 125 saved backup bundle to $TESTTMP/copydisable/.hg/strip-backup/6adcf8c12e7d-ce4b3e75-rebase.hg (glob)
123 126 $ hg up -q 3
124 127 $ hg log -f x -T '{rev} {desc}\n'
125 128 3 copy b->x
126 129 1 add b, c
127 130
128 131 $ cd ../
129 132
130 133 Verify we duplicate existing copies, instead of detecting them
131 134
132 135 $ hg init copydisable3
133 136 $ cd copydisable3
134 137 $ touch a
135 138 $ hg ci -Aqm 'add a'
136 139 $ hg cp a b
137 140 $ hg ci -Aqm 'copy a->b'
138 141 $ hg mv b c
139 142 $ hg ci -Aqm 'move b->c'
140 143 $ hg up -q 0
141 144 $ hg cp a b
142 145 $ echo b >> b
143 146 $ hg ci -Aqm 'copy a->b (2)'
144 147 $ hg log -G -T '{rev} {desc}\n'
145 148 @ 3 copy a->b (2)
146 149 |
147 150 | o 2 move b->c
148 151 | |
149 152 | o 1 copy a->b
150 153 |/
151 154 o 0 add a
152 155
153 156 $ hg rebase -d 2 -s 3 --config extensions.rebase= --config experimental.copytrace=off
154 157 rebasing 3:47e1a9e6273b "copy a->b (2)" (tip)
155 158 saved backup bundle to $TESTTMP/copydisable3/.hg/strip-backup/47e1a9e6273b-2d099c59-rebase.hg (glob)
156 159
157 160 $ hg log -G -f b
158 161 @ changeset: 3:76024fb4b05b
159 162 : tag: tip
160 163 : user: test
161 164 : date: Thu Jan 01 00:00:00 1970 +0000
162 165 : summary: copy a->b (2)
163 166 :
164 167 o changeset: 0:ac82d8b1f7c4
165 168 user: test
166 169 date: Thu Jan 01 00:00:00 1970 +0000
167 170 summary: add a
168 171
General Comments 0
You need to be logged in to leave comments. Login now