##// END OF EJS Templates
narrow: fix copies._fullcopytracing() narrowspec filtering in graft case...
Martin von Zweigbergk -
r40489:07a66c13 stable
parent child Browse files
Show More
@@ -1,922 +1,923
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import heapq
12 12 import os
13 13
14 14 from .i18n import _
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 scmutil,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 def _findlimit(repo, a, b):
28 28 """
29 29 Find the last revision that needs to be checked to ensure that a full
30 30 transitive closure for file copies can be properly calculated.
31 31 Generally, this means finding the earliest revision number that's an
32 32 ancestor of a or b but not both, except when a or b is a direct descendent
33 33 of the other, in which case we can return the minimum revnum of a and b.
34 34 None if no such revision exists.
35 35 """
36 36
37 37 # basic idea:
38 38 # - mark a and b with different sides
39 39 # - if a parent's children are all on the same side, the parent is
40 40 # on that side, otherwise it is on no side
41 41 # - walk the graph in topological order with the help of a heap;
42 42 # - add unseen parents to side map
43 43 # - clear side of any parent that has children on different sides
44 44 # - track number of interesting revs that might still be on a side
45 45 # - track the lowest interesting rev seen
46 46 # - quit when interesting revs is zero
47 47
48 48 cl = repo.changelog
49 49 working = len(cl) # pseudo rev for the working directory
50 50 if a is None:
51 51 a = working
52 52 if b is None:
53 53 b = working
54 54
55 55 side = {a: -1, b: 1}
56 56 visit = [-a, -b]
57 57 heapq.heapify(visit)
58 58 interesting = len(visit)
59 59 hascommonancestor = False
60 60 limit = working
61 61
62 62 while interesting:
63 63 r = -heapq.heappop(visit)
64 64 if r == working:
65 65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 66 else:
67 67 parents = cl.parentrevs(r)
68 68 for p in parents:
69 69 if p < 0:
70 70 continue
71 71 if p not in side:
72 72 # first time we see p; add it to visit
73 73 side[p] = side[r]
74 74 if side[p]:
75 75 interesting += 1
76 76 heapq.heappush(visit, -p)
77 77 elif side[p] and side[p] != side[r]:
78 78 # p was interesting but now we know better
79 79 side[p] = 0
80 80 interesting -= 1
81 81 hascommonancestor = True
82 82 if side[r]:
83 83 limit = r # lowest rev visited
84 84 interesting -= 1
85 85
86 86 if not hascommonancestor:
87 87 return None
88 88
89 89 # Consider the following flow (see test-commit-amend.t under issue4405):
90 90 # 1/ File 'a0' committed
91 91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 92 # 3/ Move back to first commit
93 93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 95 #
96 96 # During the amend in step five, we will be in this state:
97 97 #
98 98 # @ 3 temporary amend commit for a1-amend
99 99 # |
100 100 # o 2 a1-amend
101 101 # |
102 102 # | o 1 a1
103 103 # |/
104 104 # o 0 a0
105 105 #
106 106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 107 # yet the filelog has the copy information in rev 1 and we will not look
108 108 # back far enough unless we also look at the a and b as candidates.
109 109 # This only occurs when a is a descendent of b or visa-versa.
110 110 return min(limit, a, b)
111 111
112 112 def _chain(src, dst, a, b):
113 113 """chain two sets of copies a->b"""
114 114 t = a.copy()
115 115 for k, v in b.iteritems():
116 116 if v in t:
117 117 # found a chain
118 118 if t[v] != k:
119 119 # file wasn't renamed back to itself
120 120 t[k] = t[v]
121 121 if v not in dst:
122 122 # chain was a rename, not a copy
123 123 del t[v]
124 124 if v in src:
125 125 # file is a copy of an existing file
126 126 t[k] = v
127 127
128 128 # remove criss-crossed copies
129 129 for k, v in list(t.items()):
130 130 if k in src and v in dst:
131 131 del t[k]
132 132
133 133 return t
134 134
135 135 def _tracefile(fctx, am, limit=-1):
136 136 """return file context that is the ancestor of fctx present in ancestor
137 137 manifest am, stopping after the first ancestor lower than limit"""
138 138
139 139 for f in fctx.ancestors():
140 140 if am.get(f.path(), None) == f.filenode():
141 141 return f
142 142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
143 143 return None
144 144
145 145 def _dirstatecopies(d, match=None):
146 146 ds = d._repo.dirstate
147 147 c = ds.copies().copy()
148 148 for k in list(c):
149 149 if ds[k] not in 'anm' or (match and not match(k)):
150 150 del c[k]
151 151 return c
152 152
153 153 def _computeforwardmissing(a, b, match=None):
154 154 """Computes which files are in b but not a.
155 155 This is its own function so extensions can easily wrap this call to see what
156 156 files _forwardcopies is about to process.
157 157 """
158 158 ma = a.manifest()
159 159 mb = b.manifest()
160 160 return mb.filesnotin(ma, match=match)
161 161
162 162 def _committedforwardcopies(a, b, match):
163 163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 164 # files might have to be traced back to the fctx parent of the last
165 165 # one-side-only changeset, but not further back than that
166 166 repo = a._repo
167 167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
168 168 dbg = repo.ui.debug
169 169 if debug:
170 170 dbg('debug.copies: looking into rename from %s to %s\n'
171 171 % (a, b))
172 172 limit = _findlimit(repo, a.rev(), b.rev())
173 173 if limit is None:
174 174 limit = -1
175 175 if debug:
176 176 dbg('debug.copies: search limit: %d\n' % limit)
177 177 am = a.manifest()
178 178
179 179 # find where new files came from
180 180 # we currently don't try to find where old files went, too expensive
181 181 # this means we can miss a case like 'hg rm b; hg cp a b'
182 182 cm = {}
183 183
184 184 # Computing the forward missing is quite expensive on large manifests, since
185 185 # it compares the entire manifests. We can optimize it in the common use
186 186 # case of computing what copies are in a commit versus its parent (like
187 187 # during a rebase or histedit). Note, we exclude merge commits from this
188 188 # optimization, since the ctx.files() for a merge commit is not correct for
189 189 # this comparison.
190 190 forwardmissingmatch = match
191 191 if b.p1() == a and b.p2().node() == node.nullid:
192 192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
193 193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
194 194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
195 195
196 196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
197 197
198 198 if debug:
199 199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
200 200
201 201 for f in missing:
202 202 if debug:
203 203 dbg('debug.copies: tracing file: %s\n' % f)
204 204 fctx = b[f]
205 205 fctx._ancestrycontext = ancestrycontext
206 206
207 207 if debug:
208 208 start = util.timer()
209 209 ofctx = _tracefile(fctx, am, limit)
210 210 if ofctx:
211 211 if debug:
212 212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
213 213 cm[f] = ofctx.path()
214 214 if debug:
215 215 dbg('debug.copies: time: %f seconds\n'
216 216 % (util.timer() - start))
217 217 return cm
218 218
219 219 def _forwardcopies(a, b, match=None):
220 220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221 221
222 222 match = a.repo().narrowmatch(match)
223 223 # check for working copy
224 224 if b.rev() is None:
225 225 if a == b.p1():
226 226 # short-circuit to avoid issues with merge states
227 227 return _dirstatecopies(b, match)
228 228
229 229 cm = _committedforwardcopies(a, b.p1(), match)
230 230 # combine copies from dirstate if necessary
231 231 return _chain(a, b, cm, _dirstatecopies(b, match))
232 232 return _committedforwardcopies(a, b, match)
233 233
234 234 def _backwardrenames(a, b):
235 235 if a._repo.ui.config('experimental', 'copytrace') == 'off':
236 236 return {}
237 237
238 238 # Even though we're not taking copies into account, 1:n rename situations
239 239 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
240 240 # arbitrarily pick one of the renames.
241 241 f = _forwardcopies(b, a)
242 242 r = {}
243 243 for k, v in sorted(f.iteritems()):
244 244 # remove copies
245 245 if v in a:
246 246 continue
247 247 r[v] = k
248 248 return r
249 249
250 250 def pathcopies(x, y, match=None):
251 251 """find {dst@y: src@x} copy mapping for directed compare"""
252 252 repo = x._repo
253 253 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
254 254 if debug:
255 255 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
256 256 % (x, y))
257 257 if x == y or not x or not y:
258 258 return {}
259 259 a = y.ancestor(x)
260 260 if a == x:
261 261 if debug:
262 262 repo.ui.debug('debug.copies: search mode: forward\n')
263 263 return _forwardcopies(x, y, match=match)
264 264 if a == y:
265 265 if debug:
266 266 repo.ui.debug('debug.copies: search mode: backward\n')
267 267 return _backwardrenames(x, y)
268 268 if debug:
269 269 repo.ui.debug('debug.copies: search mode: combined\n')
270 270 return _chain(x, y, _backwardrenames(x, a),
271 271 _forwardcopies(a, y, match=match))
272 272
273 273 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
274 274 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
275 275 and c2. This is its own function so extensions can easily wrap this call
276 276 to see what files mergecopies is about to process.
277 277
278 278 Even though c1 and c2 are not used in this function, they are useful in
279 279 other extensions for being able to read the file nodes of the changed files.
280 280
281 281 "baselabel" can be passed to help distinguish the multiple computations
282 282 done in the graft case.
283 283 """
284 284 u1 = sorted(addedinm1 - addedinm2)
285 285 u2 = sorted(addedinm2 - addedinm1)
286 286
287 287 header = " unmatched files in %s"
288 288 if baselabel:
289 289 header += ' (from %s)' % baselabel
290 290 if u1:
291 291 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
292 292 if u2:
293 293 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
294 294
295 295 return u1, u2
296 296
297 297 def _makegetfctx(ctx):
298 298 """return a 'getfctx' function suitable for _checkcopies usage
299 299
300 300 We have to re-setup the function building 'filectx' for each
301 301 '_checkcopies' to ensure the linkrev adjustment is properly setup for
302 302 each. Linkrev adjustment is important to avoid bug in rename
303 303 detection. Moreover, having a proper '_ancestrycontext' setup ensures
304 304 the performance impact of this adjustment is kept limited. Without it,
305 305 each file could do a full dag traversal making the time complexity of
306 306 the operation explode (see issue4537).
307 307
308 308 This function exists here mostly to limit the impact on stable. Feel
309 309 free to refactor on default.
310 310 """
311 311 rev = ctx.rev()
312 312 repo = ctx._repo
313 313 ac = getattr(ctx, '_ancestrycontext', None)
314 314 if ac is None:
315 315 revs = [rev]
316 316 if rev is None:
317 317 revs = [p.rev() for p in ctx.parents()]
318 318 ac = repo.changelog.ancestors(revs, inclusive=True)
319 319 ctx._ancestrycontext = ac
320 320 def makectx(f, n):
321 321 if n in node.wdirfilenodeids: # in a working context?
322 322 if ctx.rev() is None:
323 323 return ctx.filectx(f)
324 324 return repo[None][f]
325 325 fctx = repo.filectx(f, fileid=n)
326 326 # setup only needed for filectx not create from a changectx
327 327 fctx._ancestrycontext = ac
328 328 fctx._descendantrev = rev
329 329 return fctx
330 330 return util.lrucachefunc(makectx)
331 331
332 332 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
333 333 """combine partial copy paths"""
334 334 remainder = {}
335 335 for f in copyfrom:
336 336 if f in copyto:
337 337 finalcopy[copyto[f]] = copyfrom[f]
338 338 del copyto[f]
339 339 for f in incompletediverge:
340 340 assert f not in diverge
341 341 ic = incompletediverge[f]
342 342 if ic[0] in copyto:
343 343 diverge[f] = [copyto[ic[0]], ic[1]]
344 344 else:
345 345 remainder[f] = ic
346 346 return remainder
347 347
348 348 def mergecopies(repo, c1, c2, base):
349 349 """
350 350 The function calling different copytracing algorithms on the basis of config
351 351 which find moves and copies between context c1 and c2 that are relevant for
352 352 merging. 'base' will be used as the merge base.
353 353
354 354 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
355 355 files that were moved/ copied in one merge parent and modified in another.
356 356 For example:
357 357
358 358 o ---> 4 another commit
359 359 |
360 360 | o ---> 3 commit that modifies a.txt
361 361 | /
362 362 o / ---> 2 commit that moves a.txt to b.txt
363 363 |/
364 364 o ---> 1 merge base
365 365
366 366 If we try to rebase revision 3 on revision 4, since there is no a.txt in
367 367 revision 4, and if user have copytrace disabled, we prints the following
368 368 message:
369 369
370 370 ```other changed <file> which local deleted```
371 371
372 372 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
373 373 "dirmove".
374 374
375 375 "copy" is a mapping from destination name -> source name,
376 376 where source is in c1 and destination is in c2 or vice-versa.
377 377
378 378 "movewithdir" is a mapping from source name -> destination name,
379 379 where the file at source present in one context but not the other
380 380 needs to be moved to destination by the merge process, because the
381 381 other context moved the directory it is in.
382 382
383 383 "diverge" is a mapping of source name -> list of destination names
384 384 for divergent renames.
385 385
386 386 "renamedelete" is a mapping of source name -> list of destination
387 387 names for files deleted in c1 that were renamed in c2 or vice-versa.
388 388
389 389 "dirmove" is a mapping of detected source dir -> destination dir renames.
390 390 This is needed for handling changes to new files previously grafted into
391 391 renamed directories.
392 392 """
393 393 # avoid silly behavior for update from empty dir
394 394 if not c1 or not c2 or c1 == c2:
395 395 return {}, {}, {}, {}, {}
396 396
397 397 # avoid silly behavior for parent -> working dir
398 398 if c2.node() is None and c1.node() == repo.dirstate.p1():
399 399 return repo.dirstate.copies(), {}, {}, {}, {}
400 400
401 401 copytracing = repo.ui.config('experimental', 'copytrace')
402 402 boolctrace = stringutil.parsebool(copytracing)
403 403
404 404 # Copy trace disabling is explicitly below the node == p1 logic above
405 405 # because the logic above is required for a simple copy to be kept across a
406 406 # rebase.
407 407 if copytracing == 'heuristics':
408 408 # Do full copytracing if only non-public revisions are involved as
409 409 # that will be fast enough and will also cover the copies which could
410 410 # be missed by heuristics
411 411 if _isfullcopytraceable(repo, c1, base):
412 412 return _fullcopytracing(repo, c1, c2, base)
413 413 return _heuristicscopytracing(repo, c1, c2, base)
414 414 elif boolctrace is False:
415 415 # stringutil.parsebool() returns None when it is unable to parse the
416 416 # value, so we should rely on making sure copytracing is on such cases
417 417 return {}, {}, {}, {}, {}
418 418 else:
419 419 return _fullcopytracing(repo, c1, c2, base)
420 420
421 421 def _isfullcopytraceable(repo, c1, base):
422 422 """ Checks that if base, source and destination are all no-public branches,
423 423 if yes let's use the full copytrace algorithm for increased capabilities
424 424 since it will be fast enough.
425 425
426 426 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
427 427 number of changesets from c1 to base such that if number of changesets are
428 428 more than the limit, full copytracing algorithm won't be used.
429 429 """
430 430 if c1.rev() is None:
431 431 c1 = c1.p1()
432 432 if c1.mutable() and base.mutable():
433 433 sourcecommitlimit = repo.ui.configint('experimental',
434 434 'copytrace.sourcecommitlimit')
435 435 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
436 436 return commits < sourcecommitlimit
437 437 return False
438 438
439 439 def _fullcopytracing(repo, c1, c2, base):
440 440 """ The full copytracing algorithm which finds all the new files that were
441 441 added from merge base up to the top commit and for each file it checks if
442 442 this file was copied from another file.
443 443
444 444 This is pretty slow when a lot of changesets are involved but will track all
445 445 the copies.
446 446 """
447 447 # In certain scenarios (e.g. graft, update or rebase), base can be
448 448 # overridden We still need to know a real common ancestor in this case We
449 449 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
450 450 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
451 451 # caller may not know if the revision passed in lieu of the CA is a genuine
452 452 # common ancestor or not without explicitly checking it, it's better to
453 453 # determine that here.
454 454 #
455 455 # base.isancestorof(wc) is False, work around that
456 456 _c1 = c1.p1() if c1.rev() is None else c1
457 457 _c2 = c2.p1() if c2.rev() is None else c2
458 458 # an endpoint is "dirty" if it isn't a descendant of the merge base
459 459 # if we have a dirty endpoint, we need to trigger graft logic, and also
460 460 # keep track of which endpoint is dirty
461 461 dirtyc1 = not base.isancestorof(_c1)
462 462 dirtyc2 = not base.isancestorof(_c2)
463 463 graft = dirtyc1 or dirtyc2
464 464 tca = base
465 465 if graft:
466 466 tca = _c1.ancestor(_c2)
467 467
468 468 limit = _findlimit(repo, c1.rev(), c2.rev())
469 469 if limit is None:
470 470 # no common ancestor, no copies
471 471 return {}, {}, {}, {}, {}
472 472 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
473 473
474 474 m1 = c1.manifest()
475 475 m2 = c2.manifest()
476 476 mb = base.manifest()
477 477
478 478 # gather data from _checkcopies:
479 479 # - diverge = record all diverges in this dict
480 480 # - copy = record all non-divergent copies in this dict
481 481 # - fullcopy = record all copies in this dict
482 482 # - incomplete = record non-divergent partial copies here
483 483 # - incompletediverge = record divergent partial copies here
484 484 diverge = {} # divergence data is shared
485 485 incompletediverge = {}
486 486 data1 = {'copy': {},
487 487 'fullcopy': {},
488 488 'incomplete': {},
489 489 'diverge': diverge,
490 490 'incompletediverge': incompletediverge,
491 491 }
492 492 data2 = {'copy': {},
493 493 'fullcopy': {},
494 494 'incomplete': {},
495 495 'diverge': diverge,
496 496 'incompletediverge': incompletediverge,
497 497 }
498 498
499 499 # find interesting file sets from manifests
500 500 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
501 501 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
502 502 bothnew = sorted(addedinm1 & addedinm2)
503 503 if tca == base:
504 504 # unmatched file from base
505 505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
506 506 u1u, u2u = u1r, u2r
507 507 else:
508 508 # unmatched file from base (DAG rotation in the graft case)
509 509 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
510 510 baselabel='base')
511 511 # unmatched file from topological common ancestors (no DAG rotation)
512 512 # need to recompute this for directory move handling when grafting
513 513 mta = tca.manifest()
514 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
515 m2.filesnotin(mta),
514 u1u, u2u = _computenonoverlap(repo, c1, c2,
515 m1.filesnotin(mta, repo.narrowmatch()),
516 m2.filesnotin(mta, repo.narrowmatch()),
516 517 baselabel='topological common ancestor')
517 518
518 519 for f in u1u:
519 520 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520 521
521 522 for f in u2u:
522 523 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523 524
524 525 copy = dict(data1['copy'])
525 526 copy.update(data2['copy'])
526 527 fullcopy = dict(data1['fullcopy'])
527 528 fullcopy.update(data2['fullcopy'])
528 529
529 530 if dirtyc1:
530 531 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 532 incompletediverge)
532 533 else:
533 534 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 535 incompletediverge)
535 536
536 537 renamedelete = {}
537 538 renamedeleteset = set()
538 539 divergeset = set()
539 540 for of, fl in list(diverge.items()):
540 541 if len(fl) == 1 or of in c1 or of in c2:
541 542 del diverge[of] # not actually divergent, or not a rename
542 543 if of not in c1 and of not in c2:
543 544 # renamed on one side, deleted on the other side, but filter
544 545 # out files that have been renamed and then deleted
545 546 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 547 renamedeleteset.update(fl) # reverse map for below
547 548 else:
548 549 divergeset.update(fl) # reverse map for below
549 550
550 551 if bothnew:
551 552 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 553 % "\n ".join(bothnew))
553 554 bothdiverge = {}
554 555 bothincompletediverge = {}
555 556 remainder = {}
556 557 both1 = {'copy': {},
557 558 'fullcopy': {},
558 559 'incomplete': {},
559 560 'diverge': bothdiverge,
560 561 'incompletediverge': bothincompletediverge
561 562 }
562 563 both2 = {'copy': {},
563 564 'fullcopy': {},
564 565 'incomplete': {},
565 566 'diverge': bothdiverge,
566 567 'incompletediverge': bothincompletediverge
567 568 }
568 569 for f in bothnew:
569 570 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 571 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 572 if dirtyc1:
572 573 # incomplete copies may only be found on the "dirty" side for bothnew
573 574 assert not both2['incomplete']
574 575 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 576 bothincompletediverge)
576 577 elif dirtyc2:
577 578 assert not both1['incomplete']
578 579 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 580 bothincompletediverge)
580 581 else:
581 582 # incomplete copies and divergences can't happen outside grafts
582 583 assert not both1['incomplete']
583 584 assert not both2['incomplete']
584 585 assert not bothincompletediverge
585 586 for f in remainder:
586 587 assert f not in bothdiverge
587 588 ic = remainder[f]
588 589 if ic[0] in (m1 if dirtyc1 else m2):
589 590 # backed-out rename on one side, but watch out for deleted files
590 591 bothdiverge[f] = ic
591 592 for of, fl in bothdiverge.items():
592 593 if len(fl) == 2 and fl[0] == fl[1]:
593 594 copy[fl[0]] = of # not actually divergent, just matching renames
594 595
595 596 if fullcopy and repo.ui.debugflag:
596 597 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 598 "% = renamed and deleted):\n")
598 599 for f in sorted(fullcopy):
599 600 note = ""
600 601 if f in copy:
601 602 note += "*"
602 603 if f in divergeset:
603 604 note += "!"
604 605 if f in renamedeleteset:
605 606 note += "%"
606 607 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 608 note))
608 609 del divergeset
609 610
610 611 if not fullcopy:
611 612 return copy, {}, diverge, renamedelete, {}
612 613
613 614 repo.ui.debug(" checking for directory renames\n")
614 615
615 616 # generate a directory move map
616 617 d1, d2 = c1.dirs(), c2.dirs()
617 618 # Hack for adding '', which is not otherwise added, to d1 and d2
618 619 d1.addpath('/')
619 620 d2.addpath('/')
620 621 invalid = set()
621 622 dirmove = {}
622 623
623 624 # examine each file copy for a potential directory move, which is
624 625 # when all the files in a directory are moved to a new directory
625 626 for dst, src in fullcopy.iteritems():
626 627 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 628 if dsrc in invalid:
628 629 # already seen to be uninteresting
629 630 continue
630 631 elif dsrc in d1 and ddst in d1:
631 632 # directory wasn't entirely moved locally
632 633 invalid.add(dsrc)
633 634 elif dsrc in d2 and ddst in d2:
634 635 # directory wasn't entirely moved remotely
635 636 invalid.add(dsrc)
636 637 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 638 # files from the same directory moved to two different places
638 639 invalid.add(dsrc)
639 640 else:
640 641 # looks good so far
641 642 dirmove[dsrc] = ddst
642 643
643 644 for i in invalid:
644 645 if i in dirmove:
645 646 del dirmove[i]
646 647 del d1, d2, invalid
647 648
648 649 if not dirmove:
649 650 return copy, {}, diverge, renamedelete, {}
650 651
651 652 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652 653
653 654 for d in dirmove:
654 655 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 656 (d, dirmove[d]))
656 657
657 658 movewithdir = {}
658 659 # check unaccounted nonoverlapping files against directory moves
659 660 for f in u1r + u2r:
660 661 if f not in fullcopy:
661 662 for d in dirmove:
662 663 if f.startswith(d):
663 664 # new file added in a directory that was moved, move it
664 665 df = dirmove[d] + f[len(d):]
665 666 if df not in copy:
666 667 movewithdir[f] = df
667 668 repo.ui.debug((" pending file src: '%s' -> "
668 669 "dst: '%s'\n") % (f, df))
669 670 break
670 671
671 672 return copy, movewithdir, diverge, renamedelete, dirmove
672 673
673 674 def _heuristicscopytracing(repo, c1, c2, base):
674 675 """ Fast copytracing using filename heuristics
675 676
676 677 Assumes that moves or renames are of following two types:
677 678
678 679 1) Inside a directory only (same directory name but different filenames)
679 680 2) Move from one directory to another
680 681 (same filenames but different directory names)
681 682
682 683 Works only when there are no merge commits in the "source branch".
683 684 Source branch is commits from base up to c2 not including base.
684 685
685 686 If merge is involved it fallbacks to _fullcopytracing().
686 687
687 688 Can be used by setting the following config:
688 689
689 690 [experimental]
690 691 copytrace = heuristics
691 692
692 693 In some cases the copy/move candidates found by heuristics can be very large
693 694 in number and that will make the algorithm slow. The number of possible
694 695 candidates to check can be limited by using the config
695 696 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 697 """
697 698
698 699 if c1.rev() is None:
699 700 c1 = c1.p1()
700 701 if c2.rev() is None:
701 702 c2 = c2.p1()
702 703
703 704 copies = {}
704 705
705 706 changedfiles = set()
706 707 m1 = c1.manifest()
707 708 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 709 # If base is not in c2 branch, we switch to fullcopytracing
709 710 repo.ui.debug("switching to full copytracing as base is not "
710 711 "an ancestor of c2\n")
711 712 return _fullcopytracing(repo, c1, c2, base)
712 713
713 714 ctx = c2
714 715 while ctx != base:
715 716 if len(ctx.parents()) == 2:
716 717 # To keep things simple let's not handle merges
717 718 repo.ui.debug("switching to full copytracing because of merges\n")
718 719 return _fullcopytracing(repo, c1, c2, base)
719 720 changedfiles.update(ctx.files())
720 721 ctx = ctx.p1()
721 722
722 723 cp = _forwardcopies(base, c2)
723 724 for dst, src in cp.iteritems():
724 725 if src in m1:
725 726 copies[dst] = src
726 727
727 728 # file is missing if it isn't present in the destination, but is present in
728 729 # the base and present in the source.
729 730 # Presence in the base is important to exclude added files, presence in the
730 731 # source is important to exclude removed files.
731 732 filt = lambda f: f not in m1 and f in base and f in c2
732 733 missingfiles = [f for f in changedfiles if filt(f)]
733 734
734 735 if missingfiles:
735 736 basenametofilename = collections.defaultdict(list)
736 737 dirnametofilename = collections.defaultdict(list)
737 738
738 739 for f in m1.filesnotin(base.manifest()):
739 740 basename = os.path.basename(f)
740 741 dirname = os.path.dirname(f)
741 742 basenametofilename[basename].append(f)
742 743 dirnametofilename[dirname].append(f)
743 744
744 745 for f in missingfiles:
745 746 basename = os.path.basename(f)
746 747 dirname = os.path.dirname(f)
747 748 samebasename = basenametofilename[basename]
748 749 samedirname = dirnametofilename[dirname]
749 750 movecandidates = samebasename + samedirname
750 751 # f is guaranteed to be present in c2, that's why
751 752 # c2.filectx(f) won't fail
752 753 f2 = c2.filectx(f)
753 754 # we can have a lot of candidates which can slow down the heuristics
754 755 # config value to limit the number of candidates moves to check
755 756 maxcandidates = repo.ui.configint('experimental',
756 757 'copytrace.movecandidateslimit')
757 758
758 759 if len(movecandidates) > maxcandidates:
759 760 repo.ui.status(_("skipping copytracing for '%s', more "
760 761 "candidates than the limit: %d\n")
761 762 % (f, len(movecandidates)))
762 763 continue
763 764
764 765 for candidate in movecandidates:
765 766 f1 = c1.filectx(candidate)
766 767 if _related(f1, f2):
767 768 # if there are a few related copies then we'll merge
768 769 # changes into all of them. This matches the behaviour
769 770 # of upstream copytracing
770 771 copies[candidate] = f
771 772
772 773 return copies, {}, {}, {}, {}
773 774
774 775 def _related(f1, f2):
775 776 """return True if f1 and f2 filectx have a common ancestor
776 777
777 778 Walk back to common ancestor to see if the two files originate
778 779 from the same file. Since workingfilectx's rev() is None it messes
779 780 up the integer comparison logic, hence the pre-step check for
780 781 None (f1 and f2 can only be workingfilectx's initially).
781 782 """
782 783
783 784 if f1 == f2:
784 785 return f1 # a match
785 786
786 787 g1, g2 = f1.ancestors(), f2.ancestors()
787 788 try:
788 789 f1r, f2r = f1.linkrev(), f2.linkrev()
789 790
790 791 if f1r is None:
791 792 f1 = next(g1)
792 793 if f2r is None:
793 794 f2 = next(g2)
794 795
795 796 while True:
796 797 f1r, f2r = f1.linkrev(), f2.linkrev()
797 798 if f1r > f2r:
798 799 f1 = next(g1)
799 800 elif f2r > f1r:
800 801 f2 = next(g2)
801 802 else: # f1 and f2 point to files in the same linkrev
802 803 return f1 == f2 # true if they point to the same file
803 804 except StopIteration:
804 805 return False
805 806
806 807 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 808 """
808 809 check possible copies of f from msrc to mdst
809 810
810 811 srcctx = starting context for f in msrc
811 812 dstctx = destination context for f in mdst
812 813 f = the filename to check (as in msrc)
813 814 base = the changectx used as a merge base
814 815 tca = topological common ancestor for graft-like scenarios
815 816 remotebase = True if base is outside tca::srcctx, False otherwise
816 817 limit = the rev number to not search beyond
817 818 data = dictionary of dictionary to store copy data. (see mergecopies)
818 819
819 820 note: limit is only an optimization, and provides no guarantee that
820 821 irrelevant revisions will not be visited
821 822 there is no easy way to make this algorithm stop in a guaranteed way
822 823 once it "goes behind a certain revision".
823 824 """
824 825
825 826 msrc = srcctx.manifest()
826 827 mdst = dstctx.manifest()
827 828 mb = base.manifest()
828 829 mta = tca.manifest()
829 830 # Might be true if this call is about finding backward renames,
830 831 # This happens in the case of grafts because the DAG is then rotated.
831 832 # If the file exists in both the base and the source, we are not looking
832 833 # for a rename on the source side, but on the part of the DAG that is
833 834 # traversed backwards.
834 835 #
835 836 # In the case there is both backward and forward renames (before and after
836 837 # the base) this is more complicated as we must detect a divergence.
837 838 # We use 'backwards = False' in that case.
838 839 backwards = not remotebase and base != tca and f in mb
839 840 getsrcfctx = _makegetfctx(srcctx)
840 841 getdstfctx = _makegetfctx(dstctx)
841 842
842 843 if msrc[f] == mb.get(f) and not remotebase:
843 844 # Nothing to merge
844 845 return
845 846
846 847 of = None
847 848 seen = {f}
848 849 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 850 of = oc.path()
850 851 if of in seen:
851 852 # check limit late - grab last rename before
852 853 if oc.linkrev() < limit:
853 854 break
854 855 continue
855 856 seen.add(of)
856 857
857 858 # remember for dir rename detection
858 859 if backwards:
859 860 data['fullcopy'][of] = f # grafting backwards through renames
860 861 else:
861 862 data['fullcopy'][f] = of
862 863 if of not in mdst:
863 864 continue # no match, keep looking
864 865 if mdst[of] == mb.get(of):
865 866 return # no merge needed, quit early
866 867 c2 = getdstfctx(of, mdst[of])
867 868 # c2 might be a plain new file on added on destination side that is
868 869 # unrelated to the droids we are looking for.
869 870 cr = _related(oc, c2)
870 871 if cr and (of == f or of == c2.path()): # non-divergent
871 872 if backwards:
872 873 data['copy'][of] = f
873 874 elif of in mb:
874 875 data['copy'][f] = of
875 876 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 877 data['copy'][of] = f
877 878 del data['fullcopy'][f]
878 879 data['fullcopy'][of] = f
879 880 else: # divergence w.r.t. graft CA on one side of topological CA
880 881 for sf in seen:
881 882 if sf in mb:
882 883 assert sf not in data['diverge']
883 884 data['diverge'][sf] = [f, of]
884 885 break
885 886 return
886 887
887 888 if of in mta:
888 889 if backwards or remotebase:
889 890 data['incomplete'][of] = f
890 891 else:
891 892 for sf in seen:
892 893 if sf in mb:
893 894 if tca == base:
894 895 data['diverge'].setdefault(sf, []).append(f)
895 896 else:
896 897 data['incompletediverge'][sf] = [of, f]
897 898 return
898 899
899 900 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 901 """reproduce copies from fromrev to rev in the dirstate
901 902
902 903 If skiprev is specified, it's a revision that should be used to
903 904 filter copy records. Any copies that occur between fromrev and
904 905 skiprev will not be duplicated, even if they appear in the set of
905 906 copies between fromrev and rev.
906 907 """
907 908 exclude = {}
908 909 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 910 bctrace = stringutil.parsebool(ctraceconfig)
910 911 if (skiprev is not None and
911 912 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 913 # copytrace='off' skips this line, but not the entire function because
913 914 # the line below is O(size of the repo) during a rebase, while the rest
914 915 # of the function is much faster (and is required for carrying copy
915 916 # metadata across the rebase anyway).
916 917 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 918 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 919 # copies.pathcopies returns backward renames, so dst might not
919 920 # actually be in the dirstate
920 921 if dst in exclude:
921 922 continue
922 923 wctx[dst].markcopied(src)
@@ -1,75 +1,74
1 1
2 2 $ . "$TESTDIR/narrow-library.sh"
3 3
4 4 create full repo
5 5
6 6 $ hg init master
7 7 $ cd master
8 8
9 9 $ mkdir inside
10 10 $ echo inside > inside/f1
11 11 $ mkdir outside
12 12 $ echo outside > outside/f2
13 13 $ hg ci -Aqm 'initial'
14 14
15 15 $ hg mv outside/f2 inside/f2
16 16 $ hg ci -qm 'move f2 from outside'
17 17
18 18 $ echo modified > inside/f2
19 19 $ hg ci -qm 'modify inside/f2'
20 20
21 21 $ mkdir outside
22 22 $ echo new > outside/f3
23 23 $ hg ci -Aqm 'add outside/f3'
24 24 $ cd ..
25 25
26 26 $ hg clone --narrow ssh://user@dummy/master narrow --include inside -r 2
27 27 adding changesets
28 28 adding manifests
29 29 adding file changes
30 30 added 3 changesets with 3 changes to 2 files
31 31 new changesets *:* (glob)
32 32 updating to branch default
33 33 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
34 34 $ cd narrow
35 35
36 36 $ hg co 'desc("move f2")'
37 37 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
38 38 $ hg status
39 39 $ hg diff
40 40 $ hg diff --change . --git
41 41 diff --git a/inside/f2 b/inside/f2
42 42 new file mode 100644
43 43 --- /dev/null
44 44 +++ b/inside/f2
45 45 @@ -0,0 +1,1 @@
46 46 +outside
47 47
48 48 $ hg log --follow inside/f2 -r tip
49 49 changeset: 2:bcfb756e0ca9
50 50 tag: tip
51 51 user: test
52 52 date: Thu Jan 01 00:00:00 1970 +0000
53 53 summary: modify inside/f2
54 54
55 55 changeset: 1:5a016133b2bb
56 56 user: test
57 57 date: Thu Jan 01 00:00:00 1970 +0000
58 58 summary: move f2 from outside
59 59
60 60 $ echo new > inside/f4
61 61 $ hg ci -Aqm 'add inside/f4'
62 62 $ hg pull -q
63 63 $ hg --config extensions.rebase= rebase -d tip
64 64 rebasing 3:4f84b666728c "add inside/f4"
65 65 saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/4f84b666728c-4269b76e-rebase.hg
66 66
67 67 $ hg co -q 0
68 68 $ echo modified > inside/f1
69 69 $ hg ci -qm 'modify inside/f1'
70 70 $ echo new > inside/f5
71 71 $ hg ci -Aqm 'add inside/f5'
72 72 $ hg --config extensions.rebase= rebase -d 'public()' -r .
73 73 rebasing 6:610b60178c28 "add inside/f5" (tip)
74 abort: data/outside/f3.i@54e53435331b: no match found!
75 [255]
74 saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/610b60178c28-65716a78-rebase.hg
General Comments 0
You need to be logged in to leave comments. Login now