##// END OF EJS Templates
copies: compute a suitable TCA if base turns out to be unsuitable...
Gábor Stefanik -
r30194:8c69c52c default
parent child Browse files
Show More
@@ -1,592 +1,596 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 node,
14 14 pathutil,
15 15 scmutil,
16 16 util,
17 17 )
18 18
19 19 def _findlimit(repo, a, b):
20 20 """
21 21 Find the last revision that needs to be checked to ensure that a full
22 22 transitive closure for file copies can be properly calculated.
23 23 Generally, this means finding the earliest revision number that's an
24 24 ancestor of a or b but not both, except when a or b is a direct descendent
25 25 of the other, in which case we can return the minimum revnum of a and b.
26 26 None if no such revision exists.
27 27 """
28 28
29 29 # basic idea:
30 30 # - mark a and b with different sides
31 31 # - if a parent's children are all on the same side, the parent is
32 32 # on that side, otherwise it is on no side
33 33 # - walk the graph in topological order with the help of a heap;
34 34 # - add unseen parents to side map
35 35 # - clear side of any parent that has children on different sides
36 36 # - track number of interesting revs that might still be on a side
37 37 # - track the lowest interesting rev seen
38 38 # - quit when interesting revs is zero
39 39
40 40 cl = repo.changelog
41 41 working = len(cl) # pseudo rev for the working directory
42 42 if a is None:
43 43 a = working
44 44 if b is None:
45 45 b = working
46 46
47 47 side = {a: -1, b: 1}
48 48 visit = [-a, -b]
49 49 heapq.heapify(visit)
50 50 interesting = len(visit)
51 51 hascommonancestor = False
52 52 limit = working
53 53
54 54 while interesting:
55 55 r = -heapq.heappop(visit)
56 56 if r == working:
57 57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 58 else:
59 59 parents = cl.parentrevs(r)
60 60 for p in parents:
61 61 if p < 0:
62 62 continue
63 63 if p not in side:
64 64 # first time we see p; add it to visit
65 65 side[p] = side[r]
66 66 if side[p]:
67 67 interesting += 1
68 68 heapq.heappush(visit, -p)
69 69 elif side[p] and side[p] != side[r]:
70 70 # p was interesting but now we know better
71 71 side[p] = 0
72 72 interesting -= 1
73 73 hascommonancestor = True
74 74 if side[r]:
75 75 limit = r # lowest rev visited
76 76 interesting -= 1
77 77
78 78 if not hascommonancestor:
79 79 return None
80 80
81 81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 82 # 1/ File 'a0' committed
83 83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 84 # 3/ Move back to first commit
85 85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 87 #
88 88 # During the amend in step five, we will be in this state:
89 89 #
90 90 # @ 3 temporary amend commit for a1-amend
91 91 # |
92 92 # o 2 a1-amend
93 93 # |
94 94 # | o 1 a1
95 95 # |/
96 96 # o 0 a0
97 97 #
98 98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 99 # yet the filelog has the copy information in rev 1 and we will not look
100 100 # back far enough unless we also look at the a and b as candidates.
101 101 # This only occurs when a is a descendent of b or visa-versa.
102 102 return min(limit, a, b)
103 103
104 104 def _chain(src, dst, a, b):
105 105 '''chain two sets of copies a->b'''
106 106 t = a.copy()
107 107 for k, v in b.iteritems():
108 108 if v in t:
109 109 # found a chain
110 110 if t[v] != k:
111 111 # file wasn't renamed back to itself
112 112 t[k] = t[v]
113 113 if v not in dst:
114 114 # chain was a rename, not a copy
115 115 del t[v]
116 116 if v in src:
117 117 # file is a copy of an existing file
118 118 t[k] = v
119 119
120 120 # remove criss-crossed copies
121 121 for k, v in t.items():
122 122 if k in src and v in dst:
123 123 del t[k]
124 124
125 125 return t
126 126
127 127 def _tracefile(fctx, am, limit=-1):
128 128 '''return file context that is the ancestor of fctx present in ancestor
129 129 manifest am, stopping after the first ancestor lower than limit'''
130 130
131 131 for f in fctx.ancestors():
132 132 if am.get(f.path(), None) == f.filenode():
133 133 return f
134 134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 135 return None
136 136
137 137 def _dirstatecopies(d):
138 138 ds = d._repo.dirstate
139 139 c = ds.copies().copy()
140 140 for k in c.keys():
141 141 if ds[k] not in 'anm':
142 142 del c[k]
143 143 return c
144 144
145 145 def _computeforwardmissing(a, b, match=None):
146 146 """Computes which files are in b but not a.
147 147 This is its own function so extensions can easily wrap this call to see what
148 148 files _forwardcopies is about to process.
149 149 """
150 150 ma = a.manifest()
151 151 mb = b.manifest()
152 152 if match:
153 153 ma = ma.matches(match)
154 154 mb = mb.matches(match)
155 155 return mb.filesnotin(ma)
156 156
157 157 def _forwardcopies(a, b, match=None):
158 158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159 159
160 160 # check for working copy
161 161 w = None
162 162 if b.rev() is None:
163 163 w = b
164 164 b = w.p1()
165 165 if a == b:
166 166 # short-circuit to avoid issues with merge states
167 167 return _dirstatecopies(w)
168 168
169 169 # files might have to be traced back to the fctx parent of the last
170 170 # one-side-only changeset, but not further back than that
171 171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 172 if limit is None:
173 173 limit = -1
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 191
192 192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 193 for f in missing:
194 194 fctx = b[f]
195 195 fctx._ancestrycontext = ancestrycontext
196 196 ofctx = _tracefile(fctx, am, limit)
197 197 if ofctx:
198 198 cm[f] = ofctx.path()
199 199
200 200 # combine copies from dirstate if necessary
201 201 if w is not None:
202 202 cm = _chain(a, w, cm, _dirstatecopies(w))
203 203
204 204 return cm
205 205
206 206 def _backwardrenames(a, b):
207 207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 208 return {}
209 209
210 210 # Even though we're not taking copies into account, 1:n rename situations
211 211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 212 # arbitrarily pick one of the renames.
213 213 f = _forwardcopies(b, a)
214 214 r = {}
215 215 for k, v in sorted(f.iteritems()):
216 216 # remove copies
217 217 if v in a:
218 218 continue
219 219 r[v] = k
220 220 return r
221 221
222 222 def pathcopies(x, y, match=None):
223 223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 224 if x == y or not x or not y:
225 225 return {}
226 226 a = y.ancestor(x)
227 227 if a == x:
228 228 return _forwardcopies(x, y, match=match)
229 229 if a == y:
230 230 return _backwardrenames(x, y)
231 231 return _chain(x, y, _backwardrenames(x, a),
232 232 _forwardcopies(a, y, match=match))
233 233
234 234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
235 235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 236 and c2. This is its own function so extensions can easily wrap this call
237 237 to see what files mergecopies is about to process.
238 238
239 239 Even though c1 and c2 are not used in this function, they are useful in
240 240 other extensions for being able to read the file nodes of the changed files.
241 241 """
242 242 u1 = sorted(addedinm1 - addedinm2)
243 243 u2 = sorted(addedinm2 - addedinm1)
244 244
245 245 if u1:
246 246 repo.ui.debug(" unmatched files in local:\n %s\n"
247 247 % "\n ".join(u1))
248 248 if u2:
249 249 repo.ui.debug(" unmatched files in other:\n %s\n"
250 250 % "\n ".join(u2))
251 251 return u1, u2
252 252
253 253 def _makegetfctx(ctx):
254 254 """return a 'getfctx' function suitable for _checkcopies usage
255 255
256 256 We have to re-setup the function building 'filectx' for each
257 257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
258 258 each. Linkrev adjustment is important to avoid bug in rename
259 259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
260 260 the performance impact of this adjustment is kept limited. Without it,
261 261 each file could do a full dag traversal making the time complexity of
262 262 the operation explode (see issue4537).
263 263
264 264 This function exists here mostly to limit the impact on stable. Feel
265 265 free to refactor on default.
266 266 """
267 267 rev = ctx.rev()
268 268 repo = ctx._repo
269 269 ac = getattr(ctx, '_ancestrycontext', None)
270 270 if ac is None:
271 271 revs = [rev]
272 272 if rev is None:
273 273 revs = [p.rev() for p in ctx.parents()]
274 274 ac = repo.changelog.ancestors(revs, inclusive=True)
275 275 ctx._ancestrycontext = ac
276 276 def makectx(f, n):
277 277 if len(n) != 20: # in a working context?
278 278 if ctx.rev() is None:
279 279 return ctx.filectx(f)
280 280 return repo[None][f]
281 281 fctx = repo.filectx(f, fileid=n)
282 282 # setup only needed for filectx not create from a changectx
283 283 fctx._ancestrycontext = ac
284 284 fctx._descendantrev = rev
285 285 return fctx
286 286 return util.lrucachefunc(makectx)
287 287
288 288 def mergecopies(repo, c1, c2, base):
289 289 """
290 290 Find moves and copies between context c1 and c2 that are relevant
291 291 for merging. 'base' will be used as the merge base.
292 292
293 293 Returns four dicts: "copy", "movewithdir", "diverge", and
294 294 "renamedelete".
295 295
296 296 "copy" is a mapping from destination name -> source name,
297 297 where source is in c1 and destination is in c2 or vice-versa.
298 298
299 299 "movewithdir" is a mapping from source name -> destination name,
300 300 where the file at source present in one context but not the other
301 301 needs to be moved to destination by the merge process, because the
302 302 other context moved the directory it is in.
303 303
304 304 "diverge" is a mapping of source name -> list of destination names
305 305 for divergent renames.
306 306
307 307 "renamedelete" is a mapping of source name -> list of destination
308 308 names for files deleted in c1 that were renamed in c2 or vice-versa.
309 309 """
310 310 # avoid silly behavior for update from empty dir
311 311 if not c1 or not c2 or c1 == c2:
312 312 return {}, {}, {}, {}
313 313
314 314 # avoid silly behavior for parent -> working dir
315 315 if c2.node() is None and c1.node() == repo.dirstate.p1():
316 316 return repo.dirstate.copies(), {}, {}, {}
317 317
318 318 # Copy trace disabling is explicitly below the node == p1 logic above
319 319 # because the logic above is required for a simple copy to be kept across a
320 320 # rebase.
321 321 if repo.ui.configbool('experimental', 'disablecopytrace'):
322 322 return {}, {}, {}, {}
323 323
324 324 # In certain scenarios (e.g. graft, update or rebase), base can be
325 325 # overridden We still need to know a real common ancestor in this case We
326 326 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
327 327 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
328 328 # caller may not know if the revision passed in lieu of the CA is a genuine
329 329 # common ancestor or not without explicitly checking it, it's better to
330 330 # determine that here.
331 331 #
332 332 # base.descendant(wc) and base.descendant(base) are False, work around that
333 333 _c1 = c1.p1() if c1.rev() is None else c1
334 334 _c2 = c2.p1() if c2.rev() is None else c2
335 335 # an endpoint is "dirty" if it isn't a descendant of the merge base
336 336 # if we have a dirty endpoint, we need to trigger graft logic, and also
337 337 # keep track of which endpoint is dirty
338 338 dirtyc1 = not (base == _c1 or base.descendant(_c1))
339 339 dirtyc2 = not (base== _c2 or base.descendant(_c2))
340 340 graft = dirtyc1 or dirtyc2
341 tca = base
342 if graft:
343 tca = _c1.ancestor(_c2)
344
341 345 limit = _findlimit(repo, c1.rev(), c2.rev())
342 346 if limit is None:
343 347 # no common ancestor, no copies
344 348 return {}, {}, {}, {}
345 349 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
346 350
347 351 m1 = c1.manifest()
348 352 m2 = c2.manifest()
349 353 mb = base.manifest()
350 354
351 355 # gather data from _checkcopies:
352 356 # - diverge = record all diverges in this dict
353 357 # - copy = record all non-divergent copies in this dict
354 358 # - fullcopy = record all copies in this dict
355 359 diverge = {} # divergence data is shared
356 360 data1 = {'copy': {},
357 361 'fullcopy': {},
358 362 'diverge': diverge,
359 363 }
360 364 data2 = {'copy': {},
361 365 'fullcopy': {},
362 366 'diverge': diverge,
363 367 }
364 368
365 369 # find interesting file sets from manifests
366 370 addedinm1 = m1.filesnotin(mb)
367 371 addedinm2 = m2.filesnotin(mb)
368 372 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
369 373 u1u, u2u = u1r, u2r
370 374 bothnew = sorted(addedinm1 & addedinm2)
371 375
372 376 for f in u1u:
373 377 _checkcopies(c1, f, m1, m2, base, limit, data1)
374 378
375 379 for f in u2u:
376 380 _checkcopies(c2, f, m2, m1, base, limit, data2)
377 381
378 382 copy = dict(data1['copy'].items() + data2['copy'].items())
379 383 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
380 384
381 385 renamedelete = {}
382 386 renamedeleteset = set()
383 387 divergeset = set()
384 388 for of, fl in diverge.items():
385 389 if len(fl) == 1 or of in c1 or of in c2:
386 390 del diverge[of] # not actually divergent, or not a rename
387 391 if of not in c1 and of not in c2:
388 392 # renamed on one side, deleted on the other side, but filter
389 393 # out files that have been renamed and then deleted
390 394 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
391 395 renamedeleteset.update(fl) # reverse map for below
392 396 else:
393 397 divergeset.update(fl) # reverse map for below
394 398
395 399 if bothnew:
396 400 repo.ui.debug(" unmatched files new in both:\n %s\n"
397 401 % "\n ".join(bothnew))
398 402 bothdiverge = {}
399 403 bothdata = {'copy': {},
400 404 'fullcopy': {},
401 405 'diverge': bothdiverge,
402 406 }
403 407 for f in bothnew:
404 408 _checkcopies(c1, f, m1, m2, base, limit, bothdata)
405 409 _checkcopies(c2, f, m2, m1, base, limit, bothdata)
406 410 for of, fl in bothdiverge.items():
407 411 if len(fl) == 2 and fl[0] == fl[1]:
408 412 copy[fl[0]] = of # not actually divergent, just matching renames
409 413
410 414 if fullcopy and repo.ui.debugflag:
411 415 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
412 416 "% = renamed and deleted):\n")
413 417 for f in sorted(fullcopy):
414 418 note = ""
415 419 if f in copy:
416 420 note += "*"
417 421 if f in divergeset:
418 422 note += "!"
419 423 if f in renamedeleteset:
420 424 note += "%"
421 425 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
422 426 note))
423 427 del divergeset
424 428
425 429 if not fullcopy:
426 430 return copy, {}, diverge, renamedelete
427 431
428 432 repo.ui.debug(" checking for directory renames\n")
429 433
430 434 # generate a directory move map
431 435 d1, d2 = c1.dirs(), c2.dirs()
432 436 # Hack for adding '', which is not otherwise added, to d1 and d2
433 437 d1.addpath('/')
434 438 d2.addpath('/')
435 439 invalid = set()
436 440 dirmove = {}
437 441
438 442 # examine each file copy for a potential directory move, which is
439 443 # when all the files in a directory are moved to a new directory
440 444 for dst, src in fullcopy.iteritems():
441 445 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
442 446 if dsrc in invalid:
443 447 # already seen to be uninteresting
444 448 continue
445 449 elif dsrc in d1 and ddst in d1:
446 450 # directory wasn't entirely moved locally
447 451 invalid.add(dsrc + "/")
448 452 elif dsrc in d2 and ddst in d2:
449 453 # directory wasn't entirely moved remotely
450 454 invalid.add(dsrc + "/")
451 455 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
452 456 # files from the same directory moved to two different places
453 457 invalid.add(dsrc + "/")
454 458 else:
455 459 # looks good so far
456 460 dirmove[dsrc + "/"] = ddst + "/"
457 461
458 462 for i in invalid:
459 463 if i in dirmove:
460 464 del dirmove[i]
461 465 del d1, d2, invalid
462 466
463 467 if not dirmove:
464 468 return copy, {}, diverge, renamedelete
465 469
466 470 for d in dirmove:
467 471 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
468 472 (d, dirmove[d]))
469 473
470 474 movewithdir = {}
471 475 # check unaccounted nonoverlapping files against directory moves
472 476 for f in u1r + u2r:
473 477 if f not in fullcopy:
474 478 for d in dirmove:
475 479 if f.startswith(d):
476 480 # new file added in a directory that was moved, move it
477 481 df = dirmove[d] + f[len(d):]
478 482 if df not in copy:
479 483 movewithdir[f] = df
480 484 repo.ui.debug((" pending file src: '%s' -> "
481 485 "dst: '%s'\n") % (f, df))
482 486 break
483 487
484 488 return copy, movewithdir, diverge, renamedelete
485 489
486 490 def _related(f1, f2, limit):
487 491 """return True if f1 and f2 filectx have a common ancestor
488 492
489 493 Walk back to common ancestor to see if the two files originate
490 494 from the same file. Since workingfilectx's rev() is None it messes
491 495 up the integer comparison logic, hence the pre-step check for
492 496 None (f1 and f2 can only be workingfilectx's initially).
493 497 """
494 498
495 499 if f1 == f2:
496 500 return f1 # a match
497 501
498 502 g1, g2 = f1.ancestors(), f2.ancestors()
499 503 try:
500 504 f1r, f2r = f1.linkrev(), f2.linkrev()
501 505
502 506 if f1r is None:
503 507 f1 = next(g1)
504 508 if f2r is None:
505 509 f2 = next(g2)
506 510
507 511 while True:
508 512 f1r, f2r = f1.linkrev(), f2.linkrev()
509 513 if f1r > f2r:
510 514 f1 = next(g1)
511 515 elif f2r > f1r:
512 516 f2 = next(g2)
513 517 elif f1 == f2:
514 518 return f1 # a match
515 519 elif f1r == f2r or f1r < limit or f2r < limit:
516 520 return False # copy no longer relevant
517 521 except StopIteration:
518 522 return False
519 523
520 524 def _checkcopies(ctx, f, m1, m2, base, limit, data):
521 525 """
522 526 check possible copies of f from m1 to m2
523 527
524 528 ctx = starting context for f in m1
525 529 f = the filename to check (as in m1)
526 530 m1 = the source manifest
527 531 m2 = the destination manifest
528 532 base = the changectx used as a merge base
529 533 limit = the rev number to not search beyond
530 534 data = dictionary of dictionary to store copy data. (see mergecopies)
531 535
532 536 note: limit is only an optimization, and there is no guarantee that
533 537 irrelevant revisions will not be limited
534 538 there is no easy way to make this algorithm stop in a guaranteed way
535 539 once it "goes behind a certain revision".
536 540 """
537 541
538 542 mb = base.manifest()
539 543 getfctx = _makegetfctx(ctx)
540 544
541 545 of = None
542 546 seen = set([f])
543 547 for oc in getfctx(f, m1[f]).ancestors():
544 548 ocr = oc.linkrev()
545 549 of = oc.path()
546 550 if of in seen:
547 551 # check limit late - grab last rename before
548 552 if ocr < limit:
549 553 break
550 554 continue
551 555 seen.add(of)
552 556
553 557 data['fullcopy'][f] = of # remember for dir rename detection
554 558 if of not in m2:
555 559 continue # no match, keep looking
556 560 if m2[of] == mb.get(of):
557 561 return # no merge needed, quit early
558 562 c2 = getfctx(of, m2[of])
559 563 # c2 might be a plain new file on added on destination side that is
560 564 # unrelated to the droids we are looking for.
561 565 cr = _related(oc, c2, base.rev())
562 566 if cr and (of == f or of == c2.path()): # non-divergent
563 567 if of in mb:
564 568 data['copy'][f] = of
565 569 return
566 570
567 571 if of in mb:
568 572 data['diverge'].setdefault(of, []).append(f)
569 573
570 574 def duplicatecopies(repo, rev, fromrev, skiprev=None):
571 575 '''reproduce copies from fromrev to rev in the dirstate
572 576
573 577 If skiprev is specified, it's a revision that should be used to
574 578 filter copy records. Any copies that occur between fromrev and
575 579 skiprev will not be duplicated, even if they appear in the set of
576 580 copies between fromrev and rev.
577 581 '''
578 582 exclude = {}
579 583 if (skiprev is not None and
580 584 not repo.ui.configbool('experimental', 'disablecopytrace')):
581 585 # disablecopytrace skips this line, but not the entire function because
582 586 # the line below is O(size of the repo) during a rebase, while the rest
583 587 # of the function is much faster (and is required for carrying copy
584 588 # metadata across the rebase anyway).
585 589 exclude = pathcopies(repo[fromrev], repo[skiprev])
586 590 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
587 591 # copies.pathcopies returns backward renames, so dst might not
588 592 # actually be in the dirstate
589 593 if dst in exclude:
590 594 continue
591 595 if repo.dirstate[dst] in "nma":
592 596 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now