##// END OF EJS Templates
mergecopies: rename 'ca' to 'base'...
Pierre-Yves David -
r30186:f7ed5af3 default
parent child Browse files
Show More
@@ -1,574 +1,574 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 node,
14 14 pathutil,
15 15 scmutil,
16 16 util,
17 17 )
18 18
19 19 def _findlimit(repo, a, b):
20 20 """
21 21 Find the last revision that needs to be checked to ensure that a full
22 22 transitive closure for file copies can be properly calculated.
23 23 Generally, this means finding the earliest revision number that's an
24 24 ancestor of a or b but not both, except when a or b is a direct descendent
25 25 of the other, in which case we can return the minimum revnum of a and b.
26 26 None if no such revision exists.
27 27 """
28 28
29 29 # basic idea:
30 30 # - mark a and b with different sides
31 31 # - if a parent's children are all on the same side, the parent is
32 32 # on that side, otherwise it is on no side
33 33 # - walk the graph in topological order with the help of a heap;
34 34 # - add unseen parents to side map
35 35 # - clear side of any parent that has children on different sides
36 36 # - track number of interesting revs that might still be on a side
37 37 # - track the lowest interesting rev seen
38 38 # - quit when interesting revs is zero
39 39
40 40 cl = repo.changelog
41 41 working = len(cl) # pseudo rev for the working directory
42 42 if a is None:
43 43 a = working
44 44 if b is None:
45 45 b = working
46 46
47 47 side = {a: -1, b: 1}
48 48 visit = [-a, -b]
49 49 heapq.heapify(visit)
50 50 interesting = len(visit)
51 51 hascommonancestor = False
52 52 limit = working
53 53
54 54 while interesting:
55 55 r = -heapq.heappop(visit)
56 56 if r == working:
57 57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 58 else:
59 59 parents = cl.parentrevs(r)
60 60 for p in parents:
61 61 if p < 0:
62 62 continue
63 63 if p not in side:
64 64 # first time we see p; add it to visit
65 65 side[p] = side[r]
66 66 if side[p]:
67 67 interesting += 1
68 68 heapq.heappush(visit, -p)
69 69 elif side[p] and side[p] != side[r]:
70 70 # p was interesting but now we know better
71 71 side[p] = 0
72 72 interesting -= 1
73 73 hascommonancestor = True
74 74 if side[r]:
75 75 limit = r # lowest rev visited
76 76 interesting -= 1
77 77
78 78 if not hascommonancestor:
79 79 return None
80 80
81 81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 82 # 1/ File 'a0' committed
83 83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 84 # 3/ Move back to first commit
85 85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 87 #
88 88 # During the amend in step five, we will be in this state:
89 89 #
90 90 # @ 3 temporary amend commit for a1-amend
91 91 # |
92 92 # o 2 a1-amend
93 93 # |
94 94 # | o 1 a1
95 95 # |/
96 96 # o 0 a0
97 97 #
98 98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 99 # yet the filelog has the copy information in rev 1 and we will not look
100 100 # back far enough unless we also look at the a and b as candidates.
101 101 # This only occurs when a is a descendent of b or visa-versa.
102 102 return min(limit, a, b)
103 103
104 104 def _chain(src, dst, a, b):
105 105 '''chain two sets of copies a->b'''
106 106 t = a.copy()
107 107 for k, v in b.iteritems():
108 108 if v in t:
109 109 # found a chain
110 110 if t[v] != k:
111 111 # file wasn't renamed back to itself
112 112 t[k] = t[v]
113 113 if v not in dst:
114 114 # chain was a rename, not a copy
115 115 del t[v]
116 116 if v in src:
117 117 # file is a copy of an existing file
118 118 t[k] = v
119 119
120 120 # remove criss-crossed copies
121 121 for k, v in t.items():
122 122 if k in src and v in dst:
123 123 del t[k]
124 124
125 125 return t
126 126
127 127 def _tracefile(fctx, am, limit=-1):
128 128 '''return file context that is the ancestor of fctx present in ancestor
129 129 manifest am, stopping after the first ancestor lower than limit'''
130 130
131 131 for f in fctx.ancestors():
132 132 if am.get(f.path(), None) == f.filenode():
133 133 return f
134 134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 135 return None
136 136
137 137 def _dirstatecopies(d):
138 138 ds = d._repo.dirstate
139 139 c = ds.copies().copy()
140 140 for k in c.keys():
141 141 if ds[k] not in 'anm':
142 142 del c[k]
143 143 return c
144 144
145 145 def _computeforwardmissing(a, b, match=None):
146 146 """Computes which files are in b but not a.
147 147 This is its own function so extensions can easily wrap this call to see what
148 148 files _forwardcopies is about to process.
149 149 """
150 150 ma = a.manifest()
151 151 mb = b.manifest()
152 152 if match:
153 153 ma = ma.matches(match)
154 154 mb = mb.matches(match)
155 155 return mb.filesnotin(ma)
156 156
157 157 def _forwardcopies(a, b, match=None):
158 158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159 159
160 160 # check for working copy
161 161 w = None
162 162 if b.rev() is None:
163 163 w = b
164 164 b = w.p1()
165 165 if a == b:
166 166 # short-circuit to avoid issues with merge states
167 167 return _dirstatecopies(w)
168 168
169 169 # files might have to be traced back to the fctx parent of the last
170 170 # one-side-only changeset, but not further back than that
171 171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 172 if limit is None:
173 173 limit = -1
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 191
192 192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 193 for f in missing:
194 194 fctx = b[f]
195 195 fctx._ancestrycontext = ancestrycontext
196 196 ofctx = _tracefile(fctx, am, limit)
197 197 if ofctx:
198 198 cm[f] = ofctx.path()
199 199
200 200 # combine copies from dirstate if necessary
201 201 if w is not None:
202 202 cm = _chain(a, w, cm, _dirstatecopies(w))
203 203
204 204 return cm
205 205
206 206 def _backwardrenames(a, b):
207 207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 208 return {}
209 209
210 210 # Even though we're not taking copies into account, 1:n rename situations
211 211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 212 # arbitrarily pick one of the renames.
213 213 f = _forwardcopies(b, a)
214 214 r = {}
215 215 for k, v in sorted(f.iteritems()):
216 216 # remove copies
217 217 if v in a:
218 218 continue
219 219 r[v] = k
220 220 return r
221 221
222 222 def pathcopies(x, y, match=None):
223 223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 224 if x == y or not x or not y:
225 225 return {}
226 226 a = y.ancestor(x)
227 227 if a == x:
228 228 return _forwardcopies(x, y, match=match)
229 229 if a == y:
230 230 return _backwardrenames(x, y)
231 231 return _chain(x, y, _backwardrenames(x, a),
232 232 _forwardcopies(a, y, match=match))
233 233
234 234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
235 235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 236 and c2. This is its own function so extensions can easily wrap this call
237 237 to see what files mergecopies is about to process.
238 238
239 239 Even though c1 and c2 are not used in this function, they are useful in
240 240 other extensions for being able to read the file nodes of the changed files.
241 241 """
242 242 u1 = sorted(addedinm1 - addedinm2)
243 243 u2 = sorted(addedinm2 - addedinm1)
244 244
245 245 if u1:
246 246 repo.ui.debug(" unmatched files in local:\n %s\n"
247 247 % "\n ".join(u1))
248 248 if u2:
249 249 repo.ui.debug(" unmatched files in other:\n %s\n"
250 250 % "\n ".join(u2))
251 251 return u1, u2
252 252
253 253 def _makegetfctx(ctx):
254 254 """return a 'getfctx' function suitable for _checkcopies usage
255 255
256 256 We have to re-setup the function building 'filectx' for each
257 257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
258 258 each. Linkrev adjustment is important to avoid bug in rename
259 259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
260 260 the performance impact of this adjustment is kept limited. Without it,
261 261 each file could do a full dag traversal making the time complexity of
262 262 the operation explode (see issue4537).
263 263
264 264 This function exists here mostly to limit the impact on stable. Feel
265 265 free to refactor on default.
266 266 """
267 267 rev = ctx.rev()
268 268 repo = ctx._repo
269 269 ac = getattr(ctx, '_ancestrycontext', None)
270 270 if ac is None:
271 271 revs = [rev]
272 272 if rev is None:
273 273 revs = [p.rev() for p in ctx.parents()]
274 274 ac = repo.changelog.ancestors(revs, inclusive=True)
275 275 ctx._ancestrycontext = ac
276 276 def makectx(f, n):
277 277 if len(n) != 20: # in a working context?
278 278 if ctx.rev() is None:
279 279 return ctx.filectx(f)
280 280 return repo[None][f]
281 281 fctx = repo.filectx(f, fileid=n)
282 282 # setup only needed for filectx not create from a changectx
283 283 fctx._ancestrycontext = ac
284 284 fctx._descendantrev = rev
285 285 return fctx
286 286 return util.lrucachefunc(makectx)
287 287
288 def mergecopies(repo, c1, c2, ca):
288 def mergecopies(repo, c1, c2, base):
289 289 """
290 290 Find moves and copies between context c1 and c2 that are relevant
291 for merging.
291 for merging. 'base' will be used as the merge base.
292 292
293 293 Returns four dicts: "copy", "movewithdir", "diverge", and
294 294 "renamedelete".
295 295
296 296 "copy" is a mapping from destination name -> source name,
297 297 where source is in c1 and destination is in c2 or vice-versa.
298 298
299 299 "movewithdir" is a mapping from source name -> destination name,
300 300 where the file at source present in one context but not the other
301 301 needs to be moved to destination by the merge process, because the
302 302 other context moved the directory it is in.
303 303
304 304 "diverge" is a mapping of source name -> list of destination names
305 305 for divergent renames.
306 306
307 307 "renamedelete" is a mapping of source name -> list of destination
308 308 names for files deleted in c1 that were renamed in c2 or vice-versa.
309 309 """
310 310 # avoid silly behavior for update from empty dir
311 311 if not c1 or not c2 or c1 == c2:
312 312 return {}, {}, {}, {}
313 313
314 314 # avoid silly behavior for parent -> working dir
315 315 if c2.node() is None and c1.node() == repo.dirstate.p1():
316 316 return repo.dirstate.copies(), {}, {}, {}
317 317
318 318 # Copy trace disabling is explicitly below the node == p1 logic above
319 319 # because the logic above is required for a simple copy to be kept across a
320 320 # rebase.
321 321 if repo.ui.configbool('experimental', 'disablecopytrace'):
322 322 return {}, {}, {}, {}
323 323
324 324 limit = _findlimit(repo, c1.rev(), c2.rev())
325 325 if limit is None:
326 326 # no common ancestor, no copies
327 327 return {}, {}, {}, {}
328 328 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
329 329
330 330 m1 = c1.manifest()
331 331 m2 = c2.manifest()
332 ma = ca.manifest()
332 mb = base.manifest()
333 333
334 334 # gather data from _checkcopies:
335 335 # - diverge = record all diverges in this dict
336 336 # - copy = record all non-divergent copies in this dict
337 337 # - fullcopy = record all copies in this dict
338 338 diverge = {} # divergence data is shared
339 339 data1 = {'copy': {},
340 340 'fullcopy': {},
341 341 'diverge': diverge,
342 342 }
343 343 data2 = {'copy': {},
344 344 'fullcopy': {},
345 345 'diverge': diverge,
346 346 }
347 347
348 348 # find interesting file sets from manifests
349 addedinm1 = m1.filesnotin(ma)
350 addedinm2 = m2.filesnotin(ma)
349 addedinm1 = m1.filesnotin(mb)
350 addedinm2 = m2.filesnotin(mb)
351 351 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
352 352 u1u, u2u = u1r, u2r
353 353 bothnew = sorted(addedinm1 & addedinm2)
354 354
355 355 for f in u1u:
356 _checkcopies(c1, f, m1, m2, ca, limit, data1)
356 _checkcopies(c1, f, m1, m2, base, limit, data1)
357 357
358 358 for f in u2u:
359 _checkcopies(c2, f, m2, m1, ca, limit, data2)
359 _checkcopies(c2, f, m2, m1, base, limit, data2)
360 360
361 361 copy = dict(data1['copy'].items() + data2['copy'].items())
362 362 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
363 363
364 364 renamedelete = {}
365 365 renamedeleteset = set()
366 366 divergeset = set()
367 367 for of, fl in diverge.items():
368 368 if len(fl) == 1 or of in c1 or of in c2:
369 369 del diverge[of] # not actually divergent, or not a rename
370 370 if of not in c1 and of not in c2:
371 371 # renamed on one side, deleted on the other side, but filter
372 372 # out files that have been renamed and then deleted
373 373 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
374 374 renamedeleteset.update(fl) # reverse map for below
375 375 else:
376 376 divergeset.update(fl) # reverse map for below
377 377
378 378 if bothnew:
379 379 repo.ui.debug(" unmatched files new in both:\n %s\n"
380 380 % "\n ".join(bothnew))
381 381 bothdiverge = {}
382 382 bothdata = {'copy': {},
383 383 'fullcopy': {},
384 384 'diverge': bothdiverge,
385 385 }
386 386 for f in bothnew:
387 _checkcopies(c1, f, m1, m2, ca, limit, bothdata)
388 _checkcopies(c2, f, m2, m1, ca, limit, bothdata)
387 _checkcopies(c1, f, m1, m2, base, limit, bothdata)
388 _checkcopies(c2, f, m2, m1, base, limit, bothdata)
389 389 for of, fl in bothdiverge.items():
390 390 if len(fl) == 2 and fl[0] == fl[1]:
391 391 copy[fl[0]] = of # not actually divergent, just matching renames
392 392
393 393 if fullcopy and repo.ui.debugflag:
394 394 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
395 395 "% = renamed and deleted):\n")
396 396 for f in sorted(fullcopy):
397 397 note = ""
398 398 if f in copy:
399 399 note += "*"
400 400 if f in divergeset:
401 401 note += "!"
402 402 if f in renamedeleteset:
403 403 note += "%"
404 404 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
405 405 note))
406 406 del divergeset
407 407
408 408 if not fullcopy:
409 409 return copy, {}, diverge, renamedelete
410 410
411 411 repo.ui.debug(" checking for directory renames\n")
412 412
413 413 # generate a directory move map
414 414 d1, d2 = c1.dirs(), c2.dirs()
415 415 # Hack for adding '', which is not otherwise added, to d1 and d2
416 416 d1.addpath('/')
417 417 d2.addpath('/')
418 418 invalid = set()
419 419 dirmove = {}
420 420
421 421 # examine each file copy for a potential directory move, which is
422 422 # when all the files in a directory are moved to a new directory
423 423 for dst, src in fullcopy.iteritems():
424 424 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
425 425 if dsrc in invalid:
426 426 # already seen to be uninteresting
427 427 continue
428 428 elif dsrc in d1 and ddst in d1:
429 429 # directory wasn't entirely moved locally
430 430 invalid.add(dsrc + "/")
431 431 elif dsrc in d2 and ddst in d2:
432 432 # directory wasn't entirely moved remotely
433 433 invalid.add(dsrc + "/")
434 434 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
435 435 # files from the same directory moved to two different places
436 436 invalid.add(dsrc + "/")
437 437 else:
438 438 # looks good so far
439 439 dirmove[dsrc + "/"] = ddst + "/"
440 440
441 441 for i in invalid:
442 442 if i in dirmove:
443 443 del dirmove[i]
444 444 del d1, d2, invalid
445 445
446 446 if not dirmove:
447 447 return copy, {}, diverge, renamedelete
448 448
449 449 for d in dirmove:
450 450 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
451 451 (d, dirmove[d]))
452 452
453 453 movewithdir = {}
454 454 # check unaccounted nonoverlapping files against directory moves
455 455 for f in u1r + u2r:
456 456 if f not in fullcopy:
457 457 for d in dirmove:
458 458 if f.startswith(d):
459 459 # new file added in a directory that was moved, move it
460 460 df = dirmove[d] + f[len(d):]
461 461 if df not in copy:
462 462 movewithdir[f] = df
463 463 repo.ui.debug((" pending file src: '%s' -> "
464 464 "dst: '%s'\n") % (f, df))
465 465 break
466 466
467 467 return copy, movewithdir, diverge, renamedelete
468 468
469 469 def _related(f1, f2, limit):
470 470 """return True if f1 and f2 filectx have a common ancestor
471 471
472 472 Walk back to common ancestor to see if the two files originate
473 473 from the same file. Since workingfilectx's rev() is None it messes
474 474 up the integer comparison logic, hence the pre-step check for
475 475 None (f1 and f2 can only be workingfilectx's initially).
476 476 """
477 477
478 478 if f1 == f2:
479 479 return f1 # a match
480 480
481 481 g1, g2 = f1.ancestors(), f2.ancestors()
482 482 try:
483 483 f1r, f2r = f1.linkrev(), f2.linkrev()
484 484
485 485 if f1r is None:
486 486 f1 = next(g1)
487 487 if f2r is None:
488 488 f2 = next(g2)
489 489
490 490 while True:
491 491 f1r, f2r = f1.linkrev(), f2.linkrev()
492 492 if f1r > f2r:
493 493 f1 = next(g1)
494 494 elif f2r > f1r:
495 495 f2 = next(g2)
496 496 elif f1 == f2:
497 497 return f1 # a match
498 498 elif f1r == f2r or f1r < limit or f2r < limit:
499 499 return False # copy no longer relevant
500 500 except StopIteration:
501 501 return False
502 502
503 503 def _checkcopies(ctx, f, m1, m2, base, limit, data):
504 504 """
505 505 check possible copies of f from m1 to m2
506 506
507 507 ctx = starting context for f in m1
508 508 f = the filename to check (as in m1)
509 509 m1 = the source manifest
510 510 m2 = the destination manifest
511 511 base = the changectx used as a merge base
512 512 limit = the rev number to not search beyond
513 513 data = dictionary of dictionary to store copy data. (see mergecopies)
514 514
515 515 note: limit is only an optimization, and there is no guarantee that
516 516 irrelevant revisions will not be limited
517 517 there is no easy way to make this algorithm stop in a guaranteed way
518 518 once it "goes behind a certain revision".
519 519 """
520 520
521 521 mb = base.manifest()
522 522 getfctx = _makegetfctx(ctx)
523 523
524 524 of = None
525 525 seen = set([f])
526 526 for oc in getfctx(f, m1[f]).ancestors():
527 527 ocr = oc.linkrev()
528 528 of = oc.path()
529 529 if of in seen:
530 530 # check limit late - grab last rename before
531 531 if ocr < limit:
532 532 break
533 533 continue
534 534 seen.add(of)
535 535
536 536 data['fullcopy'][f] = of # remember for dir rename detection
537 537 if of not in m2:
538 538 continue # no match, keep looking
539 539 if m2[of] == mb.get(of):
540 540 return # no merge needed, quit early
541 541 c2 = getfctx(of, m2[of])
542 542 # c2 might be a plain new file on added on destination side that is
543 543 # unrelated to the droids we are looking for.
544 544 cr = _related(oc, c2, base.rev())
545 545 if cr and (of == f or of == c2.path()): # non-divergent
546 546 data['copy'][f] = of
547 547 return
548 548
549 549 if of in mb:
550 550 data['diverge'].setdefault(of, []).append(f)
551 551
552 552 def duplicatecopies(repo, rev, fromrev, skiprev=None):
553 553 '''reproduce copies from fromrev to rev in the dirstate
554 554
555 555 If skiprev is specified, it's a revision that should be used to
556 556 filter copy records. Any copies that occur between fromrev and
557 557 skiprev will not be duplicated, even if they appear in the set of
558 558 copies between fromrev and rev.
559 559 '''
560 560 exclude = {}
561 561 if (skiprev is not None and
562 562 not repo.ui.configbool('experimental', 'disablecopytrace')):
563 563 # disablecopytrace skips this line, but not the entire function because
564 564 # the line below is O(size of the repo) during a rebase, while the rest
565 565 # of the function is much faster (and is required for carrying copy
566 566 # metadata across the rebase anyway).
567 567 exclude = pathcopies(repo[fromrev], repo[skiprev])
568 568 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
569 569 # copies.pathcopies returns backward renames, so dst might not
570 570 # actually be in the dirstate
571 571 if dst in exclude:
572 572 continue
573 573 if repo.dirstate[dst] in "nma":
574 574 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now