copies.py
883 lines
| 32.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / copies.py
Matt Mackall
|
r6274 | # copies.py - copy detection for Mercurial | ||
# | ||||
# Copyright 2008 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r6274 | |||
Gregory Szorc
|
r25924 | from __future__ import absolute_import | ||
Pulkit Goyal
|
r34180 | import collections | ||
Simon Heimberg
|
r8312 | import heapq | ||
Pulkit Goyal
|
r34180 | import os | ||
Matt Mackall
|
r6274 | |||
Pulkit Goyal
|
r34847 | from .i18n import _ | ||
Gregory Szorc
|
r25924 | from . import ( | ||
Yuya Nishihara
|
r33869 | match as matchmod, | ||
Durham Goode
|
r28000 | node, | ||
Gregory Szorc
|
r25924 | pathutil, | ||
Durham Goode
|
r28000 | scmutil, | ||
Gregory Szorc
|
r25924 | util, | ||
) | ||||
Matt Mackall
|
r6431 | def _findlimit(repo, a, b): | ||
Ryan McElroy
|
r23071 | """ | ||
Find the last revision that needs to be checked to ensure that a full | ||||
transitive closure for file copies can be properly calculated. | ||||
Generally, this means finding the earliest revision number that's an | ||||
ancestor of a or b but not both, except when a or b is a direct descendent | ||||
of the other, in which case we can return the minimum revnum of a and b. | ||||
Patrick Mezard
|
r10179 | None if no such revision exists. | ||
""" | ||||
Ryan McElroy
|
r23071 | |||
Matt Mackall
|
r6429 | # basic idea: | ||
# - mark a and b with different sides | ||||
# - if a parent's children are all on the same side, the parent is | ||||
# on that side, otherwise it is on no side | ||||
# - walk the graph in topological order with the help of a heap; | ||||
# - add unseen parents to side map | ||||
# - clear side of any parent that has children on different sides | ||||
Matt Mackall
|
r6431 | # - track number of interesting revs that might still be on a side | ||
# - track the lowest interesting rev seen | ||||
# - quit when interesting revs is zero | ||||
Matt Mackall
|
r6430 | |||
cl = repo.changelog | ||||
Matt Mackall
|
r6750 | working = len(cl) # pseudo rev for the working directory | ||
Matt Mackall
|
r6430 | if a is None: | ||
a = working | ||||
if b is None: | ||||
b = working | ||||
Matt Mackall
|
r6429 | |||
side = {a: -1, b: 1} | ||||
visit = [-a, -b] | ||||
heapq.heapify(visit) | ||||
interesting = len(visit) | ||||
Patrick Mezard
|
r10179 | hascommonancestor = False | ||
Matt Mackall
|
r6431 | limit = working | ||
Matt Mackall
|
r6429 | |||
while interesting: | ||||
r = -heapq.heappop(visit) | ||||
Matt Mackall
|
r6430 | if r == working: | ||
parents = [cl.rev(p) for p in repo.dirstate.parents()] | ||||
else: | ||||
parents = cl.parentrevs(r) | ||||
for p in parents: | ||||
Patrick Mezard
|
r10179 | if p < 0: | ||
continue | ||||
Matt Mackall
|
r6429 | if p not in side: | ||
# first time we see p; add it to visit | ||||
side[p] = side[r] | ||||
if side[p]: | ||||
interesting += 1 | ||||
heapq.heappush(visit, -p) | ||||
elif side[p] and side[p] != side[r]: | ||||
# p was interesting but now we know better | ||||
side[p] = 0 | ||||
interesting -= 1 | ||||
Patrick Mezard
|
r10179 | hascommonancestor = True | ||
Matt Mackall
|
r6430 | if side[r]: | ||
Matt Mackall
|
r6431 | limit = r # lowest rev visited | ||
Matt Mackall
|
r6430 | interesting -= 1 | ||
Patrick Mezard
|
r10179 | |||
if not hascommonancestor: | ||||
return None | ||||
Ryan McElroy
|
r23071 | |||
# Consider the following flow (see test-commit-amend.t under issue4405): | ||||
# 1/ File 'a0' committed | ||||
# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1') | ||||
# 3/ Move back to first commit | ||||
# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend') | ||||
# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg' | ||||
# | ||||
# During the amend in step five, we will be in this state: | ||||
# | ||||
# @ 3 temporary amend commit for a1-amend | ||||
# | | ||||
# o 2 a1-amend | ||||
# | | ||||
# | o 1 a1 | ||||
# |/ | ||||
# o 0 a0 | ||||
# | ||||
Mads Kiilerich
|
r23139 | # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2, | ||
Ryan McElroy
|
r23071 | # yet the filelog has the copy information in rev 1 and we will not look | ||
# back far enough unless we also look at the a and b as candidates. | ||||
# This only occurs when a is a descendent of b or visa-versa. | ||||
return min(limit, a, b) | ||||
Matt Mackall
|
r6429 | |||
Matt Mackall
|
r15775 | def _chain(src, dst, a, b): | ||
Martin von Zweigbergk
|
r35422 | """chain two sets of copies a->b""" | ||
Matt Mackall
|
r15775 | t = a.copy() | ||
for k, v in b.iteritems(): | ||||
if v in t: | ||||
# found a chain | ||||
if t[v] != k: | ||||
# file wasn't renamed back to itself | ||||
t[k] = t[v] | ||||
if v not in dst: | ||||
# chain was a rename, not a copy | ||||
del t[v] | ||||
if v in src: | ||||
# file is a copy of an existing file | ||||
t[k] = v | ||||
Matt Mackall
|
r15976 | |||
# remove criss-crossed copies | ||||
for k, v in t.items(): | ||||
if k in src and v in dst: | ||||
del t[k] | ||||
Matt Mackall
|
r15775 | return t | ||
Mads Kiilerich
|
r20294 | def _tracefile(fctx, am, limit=-1): | ||
Martin von Zweigbergk
|
r35422 | """return file context that is the ancestor of fctx present in ancestor | ||
manifest am, stopping after the first ancestor lower than limit""" | ||||
Matt Mackall
|
r15775 | |||
for f in fctx.ancestors(): | ||||
if am.get(f.path(), None) == f.filenode(): | ||||
return f | ||||
Matt Mackall
|
r23982 | if limit >= 0 and f.linkrev() < limit and f.rev() < limit: | ||
Matt Mackall
|
r15775 | return None | ||
Martin von Zweigbergk
|
r35421 | def _dirstatecopies(d, match=None): | ||
Matt Mackall
|
r15775 | ds = d._repo.dirstate | ||
c = ds.copies().copy() | ||||
Pulkit Goyal
|
r34350 | for k in list(c): | ||
Martin von Zweigbergk
|
r35421 | if ds[k] not in 'anm' or (match and not match(k)): | ||
Matt Mackall
|
r15775 | del c[k] | ||
return c | ||||
Durham Goode
|
r24782 | def _computeforwardmissing(a, b, match=None): | ||
Durham Goode
|
r24011 | """Computes which files are in b but not a. | ||
This is its own function so extensions can easily wrap this call to see what | ||||
files _forwardcopies is about to process. | ||||
""" | ||||
Durham Goode
|
r24782 | ma = a.manifest() | ||
mb = b.manifest() | ||||
Durham Goode
|
r31256 | return mb.filesnotin(ma, match=match) | ||
Durham Goode
|
r24011 | |||
Martin von Zweigbergk
|
r35423 | def _committedforwardcopies(a, b, match): | ||
"""Like _forwardcopies(), but b.rev() cannot be None (working copy)""" | ||||
Mads Kiilerich
|
r20294 | # files might have to be traced back to the fctx parent of the last | ||
# one-side-only changeset, but not further back than that | ||||
limit = _findlimit(a._repo, a.rev(), b.rev()) | ||||
if limit is None: | ||||
limit = -1 | ||||
am = a.manifest() | ||||
Matt Mackall
|
r15775 | # find where new files came from | ||
# we currently don't try to find where old files went, too expensive | ||||
# this means we can miss a case like 'hg rm b; hg cp a b' | ||||
cm = {} | ||||
Durham Goode
|
r28000 | |||
# Computing the forward missing is quite expensive on large manifests, since | ||||
# it compares the entire manifests. We can optimize it in the common use | ||||
# case of computing what copies are in a commit versus its parent (like | ||||
# during a rebase or histedit). Note, we exclude merge commits from this | ||||
# optimization, since the ctx.files() for a merge commit is not correct for | ||||
# this comparison. | ||||
forwardmissingmatch = match | ||||
Yuya Nishihara
|
r33869 | if b.p1() == a and b.p2().node() == node.nullid: | ||
filesmatcher = scmutil.matchfiles(a._repo, b.files()) | ||||
forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher) | ||||
Durham Goode
|
r28000 | missing = _computeforwardmissing(a, b, match=forwardmissingmatch) | ||
Pierre-Yves David
|
r23980 | ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True) | ||
Siddharth Agarwal
|
r18878 | for f in missing: | ||
Pierre-Yves David
|
r23980 | fctx = b[f] | ||
fctx._ancestrycontext = ancestrycontext | ||||
ofctx = _tracefile(fctx, am, limit) | ||||
Siddharth Agarwal
|
r18878 | if ofctx: | ||
cm[f] = ofctx.path() | ||||
Martin von Zweigbergk
|
r35423 | return cm | ||
def _forwardcopies(a, b, match=None): | ||||
"""find {dst@b: src@a} copy mapping where a is an ancestor of b""" | ||||
# check for working copy | ||||
if b.rev() is None: | ||||
Martin von Zweigbergk
|
r35424 | if a == b.p1(): | ||
Martin von Zweigbergk
|
r35423 | # short-circuit to avoid issues with merge states | ||
Martin von Zweigbergk
|
r35424 | return _dirstatecopies(b, match) | ||
Martin von Zweigbergk
|
r35423 | |||
Martin von Zweigbergk
|
r35424 | cm = _committedforwardcopies(a, b.p1(), match) | ||
# combine copies from dirstate if necessary | ||||
return _chain(a, b, cm, _dirstatecopies(b, match)) | ||||
return _committedforwardcopies(a, b, match) | ||||
Matt Mackall
|
r15775 | |||
Siddharth Agarwal
|
r18136 | def _backwardrenames(a, b): | ||
Pulkit Goyal
|
r34079 | if a._repo.ui.config('experimental', 'copytrace') == 'off': | ||
Durham Goode
|
r26013 | return {} | ||
Siddharth Agarwal
|
r18136 | # Even though we're not taking copies into account, 1:n rename situations | ||
# can still exist (e.g. hg cp a b; hg mv a c). In those cases we | ||||
# arbitrarily pick one of the renames. | ||||
Matt Mackall
|
r15775 | f = _forwardcopies(b, a) | ||
r = {} | ||||
Mads Kiilerich
|
r18355 | for k, v in sorted(f.iteritems()): | ||
Siddharth Agarwal
|
r18136 | # remove copies | ||
if v in a: | ||||
continue | ||||
Matt Mackall
|
r15775 | r[v] = k | ||
return r | ||||
Durham Goode
|
r24782 | def pathcopies(x, y, match=None): | ||
Martin von Zweigbergk
|
r35422 | """find {dst@y: src@x} copy mapping for directed compare""" | ||
Matt Mackall
|
r15775 | if x == y or not x or not y: | ||
return {} | ||||
a = y.ancestor(x) | ||||
if a == x: | ||||
Durham Goode
|
r24782 | return _forwardcopies(x, y, match=match) | ||
Matt Mackall
|
r15775 | if a == y: | ||
Siddharth Agarwal
|
r18136 | return _backwardrenames(x, y) | ||
Durham Goode
|
r24782 | return _chain(x, y, _backwardrenames(x, a), | ||
_forwardcopies(a, y, match=match)) | ||||
Matt Mackall
|
r15774 | |||
Pierre-Yves David
|
r30196 | def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''): | ||
Durham Goode
|
r24625 | """Computes, based on addedinm1 and addedinm2, the files exclusive to c1 | ||
and c2. This is its own function so extensions can easily wrap this call | ||||
Martin von Zweigbergk
|
r24187 | to see what files mergecopies is about to process. | ||
Durham Goode
|
r24273 | |||
Durham Goode
|
r24625 | Even though c1 and c2 are not used in this function, they are useful in | ||
Durham Goode
|
r24273 | other extensions for being able to read the file nodes of the changed files. | ||
Pierre-Yves David
|
r30196 | |||
"baselabel" can be passed to help distinguish the multiple computations | ||||
done in the graft case. | ||||
Durham Goode
|
r24010 | """ | ||
Martin von Zweigbergk
|
r24185 | u1 = sorted(addedinm1 - addedinm2) | ||
u2 = sorted(addedinm2 - addedinm1) | ||||
Durham Goode
|
r24010 | |||
Pierre-Yves David
|
r30196 | header = " unmatched files in %s" | ||
if baselabel: | ||||
header += ' (from %s)' % baselabel | ||||
Durham Goode
|
r24010 | if u1: | ||
Pierre-Yves David
|
r30196 | repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1))) | ||
Durham Goode
|
r24010 | if u2: | ||
Pierre-Yves David
|
r30196 | repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2))) | ||
Durham Goode
|
r24010 | return u1, u2 | ||
Matt Mackall
|
r26656 | def _makegetfctx(ctx): | ||
Gábor Stefanik
|
r30048 | """return a 'getfctx' function suitable for _checkcopies usage | ||
Matt Mackall
|
r26656 | |||
We have to re-setup the function building 'filectx' for each | ||||
Gábor Stefanik
|
r30048 | '_checkcopies' to ensure the linkrev adjustment is properly setup for | ||
Matt Mackall
|
r26656 | each. Linkrev adjustment is important to avoid bug in rename | ||
detection. Moreover, having a proper '_ancestrycontext' setup ensures | ||||
the performance impact of this adjustment is kept limited. Without it, | ||||
each file could do a full dag traversal making the time complexity of | ||||
the operation explode (see issue4537). | ||||
This function exists here mostly to limit the impact on stable. Feel | ||||
free to refactor on default. | ||||
""" | ||||
rev = ctx.rev() | ||||
repo = ctx._repo | ||||
ac = getattr(ctx, '_ancestrycontext', None) | ||||
if ac is None: | ||||
revs = [rev] | ||||
if rev is None: | ||||
revs = [p.rev() for p in ctx.parents()] | ||||
ac = repo.changelog.ancestors(revs, inclusive=True) | ||||
ctx._ancestrycontext = ac | ||||
def makectx(f, n): | ||||
Durham Goode
|
r30361 | if n in node.wdirnodes: # in a working context? | ||
Matt Mackall
|
r26656 | if ctx.rev() is None: | ||
return ctx.filectx(f) | ||||
return repo[None][f] | ||||
fctx = repo.filectx(f, fileid=n) | ||||
# setup only needed for filectx not create from a changectx | ||||
fctx._ancestrycontext = ac | ||||
fctx._descendantrev = rev | ||||
return fctx | ||||
return util.lrucachefunc(makectx) | ||||
Gábor Stefanik
|
r30202 | def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge): | ||
"""combine partial copy paths""" | ||||
remainder = {} | ||||
for f in copyfrom: | ||||
if f in copyto: | ||||
finalcopy[copyto[f]] = copyfrom[f] | ||||
del copyto[f] | ||||
for f in incompletediverge: | ||||
assert f not in diverge | ||||
ic = incompletediverge[f] | ||||
if ic[0] in copyto: | ||||
diverge[f] = [copyto[ic[0]], ic[1]] | ||||
else: | ||||
remainder[f] = ic | ||||
return remainder | ||||
Pierre-Yves David
|
r30186 | def mergecopies(repo, c1, c2, base): | ||
Matt Mackall
|
r6274 | """ | ||
Pulkit Goyal
|
r34080 | The function calling different copytracing algorithms on the basis of config | ||
which find moves and copies between context c1 and c2 that are relevant for | ||||
merging. 'base' will be used as the merge base. | ||||
Copytracing is used in commands like rebase, merge, unshelve, etc to merge | ||||
files that were moved/ copied in one merge parent and modified in another. | ||||
For example: | ||||
Pulkit Goyal
|
r33821 | |||
o ---> 4 another commit | ||||
| | ||||
| o ---> 3 commit that modifies a.txt | ||||
| / | ||||
o / ---> 2 commit that moves a.txt to b.txt | ||||
|/ | ||||
o ---> 1 merge base | ||||
If we try to rebase revision 3 on revision 4, since there is no a.txt in | ||||
revision 4, and if user have copytrace disabled, we prints the following | ||||
message: | ||||
```other changed <file> which local deleted``` | ||||
Gábor Stefanik
|
r30581 | Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and | ||
"dirmove". | ||||
Matt Mackall
|
r16168 | |||
Matt Mackall
|
r16177 | "copy" is a mapping from destination name -> source name, | ||
Matt Mackall
|
r16168 | where source is in c1 and destination is in c2 or vice-versa. | ||
Siddharth Agarwal
|
r18134 | "movewithdir" is a mapping from source name -> destination name, | ||
where the file at source present in one context but not the other | ||||
needs to be moved to destination by the merge process, because the | ||||
other context moved the directory it is in. | ||||
Matt Mackall
|
r16168 | "diverge" is a mapping of source name -> list of destination names | ||
for divergent renames. | ||||
Thomas Arendsen Hein
|
r16794 | |||
"renamedelete" is a mapping of source name -> list of destination | ||||
names for files deleted in c1 that were renamed in c2 or vice-versa. | ||||
Gábor Stefanik
|
r30581 | |||
"dirmove" is a mapping of detected source dir -> destination dir renames. | ||||
This is needed for handling changes to new files previously grafted into | ||||
renamed directories. | ||||
Matt Mackall
|
r6274 | """ | ||
# avoid silly behavior for update from empty dir | ||||
Matt Mackall
|
r6430 | if not c1 or not c2 or c1 == c2: | ||
Gábor Stefanik
|
r30581 | return {}, {}, {}, {}, {} | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r6646 | # avoid silly behavior for parent -> working dir | ||
Matt Mackall
|
r13878 | if c2.node() is None and c1.node() == repo.dirstate.p1(): | ||
Gábor Stefanik
|
r30581 | return repo.dirstate.copies(), {}, {}, {}, {} | ||
Matt Mackall
|
r6646 | |||
Pulkit Goyal
|
r34080 | copytracing = repo.ui.config('experimental', 'copytrace') | ||
Durham Goode
|
r26013 | # Copy trace disabling is explicitly below the node == p1 logic above | ||
# because the logic above is required for a simple copy to be kept across a | ||||
# rebase. | ||||
Pulkit Goyal
|
r34080 | if copytracing == 'off': | ||
Gábor Stefanik
|
r30581 | return {}, {}, {}, {}, {} | ||
Pulkit Goyal
|
r34180 | elif copytracing == 'heuristics': | ||
Yuya Nishihara
|
r34365 | # Do full copytracing if only non-public revisions are involved as | ||
# that will be fast enough and will also cover the copies which could | ||||
# be missed by heuristics | ||||
Pulkit Goyal
|
r34312 | if _isfullcopytraceable(repo, c1, base): | ||
Pulkit Goyal
|
r34289 | return _fullcopytracing(repo, c1, c2, base) | ||
Pulkit Goyal
|
r34180 | return _heuristicscopytracing(repo, c1, c2, base) | ||
Pulkit Goyal
|
r34080 | else: | ||
return _fullcopytracing(repo, c1, c2, base) | ||||
Durham Goode
|
r26013 | |||
Pulkit Goyal
|
r34312 | def _isfullcopytraceable(repo, c1, base): | ||
Yuya Nishihara
|
r34365 | """ Checks that if base, source and destination are all no-public branches, | ||
if yes let's use the full copytrace algorithm for increased capabilities | ||||
since it will be fast enough. | ||||
Pulkit Goyal
|
r34517 | |||
`experimental.copytrace.sourcecommitlimit` can be used to set a limit for | ||||
number of changesets from c1 to base such that if number of changesets are | ||||
more than the limit, full copytracing algorithm won't be used. | ||||
Pulkit Goyal
|
r34289 | """ | ||
Pulkit Goyal
|
r34312 | if c1.rev() is None: | ||
c1 = c1.p1() | ||||
Yuya Nishihara
|
r34365 | if c1.mutable() and base.mutable(): | ||
Pulkit Goyal
|
r34312 | sourcecommitlimit = repo.ui.configint('experimental', | ||
'copytrace.sourcecommitlimit') | ||||
commits = len(repo.revs('%d::%d', base.rev(), c1.rev())) | ||||
return commits < sourcecommitlimit | ||||
Pulkit Goyal
|
r34289 | return False | ||
Pulkit Goyal
|
r34080 | def _fullcopytracing(repo, c1, c2, base): | ||
""" The full copytracing algorithm which finds all the new files that were | ||||
added from merge base up to the top commit and for each file it checks if | ||||
this file was copied from another file. | ||||
This is pretty slow when a lot of changesets are involved but will track all | ||||
the copies. | ||||
""" | ||||
Gábor Stefanik
|
r30193 | # In certain scenarios (e.g. graft, update or rebase), base can be | ||
# overridden We still need to know a real common ancestor in this case We | ||||
# can't just compute _c1.ancestor(_c2) and compare it to ca, because there | ||||
# can be multiple common ancestors, e.g. in case of bidmerge. Because our | ||||
# caller may not know if the revision passed in lieu of the CA is a genuine | ||||
# common ancestor or not without explicitly checking it, it's better to | ||||
# determine that here. | ||||
# | ||||
# base.descendant(wc) and base.descendant(base) are False, work around that | ||||
_c1 = c1.p1() if c1.rev() is None else c1 | ||||
_c2 = c2.p1() if c2.rev() is None else c2 | ||||
# an endpoint is "dirty" if it isn't a descendant of the merge base | ||||
# if we have a dirty endpoint, we need to trigger graft logic, and also | ||||
# keep track of which endpoint is dirty | ||||
dirtyc1 = not (base == _c1 or base.descendant(_c1)) | ||||
Gábor Stefanik
|
r33882 | dirtyc2 = not (base == _c2 or base.descendant(_c2)) | ||
Gábor Stefanik
|
r30193 | graft = dirtyc1 or dirtyc2 | ||
Gábor Stefanik
|
r30194 | tca = base | ||
if graft: | ||||
tca = _c1.ancestor(_c2) | ||||
Matt Mackall
|
r6431 | limit = _findlimit(repo, c1.rev(), c2.rev()) | ||
Patrick Mezard
|
r10179 | if limit is None: | ||
# no common ancestor, no copies | ||||
Gábor Stefanik
|
r30581 | return {}, {}, {}, {}, {} | ||
Matt Mackall
|
r26319 | repo.ui.debug(" searching for copies back to rev %d\n" % limit) | ||
Matt Mackall
|
r6274 | m1 = c1.manifest() | ||
m2 = c2.manifest() | ||||
Pierre-Yves David
|
r30186 | mb = base.manifest() | ||
Matt Mackall
|
r6274 | |||
Pierre-Yves David
|
r30185 | # gather data from _checkcopies: | ||
# - diverge = record all diverges in this dict | ||||
# - copy = record all non-divergent copies in this dict | ||||
# - fullcopy = record all copies in this dict | ||||
Gábor Stefanik
|
r30202 | # - incomplete = record non-divergent partial copies here | ||
# - incompletediverge = record divergent partial copies here | ||||
Pierre-Yves David
|
r30184 | diverge = {} # divergence data is shared | ||
Gábor Stefanik
|
r30202 | incompletediverge = {} | ||
Pierre-Yves David
|
r30184 | data1 = {'copy': {}, | ||
'fullcopy': {}, | ||||
Gábor Stefanik
|
r30202 | 'incomplete': {}, | ||
Pierre-Yves David
|
r30184 | 'diverge': diverge, | ||
Gábor Stefanik
|
r30202 | 'incompletediverge': incompletediverge, | ||
Pierre-Yves David
|
r30184 | } | ||
data2 = {'copy': {}, | ||||
'fullcopy': {}, | ||||
Gábor Stefanik
|
r30202 | 'incomplete': {}, | ||
Pierre-Yves David
|
r30184 | 'diverge': diverge, | ||
Gábor Stefanik
|
r30202 | 'incompletediverge': incompletediverge, | ||
Pierre-Yves David
|
r30184 | } | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r26659 | # find interesting file sets from manifests | ||
Pierre-Yves David
|
r30186 | addedinm1 = m1.filesnotin(mb) | ||
addedinm2 = m2.filesnotin(mb) | ||||
Matt Mackall
|
r26659 | bothnew = sorted(addedinm1 & addedinm2) | ||
Gábor Stefanik
|
r30197 | if tca == base: | ||
# unmatched file from base | ||||
u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2) | ||||
u1u, u2u = u1r, u2r | ||||
else: | ||||
# unmatched file from base (DAG rotation in the graft case) | ||||
u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, | ||||
baselabel='base') | ||||
# unmatched file from topological common ancestors (no DAG rotation) | ||||
# need to recompute this for directory move handling when grafting | ||||
mta = tca.manifest() | ||||
u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta), | ||||
m2.filesnotin(mta), | ||||
baselabel='topological common ancestor') | ||||
Matt Mackall
|
r6274 | |||
Gábor Stefanik
|
r30047 | for f in u1u: | ||
Stanislau Hlebik
|
r32563 | _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1) | ||
Mads Kiilerich
|
r20989 | |||
Gábor Stefanik
|
r30047 | for f in u2u: | ||
Stanislau Hlebik
|
r32563 | _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2) | ||
Matt Mackall
|
r26316 | |||
Pulkit Goyal
|
r32640 | copy = dict(data1['copy']) | ||
copy.update(data2['copy']) | ||||
fullcopy = dict(data1['fullcopy']) | ||||
fullcopy.update(data2['fullcopy']) | ||||
Matt Mackall
|
r6274 | |||
Gábor Stefanik
|
r30202 | if dirtyc1: | ||
_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge, | ||||
incompletediverge) | ||||
else: | ||||
_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge, | ||||
incompletediverge) | ||||
Thomas Arendsen Hein
|
r16794 | renamedelete = {} | ||
Matt Mackall
|
r26658 | renamedeleteset = set() | ||
Matt Mackall
|
r26317 | divergeset = set() | ||
Pulkit Goyal
|
r34350 | for of, fl in list(diverge.items()): | ||
Thomas Arendsen Hein
|
r16792 | if len(fl) == 1 or of in c1 or of in c2: | ||
Dan Villiom Podlaski Christiansen
|
r12683 | del diverge[of] # not actually divergent, or not a rename | ||
Thomas Arendsen Hein
|
r16794 | if of not in c1 and of not in c2: | ||
# renamed on one side, deleted on the other side, but filter | ||||
# out files that have been renamed and then deleted | ||||
renamedelete[of] = [f for f in fl if f in c1 or f in c2] | ||||
Matt Mackall
|
r26658 | renamedeleteset.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | else: | ||
Matt Mackall
|
r26317 | divergeset.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | |||
Mads Kiilerich
|
r20641 | if bothnew: | ||
repo.ui.debug(" unmatched files new in both:\n %s\n" | ||||
% "\n ".join(bothnew)) | ||||
Pierre-Yves David
|
r30184 | bothdiverge = {} | ||
Gábor Stefanik
|
r30202 | bothincompletediverge = {} | ||
Gábor Stefanik
|
r30208 | remainder = {} | ||
Gábor Stefanik
|
r30202 | both1 = {'copy': {}, | ||
'fullcopy': {}, | ||||
'incomplete': {}, | ||||
'diverge': bothdiverge, | ||||
'incompletediverge': bothincompletediverge | ||||
} | ||||
both2 = {'copy': {}, | ||||
'fullcopy': {}, | ||||
'incomplete': {}, | ||||
'diverge': bothdiverge, | ||||
'incompletediverge': bothincompletediverge | ||||
} | ||||
Mads Kiilerich
|
r20641 | for f in bothnew: | ||
Stanislau Hlebik
|
r32563 | _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1) | ||
_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2) | ||||
Gábor Stefanik
|
r30202 | if dirtyc1: | ||
Gábor Stefanik
|
r30208 | # incomplete copies may only be found on the "dirty" side for bothnew | ||
assert not both2['incomplete'] | ||||
Gábor Stefanik
|
r30202 | remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge, | ||
bothincompletediverge) | ||||
Gábor Stefanik
|
r30208 | elif dirtyc2: | ||
assert not both1['incomplete'] | ||||
remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge, | ||||
bothincompletediverge) | ||||
Gábor Stefanik
|
r30202 | else: | ||
Gábor Stefanik
|
r30208 | # incomplete copies and divergences can't happen outside grafts | ||
assert not both1['incomplete'] | ||||
assert not both2['incomplete'] | ||||
assert not bothincompletediverge | ||||
Gábor Stefanik
|
r30202 | for f in remainder: | ||
assert f not in bothdiverge | ||||
ic = remainder[f] | ||||
if ic[0] in (m1 if dirtyc1 else m2): | ||||
# backed-out rename on one side, but watch out for deleted files | ||||
bothdiverge[f] = ic | ||||
Mads Kiilerich
|
r20641 | for of, fl in bothdiverge.items(): | ||
if len(fl) == 2 and fl[0] == fl[1]: | ||||
copy[fl[0]] = of # not actually divergent, just matching renames | ||||
Mads Kiilerich
|
r20990 | if fullcopy and repo.ui.debugflag: | ||
Thomas Arendsen Hein
|
r16795 | repo.ui.debug(" all copies found (* = to merge, ! = divergent, " | ||
"% = renamed and deleted):\n") | ||||
Mads Kiilerich
|
r18362 | for f in sorted(fullcopy): | ||
Matt Mackall
|
r6274 | note = "" | ||
Matt Mackall
|
r10282 | if f in copy: | ||
note += "*" | ||||
Matt Mackall
|
r26317 | if f in divergeset: | ||
Matt Mackall
|
r10282 | note += "!" | ||
Matt Mackall
|
r26658 | if f in renamedeleteset: | ||
Thomas Arendsen Hein
|
r16795 | note += "%" | ||
Siddharth Agarwal
|
r18135 | repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, | ||
note)) | ||||
Matt Mackall
|
r26317 | del divergeset | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r16169 | if not fullcopy: | ||
Gábor Stefanik
|
r30581 | return copy, {}, diverge, renamedelete, {} | ||
Matt Mackall
|
r6274 | |||
Martin Geisler
|
r9467 | repo.ui.debug(" checking for directory renames\n") | ||
Matt Mackall
|
r6274 | |||
# generate a directory move map | ||||
Matt Mackall
|
r16178 | d1, d2 = c1.dirs(), c2.dirs() | ||
Martin von Zweigbergk
|
r25288 | # Hack for adding '', which is not otherwise added, to d1 and d2 | ||
Bryan O'Sullivan
|
r18899 | d1.addpath('/') | ||
d2.addpath('/') | ||||
Matt Mackall
|
r17055 | invalid = set() | ||
Matt Mackall
|
r6274 | dirmove = {} | ||
# examine each file copy for a potential directory move, which is | ||||
# when all the files in a directory are moved to a new directory | ||||
Dirkjan Ochtman
|
r7622 | for dst, src in fullcopy.iteritems(): | ||
Durham Goode
|
r25282 | dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst) | ||
Matt Mackall
|
r6274 | if dsrc in invalid: | ||
# already seen to be uninteresting | ||||
continue | ||||
elif dsrc in d1 and ddst in d1: | ||||
# directory wasn't entirely moved locally | ||||
Matt Mackall
|
r27876 | invalid.add(dsrc + "/") | ||
Matt Mackall
|
r6274 | elif dsrc in d2 and ddst in d2: | ||
# directory wasn't entirely moved remotely | ||||
Matt Mackall
|
r27876 | invalid.add(dsrc + "/") | ||
elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/": | ||||
Matt Mackall
|
r6274 | # files from the same directory moved to two different places | ||
Matt Mackall
|
r27876 | invalid.add(dsrc + "/") | ||
Matt Mackall
|
r6274 | else: | ||
# looks good so far | ||||
dirmove[dsrc + "/"] = ddst + "/" | ||||
for i in invalid: | ||||
if i in dirmove: | ||||
del dirmove[i] | ||||
del d1, d2, invalid | ||||
if not dirmove: | ||||
Gábor Stefanik
|
r30581 | return copy, {}, diverge, renamedelete, {} | ||
Matt Mackall
|
r6274 | |||
for d in dirmove: | ||||
Siddharth Agarwal
|
r18135 | repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" % | ||
(d, dirmove[d])) | ||||
Matt Mackall
|
r6274 | |||
Pierre-Yves David
|
r30183 | movewithdir = {} | ||
Matt Mackall
|
r6274 | # check unaccounted nonoverlapping files against directory moves | ||
Gábor Stefanik
|
r30047 | for f in u1r + u2r: | ||
Matt Mackall
|
r6274 | if f not in fullcopy: | ||
for d in dirmove: | ||||
if f.startswith(d): | ||||
# new file added in a directory that was moved, move it | ||||
Matt Mackall
|
r6425 | df = dirmove[d] + f[len(d):] | ||
Matt Mackall
|
r6426 | if df not in copy: | ||
Siddharth Agarwal
|
r18134 | movewithdir[f] = df | ||
Siddharth Agarwal
|
r18135 | repo.ui.debug((" pending file src: '%s' -> " | ||
"dst: '%s'\n") % (f, df)) | ||||
Matt Mackall
|
r6274 | break | ||
Gábor Stefanik
|
r30581 | return copy, movewithdir, diverge, renamedelete, dirmove | ||
Durham Goode
|
r19178 | |||
Pulkit Goyal
|
r34180 | def _heuristicscopytracing(repo, c1, c2, base): | ||
""" Fast copytracing using filename heuristics | ||||
Assumes that moves or renames are of following two types: | ||||
1) Inside a directory only (same directory name but different filenames) | ||||
2) Move from one directory to another | ||||
(same filenames but different directory names) | ||||
Works only when there are no merge commits in the "source branch". | ||||
Source branch is commits from base up to c2 not including base. | ||||
If merge is involved it fallbacks to _fullcopytracing(). | ||||
Can be used by setting the following config: | ||||
[experimental] | ||||
copytrace = heuristics | ||||
Pulkit Goyal
|
r34847 | |||
In some cases the copy/move candidates found by heuristics can be very large | ||||
in number and that will make the algorithm slow. The number of possible | ||||
candidates to check can be limited by using the config | ||||
`experimental.copytrace.movecandidateslimit` which defaults to 100. | ||||
Pulkit Goyal
|
r34180 | """ | ||
if c1.rev() is None: | ||||
c1 = c1.p1() | ||||
if c2.rev() is None: | ||||
c2 = c2.p1() | ||||
copies = {} | ||||
changedfiles = set() | ||||
m1 = c1.manifest() | ||||
if not repo.revs('%d::%d', base.rev(), c2.rev()): | ||||
# If base is not in c2 branch, we switch to fullcopytracing | ||||
repo.ui.debug("switching to full copytracing as base is not " | ||||
"an ancestor of c2\n") | ||||
return _fullcopytracing(repo, c1, c2, base) | ||||
ctx = c2 | ||||
while ctx != base: | ||||
if len(ctx.parents()) == 2: | ||||
# To keep things simple let's not handle merges | ||||
repo.ui.debug("switching to full copytracing because of merges\n") | ||||
return _fullcopytracing(repo, c1, c2, base) | ||||
changedfiles.update(ctx.files()) | ||||
ctx = ctx.p1() | ||||
cp = _forwardcopies(base, c2) | ||||
for dst, src in cp.iteritems(): | ||||
if src in m1: | ||||
copies[dst] = src | ||||
# file is missing if it isn't present in the destination, but is present in | ||||
# the base and present in the source. | ||||
# Presence in the base is important to exclude added files, presence in the | ||||
# source is important to exclude removed files. | ||||
missingfiles = filter(lambda f: f not in m1 and f in base and f in c2, | ||||
changedfiles) | ||||
if missingfiles: | ||||
basenametofilename = collections.defaultdict(list) | ||||
dirnametofilename = collections.defaultdict(list) | ||||
for f in m1.filesnotin(base.manifest()): | ||||
basename = os.path.basename(f) | ||||
dirname = os.path.dirname(f) | ||||
basenametofilename[basename].append(f) | ||||
dirnametofilename[dirname].append(f) | ||||
# in case of a rebase/graft, base may not be a common ancestor | ||||
anc = c1.ancestor(c2) | ||||
for f in missingfiles: | ||||
basename = os.path.basename(f) | ||||
dirname = os.path.dirname(f) | ||||
samebasename = basenametofilename[basename] | ||||
samedirname = dirnametofilename[dirname] | ||||
movecandidates = samebasename + samedirname | ||||
# f is guaranteed to be present in c2, that's why | ||||
# c2.filectx(f) won't fail | ||||
f2 = c2.filectx(f) | ||||
Pulkit Goyal
|
r34847 | # we can have a lot of candidates which can slow down the heuristics | ||
# config value to limit the number of candidates moves to check | ||||
maxcandidates = repo.ui.configint('experimental', | ||||
'copytrace.movecandidateslimit') | ||||
if len(movecandidates) > maxcandidates: | ||||
repo.ui.status(_("skipping copytracing for '%s', more " | ||||
"candidates than the limit: %d\n") | ||||
% (f, len(movecandidates))) | ||||
continue | ||||
Pulkit Goyal
|
r34180 | for candidate in movecandidates: | ||
f1 = c1.filectx(candidate) | ||||
if _related(f1, f2, anc.rev()): | ||||
# if there are a few related copies then we'll merge | ||||
# changes into all of them. This matches the behaviour | ||||
# of upstream copytracing | ||||
copies[candidate] = f | ||||
return copies, {}, {}, {}, {} | ||||
Pierre-Yves David
|
r30138 | def _related(f1, f2, limit): | ||
"""return True if f1 and f2 filectx have a common ancestor | ||||
Walk back to common ancestor to see if the two files originate | ||||
from the same file. Since workingfilectx's rev() is None it messes | ||||
up the integer comparison logic, hence the pre-step check for | ||||
None (f1 and f2 can only be workingfilectx's initially). | ||||
""" | ||||
if f1 == f2: | ||||
return f1 # a match | ||||
g1, g2 = f1.ancestors(), f2.ancestors() | ||||
try: | ||||
f1r, f2r = f1.linkrev(), f2.linkrev() | ||||
if f1r is None: | ||||
f1 = next(g1) | ||||
if f2r is None: | ||||
f2 = next(g2) | ||||
while True: | ||||
f1r, f2r = f1.linkrev(), f2.linkrev() | ||||
if f1r > f2r: | ||||
f1 = next(g1) | ||||
elif f2r > f1r: | ||||
f2 = next(g2) | ||||
elif f1 == f2: | ||||
return f1 # a match | ||||
elif f1r == f2r or f1r < limit or f2r < limit: | ||||
return False # copy no longer relevant | ||||
except StopIteration: | ||||
return False | ||||
Stanislau Hlebik
|
r32563 | def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data): | ||
Durham Goode
|
r19178 | """ | ||
Stanislau Hlebik
|
r32560 | check possible copies of f from msrc to mdst | ||
Durham Goode
|
r19178 | |||
Stanislau Hlebik
|
r32561 | srcctx = starting context for f in msrc | ||
Stanislau Hlebik
|
r32562 | dstctx = destination context for f in mdst | ||
Stanislau Hlebik
|
r32559 | f = the filename to check (as in msrc) | ||
Pierre-Yves David
|
r30135 | base = the changectx used as a merge base | ||
Gábor Stefanik
|
r30195 | tca = topological common ancestor for graft-like scenarios | ||
Stanislau Hlebik
|
r32561 | remotebase = True if base is outside tca::srcctx, False otherwise | ||
Durham Goode
|
r19178 | limit = the rev number to not search beyond | ||
Pierre-Yves David
|
r30185 | data = dictionary of dictionary to store copy data. (see mergecopies) | ||
Gábor Stefanik
|
r30045 | |||
Gábor Stefanik
|
r33881 | note: limit is only an optimization, and provides no guarantee that | ||
irrelevant revisions will not be visited | ||||
Gábor Stefanik
|
r30045 | there is no easy way to make this algorithm stop in a guaranteed way | ||
once it "goes behind a certain revision". | ||||
Durham Goode
|
r19178 | """ | ||
Stanislau Hlebik
|
r32563 | msrc = srcctx.manifest() | ||
mdst = dstctx.manifest() | ||||
Pierre-Yves David
|
r30135 | mb = base.manifest() | ||
Gábor Stefanik
|
r30204 | mta = tca.manifest() | ||
Gábor Stefanik
|
r30195 | # Might be true if this call is about finding backward renames, | ||
# This happens in the case of grafts because the DAG is then rotated. | ||||
# If the file exists in both the base and the source, we are not looking | ||||
# for a rename on the source side, but on the part of the DAG that is | ||||
# traversed backwards. | ||||
# | ||||
# In the case there is both backward and forward renames (before and after | ||||
Gábor Stefanik
|
r30201 | # the base) this is more complicated as we must detect a divergence. | ||
# We use 'backwards = False' in that case. | ||||
Gábor Stefanik
|
r30203 | backwards = not remotebase and base != tca and f in mb | ||
Stanislau Hlebik
|
r32564 | getsrcfctx = _makegetfctx(srcctx) | ||
Stanislau Hlebik
|
r32565 | getdstfctx = _makegetfctx(dstctx) | ||
Durham Goode
|
r19178 | |||
Stanislau Hlebik
|
r32559 | if msrc[f] == mb.get(f) and not remotebase: | ||
Gábor Stefanik
|
r30229 | # Nothing to merge | ||
return | ||||
Durham Goode
|
r19178 | of = None | ||
Martin von Zweigbergk
|
r32291 | seen = {f} | ||
Stanislau Hlebik
|
r32564 | for oc in getsrcfctx(f, msrc[f]).ancestors(): | ||
Matt Mackall
|
r25279 | ocr = oc.linkrev() | ||
Durham Goode
|
r19178 | of = oc.path() | ||
if of in seen: | ||||
# check limit late - grab last rename before | ||||
if ocr < limit: | ||||
break | ||||
continue | ||||
seen.add(of) | ||||
Gábor Stefanik
|
r30195 | # remember for dir rename detection | ||
if backwards: | ||||
data['fullcopy'][of] = f # grafting backwards through renames | ||||
else: | ||||
data['fullcopy'][f] = of | ||||
Stanislau Hlebik
|
r32560 | if of not in mdst: | ||
Durham Goode
|
r19178 | continue # no match, keep looking | ||
Stanislau Hlebik
|
r32560 | if mdst[of] == mb.get(of): | ||
Gábor Stefanik
|
r30075 | return # no merge needed, quit early | ||
Stanislau Hlebik
|
r32565 | c2 = getdstfctx(of, mdst[of]) | ||
Pierre-Yves David
|
r30137 | # c2 might be a plain new file on added on destination side that is | ||
# unrelated to the droids we are looking for. | ||||
Gábor Stefanik
|
r30195 | cr = _related(oc, c2, tca.rev()) | ||
Durham Goode
|
r19178 | if cr and (of == f or of == c2.path()): # non-divergent | ||
Gábor Stefanik
|
r30195 | if backwards: | ||
data['copy'][of] = f | ||||
elif of in mb: | ||||
Gábor Stefanik
|
r30188 | data['copy'][f] = of | ||
Gábor Stefanik
|
r30203 | elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename | ||
data['copy'][of] = f | ||||
del data['fullcopy'][f] | ||||
data['fullcopy'][of] = f | ||||
Gábor Stefanik
|
r30201 | else: # divergence w.r.t. graft CA on one side of topological CA | ||
for sf in seen: | ||||
if sf in mb: | ||||
assert sf not in data['diverge'] | ||||
data['diverge'][sf] = [f, of] | ||||
break | ||||
Gábor Stefanik
|
r30075 | return | ||
Durham Goode
|
r19178 | |||
Gábor Stefanik
|
r30204 | if of in mta: | ||
if backwards or remotebase: | ||||
data['incomplete'][of] = f | ||||
else: | ||||
for sf in seen: | ||||
if sf in mb: | ||||
if tca == base: | ||||
data['diverge'].setdefault(sf, []).append(f) | ||||
else: | ||||
data['incompletediverge'][sf] = [of, f] | ||||
return | ||||
Matt Mackall
|
r22901 | |||
Phil Cohen
|
r34788 | def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None): | ||
Martin von Zweigbergk
|
r35422 | """reproduce copies from fromrev to rev in the dirstate | ||
Matt Mackall
|
r22901 | |||
If skiprev is specified, it's a revision that should be used to | ||||
filter copy records. Any copies that occur between fromrev and | ||||
skiprev will not be duplicated, even if they appear in the set of | ||||
copies between fromrev and rev. | ||||
Martin von Zweigbergk
|
r35422 | """ | ||
Matt Mackall
|
r22901 | exclude = {} | ||
Durham Goode
|
r26013 | if (skiprev is not None and | ||
Pulkit Goyal
|
r34079 | repo.ui.config('experimental', 'copytrace') != 'off'): | ||
# copytrace='off' skips this line, but not the entire function because | ||||
Durham Goode
|
r26013 | # the line below is O(size of the repo) during a rebase, while the rest | ||
# of the function is much faster (and is required for carrying copy | ||||
# metadata across the rebase anyway). | ||||
Matt Mackall
|
r22901 | exclude = pathcopies(repo[fromrev], repo[skiprev]) | ||
for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems(): | ||||
# copies.pathcopies returns backward renames, so dst might not | ||||
# actually be in the dirstate | ||||
if dst in exclude: | ||||
continue | ||||
Phil Cohen
|
r34788 | wctx[dst].markcopied(src) | ||