##// END OF EJS Templates
subrepo: add tests for svn rogue ssh urls (SEC)...
subrepo: add tests for svn rogue ssh urls (SEC) 'ssh://' has an exploit that will pass the url blindly to the ssh command, allowing a malicious person to have a subrepo with '-oProxyCommand' which could run arbitrary code on a user's machine. In addition, at least on Windows, a pipe '|' is able to execute arbitrary commands. When this happens, let's throw a big abort into the user's face so that they can inspect what's going on.

File last commit:

r32640:aeac3cbc default
r33730:60ee7af2 stable
Show More
copies.py
719 lines | 25.9 KiB | text/x-python | PythonLexer
Matt Mackall
copies: move findcopies code to its own module...
r6274 # copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gregory Szorc
copies: use absolute_import
r25924 from __future__ import absolute_import
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import heapq
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gregory Szorc
copies: use absolute_import
r25924 from . import (
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000 node,
Gregory Szorc
copies: use absolute_import
r25924 pathutil,
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000 scmutil,
Gregory Szorc
copies: use absolute_import
r25924 util,
)
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 def _findlimit(repo, a, b):
Ryan McElroy
amend: fix amending rename commit with diverged topologies (issue4405)...
r23071 """
Find the last revision that needs to be checked to ensure that a full
transitive closure for file copies can be properly calculated.
Generally, this means finding the earliest revision number that's an
ancestor of a or b but not both, except when a or b is a direct descendent
of the other, in which case we can return the minimum revnum of a and b.
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 None if no such revision exists.
"""
Ryan McElroy
amend: fix amending rename commit with diverged topologies (issue4405)...
r23071
Matt Mackall
symmetricdifference: move back to copies...
r6429 # basic idea:
# - mark a and b with different sides
# - if a parent's children are all on the same side, the parent is
# on that side, otherwise it is on no side
# - walk the graph in topological order with the help of a heap;
# - add unseen parents to side map
# - clear side of any parent that has children on different sides
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 # - track number of interesting revs that might still be on a side
# - track the lowest interesting rev seen
# - quit when interesting revs is zero
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430
cl = repo.changelog
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 working = len(cl) # pseudo rev for the working directory
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if a is None:
a = working
if b is None:
b = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
side = {a: -1, b: 1}
visit = [-a, -b]
heapq.heapify(visit)
interesting = len(visit)
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = False
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
while interesting:
r = -heapq.heappop(visit)
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if r == working:
parents = [cl.rev(p) for p in repo.dirstate.parents()]
else:
parents = cl.parentrevs(r)
for p in parents:
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if p < 0:
continue
Matt Mackall
symmetricdifference: move back to copies...
r6429 if p not in side:
# first time we see p; add it to visit
side[p] = side[r]
if side[p]:
interesting += 1
heapq.heappush(visit, -p)
elif side[p] and side[p] != side[r]:
# p was interesting but now we know better
side[p] = 0
interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = True
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if side[r]:
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = r # lowest rev visited
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179
if not hascommonancestor:
return None
Ryan McElroy
amend: fix amending rename commit with diverged topologies (issue4405)...
r23071
# Consider the following flow (see test-commit-amend.t under issue4405):
# 1/ File 'a0' committed
# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
# 3/ Move back to first commit
# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
#
# During the amend in step five, we will be in this state:
#
# @ 3 temporary amend commit for a1-amend
# |
# o 2 a1-amend
# |
# | o 1 a1
# |/
# o 0 a0
#
Mads Kiilerich
spelling: fixes from proofreading of spell checker issues
r23139 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
Ryan McElroy
amend: fix amending rename commit with diverged topologies (issue4405)...
r23071 # yet the filelog has the copy information in rev 1 and we will not look
# back far enough unless we also look at the a and b as candidates.
# This only occurs when a is a descendent of b or visa-versa.
return min(limit, a, b)
Matt Mackall
symmetricdifference: move back to copies...
r6429
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 def _chain(src, dst, a, b):
'''chain two sets of copies a->b'''
t = a.copy()
for k, v in b.iteritems():
if v in t:
# found a chain
if t[v] != k:
# file wasn't renamed back to itself
t[k] = t[v]
if v not in dst:
# chain was a rename, not a copy
del t[v]
if v in src:
# file is a copy of an existing file
t[k] = v
Matt Mackall
copies: eliminate criss-crosses when chaining...
r15976
# remove criss-crossed copies
for k, v in t.items():
if k in src and v in dst:
del t[k]
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 return t
Mads Kiilerich
diff: search beyond ancestor when detecting renames...
r20294 def _tracefile(fctx, am, limit=-1):
'''return file context that is the ancestor of fctx present in ancestor
manifest am, stopping after the first ancestor lower than limit'''
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775
for f in fctx.ancestors():
if am.get(f.path(), None) == f.filenode():
return f
Matt Mackall
copies: use linkrev for file tracing limit...
r23982 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 return None
def _dirstatecopies(d):
ds = d._repo.dirstate
c = ds.copies().copy()
for k in c.keys():
if ds[k] not in 'anm':
del c[k]
return c
Durham Goode
copies: add matcher parameter to copy logic...
r24782 def _computeforwardmissing(a, b, match=None):
Durham Goode
copy: move _forwardcopies file logic to a function...
r24011 """Computes which files are in b but not a.
This is its own function so extensions can easily wrap this call to see what
files _forwardcopies is about to process.
"""
Durham Goode
copies: add matcher parameter to copy logic...
r24782 ma = a.manifest()
mb = b.manifest()
Durham Goode
copies: remove use of manifest.matches...
r31256 return mb.filesnotin(ma, match=match)
Durham Goode
copy: move _forwardcopies file logic to a function...
r24011
Durham Goode
copies: add matcher parameter to copy logic...
r24782 def _forwardcopies(a, b, match=None):
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
# check for working copy
w = None
if b.rev() is None:
w = b
b = w.p1()
if a == b:
# short-circuit to avoid issues with merge states
return _dirstatecopies(w)
Mads Kiilerich
diff: search beyond ancestor when detecting renames...
r20294 # files might have to be traced back to the fctx parent of the last
# one-side-only changeset, but not further back than that
limit = _findlimit(a._repo, a.rev(), b.rev())
if limit is None:
limit = -1
am = a.manifest()
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 # find where new files came from
# we currently don't try to find where old files went, too expensive
# this means we can miss a case like 'hg rm b; hg cp a b'
cm = {}
Durham Goode
copies: optimize forward copy detection logic for rebases...
r28000
# Computing the forward missing is quite expensive on large manifests, since
# it compares the entire manifests. We can optimize it in the common use
# case of computing what copies are in a commit versus its parent (like
# during a rebase or histedit). Note, we exclude merge commits from this
# optimization, since the ctx.files() for a merge commit is not correct for
# this comparison.
forwardmissingmatch = match
if not match and b.p1() == a and b.p2().node() == node.nullid:
forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
Pierre-Yves David
_adjustlinkrev: reuse ancestors set during rename detection (issue4514)...
r23980 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
Siddharth Agarwal
copies._forwardcopies: use set operations to find missing files...
r18878 for f in missing:
Pierre-Yves David
_adjustlinkrev: reuse ancestors set during rename detection (issue4514)...
r23980 fctx = b[f]
fctx._ancestrycontext = ancestrycontext
ofctx = _tracefile(fctx, am, limit)
Siddharth Agarwal
copies._forwardcopies: use set operations to find missing files...
r18878 if ofctx:
cm[f] = ofctx.path()
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775
# combine copies from dirstate if necessary
if w is not None:
cm = _chain(a, w, cm, _dirstatecopies(w))
return cm
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 def _backwardrenames(a, b):
Durham Goode
copy: add flag for disabling copy tracing...
r26013 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
return {}
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 # Even though we're not taking copies into account, 1:n rename situations
# can still exist (e.g. hg cp a b; hg mv a c). In those cases we
# arbitrarily pick one of the renames.
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 f = _forwardcopies(b, a)
r = {}
Mads Kiilerich
copies: make the loss in _backwardcopies more stable...
r18355 for k, v in sorted(f.iteritems()):
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 # remove copies
if v in a:
continue
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 r[v] = k
return r
Durham Goode
copies: add matcher parameter to copy logic...
r24782 def pathcopies(x, y, match=None):
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 '''find {dst@y: src@x} copy mapping for directed compare'''
if x == y or not x or not y:
return {}
a = y.ancestor(x)
if a == x:
Durham Goode
copies: add matcher parameter to copy logic...
r24782 return _forwardcopies(x, y, match=match)
Matt Mackall
copies: rewrite copy detection for non-merge users...
r15775 if a == y:
Siddharth Agarwal
copies: do not track backward copies, only renames (issue3739)...
r18136 return _backwardrenames(x, y)
Durham Goode
copies: add matcher parameter to copy logic...
r24782 return _chain(x, y, _backwardrenames(x, a),
_forwardcopies(a, y, match=match))
Matt Mackall
copies: split the copies api for "normal" and merge cases (API)
r15774
Pierre-Yves David
copies: make it possible to distinguish betwen _computenonoverlap invocations...
r30196 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
Durham Goode
copies: pass changectx instead of manifest to _computenonoverlap...
r24625 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
and c2. This is its own function so extensions can easily wrap this call
Martin von Zweigbergk
copies: only calculate 'addedinm[12]' sets once...
r24187 to see what files mergecopies is about to process.
Durham Goode
copies: added manifests to computenonoverlap...
r24273
Durham Goode
copies: pass changectx instead of manifest to _computenonoverlap...
r24625 Even though c1 and c2 are not used in this function, they are useful in
Durham Goode
copies: added manifests to computenonoverlap...
r24273 other extensions for being able to read the file nodes of the changed files.
Pierre-Yves David
copies: make it possible to distinguish betwen _computenonoverlap invocations...
r30196
"baselabel" can be passed to help distinguish the multiple computations
done in the graft case.
Durham Goode
copy: move mergecopies file logic to a function...
r24010 """
Martin von Zweigbergk
copies: replace _nonoverlap() by calls to manifestdict.filesnotin()...
r24185 u1 = sorted(addedinm1 - addedinm2)
u2 = sorted(addedinm2 - addedinm1)
Durham Goode
copy: move mergecopies file logic to a function...
r24010
Pierre-Yves David
copies: make it possible to distinguish betwen _computenonoverlap invocations...
r30196 header = " unmatched files in %s"
if baselabel:
header += ' (from %s)' % baselabel
Durham Goode
copy: move mergecopies file logic to a function...
r24010 if u1:
Pierre-Yves David
copies: make it possible to distinguish betwen _computenonoverlap invocations...
r30196 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
Durham Goode
copy: move mergecopies file logic to a function...
r24010 if u2:
Pierre-Yves David
copies: make it possible to distinguish betwen _computenonoverlap invocations...
r30196 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
Durham Goode
copy: move mergecopies file logic to a function...
r24010 return u1, u2
Matt Mackall
copies: factor out setupctx into _makegetfctx...
r26656 def _makegetfctx(ctx):
Gábor Stefanik
copies: mark checkcopies as internal with the _ prefix
r30048 """return a 'getfctx' function suitable for _checkcopies usage
Matt Mackall
copies: factor out setupctx into _makegetfctx...
r26656
We have to re-setup the function building 'filectx' for each
Gábor Stefanik
copies: mark checkcopies as internal with the _ prefix
r30048 '_checkcopies' to ensure the linkrev adjustment is properly setup for
Matt Mackall
copies: factor out setupctx into _makegetfctx...
r26656 each. Linkrev adjustment is important to avoid bug in rename
detection. Moreover, having a proper '_ancestrycontext' setup ensures
the performance impact of this adjustment is kept limited. Without it,
each file could do a full dag traversal making the time complexity of
the operation explode (see issue4537).
This function exists here mostly to limit the impact on stable. Feel
free to refactor on default.
"""
rev = ctx.rev()
repo = ctx._repo
ac = getattr(ctx, '_ancestrycontext', None)
if ac is None:
revs = [rev]
if rev is None:
revs = [p.rev() for p in ctx.parents()]
ac = repo.changelog.ancestors(revs, inclusive=True)
ctx._ancestrycontext = ac
def makectx(f, n):
Durham Goode
dirstate: change added/modified placeholder hash length to 20 bytes...
r30361 if n in node.wdirnodes: # in a working context?
Matt Mackall
copies: factor out setupctx into _makegetfctx...
r26656 if ctx.rev() is None:
return ctx.filectx(f)
return repo[None][f]
fctx = repo.filectx(f, fileid=n)
# setup only needed for filectx not create from a changectx
fctx._ancestrycontext = ac
fctx._descendantrev = rev
return fctx
return util.lrucachefunc(makectx)
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
"""combine partial copy paths"""
remainder = {}
for f in copyfrom:
if f in copyto:
finalcopy[copyto[f]] = copyfrom[f]
del copyto[f]
for f in incompletediverge:
assert f not in diverge
ic = incompletediverge[f]
if ic[0] in copyto:
diverge[f] = [copyto[ic[0]], ic[1]]
else:
remainder[f] = ic
return remainder
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 def mergecopies(repo, c1, c2, base):
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
Matt Mackall
copies: add docstring for mergecopies
r16168 Find moves and copies between context c1 and c2 that are relevant
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 for merging. 'base' will be used as the merge base.
Matt Mackall
copies: add docstring for mergecopies
r16168
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
"dirmove".
Matt Mackall
copies: add docstring for mergecopies
r16168
Matt Mackall
copies: fix mergecopies doc mapping direction
r16177 "copy" is a mapping from destination name -> source name,
Matt Mackall
copies: add docstring for mergecopies
r16168 where source is in c1 and destination is in c2 or vice-versa.
Siddharth Agarwal
copies: separate moves via directory renames from explicit copies...
r18134 "movewithdir" is a mapping from source name -> destination name,
where the file at source present in one context but not the other
needs to be moved to destination by the merge process, because the
other context moved the directory it is in.
Matt Mackall
copies: add docstring for mergecopies
r16168 "diverge" is a mapping of source name -> list of destination names
for divergent renames.
Thomas Arendsen Hein
merge: warn about file deleted in one branch and renamed in other (issue3074)...
r16794
"renamedelete" is a mapping of source name -> list of destination
names for files deleted in c1 that were renamed in c2 or vice-versa.
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581
"dirmove" is a mapping of detected source dir -> destination dir renames.
This is needed for handling changes to new files previously grafted into
renamed directories.
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
# avoid silly behavior for update from empty dir
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if not c1 or not c2 or c1 == c2:
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return {}, {}, {}, {}, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 # avoid silly behavior for parent -> working dir
Matt Mackall
misc: replace .parents()[0] with p1()
r13878 if c2.node() is None and c1.node() == repo.dirstate.p1():
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return repo.dirstate.copies(), {}, {}, {}, {}
Matt Mackall
copies: teach copies about dirstate.copies...
r6646
Durham Goode
copy: add flag for disabling copy tracing...
r26013 # Copy trace disabling is explicitly below the node == p1 logic above
# because the logic above is required for a simple copy to be kept across a
# rebase.
if repo.ui.configbool('experimental', 'disablecopytrace'):
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return {}, {}, {}, {}, {}
Durham Goode
copy: add flag for disabling copy tracing...
r26013
Gábor Stefanik
copies: detect graft-like merges...
r30193 # In certain scenarios (e.g. graft, update or rebase), base can be
# overridden We still need to know a real common ancestor in this case We
# can't just compute _c1.ancestor(_c2) and compare it to ca, because there
# can be multiple common ancestors, e.g. in case of bidmerge. Because our
# caller may not know if the revision passed in lieu of the CA is a genuine
# common ancestor or not without explicitly checking it, it's better to
# determine that here.
#
# base.descendant(wc) and base.descendant(base) are False, work around that
_c1 = c1.p1() if c1.rev() is None else c1
_c2 = c2.p1() if c2.rev() is None else c2
# an endpoint is "dirty" if it isn't a descendant of the merge base
# if we have a dirty endpoint, we need to trigger graft logic, and also
# keep track of which endpoint is dirty
dirtyc1 = not (base == _c1 or base.descendant(_c1))
dirtyc2 = not (base== _c2 or base.descendant(_c2))
graft = dirtyc1 or dirtyc2
Gábor Stefanik
copies: compute a suitable TCA if base turns out to be unsuitable...
r30194 tca = base
if graft:
tca = _c1.ancestor(_c2)
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = _findlimit(repo, c1.rev(), c2.rev())
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if limit is None:
# no common ancestor, no copies
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return {}, {}, {}, {}, {}
Matt Mackall
copies: move debug statement to appropriate place
r26319 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
Matt Mackall
copies: move findcopies code to its own module...
r6274 m1 = c1.manifest()
m2 = c2.manifest()
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 mb = base.manifest()
Matt Mackall
copies: move findcopies code to its own module...
r6274
Pierre-Yves David
copies: move variable document from checkcopies to mergecopies...
r30185 # gather data from _checkcopies:
# - diverge = record all diverges in this dict
# - copy = record all non-divergent copies in this dict
# - fullcopy = record all copies in this dict
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 # - incomplete = record non-divergent partial copies here
# - incompletediverge = record divergent partial copies here
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 diverge = {} # divergence data is shared
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 incompletediverge = {}
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 data1 = {'copy': {},
'fullcopy': {},
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 'incomplete': {},
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 'diverge': diverge,
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 'incompletediverge': incompletediverge,
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 }
data2 = {'copy': {},
'fullcopy': {},
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 'incomplete': {},
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 'diverge': diverge,
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 'incompletediverge': incompletediverge,
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 }
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: group bothnew with other sets
r26659 # find interesting file sets from manifests
Pierre-Yves David
mergecopies: rename 'ca' to 'base'...
r30186 addedinm1 = m1.filesnotin(mb)
addedinm2 = m2.filesnotin(mb)
Matt Mackall
copies: group bothnew with other sets
r26659 bothnew = sorted(addedinm1 & addedinm2)
Gábor Stefanik
mergecopies: invoke _computenonoverlap for both base and tca during merges...
r30197 if tca == base:
# unmatched file from base
u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
u1u, u2u = u1r, u2r
else:
# unmatched file from base (DAG rotation in the graft case)
u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
baselabel='base')
# unmatched file from topological common ancestors (no DAG rotation)
# need to recompute this for directory move handling when grafting
mta = tca.manifest()
u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
m2.filesnotin(mta),
baselabel='topological common ancestor')
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gábor Stefanik
copies: split u1/u2 to u1u/u2u and u1r/u2r...
r30047 for f in u1u:
Stanislau Hlebik
copies: remove msrc and mdst parameters...
r32563 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
Mads Kiilerich
copies: remove _checkcopies wrapper - it does no good...
r20989
Gábor Stefanik
copies: split u1/u2 to u1u/u2u and u1r/u2r...
r30047 for f in u2u:
Stanislau Hlebik
copies: remove msrc and mdst parameters...
r32563 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
Matt Mackall
copies: begin separating mergecopies sides
r26316
Pulkit Goyal
py3: use dict.update() instead of constructing lists and adding them...
r32640 copy = dict(data1['copy'])
copy.update(data2['copy'])
fullcopy = dict(data1['fullcopy'])
fullcopy.update(data2['fullcopy'])
Matt Mackall
copies: move findcopies code to its own module...
r6274
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 if dirtyc1:
_combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
incompletediverge)
else:
_combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
incompletediverge)
Thomas Arendsen Hein
merge: warn about file deleted in one branch and renamed in other (issue3074)...
r16794 renamedelete = {}
Matt Mackall
copies: rename renamedelete to renamedeleteset for clarity
r26658 renamedeleteset = set()
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 divergeset = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for of, fl in diverge.items():
Thomas Arendsen Hein
merge: do not warn about copy and rename in the same transaction (issue2113)
r16792 if len(fl) == 1 or of in c1 or of in c2:
Dan Villiom Podlaski Christiansen
copies: don't detect copies as "divergent renames"...
r12683 del diverge[of] # not actually divergent, or not a rename
Thomas Arendsen Hein
merge: warn about file deleted in one branch and renamed in other (issue3074)...
r16794 if of not in c1 and of not in c2:
# renamed on one side, deleted on the other side, but filter
# out files that have been renamed and then deleted
renamedelete[of] = [f for f in fl if f in c1 or f in c2]
Matt Mackall
copies: rename renamedelete to renamedeleteset for clarity
r26658 renamedeleteset.update(fl) # reverse map for below
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 divergeset.update(fl) # reverse map for below
Matt Mackall
copies: move findcopies code to its own module...
r6274
Mads Kiilerich
copies: when both sides made the same copy, report it as a copy...
r20641 if bothnew:
repo.ui.debug(" unmatched files new in both:\n %s\n"
% "\n ".join(bothnew))
Pierre-Yves David
checkcopies: pass data as a dictionary of dictionaries...
r30184 bothdiverge = {}
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 bothincompletediverge = {}
Gábor Stefanik
copies: improve assertions during copy recombination...
r30208 remainder = {}
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 both1 = {'copy': {},
'fullcopy': {},
'incomplete': {},
'diverge': bothdiverge,
'incompletediverge': bothincompletediverge
}
both2 = {'copy': {},
'fullcopy': {},
'incomplete': {},
'diverge': bothdiverge,
'incompletediverge': bothincompletediverge
}
Mads Kiilerich
copies: when both sides made the same copy, report it as a copy...
r20641 for f in bothnew:
Stanislau Hlebik
copies: remove msrc and mdst parameters...
r32563 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 if dirtyc1:
Gábor Stefanik
copies: improve assertions during copy recombination...
r30208 # incomplete copies may only be found on the "dirty" side for bothnew
assert not both2['incomplete']
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
bothincompletediverge)
Gábor Stefanik
copies: improve assertions during copy recombination...
r30208 elif dirtyc2:
assert not both1['incomplete']
remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
bothincompletediverge)
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 else:
Gábor Stefanik
copies: improve assertions during copy recombination...
r30208 # incomplete copies and divergences can't happen outside grafts
assert not both1['incomplete']
assert not both2['incomplete']
assert not bothincompletediverge
Gábor Stefanik
mergecopies: add logic to process incomplete data...
r30202 for f in remainder:
assert f not in bothdiverge
ic = remainder[f]
if ic[0] in (m1 if dirtyc1 else m2):
# backed-out rename on one side, but watch out for deleted files
bothdiverge[f] = ic
Mads Kiilerich
copies: when both sides made the same copy, report it as a copy...
r20641 for of, fl in bothdiverge.items():
if len(fl) == 2 and fl[0] == fl[1]:
copy[fl[0]] = of # not actually divergent, just matching renames
Mads Kiilerich
copies: guard debug section with ui.debugflag
r20990 if fullcopy and repo.ui.debugflag:
Thomas Arendsen Hein
merge: show renamed on one and deleted on the other side in debug output
r16795 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
"% = renamed and deleted):\n")
Mads Kiilerich
copies: report found copies sorted
r18362 for f in sorted(fullcopy):
Matt Mackall
copies: move findcopies code to its own module...
r6274 note = ""
Matt Mackall
many, many trivial check-code fixups
r10282 if f in copy:
note += "*"
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 if f in divergeset:
Matt Mackall
many, many trivial check-code fixups
r10282 note += "!"
Matt Mackall
copies: rename renamedelete to renamedeleteset for clarity
r26658 if f in renamedeleteset:
Thomas Arendsen Hein
merge: show renamed on one and deleted on the other side in debug output
r16795 note += "%"
Siddharth Agarwal
copies: make debug messages more sensible...
r18135 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
note))
Matt Mackall
copies: rename diverge2 to divergeset for clarity
r26317 del divergeset
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: remove checkdirs options...
r16169 if not fullcopy:
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return copy, {}, diverge, renamedelete, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" checking for directory renames\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274
# generate a directory move map
Matt Mackall
copies: use ctx.dirs() for directory rename detection
r16178 d1, d2 = c1.dirs(), c2.dirs()
Martin von Zweigbergk
copies: document hack for adding '' to set of dirs...
r25288 # Hack for adding '', which is not otherwise added, to d1 and d2
Bryan O'Sullivan
scmutil: use new dirs class in dirstate and context...
r18899 d1.addpath('/')
d2.addpath('/')
Matt Mackall
copies: re-include root directory in directory rename detection (issue3511)
r17055 invalid = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
Dirkjan Ochtman
use dict.iteritems() rather than dict.items()...
r7622 for dst, src in fullcopy.iteritems():
Durham Goode
copies: switch to using pathutil.dirname...
r25282 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
Matt Mackall
copies: move findcopies code to its own module...
r6274 if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
Matt Mackall
copies: fix detection of divergent directory renames...
r27876 invalid.add(dsrc + "/")
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
Matt Mackall
copies: fix detection of divergent directory renames...
r27876 invalid.add(dsrc + "/")
elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
Matt Mackall
copies: move findcopies code to its own module...
r6274 # files from the same directory moved to two different places
Matt Mackall
copies: fix detection of divergent directory renames...
r27876 invalid.add(dsrc + "/")
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
# looks good so far
dirmove[dsrc + "/"] = ddst + "/"
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return copy, {}, diverge, renamedelete, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274
for d in dirmove:
Siddharth Agarwal
copies: make debug messages more sensible...
r18135 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
(d, dirmove[d]))
Matt Mackall
copies: move findcopies code to its own module...
r6274
Pierre-Yves David
checkcopies: move 'movewithdir' initialisation right before its usage...
r30183 movewithdir = {}
Matt Mackall
copies: move findcopies code to its own module...
r6274 # check unaccounted nonoverlapping files against directory moves
Gábor Stefanik
copies: split u1/u2 to u1u/u2u and u1r/u2r...
r30047 for f in u1r + u2r:
Matt Mackall
copies: move findcopies code to its own module...
r6274 if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 df = dirmove[d] + f[len(d):]
Matt Mackall
copies: don't double-detect items in the directory copy check
r6426 if df not in copy:
Siddharth Agarwal
copies: separate moves via directory renames from explicit copies...
r18134 movewithdir[f] = df
Siddharth Agarwal
copies: make debug messages more sensible...
r18135 repo.ui.debug((" pending file src: '%s' -> "
"dst: '%s'\n") % (f, df))
Matt Mackall
copies: move findcopies code to its own module...
r6274 break
Gábor Stefanik
graft: support grafting changes to new file in renamed directory (issue5436)
r30581 return copy, movewithdir, diverge, renamedelete, dirmove
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178
Pierre-Yves David
checkcopies: extract the '_related' closure...
r30138 def _related(f1, f2, limit):
"""return True if f1 and f2 filectx have a common ancestor
Walk back to common ancestor to see if the two files originate
from the same file. Since workingfilectx's rev() is None it messes
up the integer comparison logic, hence the pre-step check for
None (f1 and f2 can only be workingfilectx's initially).
"""
if f1 == f2:
return f1 # a match
g1, g2 = f1.ancestors(), f2.ancestors()
try:
f1r, f2r = f1.linkrev(), f2.linkrev()
if f1r is None:
f1 = next(g1)
if f2r is None:
f2 = next(g2)
while True:
f1r, f2r = f1.linkrev(), f2.linkrev()
if f1r > f2r:
f1 = next(g1)
elif f2r > f1r:
f2 = next(g2)
elif f1 == f2:
return f1 # a match
elif f1r == f2r or f1r < limit or f2r < limit:
return False # copy no longer relevant
except StopIteration:
return False
Stanislau Hlebik
copies: remove msrc and mdst parameters...
r32563 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 """
Stanislau Hlebik
copies: rename m2 to mdst...
r32560 check possible copies of f from msrc to mdst
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178
Stanislau Hlebik
copies: rename ctx to srcctx...
r32561 srcctx = starting context for f in msrc
Stanislau Hlebik
copies: add dstctx parameter...
r32562 dstctx = destination context for f in mdst
Stanislau Hlebik
copies: rename m1 to msrc...
r32559 f = the filename to check (as in msrc)
Pierre-Yves David
checkcopies: rename 'ca' to 'base'...
r30135 base = the changectx used as a merge base
Gábor Stefanik
copies: make _checkcopies handle simple renames in a rotated DAG...
r30195 tca = topological common ancestor for graft-like scenarios
Stanislau Hlebik
copies: rename ctx to srcctx...
r32561 remotebase = True if base is outside tca::srcctx, False otherwise
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 limit = the rev number to not search beyond
Pierre-Yves David
copies: move variable document from checkcopies to mergecopies...
r30185 data = dictionary of dictionary to store copy data. (see mergecopies)
Gábor Stefanik
copies: limit is an optimization, and doesn't provide guarantees
r30045
note: limit is only an optimization, and there is no guarantee that
irrelevant revisions will not be limited
there is no easy way to make this algorithm stop in a guaranteed way
once it "goes behind a certain revision".
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 """
Stanislau Hlebik
copies: remove msrc and mdst parameters...
r32563 msrc = srcctx.manifest()
mdst = dstctx.manifest()
Pierre-Yves David
checkcopies: rename 'ca' to 'base'...
r30135 mb = base.manifest()
Gábor Stefanik
copies: make _checkcopies handle copy sequences spanning the TCA (issue4028)...
r30204 mta = tca.manifest()
Gábor Stefanik
copies: make _checkcopies handle simple renames in a rotated DAG...
r30195 # Might be true if this call is about finding backward renames,
# This happens in the case of grafts because the DAG is then rotated.
# If the file exists in both the base and the source, we are not looking
# for a rename on the source side, but on the part of the DAG that is
# traversed backwards.
#
# In the case there is both backward and forward renames (before and after
Gábor Stefanik
checkcopies: handle divergences contained entirely in tca::ctx...
r30201 # the base) this is more complicated as we must detect a divergence.
# We use 'backwards = False' in that case.
Gábor Stefanik
checkcopies: add logic to handle remotebase...
r30203 backwards = not remotebase and base != tca and f in mb
Stanislau Hlebik
copies: rename getfctx to getsrcfctx...
r32564 getsrcfctx = _makegetfctx(srcctx)
Stanislau Hlebik
copies: introduce getdstfctx...
r32565 getdstfctx = _makegetfctx(dstctx)
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178
Stanislau Hlebik
copies: rename m1 to msrc...
r32559 if msrc[f] == mb.get(f) and not remotebase:
Gábor Stefanik
merge: avoid superfluous filemerges when grafting through renames (issue5407)...
r30229 # Nothing to merge
return
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 of = None
Martin von Zweigbergk
cleanup: use set literals...
r32291 seen = {f}
Stanislau Hlebik
copies: rename getfctx to getsrcfctx...
r32564 for oc in getsrcfctx(f, msrc[f]).ancestors():
Matt Mackall
mergecopies: avoid slowdown from linkrev adjustment (issue4680)...
r25279 ocr = oc.linkrev()
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 of = oc.path()
if of in seen:
# check limit late - grab last rename before
if ocr < limit:
break
continue
seen.add(of)
Gábor Stefanik
copies: make _checkcopies handle simple renames in a rotated DAG...
r30195 # remember for dir rename detection
if backwards:
data['fullcopy'][of] = f # grafting backwards through renames
else:
data['fullcopy'][f] = of
Stanislau Hlebik
copies: rename m2 to mdst...
r32560 if of not in mdst:
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 continue # no match, keep looking
Stanislau Hlebik
copies: rename m2 to mdst...
r32560 if mdst[of] == mb.get(of):
Gábor Stefanik
copies: don't record divergence for files needing no merge...
r30075 return # no merge needed, quit early
Stanislau Hlebik
copies: introduce getdstfctx...
r32565 c2 = getdstfctx(of, mdst[of])
Pierre-Yves David
checkcopies: add an inline comment about the '_related' call...
r30137 # c2 might be a plain new file on added on destination side that is
# unrelated to the droids we are looking for.
Gábor Stefanik
copies: make _checkcopies handle simple renames in a rotated DAG...
r30195 cr = _related(oc, c2, tca.rev())
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178 if cr and (of == f or of == c2.path()): # non-divergent
Gábor Stefanik
copies: make _checkcopies handle simple renames in a rotated DAG...
r30195 if backwards:
data['copy'][of] = f
elif of in mb:
Gábor Stefanik
checkcopies: add a sanity check against false-positive copies...
r30188 data['copy'][f] = of
Gábor Stefanik
checkcopies: add logic to handle remotebase...
r30203 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
data['copy'][of] = f
del data['fullcopy'][f]
data['fullcopy'][of] = f
Gábor Stefanik
checkcopies: handle divergences contained entirely in tca::ctx...
r30201 else: # divergence w.r.t. graft CA on one side of topological CA
for sf in seen:
if sf in mb:
assert sf not in data['diverge']
data['diverge'][sf] = [f, of]
break
Gábor Stefanik
copies: don't record divergence for files needing no merge...
r30075 return
Durham Goode
copies: refactor checkcopies() into a top level method...
r19178
Gábor Stefanik
copies: make _checkcopies handle copy sequences spanning the TCA (issue4028)...
r30204 if of in mta:
if backwards or remotebase:
data['incomplete'][of] = f
else:
for sf in seen:
if sf in mb:
if tca == base:
data['diverge'].setdefault(sf, []).append(f)
else:
data['incompletediverge'][sf] = [of, f]
return
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901
def duplicatecopies(repo, rev, fromrev, skiprev=None):
'''reproduce copies from fromrev to rev in the dirstate
If skiprev is specified, it's a revision that should be used to
filter copy records. Any copies that occur between fromrev and
skiprev will not be duplicated, even if they appear in the set of
copies between fromrev and rev.
'''
exclude = {}
Durham Goode
copy: add flag for disabling copy tracing...
r26013 if (skiprev is not None and
not repo.ui.configbool('experimental', 'disablecopytrace')):
# disablecopytrace skips this line, but not the entire function because
# the line below is O(size of the repo) during a rebase, while the rest
# of the function is much faster (and is required for carrying copy
# metadata across the rebase anyway).
Matt Mackall
duplicatecopies: move from cmdutil to copies...
r22901 exclude = pathcopies(repo[fromrev], repo[skiprev])
for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
# copies.pathcopies returns backward renames, so dst might not
# actually be in the dirstate
if dst in exclude:
continue
if repo.dirstate[dst] in "nma":
repo.dirstate.copy(src, dst)