copies.py
486 lines
| 16.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / copies.py
Matt Mackall
|
r6274 | # copies.py - copy detection for Mercurial | ||
# | ||||
# Copyright 2008 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r6274 | |||
Simon Heimberg
|
r8312 | import util | ||
import heapq | ||||
Matt Mackall
|
r6274 | |||
def _dirname(f): | ||||
s = f.rfind("/") | ||||
if s == -1: | ||||
return "" | ||||
return f[:s] | ||||
Matt Mackall
|
r6431 | def _findlimit(repo, a, b): | ||
Ryan McElroy
|
r23071 | """ | ||
Find the last revision that needs to be checked to ensure that a full | ||||
transitive closure for file copies can be properly calculated. | ||||
Generally, this means finding the earliest revision number that's an | ||||
ancestor of a or b but not both, except when a or b is a direct descendent | ||||
of the other, in which case we can return the minimum revnum of a and b. | ||||
Patrick Mezard
|
r10179 | None if no such revision exists. | ||
""" | ||||
Ryan McElroy
|
r23071 | |||
Matt Mackall
|
r6429 | # basic idea: | ||
# - mark a and b with different sides | ||||
# - if a parent's children are all on the same side, the parent is | ||||
# on that side, otherwise it is on no side | ||||
# - walk the graph in topological order with the help of a heap; | ||||
# - add unseen parents to side map | ||||
# - clear side of any parent that has children on different sides | ||||
Matt Mackall
|
r6431 | # - track number of interesting revs that might still be on a side | ||
# - track the lowest interesting rev seen | ||||
# - quit when interesting revs is zero | ||||
Matt Mackall
|
r6430 | |||
cl = repo.changelog | ||||
Matt Mackall
|
r6750 | working = len(cl) # pseudo rev for the working directory | ||
Matt Mackall
|
r6430 | if a is None: | ||
a = working | ||||
if b is None: | ||||
b = working | ||||
Matt Mackall
|
r6429 | |||
side = {a: -1, b: 1} | ||||
visit = [-a, -b] | ||||
heapq.heapify(visit) | ||||
interesting = len(visit) | ||||
Patrick Mezard
|
r10179 | hascommonancestor = False | ||
Matt Mackall
|
r6431 | limit = working | ||
Matt Mackall
|
r6429 | |||
while interesting: | ||||
r = -heapq.heappop(visit) | ||||
Matt Mackall
|
r6430 | if r == working: | ||
parents = [cl.rev(p) for p in repo.dirstate.parents()] | ||||
else: | ||||
parents = cl.parentrevs(r) | ||||
for p in parents: | ||||
Patrick Mezard
|
r10179 | if p < 0: | ||
continue | ||||
Matt Mackall
|
r6429 | if p not in side: | ||
# first time we see p; add it to visit | ||||
side[p] = side[r] | ||||
if side[p]: | ||||
interesting += 1 | ||||
heapq.heappush(visit, -p) | ||||
elif side[p] and side[p] != side[r]: | ||||
# p was interesting but now we know better | ||||
side[p] = 0 | ||||
interesting -= 1 | ||||
Patrick Mezard
|
r10179 | hascommonancestor = True | ||
Matt Mackall
|
r6430 | if side[r]: | ||
Matt Mackall
|
r6431 | limit = r # lowest rev visited | ||
Matt Mackall
|
r6430 | interesting -= 1 | ||
Patrick Mezard
|
r10179 | |||
if not hascommonancestor: | ||||
return None | ||||
Ryan McElroy
|
r23071 | |||
# Consider the following flow (see test-commit-amend.t under issue4405): | ||||
# 1/ File 'a0' committed | ||||
# 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1') | ||||
# 3/ Move back to first commit | ||||
# 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend') | ||||
# 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg' | ||||
# | ||||
# During the amend in step five, we will be in this state: | ||||
# | ||||
# @ 3 temporary amend commit for a1-amend | ||||
# | | ||||
# o 2 a1-amend | ||||
# | | ||||
# | o 1 a1 | ||||
# |/ | ||||
# o 0 a0 | ||||
# | ||||
Mads Kiilerich
|
r23139 | # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2, | ||
Ryan McElroy
|
r23071 | # yet the filelog has the copy information in rev 1 and we will not look | ||
# back far enough unless we also look at the a and b as candidates. | ||||
# This only occurs when a is a descendent of b or visa-versa. | ||||
return min(limit, a, b) | ||||
Matt Mackall
|
r6429 | |||
Matt Mackall
|
r15775 | def _chain(src, dst, a, b): | ||
'''chain two sets of copies a->b''' | ||||
t = a.copy() | ||||
for k, v in b.iteritems(): | ||||
if v in t: | ||||
# found a chain | ||||
if t[v] != k: | ||||
# file wasn't renamed back to itself | ||||
t[k] = t[v] | ||||
if v not in dst: | ||||
# chain was a rename, not a copy | ||||
del t[v] | ||||
if v in src: | ||||
# file is a copy of an existing file | ||||
t[k] = v | ||||
Matt Mackall
|
r15976 | |||
# remove criss-crossed copies | ||||
for k, v in t.items(): | ||||
if k in src and v in dst: | ||||
del t[k] | ||||
Matt Mackall
|
r15775 | return t | ||
Mads Kiilerich
|
r20294 | def _tracefile(fctx, am, limit=-1): | ||
'''return file context that is the ancestor of fctx present in ancestor | ||||
manifest am, stopping after the first ancestor lower than limit''' | ||||
Matt Mackall
|
r15775 | |||
for f in fctx.ancestors(): | ||||
if am.get(f.path(), None) == f.filenode(): | ||||
return f | ||||
Matt Mackall
|
r23982 | if limit >= 0 and f.linkrev() < limit and f.rev() < limit: | ||
Matt Mackall
|
r15775 | return None | ||
def _dirstatecopies(d): | ||||
ds = d._repo.dirstate | ||||
c = ds.copies().copy() | ||||
for k in c.keys(): | ||||
if ds[k] not in 'anm': | ||||
del c[k] | ||||
return c | ||||
Durham Goode
|
r24011 | def _computeforwardmissing(a, b): | ||
"""Computes which files are in b but not a. | ||||
This is its own function so extensions can easily wrap this call to see what | ||||
files _forwardcopies is about to process. | ||||
""" | ||||
Martin von Zweigbergk
|
r24184 | return b.manifest().filesnotin(a.manifest()) | ||
Durham Goode
|
r24011 | |||
Matt Mackall
|
r15775 | def _forwardcopies(a, b): | ||
'''find {dst@b: src@a} copy mapping where a is an ancestor of b''' | ||||
# check for working copy | ||||
w = None | ||||
if b.rev() is None: | ||||
w = b | ||||
b = w.p1() | ||||
if a == b: | ||||
# short-circuit to avoid issues with merge states | ||||
return _dirstatecopies(w) | ||||
Mads Kiilerich
|
r20294 | # files might have to be traced back to the fctx parent of the last | ||
# one-side-only changeset, but not further back than that | ||||
limit = _findlimit(a._repo, a.rev(), b.rev()) | ||||
if limit is None: | ||||
limit = -1 | ||||
am = a.manifest() | ||||
Matt Mackall
|
r15775 | # find where new files came from | ||
# we currently don't try to find where old files went, too expensive | ||||
# this means we can miss a case like 'hg rm b; hg cp a b' | ||||
cm = {} | ||||
Durham Goode
|
r24011 | missing = _computeforwardmissing(a, b) | ||
Pierre-Yves David
|
r23980 | ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True) | ||
Siddharth Agarwal
|
r18878 | for f in missing: | ||
Pierre-Yves David
|
r23980 | fctx = b[f] | ||
fctx._ancestrycontext = ancestrycontext | ||||
ofctx = _tracefile(fctx, am, limit) | ||||
Siddharth Agarwal
|
r18878 | if ofctx: | ||
cm[f] = ofctx.path() | ||||
Matt Mackall
|
r15775 | |||
# combine copies from dirstate if necessary | ||||
if w is not None: | ||||
cm = _chain(a, w, cm, _dirstatecopies(w)) | ||||
return cm | ||||
Siddharth Agarwal
|
r18136 | def _backwardrenames(a, b): | ||
# Even though we're not taking copies into account, 1:n rename situations | ||||
# can still exist (e.g. hg cp a b; hg mv a c). In those cases we | ||||
# arbitrarily pick one of the renames. | ||||
Matt Mackall
|
r15775 | f = _forwardcopies(b, a) | ||
r = {} | ||||
Mads Kiilerich
|
r18355 | for k, v in sorted(f.iteritems()): | ||
Siddharth Agarwal
|
r18136 | # remove copies | ||
if v in a: | ||||
continue | ||||
Matt Mackall
|
r15775 | r[v] = k | ||
return r | ||||
def pathcopies(x, y): | ||||
'''find {dst@y: src@x} copy mapping for directed compare''' | ||||
if x == y or not x or not y: | ||||
return {} | ||||
a = y.ancestor(x) | ||||
if a == x: | ||||
return _forwardcopies(x, y) | ||||
if a == y: | ||||
Siddharth Agarwal
|
r18136 | return _backwardrenames(x, y) | ||
return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y)) | ||||
Matt Mackall
|
r15774 | |||
Durham Goode
|
r24273 | def _computenonoverlap(repo, m1, m2, addedinm1, addedinm2): | ||
Martin von Zweigbergk
|
r24187 | """Computes, based on addedinm1 and addedinm2, the files exclusive to m1 | ||
and m2. This is its own function so extensions can easily wrap this call | ||||
to see what files mergecopies is about to process. | ||||
Durham Goode
|
r24273 | |||
Even though m1 and m2 are not used in this function, they are useful in | ||||
other extensions for being able to read the file nodes of the changed files. | ||||
Durham Goode
|
r24010 | """ | ||
Martin von Zweigbergk
|
r24185 | u1 = sorted(addedinm1 - addedinm2) | ||
u2 = sorted(addedinm2 - addedinm1) | ||||
Durham Goode
|
r24010 | |||
if u1: | ||||
repo.ui.debug(" unmatched files in local:\n %s\n" | ||||
% "\n ".join(u1)) | ||||
if u2: | ||||
repo.ui.debug(" unmatched files in other:\n %s\n" | ||||
% "\n ".join(u2)) | ||||
return u1, u2 | ||||
Matt Mackall
|
r16169 | def mergecopies(repo, c1, c2, ca): | ||
Matt Mackall
|
r6274 | """ | ||
Matt Mackall
|
r16168 | Find moves and copies between context c1 and c2 that are relevant | ||
for merging. | ||||
Siddharth Agarwal
|
r18134 | Returns four dicts: "copy", "movewithdir", "diverge", and | ||
"renamedelete". | ||||
Matt Mackall
|
r16168 | |||
Matt Mackall
|
r16177 | "copy" is a mapping from destination name -> source name, | ||
Matt Mackall
|
r16168 | where source is in c1 and destination is in c2 or vice-versa. | ||
Siddharth Agarwal
|
r18134 | "movewithdir" is a mapping from source name -> destination name, | ||
where the file at source present in one context but not the other | ||||
needs to be moved to destination by the merge process, because the | ||||
other context moved the directory it is in. | ||||
Matt Mackall
|
r16168 | "diverge" is a mapping of source name -> list of destination names | ||
for divergent renames. | ||||
Thomas Arendsen Hein
|
r16794 | |||
"renamedelete" is a mapping of source name -> list of destination | ||||
names for files deleted in c1 that were renamed in c2 or vice-versa. | ||||
Matt Mackall
|
r6274 | """ | ||
# avoid silly behavior for update from empty dir | ||||
Matt Mackall
|
r6430 | if not c1 or not c2 or c1 == c2: | ||
Siddharth Agarwal
|
r18134 | return {}, {}, {}, {} | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r6646 | # avoid silly behavior for parent -> working dir | ||
Matt Mackall
|
r13878 | if c2.node() is None and c1.node() == repo.dirstate.p1(): | ||
Siddharth Agarwal
|
r18134 | return repo.dirstate.copies(), {}, {}, {} | ||
Matt Mackall
|
r6646 | |||
Matt Mackall
|
r6431 | limit = _findlimit(repo, c1.rev(), c2.rev()) | ||
Patrick Mezard
|
r10179 | if limit is None: | ||
# no common ancestor, no copies | ||||
Siddharth Agarwal
|
r18134 | return {}, {}, {}, {} | ||
Matt Mackall
|
r6274 | m1 = c1.manifest() | ||
m2 = c2.manifest() | ||||
ma = ca.manifest() | ||||
def makectx(f, n): | ||||
if len(n) != 20: # in a working context? | ||||
if c1.rev() is None: | ||||
return c1.filectx(f) | ||||
return c2.filectx(f) | ||||
return repo.filectx(f, fileid=n) | ||||
Matt Mackall
|
r9097 | ctx = util.lrucachefunc(makectx) | ||
Matt Mackall
|
r6274 | copy = {} | ||
Siddharth Agarwal
|
r18134 | movewithdir = {} | ||
Matt Mackall
|
r6274 | fullcopy = {} | ||
diverge = {} | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" searching for copies back to rev %d\n" % limit) | ||
Matt Mackall
|
r6274 | |||
Martin von Zweigbergk
|
r24187 | addedinm1 = m1.filesnotin(ma) | ||
addedinm2 = m2.filesnotin(ma) | ||||
Durham Goode
|
r24273 | u1, u2 = _computenonoverlap(repo, m1, m2, addedinm1, addedinm2) | ||
Matt Mackall
|
r6274 | |||
for f in u1: | ||||
Mads Kiilerich
|
r20989 | checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy) | ||
Matt Mackall
|
r6274 | for f in u2: | ||
Mads Kiilerich
|
r20989 | checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy) | ||
Matt Mackall
|
r6274 | |||
Thomas Arendsen Hein
|
r16794 | renamedelete = {} | ||
Thomas Arendsen Hein
|
r16795 | renamedelete2 = set() | ||
Martin Geisler
|
r8152 | diverge2 = set() | ||
Matt Mackall
|
r6274 | for of, fl in diverge.items(): | ||
Thomas Arendsen Hein
|
r16792 | if len(fl) == 1 or of in c1 or of in c2: | ||
Dan Villiom Podlaski Christiansen
|
r12683 | del diverge[of] # not actually divergent, or not a rename | ||
Thomas Arendsen Hein
|
r16794 | if of not in c1 and of not in c2: | ||
# renamed on one side, deleted on the other side, but filter | ||||
# out files that have been renamed and then deleted | ||||
renamedelete[of] = [f for f in fl if f in c1 or f in c2] | ||||
Thomas Arendsen Hein
|
r16795 | renamedelete2.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | else: | ||
Martin Geisler
|
r8152 | diverge2.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | |||
Martin von Zweigbergk
|
r24186 | bothnew = sorted(addedinm1 & addedinm2) | ||
Mads Kiilerich
|
r20641 | if bothnew: | ||
repo.ui.debug(" unmatched files new in both:\n %s\n" | ||||
% "\n ".join(bothnew)) | ||||
bothdiverge, _copy, _fullcopy = {}, {}, {} | ||||
for f in bothnew: | ||||
checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy) | ||||
checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy) | ||||
for of, fl in bothdiverge.items(): | ||||
if len(fl) == 2 and fl[0] == fl[1]: | ||||
copy[fl[0]] = of # not actually divergent, just matching renames | ||||
Mads Kiilerich
|
r20990 | if fullcopy and repo.ui.debugflag: | ||
Thomas Arendsen Hein
|
r16795 | repo.ui.debug(" all copies found (* = to merge, ! = divergent, " | ||
"% = renamed and deleted):\n") | ||||
Mads Kiilerich
|
r18362 | for f in sorted(fullcopy): | ||
Matt Mackall
|
r6274 | note = "" | ||
Matt Mackall
|
r10282 | if f in copy: | ||
note += "*" | ||||
if f in diverge2: | ||||
note += "!" | ||||
Thomas Arendsen Hein
|
r16795 | if f in renamedelete2: | ||
note += "%" | ||||
Siddharth Agarwal
|
r18135 | repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, | ||
note)) | ||||
Matt Mackall
|
r6274 | del diverge2 | ||
Matt Mackall
|
r16169 | if not fullcopy: | ||
Siddharth Agarwal
|
r18134 | return copy, movewithdir, diverge, renamedelete | ||
Matt Mackall
|
r6274 | |||
Martin Geisler
|
r9467 | repo.ui.debug(" checking for directory renames\n") | ||
Matt Mackall
|
r6274 | |||
# generate a directory move map | ||||
Matt Mackall
|
r16178 | d1, d2 = c1.dirs(), c2.dirs() | ||
Bryan O'Sullivan
|
r18899 | d1.addpath('/') | ||
d2.addpath('/') | ||||
Matt Mackall
|
r17055 | invalid = set() | ||
Matt Mackall
|
r6274 | dirmove = {} | ||
# examine each file copy for a potential directory move, which is | ||||
# when all the files in a directory are moved to a new directory | ||||
Dirkjan Ochtman
|
r7622 | for dst, src in fullcopy.iteritems(): | ||
Matt Mackall
|
r6274 | dsrc, ddst = _dirname(src), _dirname(dst) | ||
if dsrc in invalid: | ||||
# already seen to be uninteresting | ||||
continue | ||||
elif dsrc in d1 and ddst in d1: | ||||
# directory wasn't entirely moved locally | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in d2 and ddst in d2: | ||
# directory wasn't entirely moved remotely | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in dirmove and dirmove[dsrc] != ddst: | ||
# files from the same directory moved to two different places | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | else: | ||
# looks good so far | ||||
dirmove[dsrc + "/"] = ddst + "/" | ||||
for i in invalid: | ||||
if i in dirmove: | ||||
del dirmove[i] | ||||
del d1, d2, invalid | ||||
if not dirmove: | ||||
Siddharth Agarwal
|
r18134 | return copy, movewithdir, diverge, renamedelete | ||
Matt Mackall
|
r6274 | |||
for d in dirmove: | ||||
Siddharth Agarwal
|
r18135 | repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" % | ||
(d, dirmove[d])) | ||||
Matt Mackall
|
r6274 | |||
# check unaccounted nonoverlapping files against directory moves | ||||
for f in u1 + u2: | ||||
if f not in fullcopy: | ||||
for d in dirmove: | ||||
if f.startswith(d): | ||||
# new file added in a directory that was moved, move it | ||||
Matt Mackall
|
r6425 | df = dirmove[d] + f[len(d):] | ||
Matt Mackall
|
r6426 | if df not in copy: | ||
Siddharth Agarwal
|
r18134 | movewithdir[f] = df | ||
Siddharth Agarwal
|
r18135 | repo.ui.debug((" pending file src: '%s' -> " | ||
"dst: '%s'\n") % (f, df)) | ||||
Matt Mackall
|
r6274 | break | ||
Siddharth Agarwal
|
r18134 | return copy, movewithdir, diverge, renamedelete | ||
Durham Goode
|
r19178 | |||
def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy): | ||||
""" | ||||
check possible copies of f from m1 to m2 | ||||
ctx = function accepting (filename, node) that returns a filectx. | ||||
f = the filename to check | ||||
m1 = the source manifest | ||||
m2 = the destination manifest | ||||
ca = the changectx of the common ancestor | ||||
limit = the rev number to not search beyond | ||||
diverge = record all diverges in this dict | ||||
copy = record all non-divergent copies in this dict | ||||
fullcopy = record all copies in this dict | ||||
""" | ||||
ma = ca.manifest() | ||||
def _related(f1, f2, limit): | ||||
# Walk back to common ancestor to see if the two files originate | ||||
# from the same file. Since workingfilectx's rev() is None it messes | ||||
# up the integer comparison logic, hence the pre-step check for | ||||
# None (f1 and f2 can only be workingfilectx's initially). | ||||
if f1 == f2: | ||||
return f1 # a match | ||||
g1, g2 = f1.ancestors(), f2.ancestors() | ||||
try: | ||||
f1r, f2r = f1.rev(), f2.rev() | ||||
if f1r is None: | ||||
f1 = g1.next() | ||||
if f2r is None: | ||||
f2 = g2.next() | ||||
while True: | ||||
f1r, f2r = f1.rev(), f2.rev() | ||||
if f1r > f2r: | ||||
f1 = g1.next() | ||||
elif f2r > f1r: | ||||
f2 = g2.next() | ||||
elif f1 == f2: | ||||
return f1 # a match | ||||
elif f1r == f2r or f1r < limit or f2r < limit: | ||||
return False # copy no longer relevant | ||||
except StopIteration: | ||||
return False | ||||
of = None | ||||
seen = set([f]) | ||||
for oc in ctx(f, m1[f]).ancestors(): | ||||
ocr = oc.rev() | ||||
of = oc.path() | ||||
if of in seen: | ||||
# check limit late - grab last rename before | ||||
if ocr < limit: | ||||
break | ||||
continue | ||||
seen.add(of) | ||||
fullcopy[f] = of # remember for dir rename detection | ||||
if of not in m2: | ||||
continue # no match, keep looking | ||||
if m2[of] == ma.get(of): | ||||
break # no merge needed, quit early | ||||
c2 = ctx(of, m2[of]) | ||||
cr = _related(oc, c2, ca.rev()) | ||||
if cr and (of == f or of == c2.path()): # non-divergent | ||||
copy[f] = of | ||||
of = None | ||||
break | ||||
if of in ma: | ||||
diverge.setdefault(of, []).append(f) | ||||
Matt Mackall
|
r22901 | |||
def duplicatecopies(repo, rev, fromrev, skiprev=None): | ||||
'''reproduce copies from fromrev to rev in the dirstate | ||||
If skiprev is specified, it's a revision that should be used to | ||||
filter copy records. Any copies that occur between fromrev and | ||||
skiprev will not be duplicated, even if they appear in the set of | ||||
copies between fromrev and rev. | ||||
''' | ||||
exclude = {} | ||||
if skiprev is not None: | ||||
exclude = pathcopies(repo[fromrev], repo[skiprev]) | ||||
for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems(): | ||||
# copies.pathcopies returns backward renames, so dst might not | ||||
# actually be in the dirstate | ||||
if dst in exclude: | ||||
continue | ||||
if repo.dirstate[dst] in "nma": | ||||
repo.dirstate.copy(src, dst) | ||||