copies.py
351 lines
| 10.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / copies.py
Matt Mackall
|
r6274 | # copies.py - copy detection for Mercurial | ||
# | ||||
# Copyright 2008 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r6274 | |||
Simon Heimberg
|
r8312 | import util | ||
import heapq | ||||
Matt Mackall
|
r6274 | |||
def _nonoverlap(d1, d2, d3): | ||||
"Return list of elements in d1 not in d2 or d3" | ||||
Matt Mackall
|
r8209 | return sorted([d for d in d1 if d not in d3 and d not in d2]) | ||
Matt Mackall
|
r6274 | |||
def _dirname(f): | ||||
s = f.rfind("/") | ||||
if s == -1: | ||||
return "" | ||||
return f[:s] | ||||
def _dirs(files): | ||||
Benoit Boissinot
|
r8468 | d = set() | ||
Matt Mackall
|
r6274 | for f in files: | ||
f = _dirname(f) | ||||
while f not in d: | ||||
Benoit Boissinot
|
r8468 | d.add(f) | ||
Matt Mackall
|
r6274 | f = _dirname(f) | ||
return d | ||||
Matt Mackall
|
r6431 | def _findlimit(repo, a, b): | ||
Patrick Mezard
|
r10179 | """Find the earliest revision that's an ancestor of a or b but not both, | ||
None if no such revision exists. | ||||
""" | ||||
Matt Mackall
|
r6429 | # basic idea: | ||
# - mark a and b with different sides | ||||
# - if a parent's children are all on the same side, the parent is | ||||
# on that side, otherwise it is on no side | ||||
# - walk the graph in topological order with the help of a heap; | ||||
# - add unseen parents to side map | ||||
# - clear side of any parent that has children on different sides | ||||
Matt Mackall
|
r6431 | # - track number of interesting revs that might still be on a side | ||
# - track the lowest interesting rev seen | ||||
# - quit when interesting revs is zero | ||||
Matt Mackall
|
r6430 | |||
cl = repo.changelog | ||||
Matt Mackall
|
r6750 | working = len(cl) # pseudo rev for the working directory | ||
Matt Mackall
|
r6430 | if a is None: | ||
a = working | ||||
if b is None: | ||||
b = working | ||||
Matt Mackall
|
r6429 | |||
side = {a: -1, b: 1} | ||||
visit = [-a, -b] | ||||
heapq.heapify(visit) | ||||
interesting = len(visit) | ||||
Patrick Mezard
|
r10179 | hascommonancestor = False | ||
Matt Mackall
|
r6431 | limit = working | ||
Matt Mackall
|
r6429 | |||
while interesting: | ||||
r = -heapq.heappop(visit) | ||||
Matt Mackall
|
r6430 | if r == working: | ||
parents = [cl.rev(p) for p in repo.dirstate.parents()] | ||||
else: | ||||
parents = cl.parentrevs(r) | ||||
for p in parents: | ||||
Patrick Mezard
|
r10179 | if p < 0: | ||
continue | ||||
Matt Mackall
|
r6429 | if p not in side: | ||
# first time we see p; add it to visit | ||||
side[p] = side[r] | ||||
if side[p]: | ||||
interesting += 1 | ||||
heapq.heappush(visit, -p) | ||||
elif side[p] and side[p] != side[r]: | ||||
# p was interesting but now we know better | ||||
side[p] = 0 | ||||
interesting -= 1 | ||||
Patrick Mezard
|
r10179 | hascommonancestor = True | ||
Matt Mackall
|
r6430 | if side[r]: | ||
Matt Mackall
|
r6431 | limit = r # lowest rev visited | ||
Matt Mackall
|
r6430 | interesting -= 1 | ||
Patrick Mezard
|
r10179 | |||
if not hascommonancestor: | ||||
return None | ||||
Matt Mackall
|
r6431 | return limit | ||
Matt Mackall
|
r6429 | |||
Matt Mackall
|
r15775 | def _chain(src, dst, a, b): | ||
'''chain two sets of copies a->b''' | ||||
t = a.copy() | ||||
for k, v in b.iteritems(): | ||||
if v in t: | ||||
# found a chain | ||||
if t[v] != k: | ||||
# file wasn't renamed back to itself | ||||
t[k] = t[v] | ||||
if v not in dst: | ||||
# chain was a rename, not a copy | ||||
del t[v] | ||||
if v in src: | ||||
# file is a copy of an existing file | ||||
t[k] = v | ||||
return t | ||||
def _tracefile(fctx, actx): | ||||
'''return file context that is the ancestor of fctx present in actx''' | ||||
stop = actx.rev() | ||||
am = actx.manifest() | ||||
for f in fctx.ancestors(): | ||||
if am.get(f.path(), None) == f.filenode(): | ||||
return f | ||||
if f.rev() < stop: | ||||
return None | ||||
def _dirstatecopies(d): | ||||
ds = d._repo.dirstate | ||||
c = ds.copies().copy() | ||||
for k in c.keys(): | ||||
if ds[k] not in 'anm': | ||||
del c[k] | ||||
return c | ||||
def _forwardcopies(a, b): | ||||
'''find {dst@b: src@a} copy mapping where a is an ancestor of b''' | ||||
# check for working copy | ||||
w = None | ||||
if b.rev() is None: | ||||
w = b | ||||
b = w.p1() | ||||
if a == b: | ||||
# short-circuit to avoid issues with merge states | ||||
return _dirstatecopies(w) | ||||
# find where new files came from | ||||
# we currently don't try to find where old files went, too expensive | ||||
# this means we can miss a case like 'hg rm b; hg cp a b' | ||||
cm = {} | ||||
for f in b: | ||||
if f not in a: | ||||
ofctx = _tracefile(b[f], a) | ||||
if ofctx: | ||||
cm[f] = ofctx.path() | ||||
# combine copies from dirstate if necessary | ||||
if w is not None: | ||||
cm = _chain(a, w, cm, _dirstatecopies(w)) | ||||
return cm | ||||
def _backwardcopies(a, b): | ||||
# because the forward mapping is 1:n, we can lose renames here | ||||
# in particular, we find renames better than copies | ||||
f = _forwardcopies(b, a) | ||||
r = {} | ||||
for k, v in f.iteritems(): | ||||
r[v] = k | ||||
return r | ||||
def pathcopies(x, y): | ||||
'''find {dst@y: src@x} copy mapping for directed compare''' | ||||
if x == y or not x or not y: | ||||
return {} | ||||
a = y.ancestor(x) | ||||
if a == x: | ||||
return _forwardcopies(x, y) | ||||
if a == y: | ||||
return _backwardcopies(x, y) | ||||
return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y)) | ||||
Matt Mackall
|
r15774 | |||
def mergecopies(repo, c1, c2, ca, checkdirs=True): | ||||
Matt Mackall
|
r6274 | """ | ||
Find moves and copies between context c1 and c2 | ||||
""" | ||||
# avoid silly behavior for update from empty dir | ||||
Matt Mackall
|
r6430 | if not c1 or not c2 or c1 == c2: | ||
Matt Mackall
|
r6274 | return {}, {} | ||
Matt Mackall
|
r6646 | # avoid silly behavior for parent -> working dir | ||
Matt Mackall
|
r13878 | if c2.node() is None and c1.node() == repo.dirstate.p1(): | ||
Matt Mackall
|
r6646 | return repo.dirstate.copies(), {} | ||
Matt Mackall
|
r6431 | limit = _findlimit(repo, c1.rev(), c2.rev()) | ||
Patrick Mezard
|
r10179 | if limit is None: | ||
# no common ancestor, no copies | ||||
return {}, {} | ||||
Matt Mackall
|
r6274 | m1 = c1.manifest() | ||
m2 = c2.manifest() | ||||
ma = ca.manifest() | ||||
def makectx(f, n): | ||||
if len(n) != 20: # in a working context? | ||||
if c1.rev() is None: | ||||
return c1.filectx(f) | ||||
return c2.filectx(f) | ||||
return repo.filectx(f, fileid=n) | ||||
Matt Mackall
|
r9097 | ctx = util.lrucachefunc(makectx) | ||
Matt Mackall
|
r6274 | copy = {} | ||
fullcopy = {} | ||||
diverge = {} | ||||
Matt Mackall
|
r10262 | def related(f1, f2, limit): | ||
Henrik Stuart
|
r10874 | # Walk back to common ancestor to see if the two files originate | ||
# from the same file. Since workingfilectx's rev() is None it messes | ||||
# up the integer comparison logic, hence the pre-step check for | ||||
# None (f1 and f2 can only be workingfilectx's initially). | ||||
if f1 == f2: | ||||
return f1 # a match | ||||
Matt Mackall
|
r10262 | g1, g2 = f1.ancestors(), f2.ancestors() | ||
try: | ||||
Henrik Stuart
|
r10874 | f1r, f2r = f1.rev(), f2.rev() | ||
if f1r is None: | ||||
f1 = g1.next() | ||||
if f2r is None: | ||||
f2 = g2.next() | ||||
Martin Geisler
|
r14494 | while True: | ||
Matt Mackall
|
r10262 | f1r, f2r = f1.rev(), f2.rev() | ||
if f1r > f2r: | ||||
f1 = g1.next() | ||||
elif f2r > f1r: | ||||
f2 = g2.next() | ||||
elif f1 == f2: | ||||
return f1 # a match | ||||
elif f1r == f2r or f1r < limit or f2r < limit: | ||||
return False # copy no longer relevant | ||||
except StopIteration: | ||||
return False | ||||
Matt Mackall
|
r6274 | def checkcopies(f, m1, m2): | ||
'''check possible copies of f from m1 to m2''' | ||||
Matt Mackall
|
r10262 | of = None | ||
seen = set([f]) | ||||
for oc in ctx(f, m1[f]).ancestors(): | ||||
ocr = oc.rev() | ||||
of = oc.path() | ||||
if of in seen: | ||||
# check limit late - grab last rename before | ||||
if ocr < limit: | ||||
break | ||||
continue | ||||
seen.add(of) | ||||
Matt Mackall
|
r6274 | fullcopy[f] = of # remember for dir rename detection | ||
Matt Mackall
|
r10262 | if of not in m2: | ||
continue # no match, keep looking | ||||
if m2[of] == ma.get(of): | ||||
break # no merge needed, quit early | ||||
c2 = ctx(of, m2[of]) | ||||
cr = related(oc, c2, ca.rev()) | ||||
Benoit Boissinot
|
r10313 | if cr and (of == f or of == c2.path()): # non-divergent | ||
Matt Mackall
|
r10262 | copy[f] = of | ||
of = None | ||||
break | ||||
if of in ma: | ||||
diverge.setdefault(of, []).append(f) | ||||
Matt Mackall
|
r6274 | |||
Martin Geisler
|
r9467 | repo.ui.debug(" searching for copies back to rev %d\n" % limit) | ||
Matt Mackall
|
r6274 | |||
u1 = _nonoverlap(m1, m2, ma) | ||||
u2 = _nonoverlap(m2, m1, ma) | ||||
if u1: | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" unmatched files in local:\n %s\n" | ||
Matt Mackall
|
r6274 | % "\n ".join(u1)) | ||
if u2: | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" unmatched files in other:\n %s\n" | ||
Matt Mackall
|
r6274 | % "\n ".join(u2)) | ||
for f in u1: | ||||
checkcopies(f, m1, m2) | ||||
for f in u2: | ||||
checkcopies(f, m2, m1) | ||||
Martin Geisler
|
r8152 | diverge2 = set() | ||
Matt Mackall
|
r6274 | for of, fl in diverge.items(): | ||
Dan Villiom Podlaski Christiansen
|
r12683 | if len(fl) == 1 or of in c2: | ||
del diverge[of] # not actually divergent, or not a rename | ||||
Matt Mackall
|
r6274 | else: | ||
Martin Geisler
|
r8152 | diverge2.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | |||
if fullcopy: | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n") | ||
Matt Mackall
|
r6274 | for f in fullcopy: | ||
note = "" | ||||
Matt Mackall
|
r10282 | if f in copy: | ||
note += "*" | ||||
if f in diverge2: | ||||
note += "!" | ||||
Martin Geisler
|
r8337 | repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note)) | ||
Matt Mackall
|
r6274 | del diverge2 | ||
Matt Mackall
|
r6425 | if not fullcopy or not checkdirs: | ||
Matt Mackall
|
r6274 | return copy, diverge | ||
Martin Geisler
|
r9467 | repo.ui.debug(" checking for directory renames\n") | ||
Matt Mackall
|
r6274 | |||
# generate a directory move map | ||||
d1, d2 = _dirs(m1), _dirs(m2) | ||||
Benoit Boissinot
|
r8468 | invalid = set() | ||
Matt Mackall
|
r6274 | dirmove = {} | ||
# examine each file copy for a potential directory move, which is | ||||
# when all the files in a directory are moved to a new directory | ||||
Dirkjan Ochtman
|
r7622 | for dst, src in fullcopy.iteritems(): | ||
Matt Mackall
|
r6274 | dsrc, ddst = _dirname(src), _dirname(dst) | ||
if dsrc in invalid: | ||||
# already seen to be uninteresting | ||||
continue | ||||
elif dsrc in d1 and ddst in d1: | ||||
# directory wasn't entirely moved locally | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in d2 and ddst in d2: | ||
# directory wasn't entirely moved remotely | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in dirmove and dirmove[dsrc] != ddst: | ||
# files from the same directory moved to two different places | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | else: | ||
# looks good so far | ||||
dirmove[dsrc + "/"] = ddst + "/" | ||||
for i in invalid: | ||||
if i in dirmove: | ||||
del dirmove[i] | ||||
del d1, d2, invalid | ||||
if not dirmove: | ||||
return copy, diverge | ||||
for d in dirmove: | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d])) | ||
Matt Mackall
|
r6274 | |||
# check unaccounted nonoverlapping files against directory moves | ||||
for f in u1 + u2: | ||||
if f not in fullcopy: | ||||
for d in dirmove: | ||||
if f.startswith(d): | ||||
# new file added in a directory that was moved, move it | ||||
Matt Mackall
|
r6425 | df = dirmove[d] + f[len(d):] | ||
Matt Mackall
|
r6426 | if df not in copy: | ||
copy[f] = df | ||||
Martin Geisler
|
r9467 | repo.ui.debug(" file %s -> %s\n" % (f, copy[f])) | ||
Matt Mackall
|
r6274 | break | ||
return copy, diverge | ||||