copies.py
233 lines
| 7.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / copies.py
Matt Mackall
|
r6274 | # copies.py - copy detection for Mercurial | ||
# | ||||
# Copyright 2008 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
# GNU General Public License version 2, incorporated herein by reference. | ||||
Matt Mackall
|
r6274 | |||
from i18n import _ | ||||
Simon Heimberg
|
r8312 | import util | ||
import heapq | ||||
Matt Mackall
|
r6274 | |||
def _nonoverlap(d1, d2, d3): | ||||
"Return list of elements in d1 not in d2 or d3" | ||||
Matt Mackall
|
r8209 | return sorted([d for d in d1 if d not in d3 and d not in d2]) | ||
Matt Mackall
|
r6274 | |||
def _dirname(f): | ||||
s = f.rfind("/") | ||||
if s == -1: | ||||
return "" | ||||
return f[:s] | ||||
def _dirs(files): | ||||
Benoit Boissinot
|
r8468 | d = set() | ||
Matt Mackall
|
r6274 | for f in files: | ||
f = _dirname(f) | ||||
while f not in d: | ||||
Benoit Boissinot
|
r8468 | d.add(f) | ||
Matt Mackall
|
r6274 | f = _dirname(f) | ||
return d | ||||
def _findoldnames(fctx, limit): | ||||
"find files that path was copied from, back to linkrev limit" | ||||
old = {} | ||||
Benoit Boissinot
|
r8468 | seen = set() | ||
Matt Mackall
|
r6274 | orig = fctx.path() | ||
Matt Mackall
|
r6424 | visit = [(fctx, 0)] | ||
Matt Mackall
|
r6274 | while visit: | ||
Matt Mackall
|
r6424 | fc, depth = visit.pop() | ||
Matt Mackall
|
r6274 | s = str(fc) | ||
if s in seen: | ||||
continue | ||||
Benoit Boissinot
|
r8468 | seen.add(s) | ||
Matt Mackall
|
r6274 | if fc.path() != orig and fc.path() not in old: | ||
Matt Mackall
|
r6424 | old[fc.path()] = (depth, fc.path()) # remember depth | ||
Matt Mackall
|
r6274 | if fc.rev() < limit and fc.rev() is not None: | ||
continue | ||||
Matt Mackall
|
r6424 | visit += [(p, depth - 1) for p in fc.parents()] | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r6424 | # return old names sorted by depth | ||
Matt Mackall
|
r8209 | return [o[1] for o in sorted(old.values())] | ||
Matt Mackall
|
r6274 | |||
Matt Mackall
|
r6431 | def _findlimit(repo, a, b): | ||
"find the earliest revision that's an ancestor of a or b but not both" | ||||
Matt Mackall
|
r6429 | # basic idea: | ||
# - mark a and b with different sides | ||||
# - if a parent's children are all on the same side, the parent is | ||||
# on that side, otherwise it is on no side | ||||
# - walk the graph in topological order with the help of a heap; | ||||
# - add unseen parents to side map | ||||
# - clear side of any parent that has children on different sides | ||||
Matt Mackall
|
r6431 | # - track number of interesting revs that might still be on a side | ||
# - track the lowest interesting rev seen | ||||
# - quit when interesting revs is zero | ||||
Matt Mackall
|
r6430 | |||
cl = repo.changelog | ||||
Matt Mackall
|
r6750 | working = len(cl) # pseudo rev for the working directory | ||
Matt Mackall
|
r6430 | if a is None: | ||
a = working | ||||
if b is None: | ||||
b = working | ||||
Matt Mackall
|
r6429 | |||
side = {a: -1, b: 1} | ||||
visit = [-a, -b] | ||||
heapq.heapify(visit) | ||||
interesting = len(visit) | ||||
Matt Mackall
|
r6431 | limit = working | ||
Matt Mackall
|
r6429 | |||
while interesting: | ||||
r = -heapq.heappop(visit) | ||||
Matt Mackall
|
r6430 | if r == working: | ||
parents = [cl.rev(p) for p in repo.dirstate.parents()] | ||||
else: | ||||
parents = cl.parentrevs(r) | ||||
for p in parents: | ||||
Matt Mackall
|
r6429 | if p not in side: | ||
# first time we see p; add it to visit | ||||
side[p] = side[r] | ||||
if side[p]: | ||||
interesting += 1 | ||||
heapq.heappush(visit, -p) | ||||
elif side[p] and side[p] != side[r]: | ||||
# p was interesting but now we know better | ||||
side[p] = 0 | ||||
interesting -= 1 | ||||
Matt Mackall
|
r6430 | if side[r]: | ||
Matt Mackall
|
r6431 | limit = r # lowest rev visited | ||
Matt Mackall
|
r6430 | interesting -= 1 | ||
Matt Mackall
|
r6431 | return limit | ||
Matt Mackall
|
r6429 | |||
Matt Mackall
|
r6425 | def copies(repo, c1, c2, ca, checkdirs=False): | ||
Matt Mackall
|
r6274 | """ | ||
Find moves and copies between context c1 and c2 | ||||
""" | ||||
# avoid silly behavior for update from empty dir | ||||
Matt Mackall
|
r6430 | if not c1 or not c2 or c1 == c2: | ||
Matt Mackall
|
r6274 | return {}, {} | ||
Matt Mackall
|
r6646 | # avoid silly behavior for parent -> working dir | ||
if c2.node() == None and c1.node() == repo.dirstate.parents()[0]: | ||||
return repo.dirstate.copies(), {} | ||||
Matt Mackall
|
r6431 | limit = _findlimit(repo, c1.rev(), c2.rev()) | ||
Matt Mackall
|
r6274 | m1 = c1.manifest() | ||
m2 = c2.manifest() | ||||
ma = ca.manifest() | ||||
def makectx(f, n): | ||||
if len(n) != 20: # in a working context? | ||||
if c1.rev() is None: | ||||
return c1.filectx(f) | ||||
return c2.filectx(f) | ||||
return repo.filectx(f, fileid=n) | ||||
ctx = util.cachefunc(makectx) | ||||
copy = {} | ||||
fullcopy = {} | ||||
diverge = {} | ||||
def checkcopies(f, m1, m2): | ||||
'''check possible copies of f from m1 to m2''' | ||||
c1 = ctx(f, m1[f]) | ||||
for of in _findoldnames(c1, limit): | ||||
fullcopy[f] = of # remember for dir rename detection | ||||
if of in m2: # original file not in other manifest? | ||||
# if the original file is unchanged on the other branch, | ||||
# no merge needed | ||||
if m2[of] != ma.get(of): | ||||
c2 = ctx(of, m2[of]) | ||||
ca = c1.ancestor(c2) | ||||
# related and named changed on only one side? | ||||
Matt Mackall
|
r6422 | if ca and (ca.path() == f or ca.path() == c2.path()): | ||
Matt Mackall
|
r6274 | if c1 != ca or c2 != ca: # merge needed? | ||
copy[f] = of | ||||
elif of in ma: | ||||
diverge.setdefault(of, []).append(f) | ||||
repo.ui.debug(_(" searching for copies back to rev %d\n") % limit) | ||||
u1 = _nonoverlap(m1, m2, ma) | ||||
u2 = _nonoverlap(m2, m1, ma) | ||||
if u1: | ||||
repo.ui.debug(_(" unmatched files in local:\n %s\n") | ||||
% "\n ".join(u1)) | ||||
if u2: | ||||
repo.ui.debug(_(" unmatched files in other:\n %s\n") | ||||
% "\n ".join(u2)) | ||||
for f in u1: | ||||
checkcopies(f, m1, m2) | ||||
for f in u2: | ||||
checkcopies(f, m2, m1) | ||||
Martin Geisler
|
r8152 | diverge2 = set() | ||
Matt Mackall
|
r6274 | for of, fl in diverge.items(): | ||
if len(fl) == 1: | ||||
del diverge[of] # not actually divergent | ||||
else: | ||||
Martin Geisler
|
r8152 | diverge2.update(fl) # reverse map for below | ||
Matt Mackall
|
r6274 | |||
if fullcopy: | ||||
repo.ui.debug(_(" all copies found (* = to merge, ! = divergent):\n")) | ||||
for f in fullcopy: | ||||
note = "" | ||||
if f in copy: note += "*" | ||||
if f in diverge2: note += "!" | ||||
Martin Geisler
|
r8337 | repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note)) | ||
Matt Mackall
|
r6274 | del diverge2 | ||
Matt Mackall
|
r6425 | if not fullcopy or not checkdirs: | ||
Matt Mackall
|
r6274 | return copy, diverge | ||
repo.ui.debug(_(" checking for directory renames\n")) | ||||
# generate a directory move map | ||||
d1, d2 = _dirs(m1), _dirs(m2) | ||||
Benoit Boissinot
|
r8468 | invalid = set() | ||
Matt Mackall
|
r6274 | dirmove = {} | ||
# examine each file copy for a potential directory move, which is | ||||
# when all the files in a directory are moved to a new directory | ||||
Dirkjan Ochtman
|
r7622 | for dst, src in fullcopy.iteritems(): | ||
Matt Mackall
|
r6274 | dsrc, ddst = _dirname(src), _dirname(dst) | ||
if dsrc in invalid: | ||||
# already seen to be uninteresting | ||||
continue | ||||
elif dsrc in d1 and ddst in d1: | ||||
# directory wasn't entirely moved locally | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in d2 and ddst in d2: | ||
# directory wasn't entirely moved remotely | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | elif dsrc in dirmove and dirmove[dsrc] != ddst: | ||
# files from the same directory moved to two different places | ||||
Benoit Boissinot
|
r8468 | invalid.add(dsrc) | ||
Matt Mackall
|
r6274 | else: | ||
# looks good so far | ||||
dirmove[dsrc + "/"] = ddst + "/" | ||||
for i in invalid: | ||||
if i in dirmove: | ||||
del dirmove[i] | ||||
del d1, d2, invalid | ||||
if not dirmove: | ||||
return copy, diverge | ||||
for d in dirmove: | ||||
repo.ui.debug(_(" dir %s -> %s\n") % (d, dirmove[d])) | ||||
# check unaccounted nonoverlapping files against directory moves | ||||
for f in u1 + u2: | ||||
if f not in fullcopy: | ||||
for d in dirmove: | ||||
if f.startswith(d): | ||||
# new file added in a directory that was moved, move it | ||||
Matt Mackall
|
r6425 | df = dirmove[d] + f[len(d):] | ||
Matt Mackall
|
r6426 | if df not in copy: | ||
copy[f] = df | ||||
repo.ui.debug(_(" file %s -> %s\n") % (f, copy[f])) | ||||
Matt Mackall
|
r6274 | break | ||
return copy, diverge | ||||