##// END OF EJS Templates
avoid .split() in for loops and use tuples instead...
avoid .split() in for loops and use tuples instead split can be more readable for longer lists like the list in dirstate.invalidate. As dirstate.invalidate is used in wlock() and therefoe used heavily, I think it's worth avoiding a split there too.

File last commit:

r12683:ada47c38 default
r13200:6f011cf5 default
Show More
copies.py
267 lines | 8.1 KiB | text/x-python | PythonLexer
Matt Mackall
copies: move findcopies code to its own module...
r6274 # copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
copies: move findcopies code to its own module...
r6274
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import util
import heapq
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _nonoverlap(d1, d2, d3):
"Return list of elements in d1 not in d2 or d3"
Matt Mackall
replace util.sort with sorted built-in...
r8209 return sorted([d for d in d1 if d not in d3 and d not in d2])
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _dirname(f):
s = f.rfind("/")
if s == -1:
return ""
return f[:s]
def _dirs(files):
Benoit Boissinot
copies: use set instead of dict
r8468 d = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in files:
f = _dirname(f)
while f not in d:
Benoit Boissinot
copies: use set instead of dict
r8468 d.add(f)
Matt Mackall
copies: move findcopies code to its own module...
r6274 f = _dirname(f)
return d
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 def _findlimit(repo, a, b):
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 """Find the earliest revision that's an ancestor of a or b but not both,
None if no such revision exists.
"""
Matt Mackall
symmetricdifference: move back to copies...
r6429 # basic idea:
# - mark a and b with different sides
# - if a parent's children are all on the same side, the parent is
# on that side, otherwise it is on no side
# - walk the graph in topological order with the help of a heap;
# - add unseen parents to side map
# - clear side of any parent that has children on different sides
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 # - track number of interesting revs that might still be on a side
# - track the lowest interesting rev seen
# - quit when interesting revs is zero
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430
cl = repo.changelog
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 working = len(cl) # pseudo rev for the working directory
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if a is None:
a = working
if b is None:
b = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
side = {a: -1, b: 1}
visit = [-a, -b]
heapq.heapify(visit)
interesting = len(visit)
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = False
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
while interesting:
r = -heapq.heappop(visit)
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if r == working:
parents = [cl.rev(p) for p in repo.dirstate.parents()]
else:
parents = cl.parentrevs(r)
for p in parents:
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if p < 0:
continue
Matt Mackall
symmetricdifference: move back to copies...
r6429 if p not in side:
# first time we see p; add it to visit
side[p] = side[r]
if side[p]:
interesting += 1
heapq.heappush(visit, -p)
elif side[p] and side[p] != side[r]:
# p was interesting but now we know better
side[p] = 0
interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = True
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if side[r]:
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = r # lowest rev visited
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179
if not hascommonancestor:
return None
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 return limit
Matt Mackall
symmetricdifference: move back to copies...
r6429
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 def copies(repo, c1, c2, ca, checkdirs=False):
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
Find moves and copies between context c1 and c2
"""
# avoid silly behavior for update from empty dir
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if not c1 or not c2 or c1 == c2:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return {}, {}
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 # avoid silly behavior for parent -> working dir
Martin Geisler
use 'x is None' instead of 'x == None'...
r8527 if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 return repo.dirstate.copies(), {}
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = _findlimit(repo, c1.rev(), c2.rev())
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if limit is None:
# no common ancestor, no copies
return {}, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274 m1 = c1.manifest()
m2 = c2.manifest()
ma = ca.manifest()
def makectx(f, n):
if len(n) != 20: # in a working context?
if c1.rev() is None:
return c1.filectx(f)
return c2.filectx(f)
return repo.filectx(f, fileid=n)
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 ctx = util.lrucachefunc(makectx)
Matt Mackall
copies: move findcopies code to its own module...
r6274 copy = {}
fullcopy = {}
diverge = {}
Matt Mackall
copies: speed up copy detection...
r10262 def related(f1, f2, limit):
Henrik Stuart
copies: properly visit file context ancestors on working file contexts
r10874 # Walk back to common ancestor to see if the two files originate
# from the same file. Since workingfilectx's rev() is None it messes
# up the integer comparison logic, hence the pre-step check for
# None (f1 and f2 can only be workingfilectx's initially).
if f1 == f2:
return f1 # a match
Matt Mackall
copies: speed up copy detection...
r10262 g1, g2 = f1.ancestors(), f2.ancestors()
try:
Henrik Stuart
copies: properly visit file context ancestors on working file contexts
r10874 f1r, f2r = f1.rev(), f2.rev()
if f1r is None:
f1 = g1.next()
if f2r is None:
f2 = g2.next()
Matt Mackall
copies: speed up copy detection...
r10262 while 1:
f1r, f2r = f1.rev(), f2.rev()
if f1r > f2r:
f1 = g1.next()
elif f2r > f1r:
f2 = g2.next()
elif f1 == f2:
return f1 # a match
elif f1r == f2r or f1r < limit or f2r < limit:
return False # copy no longer relevant
except StopIteration:
return False
Matt Mackall
copies: move findcopies code to its own module...
r6274 def checkcopies(f, m1, m2):
'''check possible copies of f from m1 to m2'''
Matt Mackall
copies: speed up copy detection...
r10262 of = None
seen = set([f])
for oc in ctx(f, m1[f]).ancestors():
ocr = oc.rev()
of = oc.path()
if of in seen:
# check limit late - grab last rename before
if ocr < limit:
break
continue
seen.add(of)
Matt Mackall
copies: move findcopies code to its own module...
r6274 fullcopy[f] = of # remember for dir rename detection
Matt Mackall
copies: speed up copy detection...
r10262 if of not in m2:
continue # no match, keep looking
if m2[of] == ma.get(of):
break # no merge needed, quit early
c2 = ctx(of, m2[of])
cr = related(oc, c2, ca.rev())
Benoit Boissinot
copies: check if revisions are related (bug found with pylint)
r10313 if cr and (of == f or of == c2.path()): # non-divergent
Matt Mackall
copies: speed up copy detection...
r10262 copy[f] = of
of = None
break
if of in ma:
diverge.setdefault(of, []).append(f)
Matt Mackall
copies: move findcopies code to its own module...
r6274
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
Matt Mackall
copies: move findcopies code to its own module...
r6274
u1 = _nonoverlap(m1, m2, ma)
u2 = _nonoverlap(m2, m1, ma)
if u1:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in local:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u1))
if u2:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in other:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u2))
for f in u1:
checkcopies(f, m1, m2)
for f in u2:
checkcopies(f, m2, m1)
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2 = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for of, fl in diverge.items():
Dan Villiom Podlaski Christiansen
copies: don't detect copies as "divergent renames"...
r12683 if len(fl) == 1 or of in c2:
del diverge[of] # not actually divergent, or not a rename
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2.update(fl) # reverse map for below
Matt Mackall
copies: move findcopies code to its own module...
r6274
if fullcopy:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in fullcopy:
note = ""
Matt Mackall
many, many trivial check-code fixups
r10282 if f in copy:
note += "*"
if f in diverge2:
note += "!"
Martin Geisler
copies: don't translate untranslatable string
r8337 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
Matt Mackall
copies: move findcopies code to its own module...
r6274 del diverge2
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 if not fullcopy or not checkdirs:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return copy, diverge
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" checking for directory renames\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274
# generate a directory move map
d1, d2 = _dirs(m1), _dirs(m2)
Benoit Boissinot
copies: use set instead of dict
r8468 invalid = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
Dirkjan Ochtman
use dict.iteritems() rather than dict.items()...
r7622 for dst, src in fullcopy.iteritems():
Matt Mackall
copies: move findcopies code to its own module...
r6274 dsrc, ddst = _dirname(src), _dirname(dst)
if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in dirmove and dirmove[dsrc] != ddst:
# files from the same directory moved to two different places
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
# looks good so far
dirmove[dsrc + "/"] = ddst + "/"
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
return copy, diverge
for d in dirmove:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
Matt Mackall
copies: move findcopies code to its own module...
r6274
# check unaccounted nonoverlapping files against directory moves
for f in u1 + u2:
if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 df = dirmove[d] + f[len(d):]
Matt Mackall
copies: don't double-detect items in the directory copy check
r6426 if df not in copy:
copy[f] = df
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
Matt Mackall
copies: move findcopies code to its own module...
r6274 break
return copy, diverge