##// END OF EJS Templates
patch: support diff data loss detection and upgrade...
patch: support diff data loss detection and upgrade In worst case, generating diff in upgrade mode can be two times more expensive than generating it in git mode directly: we may have to regenerate the whole diff again whenever a git feature is detected. Also, the first diff attempt is completely buffered instead of being streamed. That said, even without having profiled it yet, I am convinced we can fast-path the upgrade mode if necessary were it to be used in regular diff commands, and not only in mq where avoiding data loss is worth the price.

File last commit:

r10179:83cfa1ba stable
r10189:e451e599 default
Show More
copies.py
245 lines | 7.6 KiB | text/x-python | PythonLexer
Matt Mackall
copies: move findcopies code to its own module...
r6274 # copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm@selenic.com>
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
Matt Mackall
copies: move findcopies code to its own module...
r6274
from i18n import _
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import util
import heapq
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _nonoverlap(d1, d2, d3):
"Return list of elements in d1 not in d2 or d3"
Matt Mackall
replace util.sort with sorted built-in...
r8209 return sorted([d for d in d1 if d not in d3 and d not in d2])
Matt Mackall
copies: move findcopies code to its own module...
r6274
def _dirname(f):
s = f.rfind("/")
if s == -1:
return ""
return f[:s]
def _dirs(files):
Benoit Boissinot
copies: use set instead of dict
r8468 d = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in files:
f = _dirname(f)
while f not in d:
Benoit Boissinot
copies: use set instead of dict
r8468 d.add(f)
Matt Mackall
copies: move findcopies code to its own module...
r6274 f = _dirname(f)
return d
def _findoldnames(fctx, limit):
"find files that path was copied from, back to linkrev limit"
old = {}
Benoit Boissinot
copies: use set instead of dict
r8468 seen = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 orig = fctx.path()
Matt Mackall
copies: sort old names by depth
r6424 visit = [(fctx, 0)]
Matt Mackall
copies: move findcopies code to its own module...
r6274 while visit:
Matt Mackall
copies: sort old names by depth
r6424 fc, depth = visit.pop()
Matt Mackall
copies: move findcopies code to its own module...
r6274 s = str(fc)
if s in seen:
continue
Benoit Boissinot
copies: use set instead of dict
r8468 seen.add(s)
Matt Mackall
copies: move findcopies code to its own module...
r6274 if fc.path() != orig and fc.path() not in old:
Matt Mackall
copies: sort old names by depth
r6424 old[fc.path()] = (depth, fc.path()) # remember depth
Alejandro Santos
compat: can't compare two values of unequal datatypes
r9030 if fc.rev() is not None and fc.rev() < limit:
Matt Mackall
copies: move findcopies code to its own module...
r6274 continue
Matt Mackall
copies: sort old names by depth
r6424 visit += [(p, depth - 1) for p in fc.parents()]
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: sort old names by depth
r6424 # return old names sorted by depth
Matt Mackall
replace util.sort with sorted built-in...
r8209 return [o[1] for o in sorted(old.values())]
Matt Mackall
copies: move findcopies code to its own module...
r6274
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 def _findlimit(repo, a, b):
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 """Find the earliest revision that's an ancestor of a or b but not both,
None if no such revision exists.
"""
Matt Mackall
symmetricdifference: move back to copies...
r6429 # basic idea:
# - mark a and b with different sides
# - if a parent's children are all on the same side, the parent is
# on that side, otherwise it is on no side
# - walk the graph in topological order with the help of a heap;
# - add unseen parents to side map
# - clear side of any parent that has children on different sides
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 # - track number of interesting revs that might still be on a side
# - track the lowest interesting rev seen
# - quit when interesting revs is zero
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430
cl = repo.changelog
Matt Mackall
add __len__ and __iter__ methods to repo and revlog
r6750 working = len(cl) # pseudo rev for the working directory
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if a is None:
a = working
if b is None:
b = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
side = {a: -1, b: 1}
visit = [-a, -b]
heapq.heapify(visit)
interesting = len(visit)
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = False
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = working
Matt Mackall
symmetricdifference: move back to copies...
r6429
while interesting:
r = -heapq.heappop(visit)
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if r == working:
parents = [cl.rev(p) for p in repo.dirstate.parents()]
else:
parents = cl.parentrevs(r)
for p in parents:
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if p < 0:
continue
Matt Mackall
symmetricdifference: move back to copies...
r6429 if p not in side:
# first time we see p; add it to visit
side[p] = side[r]
if side[p]:
interesting += 1
heapq.heappush(visit, -p)
elif side[p] and side[p] != side[r]:
# p was interesting but now we know better
side[p] = 0
interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 hascommonancestor = True
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if side[r]:
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = r # lowest rev visited
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 interesting -= 1
Patrick Mezard
copies: don't report copies with unrelated branch
r10179
if not hascommonancestor:
return None
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 return limit
Matt Mackall
symmetricdifference: move back to copies...
r6429
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 def copies(repo, c1, c2, ca, checkdirs=False):
Matt Mackall
copies: move findcopies code to its own module...
r6274 """
Find moves and copies between context c1 and c2
"""
# avoid silly behavior for update from empty dir
Matt Mackall
copies: teach symmetric difference about working revisions...
r6430 if not c1 or not c2 or c1 == c2:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return {}, {}
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 # avoid silly behavior for parent -> working dir
Martin Geisler
use 'x is None' instead of 'x == None'...
r8527 if c2.node() is None and c1.node() == repo.dirstate.parents()[0]:
Matt Mackall
copies: teach copies about dirstate.copies...
r6646 return repo.dirstate.copies(), {}
Matt Mackall
copies: refactor symmetricdifference as _findlimit...
r6431 limit = _findlimit(repo, c1.rev(), c2.rev())
Patrick Mezard
copies: don't report copies with unrelated branch
r10179 if limit is None:
# no common ancestor, no copies
return {}, {}
Matt Mackall
copies: move findcopies code to its own module...
r6274 m1 = c1.manifest()
m2 = c2.manifest()
ma = ca.manifest()
def makectx(f, n):
if len(n) != 20: # in a working context?
if c1.rev() is None:
return c1.filectx(f)
return c2.filectx(f)
return repo.filectx(f, fileid=n)
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 ctx = util.lrucachefunc(makectx)
Matt Mackall
copies: move findcopies code to its own module...
r6274 copy = {}
fullcopy = {}
diverge = {}
def checkcopies(f, m1, m2):
'''check possible copies of f from m1 to m2'''
c1 = ctx(f, m1[f])
for of in _findoldnames(c1, limit):
fullcopy[f] = of # remember for dir rename detection
if of in m2: # original file not in other manifest?
# if the original file is unchanged on the other branch,
# no merge needed
if m2[of] != ma.get(of):
c2 = ctx(of, m2[of])
ca = c1.ancestor(c2)
# related and named changed on only one side?
Matt Mackall
copies: fix silly precedence bug
r6422 if ca and (ca.path() == f or ca.path() == c2.path()):
Matt Mackall
copies: move findcopies code to its own module...
r6274 if c1 != ca or c2 != ca: # merge needed?
copy[f] = of
elif of in ma:
diverge.setdefault(of, []).append(f)
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
Matt Mackall
copies: move findcopies code to its own module...
r6274
u1 = _nonoverlap(m1, m2, ma)
u2 = _nonoverlap(m2, m1, ma)
if u1:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in local:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u1))
if u2:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" unmatched files in other:\n %s\n"
Matt Mackall
copies: move findcopies code to its own module...
r6274 % "\n ".join(u2))
for f in u1:
checkcopies(f, m1, m2)
for f in u2:
checkcopies(f, m2, m1)
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2 = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 for of, fl in diverge.items():
if len(fl) == 1:
del diverge[of] # not actually divergent
else:
Martin Geisler
replace set-like dictionaries with real sets...
r8152 diverge2.update(fl) # reverse map for below
Matt Mackall
copies: move findcopies code to its own module...
r6274
if fullcopy:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274 for f in fullcopy:
note = ""
if f in copy: note += "*"
if f in diverge2: note += "!"
Martin Geisler
copies: don't translate untranslatable string
r8337 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
Matt Mackall
copies: move findcopies code to its own module...
r6274 del diverge2
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 if not fullcopy or not checkdirs:
Matt Mackall
copies: move findcopies code to its own module...
r6274 return copy, diverge
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" checking for directory renames\n")
Matt Mackall
copies: move findcopies code to its own module...
r6274
# generate a directory move map
d1, d2 = _dirs(m1), _dirs(m2)
Benoit Boissinot
copies: use set instead of dict
r8468 invalid = set()
Matt Mackall
copies: move findcopies code to its own module...
r6274 dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
Dirkjan Ochtman
use dict.iteritems() rather than dict.items()...
r7622 for dst, src in fullcopy.iteritems():
Matt Mackall
copies: move findcopies code to its own module...
r6274 dsrc, ddst = _dirname(src), _dirname(dst)
if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 elif dsrc in dirmove and dirmove[dsrc] != ddst:
# files from the same directory moved to two different places
Benoit Boissinot
copies: use set instead of dict
r8468 invalid.add(dsrc)
Matt Mackall
copies: move findcopies code to its own module...
r6274 else:
# looks good so far
dirmove[dsrc + "/"] = ddst + "/"
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
return copy, diverge
for d in dirmove:
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
Matt Mackall
copies: move findcopies code to its own module...
r6274
# check unaccounted nonoverlapping files against directory moves
for f in u1 + u2:
if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
Matt Mackall
copies: skip directory rename checks when not merging...
r6425 df = dirmove[d] + f[len(d):]
Matt Mackall
copies: don't double-detect items in the directory copy check
r6426 if df not in copy:
copy[f] = df
Martin Geisler
do not attempt to translate ui.debug output
r9467 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
Matt Mackall
copies: move findcopies code to its own module...
r6274 break
return copy, diverge