diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -10,6 +10,7 @@ from i18n import _ import os, sys, errno, re, glob, tempfile import mdiff, bdiff, util, templater, patch, error, encoding, templatekw import match as _match +import similar revrangesep = ':' @@ -286,52 +287,6 @@ def matchall(repo): def matchfiles(repo, files): return _match.exact(repo.root, repo.getcwd(), files) -def findrenames(repo, added, removed, threshold): - '''find renamed files -- yields (before, after, score) tuples''' - copies = {} - ctx = repo['.'] - for i, r in enumerate(removed): - repo.ui.progress(_('searching'), i, total=len(removed)) - if r not in ctx: - continue - fctx = ctx.filectx(r) - - # lazily load text - @util.cachefunc - def data(): - orig = fctx.data() - return orig, mdiff.splitnewlines(orig) - - def score(text): - if not len(text): - return 0.0 - if not fctx.cmp(text): - return 1.0 - if threshold == 1.0: - return 0.0 - orig, lines = data() - # bdiff.blocks() returns blocks of matching lines - # count the number of bytes in each - equal = 0 - matches = bdiff.blocks(text, orig) - for x1, x2, y1, y2 in matches: - for line in lines[y1:y2]: - equal += len(line) - - lengths = len(text) + len(orig) - return equal * 2.0 / lengths - - for a in added: - bestscore = copies.get(a, (None, threshold))[1] - myscore = score(repo.wread(a)) - if myscore >= bestscore: - copies[a] = (r, myscore) - repo.ui.progress(_('searching'), None) - - for dest, v in copies.iteritems(): - source, score = v - yield source, dest, score - def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None): if dry_run is None: dry_run = opts.get('dry_run') @@ -366,8 +321,8 @@ def addremove(repo, pats=[], opts={}, dr added.append(abs) copies = {} if similarity > 0: - for old, new, score in findrenames(repo, added + unknown, - removed + deleted, similarity): + for old, new, score in similar.findrenames(repo, + added + unknown, removed + deleted, similarity): if repo.ui.verbose or not m.exact(old) or not m.exact(new): repo.ui.status(_('recording removal of %s as rename to %s ' '(%d%% similar)\n') % diff --git a/mercurial/similar.py b/mercurial/similar.py new file mode 100644 --- /dev/null +++ b/mercurial/similar.py @@ -0,0 +1,59 @@ +# similar.py - mechanisms for finding similar files +# +# Copyright 2005-2007 Matt Mackall +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from i18n import _ +import util +import mdiff +import bdiff + +def findrenames(repo, added, removed, threshold): + '''find renamed files -- yields (before, after, score) tuples''' + copies = {} + ctx = repo['.'] + for i, r in enumerate(removed): + repo.ui.progress(_('searching'), i, total=len(removed)) + if r not in ctx: + continue + fctx = ctx.filectx(r) + + # lazily load text + @util.cachefunc + def data(): + orig = fctx.data() + return orig, mdiff.splitnewlines(orig) + + def score(text): + if not len(text): + return 0.0 + if not fctx.cmp(text): + return 1.0 + if threshold == 1.0: + return 0.0 + orig, lines = data() + # bdiff.blocks() returns blocks of matching lines + # count the number of bytes in each + equal = 0 + matches = bdiff.blocks(text, orig) + for x1, x2, y1, y2 in matches: + for line in lines[y1:y2]: + equal += len(line) + + lengths = len(text) + len(orig) + return equal * 2.0 / lengths + + for a in added: + bestscore = copies.get(a, (None, threshold))[1] + myscore = score(repo.wread(a)) + if myscore >= bestscore: + copies[a] = (r, myscore) + repo.ui.progress(_('searching'), None) + + for dest, v in copies.iteritems(): + source, score = v + yield source, dest, score + +