diff --git a/mercurial/similar.py b/mercurial/similar.py --- a/mercurial/similar.py +++ b/mercurial/similar.py @@ -43,6 +43,27 @@ def _findexactmatches(repo, added, remov # Done repo.ui.progress(_('searching for exact renames'), None) +@util.cachefunc +def _ctxdata(fctx): + # lazily load text + orig = fctx.data() + return orig, mdiff.splitnewlines(orig) + +@util.cachefunc +def score(fctx1, fctx2): + text = fctx1.data() + orig, lines = _ctxdata(fctx2) + # bdiff.blocks() returns blocks of matching lines + # count the number of bytes in each + equal = 0 + matches = bdiff.blocks(text, orig) + for x1, x2, y1, y2 in matches: + for line in lines[y1:y2]: + equal += len(line) + + lengths = len(text) + len(orig) + return equal * 2.0 / lengths + def _findsimilarmatches(repo, added, removed, threshold): '''find potentially renamed files based on similar file content @@ -54,28 +75,9 @@ def _findsimilarmatches(repo, added, rem repo.ui.progress(_('searching for similar files'), i, total=len(removed), unit=_('files')) - # lazily load text - @util.cachefunc - def data(): - orig = r.data() - return orig, mdiff.splitnewlines(orig) - - def score(text): - orig, lines = data() - # bdiff.blocks() returns blocks of matching lines - # count the number of bytes in each - equal = 0 - matches = bdiff.blocks(text, orig) - for x1, x2, y1, y2 in matches: - for line in lines[y1:y2]: - equal += len(line) - - lengths = len(text) + len(orig) - return equal * 2.0 / lengths - for a in added: bestscore = copies.get(a, (None, threshold))[1] - myscore = score(a.data()) + myscore = score(a, r) if myscore >= bestscore: copies[a] = (r, myscore) repo.ui.progress(_('searching'), None)