diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -7,7 +7,7 @@ from node import * from i18n import _ -import os, sys, mdiff, util, templater, patch +import os, sys, mdiff, bdiff, util, templater, patch revrangesep = ':' @@ -146,20 +146,29 @@ def walk(repo, pats=[], opts={}, node=No yield src, fn, util.pathto(repo.getcwd(), fn), fn in exact def findrenames(repo, added=None, removed=None, threshold=0.5): + '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) - bestscore, bestname = None, None + bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() - delta = mdiff.textdiff(aa, rr) - if len(delta) < len(aa): - myscore = 1.0 - (float(len(delta)) / len(aa)) - if bestscore is None or myscore > bestscore: - bestscore, bestname = myscore, r - if bestname and bestscore >= threshold: + + # bdiff.blocks() returns blocks of matching lines + # count the number of bytes in each + equal = 0 + alines = mdiff.splitnewlines(aa) + matches = bdiff.blocks(aa, rr) + for x1,x2,y1,y2 in matches: + for line in alines[x1:x2]: + equal += len(line) + + myscore = equal*2.0 / (len(aa)+len(rr)) + if myscore >= bestscore: + bestname, bestscore = r, myscore + if bestname: yield bestname, a, bestscore def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None, diff --git a/tests/test-addremove-similar b/tests/test-addremove-similar new file mode 100755 --- /dev/null +++ b/tests/test-addremove-similar @@ -0,0 +1,37 @@ +#!/bin/sh + +hg init rep; cd rep + +touch empty-file +python -c 'for x in range(10000): print x' > large-file + +hg addremove + +hg commit -m A + +rm large-file empty-file +python -c 'for x in range(10,10000): print x' > another-file + +hg addremove -s50 + +hg commit -m B + +cd .. + +hg init rep2; cd rep2 + +python -c 'for x in range(10000): print x' > large-file +python -c 'for x in range(50): print x' > tiny-file + +hg addremove + +hg commit -m A + +python -c 'for x in range(70): print x' > small-file +rm tiny-file +rm large-file + +hg addremove -s50 + +hg commit -m B + diff --git a/tests/test-addremove-similar.out b/tests/test-addremove-similar.out new file mode 100644 --- /dev/null +++ b/tests/test-addremove-similar.out @@ -0,0 +1,12 @@ +adding empty-file +adding large-file +adding another-file +removing empty-file +removing large-file +recording removal of large-file as rename to another-file (99% similar) +adding large-file +adding tiny-file +adding small-file +removing large-file +removing tiny-file +recording removal of tiny-file as rename to small-file (82% similar)