##// END OF EJS Templates
patch: fix patch hunk/metdata synchronization (issue3384)...
patch: fix patch hunk/metdata synchronization (issue3384) Git patches are parsed in two phases: 1) extract metadata, 2) parse actual deltas and merge them with the previous metadata. We do this to avoid dependency issues like "modify a; copy a to b", where "b" must be copied from the unmodified "a". Issue3384 is caused by flaky code I wrote to synchronize the patch metadata with the emitted hunk: if (gitpatches and (gitpatches[-1][0] == afile or gitpatches[-1][1] == bfile)): gp = gitpatches.pop()[2] With a patch like: diff --git a/a b/c copy from a copy to c --- a/a +++ b/c @@ -1,1 +1,2 @@ a +a @@ -2,1 +2,2 @@ a +a diff --git a/a b/a --- a/a +++ b/a @@ -1,1 +1,2 @@ a +b the first hunk of the first block is matched with the metadata for the block "diff --git a/a b/c", then the second hunk of the first block is matched with the metadata of the second block "diff --git a/a b/a", because of the "or" in the code paste above. Turning the "or" into an "and" is not enough as we have to deal with /dev/null cases for each file. We I remove this broken piece of code: # copy/rename + modify should modify target, not source if gp.op in ('COPY', 'DELETE', 'RENAME', 'ADD') or gp.mode: afile = bfile because "afile = bfile" set "afile" to stuff like "b/file" instead of "a/file", and because this only happens for git patches, which afile/bfile are ignored anyway by applydiff(). v2: - Avoid a traceback on git metadata desynchronization

File last commit:

r11085:0c864629 default
r16506:fc4e0fec stable
Show More
similar.py
103 lines | 3.5 KiB | text/x-python | PythonLexer
# similar.py - mechanisms for finding similar files
#
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from i18n import _
import util
import mdiff
import bdiff
def _findexactmatches(repo, added, removed):
'''find renamed files that have no changes
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after) tuples of exact matches.
'''
numfiles = len(added) + len(removed)
# Get hashes of removed files.
hashes = {}
for i, fctx in enumerate(removed):
repo.ui.progress(_('searching for exact renames'), i, total=numfiles)
h = util.sha1(fctx.data()).digest()
hashes[h] = fctx
# For each added file, see if it corresponds to a removed file.
for i, fctx in enumerate(added):
repo.ui.progress(_('searching for exact renames'), i + len(removed),
total=numfiles)
h = util.sha1(fctx.data()).digest()
if h in hashes:
yield (hashes[h], fctx)
# Done
repo.ui.progress(_('searching for exact renames'), None)
def _findsimilarmatches(repo, added, removed, threshold):
'''find potentially renamed files based on similar file content
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after, score) tuples of partial matches.
'''
copies = {}
for i, r in enumerate(removed):
repo.ui.progress(_('searching for similar files'), i, total=len(removed))
# lazily load text
@util.cachefunc
def data():
orig = r.data()
return orig, mdiff.splitnewlines(orig)
def score(text):
orig, lines = data()
# bdiff.blocks() returns blocks of matching lines
# count the number of bytes in each
equal = 0
matches = bdiff.blocks(text, orig)
for x1, x2, y1, y2 in matches:
for line in lines[y1:y2]:
equal += len(line)
lengths = len(text) + len(orig)
return equal * 2.0 / lengths
for a in added:
bestscore = copies.get(a, (None, threshold))[1]
myscore = score(a.data())
if myscore >= bestscore:
copies[a] = (r, myscore)
repo.ui.progress(_('searching'), None)
for dest, v in copies.iteritems():
source, score = v
yield source, dest, score
def findrenames(repo, added, removed, threshold):
'''find renamed files -- yields (before, after, score) tuples'''
parentctx = repo['.']
workingctx = repo[None]
# Zero length files will be frequently unrelated to each other, and
# tracking the deletion/addition of such a file will probably cause more
# harm than good. We strip them out here to avoid matching them later on.
addedfiles = set([workingctx[fp] for fp in added
if workingctx[fp].size() > 0])
removedfiles = set([parentctx[fp] for fp in removed
if fp in parentctx and parentctx[fp].size() > 0])
# Find exact matches.
for (a, b) in _findexactmatches(repo,
sorted(addedfiles), sorted(removedfiles)):
addedfiles.remove(b)
yield (a.path(), b.path(), 1.0)
# If the user requested similar files to be matched, search for them also.
if threshold < 1.0:
for (a, b, score) in _findsimilarmatches(repo,
sorted(addedfiles), sorted(removedfiles), threshold):
yield (a.path(), b.path(), score)