##// END OF EJS Templates
Merge with crew-stable
Merge with crew-stable

File last commit:

r4361:99c853a1 default
r4640:178778ca merge default
Show More
mdiff.py
259 lines | 8.0 KiB | text/x-python | PythonLexer
mpm@selenic.com
mdiff.py: kill #! line, add copyright notice...
r239 # mdiff.py - diff and patch routines for mercurial
#
Vadim Gelfer
update copyrights.
r2859 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
mpm@selenic.com
mdiff.py: kill #! line, add copyright notice...
r239 #
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
Thomas Arendsen Hein
merge with crew-stable
r4108 import bdiff, mpatch, re, struct, util, md5
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0
Vadim Gelfer
fix speed regression in mdiff caused by line split bugfix.
r2251 def splitnewlines(text):
Vadim Gelfer
fix diffs containing embedded "\r"....
r2248 '''like str.splitlines, but only split on newlines.'''
Vadim Gelfer
fix speed regression in mdiff caused by line split bugfix.
r2251 lines = [l + '\n' for l in text.split('\n')]
if lines:
if lines[-1] == '\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
Vadim Gelfer
fix diffs containing embedded "\r"....
r2248
Vadim Gelfer
refactor text diff/patch code....
r2874 class diffopts(object):
'''context is the number of context lines
text treats all files as text
showfunc enables diff -p output
Brendan Cully
Add diff --git option
r2907 git enables the git extended patch format
Stephen Darnell
Add -D/--nodates options to hg diff/export that removes dates from diff headers...
r3199 nodates removes dates from diff headers
Vadim Gelfer
refactor text diff/patch code....
r2874 ignorews ignores all whitespace changes in the diff
ignorewsamount ignores changes in the amount of whitespace
ignoreblanklines ignores changes whose lines are all blank'''
Thomas Arendsen Hein
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli....
r396
Vadim Gelfer
refactor text diff/patch code....
r2874 defaults = {
'context': 3,
'text': False,
'showfunc': True,
Brendan Cully
Add diff --git option
r2907 'git': False,
Stephen Darnell
Add -D/--nodates options to hg diff/export that removes dates from diff headers...
r3199 'nodates': False,
Vadim Gelfer
refactor text diff/patch code....
r2874 'ignorews': False,
'ignorewsamount': False,
'ignoreblanklines': False,
}
__slots__ = defaults.keys()
def __init__(self, **opts):
for k in self.__slots__:
v = opts.get(k)
if v is None:
v = self.defaults[k]
setattr(self, k, v)
defaultopts = diffopts()
def unidiff(a, ad, b, bd, fn, r=None, opts=defaultopts):
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 def datetag(date):
Stephen Darnell
Add -D/--nodates options to hg diff/export that removes dates from diff headers...
r3199 return (opts.git or opts.nodates) and '\n' or '\t%s\n' % date
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026
mpm@selenic.com
unidiff: punt on comparing empty files
r35 if not a and not b: return ""
Matt Mackall
Clean up mdiff imports
r1379 epoch = util.datestr((0, 0))
mpm@selenic.com
Attempt to make diff deal with null sources properly...
r264
Vadim Gelfer
refactor text diff/patch code....
r2874 if not opts.text and (util.binary(a) or util.binary(b)):
tailgunner@smtp.ru
Don't lie that "binary file has changed"...
r4103 def h(v):
# md5 is used instead of sha1 because md5 is supposedly faster
return md5.new(v).digest()
if a and b and len(a) == len(b) and h(a) == h(b):
return ""
mpm@selenic.com
Add automatic binary file detection to diff and export...
r1015 l = ['Binary file %s has changed\n' % fn]
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 elif not a:
Vadim Gelfer
fix speed regression in mdiff caused by line split bugfix.
r2251 b = splitnewlines(b)
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 if a is None:
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l1 = '--- /dev/null%s' % datetag(epoch)
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 else:
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
mpm@selenic.com
Attempt to make diff deal with null sources properly...
r264 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
l = [l1, l2, l3] + ["+" + e for e in b]
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 elif not b:
Vadim Gelfer
fix speed regression in mdiff caused by line split bugfix.
r2251 a = splitnewlines(a)
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l1 = "--- %s%s" % ("a/" + fn, datetag(ad))
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 if b is None:
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l2 = '+++ /dev/null%s' % datetag(epoch)
Thomas Arendsen Hein
Fix diff against an empty file (issue124) and add a test for this.
r1723 else:
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l2 = "+++ %s%s" % ("b/" + fn, datetag(bd))
mpm@selenic.com
Attempt to make diff deal with null sources properly...
r264 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
l = [l1, l2, l3] + ["-" + e for e in a]
else:
Vadim Gelfer
fix speed regression in mdiff caused by line split bugfix.
r2251 al = splitnewlines(a)
bl = splitnewlines(b)
Vadim Gelfer
refactor text diff/patch code....
r2874 l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn, opts=opts))
mpm@selenic.com
unidiff: handle empty diffs more gracefully...
r278 if not l: return ""
mpm@selenic.com
diff: use tab to separate date from filename...
r272 # difflib uses a space, rather than a tab
Brendan Cully
Remove dates from git export file lines - they confuse git-apply
r3026 l[0] = "%s%s" % (l[0][:-2], datetag(ad))
l[1] = "%s%s" % (l[1][:-2], datetag(bd))
mpm@selenic.com
hg diff: fix missing final newline bug
r170
for ln in xrange(len(l)):
if l[ln][-1] != '\n':
l[ln] += "\n\ No newline at end of file\n"
Thomas Arendsen Hein
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli....
r396 if r:
l.insert(0, "diff %s %s\n" %
(' '.join(["-r %s" % rev for rev in r]), fn))
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 return "".join(l)
mason@suse.com
Add new bdiff based unidiff generation.
r1637 # somewhat self contained replacement for difflib.unified_diff
# t1 and t2 are the text to be diffed
# l1 and l2 are the text broken up into lines
# header1 and header2 are the filenames for the diff output
Vadim Gelfer
refactor text diff/patch code....
r2874 def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts):
mason@suse.com
Add new bdiff based unidiff generation.
r1637 def contextend(l, len):
Vadim Gelfer
refactor text diff/patch code....
r2874 ret = l + opts.context
mason@suse.com
Add new bdiff based unidiff generation.
r1637 if ret > len:
ret = len
return ret
def contextstart(l):
Vadim Gelfer
refactor text diff/patch code....
r2874 ret = l - opts.context
mason@suse.com
Add new bdiff based unidiff generation.
r1637 if ret < 0:
return 0
return ret
def yieldhunk(hunk, header):
if header:
for x in header:
yield x
(astart, a2, bstart, b2, delta) = hunk
aend = contextend(a2, len(l1))
alen = aend - astart
blen = b2 - bstart + aend - a2
func = ""
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.showfunc:
mason@suse.com
Add new bdiff based unidiff generation.
r1637 # walk backwards from the start of the context
# to find a line starting with an alphanumeric char.
for x in xrange(astart, -1, -1):
t = l1[x].rstrip()
if funcre.match(t):
func = ' ' + t[:40]
break
yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
bstart + 1, blen, func)
for x in delta:
yield x
for x in xrange(a2, aend):
yield ' ' + l1[x]
header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.showfunc:
mason@suse.com
Add new bdiff based unidiff generation.
r1637 funcre = re.compile('\w')
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignorewsamount:
Haakon Riiser
diff: add -b/-B options
r2580 wsamountre = re.compile('[ \t]+')
wsappendedre = re.compile(' \n')
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignoreblanklines:
Haakon Riiser
diff: add -b/-B options
r2580 wsblanklinesre = re.compile('\n')
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignorews:
mason@suse.com
Add new bdiff based unidiff generation.
r1637 wsre = re.compile('[ \t]')
# bdiff.blocks gives us the matching sequences in the files. The loop
# below finds the spaces between those matching sequences and translates
# them into diff output.
#
diff = bdiff.blocks(t1, t2)
hunk = None
for i in xrange(len(diff)):
# The first match is special.
# we've either found a match starting at line 0 or a match later
# in the file. If it starts later, old and new below will both be
# empty and we'll continue to the next match.
if i > 0:
s = diff[i-1]
else:
s = [0, 0, 0, 0]
delta = []
s1 = diff[i]
a1 = s[1]
a2 = s1[0]
b1 = s[3]
b2 = s1[2]
old = l1[a1:a2]
new = l2[b1:b2]
# bdiff sometimes gives huge matches past eof, this check eats them,
# and deals with the special first match case described above
if not old and not new:
continue
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignoreblanklines:
Haakon Riiser
diff: add -b/-B options
r2580 wsold = wsblanklinesre.sub('', "".join(old))
wsnew = wsblanklinesre.sub('', "".join(new))
if wsold == wsnew:
continue
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignorewsamount:
Haakon Riiser
diff: add -b/-B options
r2580 wsold = wsamountre.sub(' ', "".join(old))
wsold = wsappendedre.sub('\n', wsold)
wsnew = wsamountre.sub(' ', "".join(new))
wsnew = wsappendedre.sub('\n', wsnew)
if wsold == wsnew:
continue
Vadim Gelfer
refactor text diff/patch code....
r2874 if opts.ignorews:
mason@suse.com
Add new bdiff based unidiff generation.
r1637 wsold = wsre.sub('', "".join(old))
wsnew = wsre.sub('', "".join(new))
if wsold == wsnew:
continue
astart = contextstart(a1)
bstart = contextstart(b1)
prev = None
if hunk:
# join with the previous hunk if it falls inside the context
Vadim Gelfer
refactor text diff/patch code....
r2874 if astart < hunk[1] + opts.context + 1:
mason@suse.com
Add new bdiff based unidiff generation.
r1637 prev = hunk
astart = hunk[1]
bstart = hunk[3]
else:
for x in yieldhunk(hunk, header):
yield x
# we only want to yield the header if the files differ, and
# we only want to yield it once.
header = None
if prev:
# we've joined the previous hunk, record the new ending points.
hunk[1] = a2
hunk[3] = b2
delta = hunk[4]
else:
# create a new hunk
hunk = [ astart, a2, bstart, b2, delta ]
delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
delta[len(delta):] = [ '-' + x for x in old ]
delta[len(delta):] = [ '+' + x for x in new ]
if hunk:
for x in yieldhunk(hunk, header):
yield x
mpm@selenic.com
Add a function to return the new text from a binary diff
r120 def patchtext(bin):
pos = 0
t = []
while pos < len(bin):
p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
pos += 12
t.append(bin[pos:pos + l])
pos += l
return "".join(t)
mpm@selenic.com
Add back links from file revisions to changeset revisions...
r0 def patch(a, bin):
Matt Mackall
Clean up mdiff imports
r1379 return mpatch.patches(a, [bin])
mpm@selenic.com
Start using bdiff for generating deltas...
r432
Alexis S. L. Carvalho
add mdiff.get_matching_blocks
r4361 # similar to difflib.SequenceMatcher.get_matching_blocks
def get_matching_blocks(a, b):
return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
Matt Mackall
Clean up mdiff imports
r1379 patches = mpatch.patches
mason@suse.com
Fill in the uncompressed size during revlog.addgroup...
r2078 patchedsize = mpatch.patchedsize
mpm@selenic.com
Start using bdiff for generating deltas...
r432 textdiff = bdiff.bdiff