##// END OF EJS Templates
contrib: add Chris Mason's stand-alone diff tool...
contrib: add Chris Mason's stand-alone diff tool This uses Mercurial's diff algorithm to generate unidiffs like the traditional diff tool.

File last commit:

r1636:7da32bb3 default
r1636:7da32bb3 default
Show More
hgdiff
224 lines | 6.8 KiB | text/plain | TextLexer
#!/usr/bin/env python
import os, sys, struct, stat
import difflib
import re
from optparse import OptionParser
from mercurial.bdiff import bdiff, blocks
VERSION="0.2"
usage = "usage: %prog [options] file1 file2"
parser = OptionParser(usage=usage)
parser.add_option("-d", "--difflib", action="store_true", default=False)
parser.add_option('-x', '--count', default=1)
parser.add_option('-c', '--context', type="int", default=3)
parser.add_option('-p', '--show-c-function', action="store_true", default=False)
parser.add_option('-w', '--ignore-all-space', action="store_true",
default=False)
(options, args) = parser.parse_args()
if not args:
parser.print_help()
sys.exit(1)
# somewhat self contained replacement for difflib.unified_diff
# t1 and t2 are the text to be diffed
# l1 and l2 are the text broken up into lines
# header1 and header2 are the filenames for the diff output
# context is the number of context lines
# showfunc enables diff -p output
# ignorews ignores all whitespace changes in the diff
def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
ignorews=False):
def contextend(l, len):
ret = l + context
if ret > len:
ret = len
return ret
def contextstart(l):
ret = l - context
if ret < 0:
return 0
return ret
def yieldhunk(hunk, header):
if header:
for x in header:
yield x
(astart, a2, bstart, b2, delta) = hunk
aend = contextend(a2, len(l1))
alen = aend - astart
blen = b2 - bstart + aend - a2
func = ""
if showfunc:
# walk backwards from the start of the context
# to find a line starting with an alphanumeric char.
for x in xrange(astart, -1, -1):
t = l1[x]
if funcre.match(t):
func = ' ' + t[:40]
break
yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
bstart + 1, blen, func)
for x in delta:
yield x
for x in xrange(a2, aend):
yield ' ' + l1[x]
header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
if showfunc:
funcre = re.compile('\w')
if ignorews:
wsre = re.compile('[ \t]')
# bdiff.blocks gives us the matching sequences in the files. The loop
# below finds the spaces between those matching sequences and translates
# them into diff output.
#
diff = blocks(t1, t2)
hunk = None
for i in xrange(len(diff)):
# The first match is special.
# we've either found a match starting at line 0 or a match later
# in the file. If it starts later, old and new below will both be
# empty and we'll continue to the next match.
if i > 0:
s = diff[i-1]
else:
s = [0, 0, 0, 0]
delta = []
s1 = diff[i]
a1 = s[1]
a2 = s1[0]
b1 = s[3]
b2 = s1[2]
old = l1[a1:a2]
new = l2[b1:b2]
# bdiff sometimes gives huge matches past eof, this check eats them,
# and deals with the special first match case described above
if not old and not new:
continue
if ignorews:
wsold = wsre.sub('', "".join(old))
wsnew = wsre.sub('', "".join(new))
if wsold == wsnew:
continue
astart = contextstart(a1)
bstart = contextstart(b1)
prev = None
if hunk:
# join with the previous hunk if it falls inside the context
if astart < hunk[1] + context + 1:
prev = hunk
astart = hunk[1]
bstart = hunk[3]
else:
for x in yieldhunk(hunk, header):
yield x
# we only want to yield the header if the files differ, and
# we only want to yield it once.
header = None
if prev:
# we've joined the previous hunk, record the new ending points.
hunk[1] = a2
hunk[3] = b2
delta = hunk[4]
else:
# create a new hunk
hunk = [ astart, a2, bstart, b2, delta ]
delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
delta[len(delta):] = [ '-' + x for x in old ]
delta[len(delta):] = [ '+' + x for x in new ]
if hunk:
for x in yieldhunk(hunk, header):
yield x
# simple utility function to put all the
# files from a directory tree into a dict
def buildlist(names, top):
tlen = len(top)
for root, dirs, files in os.walk(top):
l = root[tlen + 1:]
for x in files:
p = os.path.join(root, x)
st = os.lstat(p)
if stat.S_ISREG(st.st_mode):
names[os.path.join(l, x)] = (st.st_dev, st.st_ino)
def diff_files(file1, file2):
if file1 == None:
b = file(file2).read().splitlines(1)
l1 = "--- %s\n" % (file2)
l2 = "+++ %s\n" % (file2)
l3 = "@@ -0,0 +1,%d @@\n" % len(b)
l = [l1, l2, l3] + ["+" + e for e in b]
elif file2 == None:
a = file(file1).read().splitlines(1)
l1 = "--- %s\n" % (file1)
l2 = "+++ %s\n" % (file1)
l3 = "@@ -1,%d +0,0 @@\n" % len(a)
l = [l1, l2, l3] + ["-" + e for e in a]
else:
t1 = file(file1).read()
t2 = file(file2).read()
l1 = t1.splitlines(1)
l2 = t2.splitlines(1)
if options.difflib:
l = difflib.unified_diff(l1, l2, file1, file2)
else:
l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context,
showfunc=options.show_c_function,
ignorews=options.ignore_all_space)
for x in l:
if x[-1] != '\n':
x += "\n\ No newline at end of file\n"
print x,
file1 = args[0]
file2 = args[1]
if os.path.isfile(file1) and os.path.isfile(file2):
diff_files(file1, file2)
elif os.path.isdir(file1):
if not os.path.isdir(file2):
sys.stderr.write("file types don't match\n")
sys.exit(1)
d1 = {}
d2 = {}
buildlist(d1, file1)
buildlist(d2, file2)
keys = d1.keys()
keys.sort()
for x in keys:
if x not in d2:
f2 = None
else:
f2 = os.path.join(file2, x)
st1 = d1[x]
st2 = d2[x]
del d2[x]
if st1[0] == st2[0] and st1[1] == st2[1]:
sys.stderr.write("%s is a hard link\n" % x)
continue
x = os.path.join(file1, x)
diff_files(x, f2)
keys = d2.keys()
keys.sort()
for x in keys:
f1 = None
x = os.path.join(file2, x)
diff_files(f1, x)