#!/usr/bin/env python import os, sys, struct, stat import difflib import re from optparse import OptionParser from mercurial.bdiff import bdiff, blocks VERSION="0.2" usage = "usage: %prog [options] file1 file2" parser = OptionParser(usage=usage) parser.add_option("-d", "--difflib", action="store_true", default=False) parser.add_option('-x', '--count', default=1) parser.add_option('-c', '--context', type="int", default=3) parser.add_option('-p', '--show-c-function', action="store_true", default=False) parser.add_option('-w', '--ignore-all-space', action="store_true", default=False) (options, args) = parser.parse_args() if not args: parser.print_help() sys.exit(1) # somewhat self contained replacement for difflib.unified_diff # t1 and t2 are the text to be diffed # l1 and l2 are the text broken up into lines # header1 and header2 are the filenames for the diff output # context is the number of context lines # showfunc enables diff -p output # ignorews ignores all whitespace changes in the diff def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False, ignorews=False): def contextend(l, len): ret = l + context if ret > len: ret = len return ret def contextstart(l): ret = l - context if ret < 0: return 0 return ret def yieldhunk(hunk, header): if header: for x in header: yield x (astart, a2, bstart, b2, delta) = hunk aend = contextend(a2, len(l1)) alen = aend - astart blen = b2 - bstart + aend - a2 func = "" if showfunc: # walk backwards from the start of the context # to find a line starting with an alphanumeric char. for x in xrange(astart, -1, -1): t = l1[x] if funcre.match(t): func = ' ' + t[:40] break yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, bstart + 1, blen, func) for x in delta: yield x for x in xrange(a2, aend): yield ' ' + l1[x] header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ] if showfunc: funcre = re.compile('\w') if ignorews: wsre = re.compile('[ \t]') # bdiff.blocks gives us the matching sequences in the files. The loop # below finds the spaces between those matching sequences and translates # them into diff output. # diff = blocks(t1, t2) hunk = None for i in xrange(len(diff)): # The first match is special. # we've either found a match starting at line 0 or a match later # in the file. If it starts later, old and new below will both be # empty and we'll continue to the next match. if i > 0: s = diff[i-1] else: s = [0, 0, 0, 0] delta = [] s1 = diff[i] a1 = s[1] a2 = s1[0] b1 = s[3] b2 = s1[2] old = l1[a1:a2] new = l2[b1:b2] # bdiff sometimes gives huge matches past eof, this check eats them, # and deals with the special first match case described above if not old and not new: continue if ignorews: wsold = wsre.sub('', "".join(old)) wsnew = wsre.sub('', "".join(new)) if wsold == wsnew: continue astart = contextstart(a1) bstart = contextstart(b1) prev = None if hunk: # join with the previous hunk if it falls inside the context if astart < hunk[1] + context + 1: prev = hunk astart = hunk[1] bstart = hunk[3] else: for x in yieldhunk(hunk, header): yield x # we only want to yield the header if the files differ, and # we only want to yield it once. header = None if prev: # we've joined the previous hunk, record the new ending points. hunk[1] = a2 hunk[3] = b2 delta = hunk[4] else: # create a new hunk hunk = [ astart, a2, bstart, b2, delta ] delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ] delta[len(delta):] = [ '-' + x for x in old ] delta[len(delta):] = [ '+' + x for x in new ] if hunk: for x in yieldhunk(hunk, header): yield x # simple utility function to put all the # files from a directory tree into a dict def buildlist(names, top): tlen = len(top) for root, dirs, files in os.walk(top): l = root[tlen + 1:] for x in files: p = os.path.join(root, x) st = os.lstat(p) if stat.S_ISREG(st.st_mode): names[os.path.join(l, x)] = (st.st_dev, st.st_ino) def diff_files(file1, file2): if file1 == None: b = file(file2).read().splitlines(1) l1 = "--- %s\n" % (file2) l2 = "+++ %s\n" % (file2) l3 = "@@ -0,0 +1,%d @@\n" % len(b) l = [l1, l2, l3] + ["+" + e for e in b] elif file2 == None: a = file(file1).read().splitlines(1) l1 = "--- %s\n" % (file1) l2 = "+++ %s\n" % (file1) l3 = "@@ -1,%d +0,0 @@\n" % len(a) l = [l1, l2, l3] + ["-" + e for e in a] else: t1 = file(file1).read() t2 = file(file2).read() l1 = t1.splitlines(1) l2 = t2.splitlines(1) if options.difflib: l = difflib.unified_diff(l1, l2, file1, file2) else: l = bunidiff(t1, t2, l1, l2, file1, file2, context=options.context, showfunc=options.show_c_function, ignorews=options.ignore_all_space) for x in l: if x[-1] != '\n': x += "\n\ No newline at end of file\n" print x, file1 = args[0] file2 = args[1] if os.path.isfile(file1) and os.path.isfile(file2): diff_files(file1, file2) elif os.path.isdir(file1): if not os.path.isdir(file2): sys.stderr.write("file types don't match\n") sys.exit(1) d1 = {} d2 = {} buildlist(d1, file1) buildlist(d2, file2) keys = d1.keys() keys.sort() for x in keys: if x not in d2: f2 = None else: f2 = os.path.join(file2, x) st1 = d1[x] st2 = d2[x] del d2[x] if st1[0] == st2[0] and st1[1] == st2[1]: sys.stderr.write("%s is a hard link\n" % x) continue x = os.path.join(file1, x) diff_files(x, f2) keys = d2.keys() keys.sort() for x in keys: f1 = None x = os.path.join(file2, x) diff_files(f1, x)