bdiff.py
76 lines
| 2.0 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7703 | # bdiff.py - Python implementation of bdiff.c | ||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7703 | |||
import struct, difflib | ||||
Matt Mackall
|
r7944 | |||
def splitnewlines(text): | ||||
'''like str.splitlines, but only split on newlines.''' | ||||
lines = [l + '\n' for l in text.split('\n')] | ||||
if lines: | ||||
if lines[-1] == '\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Martin Geisler
|
r7703 | |||
def _normalizeblocks(a, b, blocks): | ||||
prev = None | ||||
for curr in blocks: | ||||
if prev is None: | ||||
prev = curr | ||||
continue | ||||
shift = 0 | ||||
a1, b1, l1 = prev | ||||
a1end = a1 + l1 | ||||
b1end = b1 + l1 | ||||
a2, b2, l2 = curr | ||||
a2end = a2 + l2 | ||||
b2end = b2 + l2 | ||||
if a1end == a2: | ||||
while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]: | ||||
shift += 1 | ||||
elif b1end == b2: | ||||
while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]: | ||||
shift += 1 | ||||
yield a1, b1, l1+shift | ||||
prev = a2+shift, b2+shift, l2-shift | ||||
yield prev | ||||
def bdiff(a, b): | ||||
a = str(a).splitlines(True) | ||||
b = str(b).splitlines(True) | ||||
if not a: | ||||
s = "".join(b) | ||||
return s and (struct.pack(">lll", 0, 0, len(s)) + s) | ||||
bin = [] | ||||
p = [0] | ||||
for i in a: p.append(p[-1] + len(i)) | ||||
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() | ||||
d = _normalizeblocks(a, b, d) | ||||
la = 0 | ||||
lb = 0 | ||||
for am, bm, size in d: | ||||
s = "".join(b[lb:bm]) | ||||
if am > la or s: | ||||
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | ||||
la = am + size | ||||
lb = bm + size | ||||
return "".join(bin) | ||||
def blocks(a, b): | ||||
Matt Mackall
|
r7944 | an = splitnewlines(a) | ||
bn = splitnewlines(b) | ||||
Martin Geisler
|
r7703 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() | ||
d = _normalizeblocks(an, bn, d) | ||||
return [(i, i + n, j, j + n) for (i, j, n) in d] | ||||