bdiff.py
91 lines
| 2.3 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7703 | # bdiff.py - Python implementation of bdiff.c | ||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7703 | |||
Gregory Szorc
|
r27335 | from __future__ import absolute_import | ||
import difflib | ||||
import re | ||||
import struct | ||||
Matt Mackall
|
r7944 | |||
def splitnewlines(text): | ||||
'''like str.splitlines, but only split on newlines.''' | ||||
lines = [l + '\n' for l in text.split('\n')] | ||||
if lines: | ||||
if lines[-1] == '\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Martin Geisler
|
r7703 | |||
def _normalizeblocks(a, b, blocks): | ||||
prev = None | ||||
Dan Villiom Podlaski Christiansen
|
r14066 | r = [] | ||
Martin Geisler
|
r7703 | for curr in blocks: | ||
if prev is None: | ||||
prev = curr | ||||
continue | ||||
shift = 0 | ||||
a1, b1, l1 = prev | ||||
a1end = a1 + l1 | ||||
b1end = b1 + l1 | ||||
a2, b2, l2 = curr | ||||
a2end = a2 + l2 | ||||
b2end = b2 + l2 | ||||
if a1end == a2: | ||||
Matt Mackall
|
r10282 | while (a1end + shift < a2end and | ||
a[a1end + shift] == b[b1end + shift]): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
elif b1end == b2: | ||||
Matt Mackall
|
r10282 | while (b1end + shift < b2end and | ||
a[a1end + shift] == b[b1end + shift]): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r.append((a1, b1, l1 + shift)) | ||
Matt Mackall
|
r10282 | prev = a2 + shift, b2 + shift, l2 - shift | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r.append(prev) | ||
return r | ||||
Martin Geisler
|
r7703 | |||
def bdiff(a, b): | ||||
Yuya Nishihara
|
r31641 | a = bytes(a).splitlines(True) | ||
b = bytes(b).splitlines(True) | ||||
Martin Geisler
|
r7703 | |||
if not a: | ||||
s = "".join(b) | ||||
return s and (struct.pack(">lll", 0, 0, len(s)) + s) | ||||
bin = [] | ||||
p = [0] | ||||
for i in a: p.append(p[-1] + len(i)) | ||||
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() | ||||
d = _normalizeblocks(a, b, d) | ||||
la = 0 | ||||
lb = 0 | ||||
for am, bm, size in d: | ||||
s = "".join(b[lb:bm]) | ||||
if am > la or s: | ||||
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | ||||
la = am + size | ||||
lb = bm + size | ||||
return "".join(bin) | ||||
def blocks(a, b): | ||||
Matt Mackall
|
r7944 | an = splitnewlines(a) | ||
bn = splitnewlines(b) | ||||
Martin Geisler
|
r7703 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() | ||
d = _normalizeblocks(an, bn, d) | ||||
return [(i, i + n, j, j + n) for (i, j, n) in d] | ||||
Patrick Mezard
|
r15530 | def fixws(text, allws): | ||
if allws: | ||||
text = re.sub('[ \t\r]+', '', text) | ||||
else: | ||||
text = re.sub('[ \t\r]+', ' ', text) | ||||
text = text.replace(' \n', '\n') | ||||
return text | ||||