bdiff.py
169 lines
| 4.9 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7703 | # bdiff.py - Python implementation of bdiff.c | ||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7703 | |||
Gregory Szorc
|
r27335 | from __future__ import absolute_import | ||
timeless
|
r28389 | import array | ||
Gregory Szorc
|
r27335 | import difflib | ||
import re | ||||
import struct | ||||
Matt Mackall
|
r7944 | |||
Maciej Fijalkowski
|
r29833 | from . import policy | ||
policynocffi = policy.policynocffi | ||||
modulepolicy = policy.policy | ||||
Matt Mackall
|
r7944 | def splitnewlines(text): | ||
'''like str.splitlines, but only split on newlines.''' | ||||
lines = [l + '\n' for l in text.split('\n')] | ||||
if lines: | ||||
if lines[-1] == '\n': | ||||
lines.pop() | ||||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Martin Geisler
|
r7703 | |||
def _normalizeblocks(a, b, blocks): | ||||
prev = None | ||||
Dan Villiom Podlaski Christiansen
|
r14066 | r = [] | ||
Martin Geisler
|
r7703 | for curr in blocks: | ||
if prev is None: | ||||
prev = curr | ||||
continue | ||||
shift = 0 | ||||
a1, b1, l1 = prev | ||||
a1end = a1 + l1 | ||||
b1end = b1 + l1 | ||||
a2, b2, l2 = curr | ||||
a2end = a2 + l2 | ||||
b2end = b2 + l2 | ||||
if a1end == a2: | ||||
Matt Mackall
|
r10282 | while (a1end + shift < a2end and | ||
a[a1end + shift] == b[b1end + shift]): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
elif b1end == b2: | ||||
Matt Mackall
|
r10282 | while (b1end + shift < b2end and | ||
a[a1end + shift] == b[b1end + shift]): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r.append((a1, b1, l1 + shift)) | ||
Matt Mackall
|
r10282 | prev = a2 + shift, b2 + shift, l2 - shift | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r.append(prev) | ||
return r | ||||
Martin Geisler
|
r7703 | |||
timeless
|
r28389 | def _tostring(c): | ||
if type(c) is array.array: | ||||
# this copy overhead isn't ideal | ||||
return c.tostring() | ||||
return str(c) | ||||
Martin Geisler
|
r7703 | def bdiff(a, b): | ||
timeless
|
r28389 | a = _tostring(a).splitlines(True) | ||
b = _tostring(b).splitlines(True) | ||||
Martin Geisler
|
r7703 | |||
if not a: | ||||
s = "".join(b) | ||||
return s and (struct.pack(">lll", 0, 0, len(s)) + s) | ||||
bin = [] | ||||
p = [0] | ||||
for i in a: p.append(p[-1] + len(i)) | ||||
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() | ||||
d = _normalizeblocks(a, b, d) | ||||
la = 0 | ||||
lb = 0 | ||||
for am, bm, size in d: | ||||
s = "".join(b[lb:bm]) | ||||
if am > la or s: | ||||
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | ||||
la = am + size | ||||
lb = bm + size | ||||
return "".join(bin) | ||||
def blocks(a, b): | ||||
Matt Mackall
|
r7944 | an = splitnewlines(a) | ||
bn = splitnewlines(b) | ||||
Martin Geisler
|
r7703 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() | ||
d = _normalizeblocks(an, bn, d) | ||||
return [(i, i + n, j, j + n) for (i, j, n) in d] | ||||
Patrick Mezard
|
r15530 | def fixws(text, allws): | ||
if allws: | ||||
text = re.sub('[ \t\r]+', '', text) | ||||
else: | ||||
text = re.sub('[ \t\r]+', ' ', text) | ||||
text = text.replace(' \n', '\n') | ||||
return text | ||||
Maciej Fijalkowski
|
r29833 | |||
if modulepolicy not in policynocffi: | ||||
try: | ||||
from _bdiff_cffi import ffi, lib | ||||
except ImportError: | ||||
if modulepolicy == 'cffi': # strict cffi import | ||||
raise | ||||
else: | ||||
def blocks(sa, sb): | ||||
a = ffi.new("struct bdiff_line**") | ||||
b = ffi.new("struct bdiff_line**") | ||||
ac = ffi.new("char[]", sa) | ||||
bc = ffi.new("char[]", sb) | ||||
Maciej Fijalkowski
|
r29834 | l = ffi.new("struct bdiff_hunk*") | ||
Maciej Fijalkowski
|
r29833 | try: | ||
an = lib.bdiff_splitlines(ac, len(sa), a) | ||||
bn = lib.bdiff_splitlines(bc, len(sb), b) | ||||
if not a[0] or not b[0]: | ||||
raise MemoryError | ||||
count = lib.bdiff_diff(a[0], an, b[0], bn, l) | ||||
if count < 0: | ||||
raise MemoryError | ||||
rl = [None] * count | ||||
h = l.next | ||||
i = 0 | ||||
while h: | ||||
rl[i] = (h.a1, h.a2, h.b1, h.b2) | ||||
h = h.next | ||||
i += 1 | ||||
finally: | ||||
lib.free(a[0]) | ||||
lib.free(b[0]) | ||||
lib.bdiff_freehunks(l.next) | ||||
return rl | ||||
Maciej Fijalkowski
|
r29834 | |||
def bdiff(sa, sb): | ||||
a = ffi.new("struct bdiff_line**") | ||||
b = ffi.new("struct bdiff_line**") | ||||
ac = ffi.new("char[]", sa) | ||||
bc = ffi.new("char[]", sb) | ||||
l = ffi.new("struct bdiff_hunk*") | ||||
try: | ||||
an = lib.bdiff_splitlines(ac, len(sa), a) | ||||
bn = lib.bdiff_splitlines(bc, len(sb), b) | ||||
if not a[0] or not b[0]: | ||||
raise MemoryError | ||||
count = lib.bdiff_diff(a[0], an, b[0], bn, l) | ||||
if count < 0: | ||||
raise MemoryError | ||||
rl = [] | ||||
h = l.next | ||||
la = lb = 0 | ||||
while h: | ||||
if h.a1 != la or h.b1 != lb: | ||||
lgt = (b[0] + h.b1).l - (b[0] + lb).l | ||||
rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l, | ||||
(a[0] + h.a1).l - a[0].l, lgt)) | ||||
rl.append(str(ffi.buffer((b[0] + lb).l, lgt))) | ||||
la = h.a2 | ||||
lb = h.b2 | ||||
h = h.next | ||||
finally: | ||||
lib.free(a[0]) | ||||
lib.free(b[0]) | ||||
lib.bdiff_freehunks(l.next) | ||||
return "".join(rl) | ||||