bdiff.py
123 lines
| 3.1 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7703 | # bdiff.py - Python implementation of bdiff.c | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others | ||
Martin Geisler
|
r7703 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7703 | |||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Gregory Szorc
|
r27335 | |||
import difflib | ||||
import re | ||||
import struct | ||||
Matt Harbison
|
r52827 | import typing | ||
Matt Mackall
|
r7944 | |||
Matt Harbison
|
r50493 | from typing import ( | ||
List, | ||||
Matt Harbison
|
r52827 | Optional, | ||
Matt Harbison
|
r50493 | Tuple, | ||
) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r52827 | from ..interfaces import ( | ||
modules as intmod, | ||||
) | ||||
Matt Harbison
|
r50493 | |||
def splitnewlines(text: bytes) -> List[bytes]: | ||||
Matt Mackall
|
r7944 | '''like str.splitlines, but only split on newlines.''' | ||
Augie Fackler
|
r43347 | lines = [l + b'\n' for l in text.split(b'\n')] | ||
Matt Mackall
|
r7944 | if lines: | ||
Augie Fackler
|
r43347 | if lines[-1] == b'\n': | ||
Matt Mackall
|
r7944 | lines.pop() | ||
else: | ||||
lines[-1] = lines[-1][:-1] | ||||
return lines | ||||
Martin Geisler
|
r7703 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50493 | def _normalizeblocks( | ||
a: List[bytes], b: List[bytes], blocks | ||||
) -> List[Tuple[int, int, int]]: | ||||
Martin Geisler
|
r7703 | prev = None | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r = [] | ||
Martin Geisler
|
r7703 | for curr in blocks: | ||
if prev is None: | ||||
prev = curr | ||||
continue | ||||
shift = 0 | ||||
a1, b1, l1 = prev | ||||
a1end = a1 + l1 | ||||
b1end = b1 + l1 | ||||
a2, b2, l2 = curr | ||||
a2end = a2 + l2 | ||||
b2end = b2 + l2 | ||||
if a1end == a2: | ||||
Augie Fackler
|
r43346 | while ( | ||
a1end + shift < a2end and a[a1end + shift] == b[b1end + shift] | ||||
): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
elif b1end == b2: | ||||
Augie Fackler
|
r43346 | while ( | ||
b1end + shift < b2end and a[a1end + shift] == b[b1end + shift] | ||||
): | ||||
Martin Geisler
|
r7703 | shift += 1 | ||
Dan Villiom Podlaski Christiansen
|
r14066 | r.append((a1, b1, l1 + shift)) | ||
Matt Mackall
|
r10282 | prev = a2 + shift, b2 + shift, l2 - shift | ||
Gregory Szorc
|
r46467 | |||
if prev is not None: | ||||
r.append(prev) | ||||
Dan Villiom Podlaski Christiansen
|
r14066 | return r | ||
Martin Geisler
|
r7703 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50493 | def bdiff(a: bytes, b: bytes) -> bytes: | ||
Yuya Nishihara
|
r31641 | a = bytes(a).splitlines(True) | ||
b = bytes(b).splitlines(True) | ||||
Martin Geisler
|
r7703 | |||
if not a: | ||||
Augie Fackler
|
r43347 | s = b"".join(b) | ||
return s and (struct.pack(b">lll", 0, 0, len(s)) + s) | ||||
Martin Geisler
|
r7703 | |||
bin = [] | ||||
p = [0] | ||||
Alex Gaynor
|
r34436 | for i in a: | ||
p.append(p[-1] + len(i)) | ||||
Martin Geisler
|
r7703 | |||
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() | ||||
d = _normalizeblocks(a, b, d) | ||||
la = 0 | ||||
lb = 0 | ||||
for am, bm, size in d: | ||||
Augie Fackler
|
r43347 | s = b"".join(b[lb:bm]) | ||
Martin Geisler
|
r7703 | if am > la or s: | ||
Augie Fackler
|
r43347 | bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s) | ||
Martin Geisler
|
r7703 | la = am + size | ||
lb = bm + size | ||||
Augie Fackler
|
r43347 | return b"".join(bin) | ||
Martin Geisler
|
r7703 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50493 | def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]: | ||
Matt Mackall
|
r7944 | an = splitnewlines(a) | ||
bn = splitnewlines(b) | ||||
Martin Geisler
|
r7703 | d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() | ||
d = _normalizeblocks(an, bn, d) | ||||
return [(i, i + n, j, j + n) for (i, j, n) in d] | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50493 | def fixws(text: bytes, allws: bool) -> bytes: | ||
Patrick Mezard
|
r15530 | if allws: | ||
Augie Fackler
|
r43347 | text = re.sub(b'[ \t\r]+', b'', text) | ||
Patrick Mezard
|
r15530 | else: | ||
Augie Fackler
|
r43347 | text = re.sub(b'[ \t\r]+', b' ', text) | ||
text = text.replace(b' \n', b'\n') | ||||
Patrick Mezard
|
r15530 | return text | ||
Matt Harbison
|
r52827 | |||
# In order to adhere to the module protocol, these functions must be visible to | ||||
# the type checker, though they aren't actually implemented by this | ||||
# implementation of the module protocol. Callers are responsible for | ||||
# checking that the implementation is available before using them. | ||||
if typing.TYPE_CHECKING: | ||||
xdiffblocks: Optional[intmod.BDiffBlocksFnc] = None | ||||