mpatch.py
144 lines
| 3.5 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7699 | # mpatch.py - Python implementation of mpatch.c | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others | ||
Martin Geisler
|
r7699 | # | ||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7699 | |||
Matt Harbison
|
r52756 | from __future__ import annotations | ||
Gregory Szorc
|
r27337 | |||
Gregory Szorc
|
r49728 | import io | ||
Martin Geisler
|
r7775 | import struct | ||
Gregory Szorc
|
r27337 | |||
Matt Harbison
|
r50494 | from typing import ( | ||
List, | ||||
Tuple, | ||||
) | ||||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49728 | stringio = io.BytesIO | ||
Martin Geisler
|
r7699 | |||
Augie Fackler
|
r43346 | |||
timeless
|
r28782 | class mpatchError(Exception): | ||
Augie Fackler
|
r46554 | """error raised when a delta cannot be decoded""" | ||
timeless
|
r28782 | |||
Augie Fackler
|
r43346 | |||
Martin Geisler
|
r7699 | # This attempts to apply a series of patches in time proportional to | ||
# the total size of the patches, rather than patches * len(text). This | ||||
# means rather than shuffling strings around, we shuffle around | ||||
# pointers to fragments with fragment lists. | ||||
# | ||||
# When the fragment lists get too long, we collapse them. To do this | ||||
# efficiently, we do all our operations inside a buffer created by | ||||
# mmap and simply use memmove. This avoids creating a bunch of large | ||||
# temporary string buffers. | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50494 | def _pull( | ||
dst: List[Tuple[int, int]], src: List[Tuple[int, int]], l: int | ||||
) -> None: # pull l bytes from src | ||||
Augie Fackler
|
r28587 | while l: | ||
f = src.pop() | ||||
Augie Fackler
|
r43346 | if f[0] > l: # do we need to split? | ||
Augie Fackler
|
r28587 | src.append((f[0] - l, f[1] + l)) | ||
dst.append((l, f[1])) | ||||
return | ||||
dst.append(f) | ||||
l -= f[0] | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50494 | def _move(m: stringio, dest: int, src: int, count: int) -> None: | ||
Augie Fackler
|
r28588 | """move count bytes from src to dest | ||
The file pointer is left at the end of dest. | ||||
""" | ||||
m.seek(src) | ||||
buf = m.read(count) | ||||
m.seek(dest) | ||||
m.write(buf) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50494 | def _collect( | ||
m: stringio, buf: int, list: List[Tuple[int, int]] | ||||
) -> Tuple[int, int]: | ||||
Augie Fackler
|
r28589 | start = buf | ||
for l, p in reversed(list): | ||||
_move(m, buf, p, l) | ||||
buf += l | ||||
return (buf - start, start) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50494 | def patches(a: bytes, bins: List[bytes]) -> bytes: | ||
Matt Mackall
|
r10282 | if not bins: | ||
return a | ||||
Martin Geisler
|
r7699 | |||
plens = [len(x) for x in bins] | ||||
pl = sum(plens) | ||||
bl = len(a) + pl | ||||
Augie Fackler
|
r43346 | tl = bl + bl + pl # enough for the patches and two working texts | ||
Martin Geisler
|
r7699 | b1, b2 = 0, bl | ||
Matt Mackall
|
r10282 | if not tl: | ||
return a | ||||
Martin Geisler
|
r7699 | |||
timeless
|
r28861 | m = stringio() | ||
Martin Geisler
|
r7699 | |||
# load our original text | ||||
m.write(a) | ||||
frags = [(len(a), b1)] | ||||
# copy all the patches into our segment so we can memmove from them | ||||
pos = b2 + bl | ||||
m.seek(pos) | ||||
Alex Gaynor
|
r34436 | for p in bins: | ||
m.write(p) | ||||
Martin Geisler
|
r7699 | |||
for plen in plens: | ||||
# if our list gets too long, execute it | ||||
if len(frags) > 128: | ||||
b2, b1 = b1, b2 | ||||
Augie Fackler
|
r28589 | frags = [_collect(m, b1, frags)] | ||
Martin Geisler
|
r7699 | |||
new = [] | ||||
end = pos + plen | ||||
last = 0 | ||||
while pos < end: | ||||
Martin Geisler
|
r7775 | m.seek(pos) | ||
timeless
|
r28782 | try: | ||
Augie Fackler
|
r43347 | p1, p2, l = struct.unpack(b">lll", m.read(12)) | ||
timeless
|
r28782 | except struct.error: | ||
Matt Harbison
|
r52634 | raise mpatchError("patch cannot be decoded") | ||
Augie Fackler
|
r43346 | _pull(new, frags, p1 - last) # what didn't change | ||
_pull([], frags, p2 - p1) # what got deleted | ||||
new.append((l, pos + 12)) # what got added | ||||
Martin Geisler
|
r7699 | pos += l + 12 | ||
last = p2 | ||||
Augie Fackler
|
r43346 | frags.extend(reversed(new)) # what was left at the end | ||
Martin Geisler
|
r7699 | |||
Augie Fackler
|
r28589 | t = _collect(m, b2, frags) | ||
Martin Geisler
|
r7699 | |||
Martin Geisler
|
r7775 | m.seek(t[1]) | ||
return m.read(t[0]) | ||||
Martin Geisler
|
r7699 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50494 | def patchedsize(orig: int, delta: bytes) -> int: | ||
Martin Geisler
|
r7699 | outlen, last, bin = 0, 0, 0 | ||
binend = len(delta) | ||||
data = 12 | ||||
while data <= binend: | ||||
Augie Fackler
|
r43346 | decode = delta[bin : bin + 12] | ||
Augie Fackler
|
r43347 | start, end, length = struct.unpack(b">lll", decode) | ||
Martin Geisler
|
r7699 | if start > end: | ||
break | ||||
bin = data + length | ||||
data = bin + 12 | ||||
outlen += start - last | ||||
last = end | ||||
outlen += length | ||||
if bin != binend: | ||||
Matt Harbison
|
r52634 | raise mpatchError("patch cannot be decoded") | ||
Martin Geisler
|
r7699 | |||
outlen += orig - last | ||||
return outlen | ||||