mpatch.py
127 lines
| 3.3 KiB
| text/x-python
|
PythonLexer
Martin Geisler
|
r7699 | # mpatch.py - Python implementation of mpatch.c | ||
# | ||||
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
Martin Geisler
|
r8225 | # This software may be used and distributed according to the terms of the | ||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r7699 | |||
Gregory Szorc
|
r27337 | from __future__ import absolute_import | ||
import cStringIO | ||||
Martin Geisler
|
r7775 | import struct | ||
Gregory Szorc
|
r27337 | |||
StringIO = cStringIO.StringIO | ||||
Martin Geisler
|
r7699 | |||
timeless
|
r28782 | class mpatchError(Exception): | ||
"""error raised when a delta cannot be decoded | ||||
""" | ||||
Martin Geisler
|
r7699 | # This attempts to apply a series of patches in time proportional to | ||
# the total size of the patches, rather than patches * len(text). This | ||||
# means rather than shuffling strings around, we shuffle around | ||||
# pointers to fragments with fragment lists. | ||||
# | ||||
# When the fragment lists get too long, we collapse them. To do this | ||||
# efficiently, we do all our operations inside a buffer created by | ||||
# mmap and simply use memmove. This avoids creating a bunch of large | ||||
# temporary string buffers. | ||||
Augie Fackler
|
r28587 | def _pull(dst, src, l): # pull l bytes from src | ||
while l: | ||||
f = src.pop() | ||||
if f[0] > l: # do we need to split? | ||||
src.append((f[0] - l, f[1] + l)) | ||||
dst.append((l, f[1])) | ||||
return | ||||
dst.append(f) | ||||
l -= f[0] | ||||
Augie Fackler
|
r28588 | def _move(m, dest, src, count): | ||
"""move count bytes from src to dest | ||||
The file pointer is left at the end of dest. | ||||
""" | ||||
m.seek(src) | ||||
buf = m.read(count) | ||||
m.seek(dest) | ||||
m.write(buf) | ||||
Augie Fackler
|
r28589 | def _collect(m, buf, list): | ||
start = buf | ||||
for l, p in reversed(list): | ||||
_move(m, buf, p, l) | ||||
buf += l | ||||
return (buf - start, start) | ||||
Martin Geisler
|
r7699 | def patches(a, bins): | ||
Matt Mackall
|
r10282 | if not bins: | ||
return a | ||||
Martin Geisler
|
r7699 | |||
plens = [len(x) for x in bins] | ||||
pl = sum(plens) | ||||
bl = len(a) + pl | ||||
tl = bl + bl + pl # enough for the patches and two working texts | ||||
b1, b2 = 0, bl | ||||
Matt Mackall
|
r10282 | if not tl: | ||
return a | ||||
Martin Geisler
|
r7699 | |||
Martin Geisler
|
r7775 | m = StringIO() | ||
Martin Geisler
|
r7699 | |||
# load our original text | ||||
m.write(a) | ||||
frags = [(len(a), b1)] | ||||
# copy all the patches into our segment so we can memmove from them | ||||
pos = b2 + bl | ||||
m.seek(pos) | ||||
for p in bins: m.write(p) | ||||
for plen in plens: | ||||
# if our list gets too long, execute it | ||||
if len(frags) > 128: | ||||
b2, b1 = b1, b2 | ||||
Augie Fackler
|
r28589 | frags = [_collect(m, b1, frags)] | ||
Martin Geisler
|
r7699 | |||
new = [] | ||||
end = pos + plen | ||||
last = 0 | ||||
while pos < end: | ||||
Martin Geisler
|
r7775 | m.seek(pos) | ||
timeless
|
r28782 | try: | ||
p1, p2, l = struct.unpack(">lll", m.read(12)) | ||||
except struct.error: | ||||
raise mpatchError("patch cannot be decoded") | ||||
Augie Fackler
|
r28587 | _pull(new, frags, p1 - last) # what didn't change | ||
_pull([], frags, p2 - p1) # what got deleted | ||||
Brodie Rao
|
r16683 | new.append((l, pos + 12)) # what got added | ||
Martin Geisler
|
r7699 | pos += l + 12 | ||
last = p2 | ||||
Brodie Rao
|
r16683 | frags.extend(reversed(new)) # what was left at the end | ||
Martin Geisler
|
r7699 | |||
Augie Fackler
|
r28589 | t = _collect(m, b2, frags) | ||
Martin Geisler
|
r7699 | |||
Martin Geisler
|
r7775 | m.seek(t[1]) | ||
return m.read(t[0]) | ||||
Martin Geisler
|
r7699 | |||
def patchedsize(orig, delta): | ||||
outlen, last, bin = 0, 0, 0 | ||||
binend = len(delta) | ||||
data = 12 | ||||
while data <= binend: | ||||
decode = delta[bin:bin + 12] | ||||
start, end, length = struct.unpack(">lll", decode) | ||||
if start > end: | ||||
break | ||||
bin = data + length | ||||
data = bin + 12 | ||||
outlen += start - last | ||||
last = end | ||||
outlen += length | ||||
if bin != binend: | ||||
timeless
|
r28782 | raise mpatchError("patch cannot be decoded") | ||
Martin Geisler
|
r7699 | |||
outlen += orig - last | ||||
return outlen | ||||