mpatch.c
420 lines
| 8.5 KiB
| text/x-c
|
CLexer
/ mercurial / mpatch.c
mpm@selenic.com
|
r72 | /* | ||
mpatch.c - efficient binary patching for Mercurial | ||||
This implements a patch algorithm that's O(m + nlog n) where m is the | ||||
size of the output and n is the number of patches. | ||||
Given a list of binary patches, it unpacks each into a hunk list, | ||||
then combines the hunk lists with a treewise recursion to form a | ||||
single hunk list. This hunk list is then applied to the original | ||||
text. | ||||
The text (or binary) fragments are copied directly from their source | ||||
Python objects into a preallocated output string to avoid the | ||||
allocation of intermediate Python objects. Working memory is about 2x | ||||
the total number of hunks. | ||||
Vadim Gelfer
|
r2859 | Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||
mpm@selenic.com
|
r72 | |||
This software may be used and distributed according to the terms | ||||
of the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
Adrian Buehlmann
|
r16758 | #define PY_SSIZE_T_CLEAN | ||
mpm@selenic.com
|
r72 | #include <Python.h> | ||
#include <stdlib.h> | ||||
#include <string.h> | ||||
Vadim Gelfer
|
r2468 | |||
Renato Cunha
|
r11360 | #include "util.h" | ||
mpm@selenic.com
|
r72 | static char mpatch_doc[] = "Efficient binary patching."; | ||
Benoit Boissinot
|
r1722 | static PyObject *mpatch_Error; | ||
mpm@selenic.com
|
r72 | |||
struct frag { | ||||
int start, end, len; | ||||
Matt Mackall
|
r5444 | const char *data; | ||
mpm@selenic.com
|
r72 | }; | ||
struct flist { | ||||
struct frag *base, *head, *tail; | ||||
}; | ||||
Adrian Buehlmann
|
r16757 | static struct flist *lalloc(Py_ssize_t size) | ||
mpm@selenic.com
|
r72 | { | ||
mpm@selenic.com
|
r128 | struct flist *a = NULL; | ||
mpm@selenic.com
|
r72 | |||
Matt Mackall
|
r3138 | if (size < 1) | ||
size = 1; | ||||
TK Soh
|
r1978 | a = (struct flist *)malloc(sizeof(struct flist)); | ||
mpm@selenic.com
|
r128 | if (a) { | ||
TK Soh
|
r1978 | a->base = (struct frag *)malloc(sizeof(struct frag) * size); | ||
Thomas Arendsen Hein
|
r2048 | if (a->base) { | ||
mpm@selenic.com
|
r128 | a->head = a->tail = a->base; | ||
Thomas Arendsen Hein
|
r2048 | return a; | ||
} | ||||
free(a); | ||||
a = NULL; | ||||
mpm@selenic.com
|
r128 | } | ||
Benoit Boissinot
|
r1722 | if (!PyErr_Occurred()) | ||
PyErr_NoMemory(); | ||||
return NULL; | ||||
mpm@selenic.com
|
r72 | } | ||
static void lfree(struct flist *a) | ||||
{ | ||||
mpm@selenic.com
|
r128 | if (a) { | ||
free(a->base); | ||||
free(a); | ||||
} | ||||
mpm@selenic.com
|
r72 | } | ||
Adrian Buehlmann
|
r16757 | static Py_ssize_t lsize(struct flist *a) | ||
mpm@selenic.com
|
r72 | { | ||
return a->tail - a->head; | ||||
} | ||||
/* move hunks in source that are less cut to dest, compensating | ||||
for changes in offset. the last hunk may be split if necessary. | ||||
*/ | ||||
static int gather(struct flist *dest, struct flist *src, int cut, int offset) | ||||
{ | ||||
struct frag *d = dest->tail, *s = src->head; | ||||
int postend, c, l; | ||||
while (s != src->tail) { | ||||
if (s->start + offset >= cut) | ||||
mpm@selenic.com
|
r82 | break; /* we've gone far enough */ | ||
mpm@selenic.com
|
r72 | |||
postend = offset + s->start + s->len; | ||||
if (postend <= cut) { | ||||
/* save this hunk */ | ||||
offset += s->start + s->len - s->end; | ||||
*d++ = *s++; | ||||
} | ||||
else { | ||||
/* break up this hunk */ | ||||
c = cut - offset; | ||||
if (s->end < c) | ||||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
d->start = s->start; | ||||
d->end = c; | ||||
d->len = l; | ||||
d->data = s->data; | ||||
d++; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
dest->tail = d; | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* like gather, but with no output list */ | ||||
static int discard(struct flist *src, int cut, int offset) | ||||
{ | ||||
struct frag *s = src->head; | ||||
int postend, c, l; | ||||
while (s != src->tail) { | ||||
if (s->start + offset >= cut) | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | |||
postend = offset + s->start + s->len; | ||||
if (postend <= cut) { | ||||
offset += s->start + s->len - s->end; | ||||
s++; | ||||
} | ||||
else { | ||||
c = cut - offset; | ||||
if (s->end < c) | ||||
c = s->end; | ||||
l = cut - offset - s->start; | ||||
if (s->len < l) | ||||
l = s->len; | ||||
offset += s->start + l - c; | ||||
s->start = c; | ||||
s->len = s->len - l; | ||||
s->data = s->data + l; | ||||
mpm@selenic.com
|
r82 | break; | ||
mpm@selenic.com
|
r72 | } | ||
} | ||||
src->head = s; | ||||
return offset; | ||||
} | ||||
/* combine hunk lists a and b, while adjusting b for offset changes in a/ | ||||
this deletes a and b and returns the resultant list. */ | ||||
static struct flist *combine(struct flist *a, struct flist *b) | ||||
{ | ||||
mpm@selenic.com
|
r128 | struct flist *c = NULL; | ||
struct frag *bh, *ct; | ||||
mpm@selenic.com
|
r72 | int offset = 0, post; | ||
mpm@selenic.com
|
r128 | if (a && b) | ||
c = lalloc((lsize(a) + lsize(b)) * 2); | ||||
if (c) { | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | for (bh = b->head; bh != b->tail; bh++) { | ||
/* save old hunks */ | ||||
offset = gather(c, a, bh->start, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* discard replaced hunks */ | ||
post = discard(a, bh->end, offset); | ||||
mpm@selenic.com
|
r72 | |||
mpm@selenic.com
|
r128 | /* insert new hunk */ | ||
ct = c->tail; | ||||
ct->start = bh->start - offset; | ||||
ct->end = bh->end - post; | ||||
ct->len = bh->len; | ||||
ct->data = bh->data; | ||||
c->tail++; | ||||
offset = post; | ||||
} | ||||
/* hold on to tail from a */ | ||||
memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); | ||||
c->tail += lsize(a); | ||||
mpm@selenic.com
|
r72 | } | ||
lfree(a); | ||||
lfree(b); | ||||
return c; | ||||
} | ||||
/* decode a binary patch into a hunk list */ | ||||
Adrian Buehlmann
|
r16757 | static struct flist *decode(const char *bin, Py_ssize_t len) | ||
mpm@selenic.com
|
r72 | { | ||
struct flist *l; | ||||
struct frag *lt; | ||||
Matt Mackall
|
r20167 | int pos = 0; | ||
mpm@selenic.com
|
r72 | |||
/* assume worst case size, we won't have many of these lists */ | ||||
Matt Mackall
|
r28656 | l = lalloc(len / 12 + 1); | ||
Benoit Boissinot
|
r1722 | if (!l) | ||
return NULL; | ||||
mpm@selenic.com
|
r72 | lt = l->tail; | ||
Matt Mackall
|
r20167 | while (pos >= 0 && pos < len) { | ||
lt->start = getbe32(bin + pos); | ||||
lt->end = getbe32(bin + pos + 4); | ||||
lt->len = getbe32(bin + pos + 8); | ||||
lt->data = bin + pos + 12; | ||||
pos += 12 + lt->len; | ||||
Matt Mackall
|
r28657 | if (lt->start > lt->end || lt->len < 0) | ||
break; /* sanity check */ | ||||
mpm@selenic.com
|
r72 | lt++; | ||
} | ||||
Matt Mackall
|
r20167 | if (pos != len) { | ||
Benoit Boissinot
|
r1722 | if (!PyErr_Occurred()) | ||
PyErr_SetString(mpatch_Error, "patch cannot be decoded"); | ||||
lfree(l); | ||||
return NULL; | ||||
} | ||||
mpm@selenic.com
|
r72 | l->tail = lt; | ||
return l; | ||||
} | ||||
/* calculate the size of resultant text */ | ||||
Adrian Buehlmann
|
r16757 | static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
Adrian Buehlmann
|
r16757 | Py_ssize_t outlen = 0, last = 0; | ||
mpm@selenic.com
|
r72 | struct frag *f = l->head; | ||
while (f != l->tail) { | ||||
Benoit Boissinot
|
r1722 | if (f->start < last || f->end > len) { | ||
if (!PyErr_Occurred()) | ||||
PyErr_SetString(mpatch_Error, | ||||
"invalid patch"); | ||||
return -1; | ||||
} | ||||
mpm@selenic.com
|
r72 | outlen += f->start - last; | ||
last = f->end; | ||||
outlen += f->len; | ||||
f++; | ||||
} | ||||
outlen += len - last; | ||||
return outlen; | ||||
} | ||||
Adrian Buehlmann
|
r16757 | static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l) | ||
mpm@selenic.com
|
r72 | { | ||
struct frag *f = l->head; | ||||
int last = 0; | ||||
char *p = buf; | ||||
while (f != l->tail) { | ||||
Benoit Boissinot
|
r1722 | if (f->start < last || f->end > len) { | ||
if (!PyErr_Occurred()) | ||||
PyErr_SetString(mpatch_Error, | ||||
"invalid patch"); | ||||
return 0; | ||||
} | ||||
mpm@selenic.com
|
r72 | memcpy(p, orig + last, f->start - last); | ||
p += f->start - last; | ||||
memcpy(p, f->data, f->len); | ||||
last = f->end; | ||||
p += f->len; | ||||
f++; | ||||
} | ||||
memcpy(p, orig + last, len - last); | ||||
Benoit Boissinot
|
r1722 | return 1; | ||
mpm@selenic.com
|
r72 | } | ||
/* recursively generate a patch of all bins between start and end */ | ||||
Adrian Buehlmann
|
r16757 | static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end) | ||
mpm@selenic.com
|
r72 | { | ||
Adrian Buehlmann
|
r16757 | Py_ssize_t len, blen; | ||
Matt Mackall
|
r5444 | const char *buffer; | ||
mpm@selenic.com
|
r72 | |||
if (start + 1 == end) { | ||||
/* trivial case, output a decoded list */ | ||||
PyObject *tmp = PyList_GetItem(bins, start); | ||||
mpm@selenic.com
|
r128 | if (!tmp) | ||
return NULL; | ||||
Matt Mackall
|
r5444 | if (PyObject_AsCharBuffer(tmp, &buffer, &blen)) | ||
return NULL; | ||||
return decode(buffer, blen); | ||||
mpm@selenic.com
|
r72 | } | ||
/* divide and conquer, memory management is elsewhere */ | ||||
len = (end - start) / 2; | ||||
return combine(fold(bins, start, start + len), | ||||
fold(bins, start + len, end)); | ||||
} | ||||
static PyObject * | ||||
patches(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *text, *bins, *result; | ||||
struct flist *patch; | ||||
Matt Mackall
|
r5444 | const char *in; | ||
char *out; | ||||
Adrian Buehlmann
|
r16757 | Py_ssize_t len, outlen, inlen; | ||
mpm@selenic.com
|
r72 | |||
Matt Mackall
|
r5444 | if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins)) | ||
mpm@selenic.com
|
r72 | return NULL; | ||
len = PyList_Size(bins); | ||||
if (!len) { | ||||
/* nothing to do */ | ||||
Py_INCREF(text); | ||||
return text; | ||||
} | ||||
Matt Mackall
|
r5444 | if (PyObject_AsCharBuffer(text, &in, &inlen)) | ||
return NULL; | ||||
mpm@selenic.com
|
r72 | patch = fold(bins, 0, len); | ||
mpm@selenic.com
|
r128 | if (!patch) | ||
Benoit Boissinot
|
r1722 | return NULL; | ||
mpm@selenic.com
|
r128 | |||
Matt Mackall
|
r5444 | outlen = calcsize(inlen, patch); | ||
Benoit Boissinot
|
r1722 | if (outlen < 0) { | ||
result = NULL; | ||||
goto cleanup; | ||||
} | ||||
Renato Cunha
|
r11360 | result = PyBytes_FromStringAndSize(NULL, outlen); | ||
Benoit Boissinot
|
r1722 | if (!result) { | ||
result = NULL; | ||||
goto cleanup; | ||||
mpm@selenic.com
|
r128 | } | ||
Renato Cunha
|
r11360 | out = PyBytes_AsString(result); | ||
Matt Mackall
|
r5444 | if (!apply(out, in, inlen, patch)) { | ||
Benoit Boissinot
|
r1722 | Py_DECREF(result); | ||
result = NULL; | ||||
} | ||||
cleanup: | ||||
mpm@selenic.com
|
r72 | lfree(patch); | ||
return result; | ||||
} | ||||
mason@suse.com
|
r2078 | /* calculate size of a patched file directly */ | ||
static PyObject * | ||||
patchedsize(PyObject *self, PyObject *args) | ||||
{ | ||||
Matt Mackall
|
r20167 | long orig, start, end, len, outlen = 0, last = 0, pos = 0; | ||
Adrian Buehlmann
|
r16758 | Py_ssize_t patchlen; | ||
Matt Mackall
|
r20167 | char *bin; | ||
mason@suse.com
|
r2078 | |||
if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen)) | ||||
return NULL; | ||||
Matt Mackall
|
r20167 | while (pos >= 0 && pos < patchlen) { | ||
start = getbe32(bin + pos); | ||||
end = getbe32(bin + pos + 4); | ||||
len = getbe32(bin + pos + 8); | ||||
Thomas Arendsen Hein
|
r4358 | if (start > end) | ||
break; /* sanity check */ | ||||
Matt Mackall
|
r20167 | pos += 12 + len; | ||
mason@suse.com
|
r2078 | outlen += start - last; | ||
last = end; | ||||
outlen += len; | ||||
} | ||||
Matt Mackall
|
r20167 | if (pos != patchlen) { | ||
mason@suse.com
|
r2078 | if (!PyErr_Occurred()) | ||
PyErr_SetString(mpatch_Error, "patch cannot be decoded"); | ||||
return NULL; | ||||
} | ||||
outlen += orig - last; | ||||
return Py_BuildValue("l", outlen); | ||||
} | ||||
mpm@selenic.com
|
r72 | static PyMethodDef methods[] = { | ||
{"patches", patches, METH_VARARGS, "apply a series of patches\n"}, | ||||
mason@suse.com
|
r2078 | {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"}, | ||
mpm@selenic.com
|
r72 | {NULL, NULL} | ||
}; | ||||
Renato Cunha
|
r11360 | #ifdef IS_PY3K | ||
static struct PyModuleDef mpatch_module = { | ||||
PyModuleDef_HEAD_INIT, | ||||
"mpatch", | ||||
mpatch_doc, | ||||
-1, | ||||
methods | ||||
}; | ||||
PyMODINIT_FUNC PyInit_mpatch(void) | ||||
{ | ||||
PyObject *m; | ||||
m = PyModule_Create(&mpatch_module); | ||||
if (m == NULL) | ||||
return NULL; | ||||
mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL); | ||||
Py_INCREF(mpatch_Error); | ||||
PyModule_AddObject(m, "mpatchError", mpatch_Error); | ||||
return m; | ||||
} | ||||
#else | ||||
mpm@selenic.com
|
r72 | PyMODINIT_FUNC | ||
initmpatch(void) | ||||
{ | ||||
Py_InitModule3("mpatch", methods, mpatch_doc); | ||||
Benoit Boissinot
|
r1722 | mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL); | ||
mpm@selenic.com
|
r72 | } | ||
Renato Cunha
|
r11360 | #endif | ||