bdiff_module.c
213 lines
| 4.5 KiB
| text/x-c
|
CLexer
/ mercurial / bdiff_module.c
Maciej Fijalkowski
|
r29541 | /* | ||
bdiff.c - efficient binary diff extension for Mercurial | ||||
Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | ||||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
Based roughly on Python difflib | ||||
*/ | ||||
#define PY_SSIZE_T_CLEAN | ||||
#include <Python.h> | ||||
#include <stdlib.h> | ||||
#include <string.h> | ||||
#include <limits.h> | ||||
#include "bdiff.h" | ||||
#include "bitmanipulation.h" | ||||
Gregory Szorc
|
r30170 | #include "util.h" | ||
Maciej Fijalkowski
|
r29541 | |||
static PyObject *blocks(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *sa, *sb, *rl = NULL, *m; | ||||
struct bdiff_line *a, *b; | ||||
struct bdiff_hunk l, *h; | ||||
int an, bn, count, pos = 0; | ||||
l.next = NULL; | ||||
if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | ||||
return NULL; | ||||
an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); | ||||
bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); | ||||
if (!a || !b) | ||||
goto nomem; | ||||
count = bdiff_diff(a, an, b, bn, &l); | ||||
if (count < 0) | ||||
goto nomem; | ||||
rl = PyList_New(count); | ||||
if (!rl) | ||||
goto nomem; | ||||
for (h = l.next; h; h = h->next) { | ||||
m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); | ||||
PyList_SetItem(rl, pos, m); | ||||
pos++; | ||||
} | ||||
nomem: | ||||
free(a); | ||||
free(b); | ||||
bdiff_freehunks(l.next); | ||||
return rl ? rl : PyErr_NoMemory(); | ||||
} | ||||
static PyObject *bdiff(PyObject *self, PyObject *args) | ||||
{ | ||||
Mads Kiilerich
|
r30561 | char *sa, *sb, *rb, *ia, *ib; | ||
Maciej Fijalkowski
|
r29541 | PyObject *result = NULL; | ||
struct bdiff_line *al, *bl; | ||||
struct bdiff_hunk l, *h; | ||||
int an, bn, count; | ||||
Mads Kiilerich
|
r30561 | Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; | ||
Maciej Fijalkowski
|
r29541 | PyThreadState *_save; | ||
l.next = NULL; | ||||
if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb)) | ||||
return NULL; | ||||
if (la > UINT_MAX || lb > UINT_MAX) { | ||||
PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); | ||||
return NULL; | ||||
} | ||||
_save = PyEval_SaveThread(); | ||||
Mads Kiilerich
|
r30561 | |||
lmax = la > lb ? lb : la; | ||||
for (ia = sa, ib = sb; | ||||
li < lmax && *ia == *ib; | ||||
++li, ++ia, ++ib) | ||||
if (*ia == '\n') | ||||
lcommon = li + 1; | ||||
/* we can almost add: if (li == lmax) lcommon = li; */ | ||||
an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); | ||||
bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); | ||||
Maciej Fijalkowski
|
r29541 | if (!al || !bl) | ||
goto nomem; | ||||
count = bdiff_diff(al, an, bl, bn, &l); | ||||
if (count < 0) | ||||
goto nomem; | ||||
/* calculate length of output */ | ||||
la = lb = 0; | ||||
for (h = l.next; h; h = h->next) { | ||||
if (h->a1 != la || h->b1 != lb) | ||||
len += 12 + bl[h->b1].l - bl[lb].l; | ||||
la = h->a2; | ||||
lb = h->b2; | ||||
} | ||||
PyEval_RestoreThread(_save); | ||||
_save = NULL; | ||||
result = PyBytes_FromStringAndSize(NULL, len); | ||||
if (!result) | ||||
goto nomem; | ||||
/* build binary patch */ | ||||
rb = PyBytes_AsString(result); | ||||
la = lb = 0; | ||||
for (h = l.next; h; h = h->next) { | ||||
if (h->a1 != la || h->b1 != lb) { | ||||
len = bl[h->b1].l - bl[lb].l; | ||||
Mads Kiilerich
|
r30561 | putbe32((uint32_t)(al[la].l + lcommon - al->l), rb); | ||
putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4); | ||||
Maciej Fijalkowski
|
r29541 | putbe32((uint32_t)len, rb + 8); | ||
memcpy(rb + 12, bl[lb].l, len); | ||||
rb += 12 + len; | ||||
} | ||||
la = h->a2; | ||||
lb = h->b2; | ||||
} | ||||
nomem: | ||||
if (_save) | ||||
PyEval_RestoreThread(_save); | ||||
free(al); | ||||
free(bl); | ||||
bdiff_freehunks(l.next); | ||||
return result ? result : PyErr_NoMemory(); | ||||
} | ||||
/* | ||||
* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, | ||||
* reduce whitespace sequences to a single space and trim remaining whitespace | ||||
* from end of lines. | ||||
*/ | ||||
static PyObject *fixws(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *s, *result = NULL; | ||||
char allws, c; | ||||
const char *r; | ||||
Py_ssize_t i, rlen, wlen = 0; | ||||
char *w; | ||||
if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) | ||||
return NULL; | ||||
r = PyBytes_AsString(s); | ||||
rlen = PyBytes_Size(s); | ||||
Gregory Szorc
|
r31467 | w = (char *)PyMem_Malloc(rlen ? rlen : 1); | ||
Maciej Fijalkowski
|
r29541 | if (!w) | ||
goto nomem; | ||||
for (i = 0; i != rlen; i++) { | ||||
c = r[i]; | ||||
if (c == ' ' || c == '\t' || c == '\r') { | ||||
if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) | ||||
w[wlen++] = ' '; | ||||
} else if (c == '\n' && !allws | ||||
&& wlen > 0 && w[wlen - 1] == ' ') { | ||||
w[wlen - 1] = '\n'; | ||||
} else { | ||||
w[wlen++] = c; | ||||
} | ||||
} | ||||
result = PyBytes_FromStringAndSize(w, wlen); | ||||
nomem: | ||||
Gregory Szorc
|
r31467 | PyMem_Free(w); | ||
Maciej Fijalkowski
|
r29541 | return result ? result : PyErr_NoMemory(); | ||
} | ||||
static char mdiff_doc[] = "Efficient binary diff."; | ||||
static PyMethodDef methods[] = { | ||||
{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, | ||||
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, | ||||
{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, | ||||
{NULL, NULL} | ||||
}; | ||||
#ifdef IS_PY3K | ||||
static struct PyModuleDef bdiff_module = { | ||||
PyModuleDef_HEAD_INIT, | ||||
"bdiff", | ||||
mdiff_doc, | ||||
-1, | ||||
methods | ||||
}; | ||||
PyMODINIT_FUNC PyInit_bdiff(void) | ||||
{ | ||||
return PyModule_Create(&bdiff_module); | ||||
} | ||||
#else | ||||
PyMODINIT_FUNC initbdiff(void) | ||||
{ | ||||
Py_InitModule3("bdiff", methods, mdiff_doc); | ||||
} | ||||
#endif | ||||