bdiff.c
348 lines
| 7.3 KiB
| text/x-c
|
CLexer
Yuya Nishihara
|
r32369 | /* | ||
bdiff.c - efficient binary diff extension for Mercurial | ||||
Raphaël Gomès
|
r47575 | Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com> | ||
Yuya Nishihara
|
r32369 | |||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
Based roughly on Python difflib | ||||
*/ | ||||
#define PY_SSIZE_T_CLEAN | ||||
#include <Python.h> | ||||
Gregory Szorc
|
r34439 | #include <limits.h> | ||
Yuya Nishihara
|
r32369 | #include <stdlib.h> | ||
#include <string.h> | ||||
#include "bdiff.h" | ||||
#include "bitmanipulation.h" | ||||
Jun Wu
|
r36693 | #include "thirdparty/xdiff/xdiff.h" | ||
Yuya Nishihara
|
r32369 | #include "util.h" | ||
static PyObject *blocks(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *sa, *sb, *rl = NULL, *m; | ||||
struct bdiff_line *a, *b; | ||||
struct bdiff_hunk l, *h; | ||||
int an, bn, count, pos = 0; | ||||
l.next = NULL; | ||||
Augie Fackler
|
r41367 | if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) { | ||
Yuya Nishihara
|
r32369 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); | ||||
bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); | ||||
Augie Fackler
|
r41367 | if (!a || !b) { | ||
Yuya Nishihara
|
r32369 | goto nomem; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
count = bdiff_diff(a, an, b, bn, &l); | ||||
Augie Fackler
|
r41367 | if (count < 0) { | ||
Yuya Nishihara
|
r32369 | goto nomem; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
rl = PyList_New(count); | ||||
Augie Fackler
|
r41367 | if (!rl) { | ||
Yuya Nishihara
|
r32369 | goto nomem; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
for (h = l.next; h; h = h->next) { | ||||
m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); | ||||
PyList_SetItem(rl, pos, m); | ||||
pos++; | ||||
} | ||||
nomem: | ||||
free(a); | ||||
free(b); | ||||
bdiff_freehunks(l.next); | ||||
return rl ? rl : PyErr_NoMemory(); | ||||
} | ||||
static PyObject *bdiff(PyObject *self, PyObject *args) | ||||
{ | ||||
Gregory Szorc
|
r36673 | Py_buffer ba, bb; | ||
char *rb, *ia, *ib; | ||||
Yuya Nishihara
|
r32369 | PyObject *result = NULL; | ||
Gregory Szorc
|
r36672 | struct bdiff_line *al = NULL, *bl = NULL; | ||
Yuya Nishihara
|
r32369 | struct bdiff_hunk l, *h; | ||
int an, bn, count; | ||||
Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; | ||||
Gregory Szorc
|
r36672 | PyThreadState *_save = NULL; | ||
Yuya Nishihara
|
r32369 | |||
l.next = NULL; | ||||
Gregory Szorc
|
r49676 | if (!PyArg_ParseTuple(args, "y*y*:bdiff", &ba, &bb)) { | ||
Yuya Nishihara
|
r32369 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
Gregory Szorc
|
r36673 | if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) { | ||
PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous"); | ||||
goto cleanup; | ||||
} | ||||
if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) { | ||||
PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous"); | ||||
goto cleanup; | ||||
} | ||||
la = ba.len; | ||||
lb = bb.len; | ||||
Yuya Nishihara
|
r32369 | if (la > UINT_MAX || lb > UINT_MAX) { | ||
PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); | ||||
Gregory Szorc
|
r36673 | goto cleanup; | ||
Yuya Nishihara
|
r32369 | } | ||
_save = PyEval_SaveThread(); | ||||
lmax = la > lb ? lb : la; | ||||
Gregory Szorc
|
r36673 | for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib; | ||
++li, ++ia, ++ib) { | ||||
Augie Fackler
|
r41367 | if (*ia == '\n') { | ||
Yuya Nishihara
|
r32369 | lcommon = li + 1; | ||
Augie Fackler
|
r41367 | } | ||
Gregory Szorc
|
r36673 | } | ||
Yuya Nishihara
|
r32369 | /* we can almost add: if (li == lmax) lcommon = li; */ | ||
Matt Harbison
|
r36699 | an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al); | ||
bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl); | ||||
Gregory Szorc
|
r36672 | if (!al || !bl) { | ||
PyErr_NoMemory(); | ||||
goto cleanup; | ||||
} | ||||
Yuya Nishihara
|
r32369 | |||
count = bdiff_diff(al, an, bl, bn, &l); | ||||
Gregory Szorc
|
r36672 | if (count < 0) { | ||
PyErr_NoMemory(); | ||||
goto cleanup; | ||||
} | ||||
Yuya Nishihara
|
r32369 | |||
/* calculate length of output */ | ||||
la = lb = 0; | ||||
for (h = l.next; h; h = h->next) { | ||||
Augie Fackler
|
r41367 | if (h->a1 != la || h->b1 != lb) { | ||
Yuya Nishihara
|
r32369 | len += 12 + bl[h->b1].l - bl[lb].l; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | la = h->a2; | ||
lb = h->b2; | ||||
} | ||||
PyEval_RestoreThread(_save); | ||||
_save = NULL; | ||||
result = PyBytes_FromStringAndSize(NULL, len); | ||||
Augie Fackler
|
r41367 | if (!result) { | ||
Gregory Szorc
|
r36672 | goto cleanup; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
/* build binary patch */ | ||||
rb = PyBytes_AsString(result); | ||||
la = lb = 0; | ||||
for (h = l.next; h; h = h->next) { | ||||
if (h->a1 != la || h->b1 != lb) { | ||||
len = bl[h->b1].l - bl[lb].l; | ||||
putbe32((uint32_t)(al[la].l + lcommon - al->l), rb); | ||||
Augie Fackler
|
r36072 | putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), | ||
rb + 4); | ||||
Yuya Nishihara
|
r32369 | putbe32((uint32_t)len, rb + 8); | ||
memcpy(rb + 12, bl[lb].l, len); | ||||
rb += 12 + len; | ||||
} | ||||
la = h->a2; | ||||
lb = h->b2; | ||||
} | ||||
Gregory Szorc
|
r36672 | cleanup: | ||
Augie Fackler
|
r41367 | if (_save) { | ||
Yuya Nishihara
|
r32369 | PyEval_RestoreThread(_save); | ||
Augie Fackler
|
r41367 | } | ||
Gregory Szorc
|
r36673 | PyBuffer_Release(&ba); | ||
PyBuffer_Release(&bb); | ||||
Josef 'Jeff' Sipek
|
r38320 | free(al); | ||
free(bl); | ||||
Yuya Nishihara
|
r38328 | bdiff_freehunks(l.next); | ||
Gregory Szorc
|
r36672 | return result; | ||
Yuya Nishihara
|
r32369 | } | ||
/* | ||||
* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, | ||||
* reduce whitespace sequences to a single space and trim remaining whitespace | ||||
* from end of lines. | ||||
*/ | ||||
static PyObject *fixws(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *s, *result = NULL; | ||||
char allws, c; | ||||
const char *r; | ||||
Py_ssize_t i, rlen, wlen = 0; | ||||
char *w; | ||||
Augie Fackler
|
r41367 | if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) { | ||
Yuya Nishihara
|
r32369 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | r = PyBytes_AsString(s); | ||
rlen = PyBytes_Size(s); | ||||
w = (char *)PyMem_Malloc(rlen ? rlen : 1); | ||||
Augie Fackler
|
r41367 | if (!w) { | ||
Yuya Nishihara
|
r32369 | goto nomem; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r32369 | |||
for (i = 0; i != rlen; i++) { | ||||
c = r[i]; | ||||
if (c == ' ' || c == '\t' || c == '\r') { | ||||
Augie Fackler
|
r41367 | if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) { | ||
Yuya Nishihara
|
r32369 | w[wlen++] = ' '; | ||
Augie Fackler
|
r41367 | } | ||
Augie Fackler
|
r36072 | } else if (c == '\n' && !allws && wlen > 0 && | ||
w[wlen - 1] == ' ') { | ||||
Yuya Nishihara
|
r32369 | w[wlen - 1] = '\n'; | ||
} else { | ||||
w[wlen++] = c; | ||||
} | ||||
} | ||||
result = PyBytes_FromStringAndSize(w, wlen); | ||||
nomem: | ||||
PyMem_Free(w); | ||||
return result ? result : PyErr_NoMemory(); | ||||
} | ||||
Augie Fackler
|
r36163 | static bool sliceintolist(PyObject *list, Py_ssize_t destidx, | ||
const char *source, Py_ssize_t len) | ||||
{ | ||||
PyObject *sliced = PyBytes_FromStringAndSize(source, len); | ||||
Augie Fackler
|
r41367 | if (sliced == NULL) { | ||
Augie Fackler
|
r36163 | return false; | ||
Augie Fackler
|
r41367 | } | ||
Augie Fackler
|
r36163 | PyList_SET_ITEM(list, destidx, sliced); | ||
return true; | ||||
} | ||||
static PyObject *splitnewlines(PyObject *self, PyObject *args) | ||||
{ | ||||
const char *text; | ||||
Py_ssize_t nelts = 0, size, i, start = 0; | ||||
PyObject *result = NULL; | ||||
Gregory Szorc
|
r49676 | if (!PyArg_ParseTuple(args, "y#", &text, &size)) { | ||
Augie Fackler
|
r36163 | goto abort; | ||
} | ||||
if (!size) { | ||||
return PyList_New(0); | ||||
} | ||||
/* This loops to size-1 because if the last byte is a newline, | ||||
* we don't want to perform a split there. */ | ||||
for (i = 0; i < size - 1; ++i) { | ||||
if (text[i] == '\n') { | ||||
++nelts; | ||||
} | ||||
} | ||||
Augie Fackler
|
r41367 | if ((result = PyList_New(nelts + 1)) == NULL) { | ||
Augie Fackler
|
r36163 | goto abort; | ||
Augie Fackler
|
r41367 | } | ||
Augie Fackler
|
r36163 | nelts = 0; | ||
for (i = 0; i < size - 1; ++i) { | ||||
if (text[i] == '\n') { | ||||
if (!sliceintolist(result, nelts++, text + start, | ||||
Augie Fackler
|
r41367 | i - start + 1)) { | ||
Augie Fackler
|
r36163 | goto abort; | ||
Augie Fackler
|
r41367 | } | ||
Augie Fackler
|
r36163 | start = i + 1; | ||
} | ||||
} | ||||
Augie Fackler
|
r41367 | if (!sliceintolist(result, nelts++, text + start, size - start)) { | ||
Augie Fackler
|
r36163 | goto abort; | ||
Augie Fackler
|
r41367 | } | ||
Augie Fackler
|
r36163 | return result; | ||
abort: | ||||
Py_XDECREF(result); | ||||
return NULL; | ||||
} | ||||
Matt Harbison
|
r36934 | static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2, | ||
void *priv) | ||||
Jun Wu
|
r36693 | { | ||
PyObject *rl = (PyObject *)priv; | ||||
Julien Cristau
|
r38016 | PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2); | ||
Yuya Nishihara
|
r39484 | int r; | ||
Augie Fackler
|
r41367 | if (!m) { | ||
Jun Wu
|
r36693 | return -1; | ||
Augie Fackler
|
r41367 | } | ||
Yuya Nishihara
|
r39484 | r = PyList_Append(rl, m); | ||
Py_DECREF(m); | ||||
return r; | ||||
Jun Wu
|
r36693 | } | ||
static PyObject *xdiffblocks(PyObject *self, PyObject *args) | ||||
{ | ||||
Py_ssize_t la, lb; | ||||
mmfile_t a, b; | ||||
PyObject *rl; | ||||
xpparam_t xpp = { | ||||
XDF_INDENT_HEURISTIC, /* flags */ | ||||
}; | ||||
xdemitconf_t xecfg = { | ||||
XDL_EMIT_BDIFFHUNK, /* flags */ | ||||
hunk_consumer, /* hunk_consume_func */ | ||||
}; | ||||
xdemitcb_t ecb = { | ||||
NULL, /* priv */ | ||||
}; | ||||
Gregory Szorc
|
r49676 | if (!PyArg_ParseTuple(args, "y#y#", &a.ptr, &la, &b.ptr, &lb)) { | ||
Jun Wu
|
r36693 | return NULL; | ||
Augie Fackler
|
r41367 | } | ||
Jun Wu
|
r36693 | |||
a.size = la; | ||||
b.size = lb; | ||||
rl = PyList_New(0); | ||||
Augie Fackler
|
r41367 | if (!rl) { | ||
Jun Wu
|
r36693 | return PyErr_NoMemory(); | ||
Augie Fackler
|
r41367 | } | ||
Jun Wu
|
r36693 | |||
ecb.priv = rl; | ||||
if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) { | ||||
Py_DECREF(rl); | ||||
return PyErr_NoMemory(); | ||||
} | ||||
return rl; | ||||
} | ||||
Yuya Nishihara
|
r32369 | static char mdiff_doc[] = "Efficient binary diff."; | ||
static PyMethodDef methods[] = { | ||||
Augie Fackler
|
r36072 | {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, | ||
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, | ||||
{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, | ||||
Augie Fackler
|
r36163 | {"splitnewlines", splitnewlines, METH_VARARGS, | ||
"like str.splitlines, but only split on newlines\n"}, | ||||
Jun Wu
|
r36693 | {"xdiffblocks", xdiffblocks, METH_VARARGS, | ||
"find a list of matching lines using xdiff algorithm\n"}, | ||||
Augie Fackler
|
r36072 | {NULL, NULL}, | ||
Yuya Nishihara
|
r32369 | }; | ||
Jun Wu
|
r36693 | static const int version = 3; | ||
Yuya Nishihara
|
r32369 | |||
static struct PyModuleDef bdiff_module = { | ||||
Augie Fackler
|
r36072 | PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods, | ||
Yuya Nishihara
|
r32369 | }; | ||
PyMODINIT_FUNC PyInit_bdiff(void) | ||||
{ | ||||
PyObject *m; | ||||
m = PyModule_Create(&bdiff_module); | ||||
PyModule_AddIntConstant(m, "version", version); | ||||
return m; | ||||
} | ||||