##// END OF EJS Templates
sha1dc: initial implementation of Python extension...
sha1dc: initial implementation of Python extension A future change will use this when available to avoid sha1 collision issues until we can get moved to something else. Differential Revision: https://phab.mercurial-scm.org/D7815

File last commit:

r41367:763b45bc default
r44510:bde1cd4c default
Show More
bdiff.c
359 lines | 7.6 KiB | text/x-c | CLexer
Yuya Nishihara
bdiff: switch to policy importer...
r32369 /*
bdiff.c - efficient binary diff extension for Mercurial
Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
Based roughly on Python difflib
*/
#define PY_SSIZE_T_CLEAN
#include <Python.h>
Gregory Szorc
cext: reorder #include...
r34439 #include <limits.h>
Yuya Nishihara
bdiff: switch to policy importer...
r32369 #include <stdlib.h>
#include <string.h>
#include "bdiff.h"
#include "bitmanipulation.h"
Jun Wu
bdiff: add a xdiffblocks method...
r36693 #include "thirdparty/xdiff/xdiff.h"
Yuya Nishihara
bdiff: switch to policy importer...
r32369 #include "util.h"
static PyObject *blocks(PyObject *self, PyObject *args)
{
PyObject *sa, *sb, *rl = NULL, *m;
struct bdiff_line *a, *b;
struct bdiff_hunk l, *h;
int an, bn, count, pos = 0;
l.next = NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 return NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!a || !b) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 goto nomem;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
count = bdiff_diff(a, an, b, bn, &l);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (count < 0) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 goto nomem;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
rl = PyList_New(count);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!rl) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 goto nomem;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
for (h = l.next; h; h = h->next) {
m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
PyList_SetItem(rl, pos, m);
pos++;
}
nomem:
free(a);
free(b);
bdiff_freehunks(l.next);
return rl ? rl : PyErr_NoMemory();
}
static PyObject *bdiff(PyObject *self, PyObject *args)
{
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 Py_buffer ba, bb;
char *rb, *ia, *ib;
Yuya Nishihara
bdiff: switch to policy importer...
r32369 PyObject *result = NULL;
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 struct bdiff_line *al = NULL, *bl = NULL;
Yuya Nishihara
bdiff: switch to policy importer...
r32369 struct bdiff_hunk l, *h;
int an, bn, count;
Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 PyThreadState *_save = NULL;
Yuya Nishihara
bdiff: switch to policy importer...
r32369
l.next = NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba,
&bb)) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 return NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
goto cleanup;
}
if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
goto cleanup;
}
la = ba.len;
lb = bb.len;
Yuya Nishihara
bdiff: switch to policy importer...
r32369 if (la > UINT_MAX || lb > UINT_MAX) {
PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 goto cleanup;
Yuya Nishihara
bdiff: switch to policy importer...
r32369 }
_save = PyEval_SaveThread();
lmax = la > lb ? lb : la;
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
++li, ++ia, ++ib) {
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (*ia == '\n') {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 lcommon = li + 1;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369 /* we can almost add: if (li == lmax) lcommon = li; */
Matt Harbison
bdiff: avoid pointer arithmetic on void*...
r36699 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 if (!al || !bl) {
PyErr_NoMemory();
goto cleanup;
}
Yuya Nishihara
bdiff: switch to policy importer...
r32369
count = bdiff_diff(al, an, bl, bn, &l);
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 if (count < 0) {
PyErr_NoMemory();
goto cleanup;
}
Yuya Nishihara
bdiff: switch to policy importer...
r32369
/* calculate length of output */
la = lb = 0;
for (h = l.next; h; h = h->next) {
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (h->a1 != la || h->b1 != lb) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 len += 12 + bl[h->b1].l - bl[lb].l;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369 la = h->a2;
lb = h->b2;
}
PyEval_RestoreThread(_save);
_save = NULL;
result = PyBytes_FromStringAndSize(NULL, len);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!result) {
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 goto cleanup;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
/* build binary patch */
rb = PyBytes_AsString(result);
la = lb = 0;
for (h = l.next; h; h = h->next) {
if (h->a1 != la || h->b1 != lb) {
len = bl[h->b1].l - bl[lb].l;
putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
Augie Fackler
bdiff: add to clang-format oversight...
r36072 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
rb + 4);
Yuya Nishihara
bdiff: switch to policy importer...
r32369 putbe32((uint32_t)len, rb + 8);
memcpy(rb + 12, bl[lb].l, len);
rb += 12 + len;
}
la = h->a2;
lb = h->b2;
}
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 cleanup:
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (_save) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 PyEval_RestoreThread(_save);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Gregory Szorc
cext: accept arguments as Py_buffer...
r36673 PyBuffer_Release(&ba);
PyBuffer_Release(&bb);
Josef 'Jeff' Sipek
cext: stop worrying and love the free(NULL)...
r38320 free(al);
free(bl);
Yuya Nishihara
bdiff: one more safe call of bdiff_freehunks(NULL)
r38328 bdiff_freehunks(l.next);
Gregory Szorc
cext: refactor cleanup code in bdiff()...
r36672 return result;
Yuya Nishihara
bdiff: switch to policy importer...
r32369 }
/*
* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
* reduce whitespace sequences to a single space and trim remaining whitespace
* from end of lines.
*/
static PyObject *fixws(PyObject *self, PyObject *args)
{
PyObject *s, *result = NULL;
char allws, c;
const char *r;
Py_ssize_t i, rlen, wlen = 0;
char *w;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 return NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369 r = PyBytes_AsString(s);
rlen = PyBytes_Size(s);
w = (char *)PyMem_Malloc(rlen ? rlen : 1);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!w) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 goto nomem;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
bdiff: switch to policy importer...
r32369
for (i = 0; i != rlen; i++) {
c = r[i];
if (c == ' ' || c == '\t' || c == '\r') {
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 w[wlen++] = ' ';
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
bdiff: add to clang-format oversight...
r36072 } else if (c == '\n' && !allws && wlen > 0 &&
w[wlen - 1] == ' ') {
Yuya Nishihara
bdiff: switch to policy importer...
r32369 w[wlen - 1] = '\n';
} else {
w[wlen++] = c;
}
}
result = PyBytes_FromStringAndSize(w, wlen);
nomem:
PyMem_Free(w);
return result ? result : PyErr_NoMemory();
}
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
const char *source, Py_ssize_t len)
{
PyObject *sliced = PyBytes_FromStringAndSize(source, len);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (sliced == NULL) {
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 return false;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 PyList_SET_ITEM(list, destidx, sliced);
return true;
}
static PyObject *splitnewlines(PyObject *self, PyObject *args)
{
const char *text;
Py_ssize_t nelts = 0, size, i, start = 0;
PyObject *result = NULL;
Yuya Nishihara
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()...
r36638 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 goto abort;
}
if (!size) {
return PyList_New(0);
}
/* This loops to size-1 because if the last byte is a newline,
* we don't want to perform a split there. */
for (i = 0; i < size - 1; ++i) {
if (text[i] == '\n') {
++nelts;
}
}
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if ((result = PyList_New(nelts + 1)) == NULL) {
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 goto abort;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 nelts = 0;
for (i = 0; i < size - 1; ++i) {
if (text[i] == '\n') {
if (!sliceintolist(result, nelts++, text + start,
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 i - start + 1)) {
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 goto abort;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 start = i + 1;
}
}
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!sliceintolist(result, nelts++, text + start, size - start)) {
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 goto abort;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 return result;
abort:
Py_XDECREF(result);
return NULL;
}
Matt Harbison
bdiff: convert more longs to int64_t...
r36934 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
void *priv)
Jun Wu
bdiff: add a xdiffblocks method...
r36693 {
PyObject *rl = (PyObject *)priv;
Julien Cristau
bdiff: fix yet more fallout from xdiff long/int64 conversion (issue5885)...
r38016 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
Yuya Nishihara
xdiff: fix leak in hunk_consumer()...
r39484 int r;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!m) {
Jun Wu
bdiff: add a xdiffblocks method...
r36693 return -1;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Yuya Nishihara
xdiff: fix leak in hunk_consumer()...
r39484 r = PyList_Append(rl, m);
Py_DECREF(m);
return r;
Jun Wu
bdiff: add a xdiffblocks method...
r36693 }
static PyObject *xdiffblocks(PyObject *self, PyObject *args)
{
Py_ssize_t la, lb;
mmfile_t a, b;
PyObject *rl;
xpparam_t xpp = {
XDF_INDENT_HEURISTIC, /* flags */
};
xdemitconf_t xecfg = {
XDL_EMIT_BDIFFHUNK, /* flags */
hunk_consumer, /* hunk_consume_func */
};
xdemitcb_t ecb = {
NULL, /* priv */
};
if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 &lb)) {
Jun Wu
bdiff: add a xdiffblocks method...
r36693 return NULL;
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Jun Wu
bdiff: add a xdiffblocks method...
r36693
a.size = la;
b.size = lb;
rl = PyList_New(0);
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 if (!rl) {
Jun Wu
bdiff: add a xdiffblocks method...
r36693 return PyErr_NoMemory();
Augie Fackler
cleanup: use clang-tidy to add missing {} around one-line statements...
r41367 }
Jun Wu
bdiff: add a xdiffblocks method...
r36693
ecb.priv = rl;
if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
Py_DECREF(rl);
return PyErr_NoMemory();
}
return rl;
}
Yuya Nishihara
bdiff: switch to policy importer...
r32369 static char mdiff_doc[] = "Efficient binary diff.";
static PyMethodDef methods[] = {
Augie Fackler
bdiff: add to clang-format oversight...
r36072 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
Augie Fackler
bdiff: write a native version of splitnewlines...
r36163 {"splitnewlines", splitnewlines, METH_VARARGS,
"like str.splitlines, but only split on newlines\n"},
Jun Wu
bdiff: add a xdiffblocks method...
r36693 {"xdiffblocks", xdiffblocks, METH_VARARGS,
"find a list of matching lines using xdiff algorithm\n"},
Augie Fackler
bdiff: add to clang-format oversight...
r36072 {NULL, NULL},
Yuya Nishihara
bdiff: switch to policy importer...
r32369 };
Jun Wu
bdiff: add a xdiffblocks method...
r36693 static const int version = 3;
Yuya Nishihara
bdiff: switch to policy importer...
r32369
#ifdef IS_PY3K
static struct PyModuleDef bdiff_module = {
Augie Fackler
bdiff: add to clang-format oversight...
r36072 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
Yuya Nishihara
bdiff: switch to policy importer...
r32369 };
PyMODINIT_FUNC PyInit_bdiff(void)
{
PyObject *m;
m = PyModule_Create(&bdiff_module);
PyModule_AddIntConstant(m, "version", version);
return m;
}
#else
PyMODINIT_FUNC initbdiff(void)
{
PyObject *m;
m = Py_InitModule3("bdiff", methods, mdiff_doc);
PyModule_AddIntConstant(m, "version", version);
}
#endif