##// END OF EJS Templates
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes....
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes. We speed up 'findrenames' for the usecase when a user specifies they want a similarity of 100% by matching files by their exact SHA1 hash value. This reduces the number of comparisons required to find exact matches from O(n^2) to O(n). While it would be nice if we could just use mercurial's pre-calculated SHA1 hash for existing files, this hash includes the file's ancestor information making it unsuitable for our purposes. Instead, we calculate the hash of old content from scratch. The following benchmarks were taken on the current head of crew: addremove 100% similarity: rm -rf *; hg up -C; mv tests tests.new hg --time addremove -s100 --dry-run before: real 176.350 secs (user 128.890+0.000 sys 47.430+0.000) after: real 2.130 secs (user 1.890+0.000 sys 0.240+0.000) addremove 75% similarity: rm -rf *; hg up -C; mv tests tests.new; \ for i in tests.new/*; do echo x >> $i; done hg --time addremove -s75 --dry-run before: real 264.560 secs (user 215.130+0.000 sys 49.410+0.000) after: real 218.710 secs (user 172.790+0.000 sys 45.870+0.000)

File last commit:

r10282:08a0f04b default
r11060:e6df0177 default
Show More
diffhelpers.c
161 lines | 4.0 KiB | text/x-c | CLexer
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 /*
* diffhelpers.c - helper routines for mpatch
*
* Copyright 2007 Chris Mason <chris.mason@oracle.com>
*
* This software may be used and distributed according to the terms
* of the GNU General Public License v2, incorporated herein by reference.
*/
#include <Python.h>
#include <stdlib.h>
#include <string.h>
static char diffhelpers_doc[] = "Efficient diff parsing";
static PyObject *diffhelpers_Error;
/* fixup the last lines of a and b when the patch has no newline at eof */
static void _fix_newline(PyObject *hunk, PyObject *a, PyObject *b)
{
int hunksz = PyList_Size(hunk);
PyObject *s = PyList_GET_ITEM(hunk, hunksz-1);
char *l = PyString_AS_STRING(s);
int alen = PyList_Size(a);
int blen = PyList_Size(b);
char c = l[0];
Patrick Mezard
diffhelpers: fix variable declaration for MSVC (not C99)
r10146 PyObject *hline;
int sz = PyString_GET_SIZE(s);
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897
Patrick Mezard
diffhelpers: fix variable declaration for MSVC (not C99)
r10146 if (sz > 1 && l[sz-2] == '\r')
/* tolerate CRLF in last line */
sz -= 1;
hline = PyString_FromStringAndSize(l, sz-1);
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 if (c == ' ' || c == '+') {
Matt Mackall
many, many trivial check-code fixups
r10282 PyObject *rline = PyString_FromStringAndSize(l + 1, sz - 2);
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 PyList_SetItem(b, blen-1, rline);
}
if (c == ' ' || c == '-') {
Py_INCREF(hline);
PyList_SetItem(a, alen-1, hline);
}
PyList_SetItem(hunk, hunksz-1, hline);
}
/* python callable form of _fix_newline */
static PyObject *
fix_newline(PyObject *self, PyObject *args)
{
PyObject *hunk, *a, *b;
if (!PyArg_ParseTuple(args, "OOO", &hunk, &a, &b))
return NULL;
_fix_newline(hunk, a, b);
return Py_BuildValue("l", 0);
}
/*
* read lines from fp into the hunk. The hunk is parsed into two arrays
* a and b. a gets the old state of the text, b gets the new state
* The control char from the hunk is saved when inserting into a, but not b
* (for performance while deleting files)
*/
static PyObject *
addlines(PyObject *self, PyObject *args)
{
PyObject *fp, *hunk, *a, *b, *x;
int i;
int lena, lenb;
int num;
int todoa, todob;
char *s, c;
PyObject *l;
if (!PyArg_ParseTuple(args, "OOiiOO", &fp, &hunk, &lena, &lenb, &a, &b))
return NULL;
Matt Mackall
many, many trivial check-code fixups
r10282 while (1) {
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 todoa = lena - PyList_Size(a);
todob = lenb - PyList_Size(b);
num = todoa > todob ? todoa : todob;
if (num == 0)
break;
Matt Mackall
many, many trivial check-code fixups
r10282 for (i = 0; i < num; i++) {
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 x = PyFile_GetLine(fp, 0);
s = PyString_AS_STRING(x);
c = *s;
if (strcmp(s, "\\ No newline at end of file\n") == 0) {
_fix_newline(hunk, a, b);
continue;
}
Hollis Blanchard
Handle patches with misformatted empty lines...
r5483 if (c == '\n') {
/* Some patches may be missing the control char
* on empty lines. Supply a leading space. */
Py_DECREF(x);
x = PyString_FromString(" \n");
}
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 PyList_Append(hunk, x);
if (c == '+') {
l = PyString_FromString(s + 1);
PyList_Append(b, l);
Py_DECREF(l);
} else if (c == '-') {
PyList_Append(a, x);
} else {
l = PyString_FromString(s + 1);
PyList_Append(b, l);
Py_DECREF(l);
PyList_Append(a, x);
}
Py_DECREF(x);
}
}
return Py_BuildValue("l", 0);
}
/*
* compare the lines in a with the lines in b. a is assumed to have
* a control char at the start of each line, this char is ignored in the
* compare
*/
static PyObject *
testhunk(PyObject *self, PyObject *args)
{
PyObject *a, *b;
long bstart;
int alen, blen;
int i;
char *sa, *sb;
if (!PyArg_ParseTuple(args, "OOl", &a, &b, &bstart))
return NULL;
alen = PyList_Size(a);
blen = PyList_Size(b);
if (alen > blen - bstart) {
return Py_BuildValue("l", -1);
}
Matt Mackall
many, many trivial check-code fixups
r10282 for (i = 0; i < alen; i++) {
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 sa = PyString_AS_STRING(PyList_GET_ITEM(a, i));
sb = PyString_AS_STRING(PyList_GET_ITEM(b, i + bstart));
Matt Mackall
many, many trivial check-code fixups
r10282 if (strcmp(sa + 1, sb) != 0)
Bryan O'Sullivan
Add Chris Mason's mpatch library....
r4897 return Py_BuildValue("l", -1);
}
return Py_BuildValue("l", 0);
}
static PyMethodDef methods[] = {
{"addlines", addlines, METH_VARARGS, "add lines to a hunk\n"},
{"fix_newline", fix_newline, METH_VARARGS, "fixup newline counters\n"},
{"testhunk", testhunk, METH_VARARGS, "test lines in a hunk\n"},
{NULL, NULL}
};
PyMODINIT_FUNC
initdiffhelpers(void)
{
Py_InitModule3("diffhelpers", methods, diffhelpers_doc);
diffhelpers_Error = PyErr_NewException("diffhelpers.diffhelpersError",
NULL, NULL);
}