##// END OF EJS Templates
bdiff: don't check border condition in loop...
bdiff: don't check border condition in loop `plast = a + len - 1`. So, this "for" loop iterates from "a" to "plast", inclusive. So, `p == plast` can only be true on the final iteration of the loop. So checking for it on every loop iteration is wasteful. This patch simply decreases the upper bound of the loop by 1 and adds an explicit check after iteration for the `p == plast` case. We can't simply add 1 to the initial value for "i" because that doesn't do the correct thing on empty input strings. `perfbdiff -m 3041e4d59df2` on the Firefox repo becomes significantly faster: ! wall 0.072763 comb 0.070000 user 0.070000 sys 0.000000 (best of 100) ! wall 0.053221 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) For the curious, this code has its origins in 8b067bde6679, which is the changeset that introduced bdiff.c in 2005. Also, GNU diffutils is able to perform a similar line-based diff in under 20ms. So there's likely more perf wins to be found in this code. One of them is the hashing algorithm. But it looks like mpm spent some time testing hash collisions in d0c48891dd4a. I'd like to do the same before switching away from lyhash, just to be on the safe side.

File last commit:

r30167:1e5ff5ae default
r30308:d500ddae default
Show More
dirs.c
315 lines | 6.9 KiB | text/x-c | CLexer
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 /*
dirs.c - dynamic directory diddling for dirstates
Copyright 2013 Facebook
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "util.h"
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 #ifdef IS_PY3K
#define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
#else
#define PYLONG_VALUE(o) PyInt_AS_LONG(o)
#endif
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 /*
* This is a multiset of directory names, built from the files that
* appear in a dirstate or manifest.
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 *
* A few implementation notes:
*
* We modify Python integers for refcounting, but those integers are
* never visible to Python code.
Bryan O'Sullivan
dirs: use mutable strings internally...
r18902 *
* We mutate strings in-place, but leave them immutable once they can
* be seen by Python code.
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 */
typedef struct {
PyObject_HEAD
PyObject *dict;
} dirsObject;
Martin von Zweigbergk
dirs.c: pass C string, not Python string, to _finddir()...
r25093 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 {
Martin von Zweigbergk
dirs: back out forward-searching in finddirs()...
r25015 while (pos != -1) {
Martin von Zweigbergk
dirs.c: pass C string, not Python string, to _finddir()...
r25093 if (path[pos] == '/')
Martin von Zweigbergk
dirs: back out forward-searching in finddirs()...
r25015 break;
pos -= 1;
}
return pos;
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 }
static int _addpath(PyObject *dirs, PyObject *path)
{
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 const char *cpath = PyBytes_AS_STRING(path);
Py_ssize_t pos = PyBytes_GET_SIZE(path);
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 PyObject *key = NULL;
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 int ret = -1;
Gregory Szorc
dirs: document performance reasons for bypassing Python C API...
r30107 /* This loop is super critical for performance. That's why we inline
* access to Python structs instead of going through a supported API.
* The implementation, therefore, is heavily dependent on CPython
* implementation details. We also commit violations of the Python
* "protocol" such as mutating immutable objects. But since we only
* mutate objects created in this function or in other well-defined
* locations, the references are known so these violations should go
Gregory Szorc
dirs: add comment about _PyBytes_Resize...
r30139 * unnoticed. The code for adjusting the length of a PyBytesObject is
* essentially a minimal version of _PyBytes_Resize. */
Martin von Zweigbergk
dirs.c: pass C string, not Python string, to _finddir()...
r25093 while ((pos = _finddir(cpath, pos - 1)) != -1) {
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 PyObject *val;
Bryan O'Sullivan
dirs: use mutable strings internally...
r18902 /* It's likely that every prefix already has an entry
in our dict. Try to avoid allocating and
deallocating a string for each prefix we check. */
if (key != NULL)
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 ((PyBytesObject *)key)->ob_shash = -1;
Martin von Zweigbergk
dirs: back out forward-searching in finddirs()...
r25015 else {
/* Force Python to not reuse a small shared string. */
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 key = PyBytes_FromStringAndSize(cpath,
Martin von Zweigbergk
dirs: back out forward-searching in finddirs()...
r25015 pos < 2 ? 2 : pos);
Bryan O'Sullivan
dirs: use mutable strings internally...
r18902 if (key == NULL)
goto bail;
}
Gregory Szorc
dirs: document Py_SIZE weirdness...
r30159 /* Py_SIZE(o) refers to the ob_size member of the struct. Yes,
* assigning to what looks like a function seems wrong. */
Gregory Szorc
dirs: inline string macros...
r30104 Py_SIZE(key) = pos;
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
val = PyDict_GetItem(dirs, key);
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 if (val != NULL) {
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 PYLONG_VALUE(val) += 1;
Martin von Zweigbergk
dirs: speed up by storing number of direct children per dir...
r25016 break;
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 }
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 /* Force Python to not reuse a small shared int. */
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 #ifdef IS_PY3K
val = PyLong_FromLong(0x1eadbeef);
#else
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 val = PyInt_FromLong(0x1eadbeef);
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 #endif
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 if (val == NULL)
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 goto bail;
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 PYLONG_VALUE(val) = 1;
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 ret = PyDict_SetItem(dirs, key, val);
Py_DECREF(val);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 if (ret == -1)
goto bail;
Siddharth Agarwal
dirs._addpath: reinstate use of Py_CLEAR...
r24651 Py_CLEAR(key);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 }
ret = 0;
bail:
Py_XDECREF(key);
return ret;
}
static int _delpath(PyObject *dirs, PyObject *path)
{
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 char *cpath = PyBytes_AS_STRING(path);
Py_ssize_t pos = PyBytes_GET_SIZE(path);
Bryan O'Sullivan
dirs: use mutable integers internally...
r18901 PyObject *key = NULL;
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 int ret = -1;
Martin von Zweigbergk
dirs.c: pass C string, not Python string, to _finddir()...
r25093 while ((pos = _finddir(cpath, pos - 1)) != -1) {
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 PyObject *val;
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 key = PyBytes_FromStringAndSize(cpath, pos);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
if (key == NULL)
goto bail;
val = PyDict_GetItem(dirs, key);
if (val == NULL) {
PyErr_SetString(PyExc_ValueError,
"expected a value, found none");
goto bail;
}
Gregory Szorc
dirs: port PyInt code to work on Python 3...
r30106 if (--PYLONG_VALUE(val) <= 0) {
Martin von Zweigbergk
dirs: speed up by storing number of direct children per dir...
r25016 if (PyDict_DelItem(dirs, key) == -1)
goto bail;
} else
break;
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 Py_CLEAR(key);
}
ret = 0;
bail:
Py_XDECREF(key);
return ret;
}
static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
{
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(source, &pos, &key, &value)) {
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 if (!PyBytes_Check(key)) {
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 PyErr_SetString(PyExc_TypeError, "expected string key");
return -1;
}
if (skipchar) {
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (!dirstate_tuple_check(value)) {
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 PyErr_SetString(PyExc_TypeError,
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 "expected a dirstate tuple");
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 return -1;
}
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (((dirstateTupleObject *)value)->state == skipchar)
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 continue;
}
if (_addpath(dirs, key) == -1)
return -1;
}
return 0;
}
static int dirs_fromiter(PyObject *dirs, PyObject *source)
{
PyObject *iter, *item = NULL;
int ret;
iter = PyObject_GetIter(source);
if (iter == NULL)
return -1;
while ((item = PyIter_Next(iter)) != NULL) {
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 if (!PyBytes_Check(item)) {
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 PyErr_SetString(PyExc_TypeError, "expected string");
break;
}
if (_addpath(dirs, item) == -1)
break;
Py_CLEAR(item);
}
ret = PyErr_Occurred() ? -1 : 0;
Augie Fackler
dirs: fix leak of iterator in dirs_fromiter...
r23960 Py_DECREF(iter);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 Py_XDECREF(item);
return ret;
}
/*
* Calculate a refcounted set of directory names for the files in a
* dirstate.
*/
static int dirs_init(dirsObject *self, PyObject *args)
{
PyObject *dirs = NULL, *source = NULL;
char skipchar = 0;
int ret = -1;
self->dict = NULL;
if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
return -1;
dirs = PyDict_New();
if (dirs == NULL)
return -1;
if (source == NULL)
ret = 0;
else if (PyDict_Check(source))
ret = dirs_fromdict(dirs, source, skipchar);
else if (skipchar)
PyErr_SetString(PyExc_ValueError,
"skip character is only supported "
"with a dict source");
else
ret = dirs_fromiter(dirs, source);
if (ret == -1)
Py_XDECREF(dirs);
else
self->dict = dirs;
return ret;
}
PyObject *dirs_addpath(dirsObject *self, PyObject *args)
{
PyObject *path;
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 return NULL;
if (_addpath(self->dict, path) == -1)
return NULL;
Py_RETURN_NONE;
}
static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
{
PyObject *path;
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 return NULL;
if (_delpath(self->dict, path) == -1)
return NULL;
Py_RETURN_NONE;
}
static int dirs_contains(dirsObject *self, PyObject *value)
{
Gregory Szorc
dirs: convert PyString to PyBytes...
r30105 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 }
static void dirs_dealloc(dirsObject *self)
{
Py_XDECREF(self->dict);
PyObject_Del(self);
}
static PyObject *dirs_iter(dirsObject *self)
{
return PyObject_GetIter(self->dict);
}
static PySequenceMethods dirs_sequence_methods;
static PyMethodDef dirs_methods[] = {
{"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
{"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
{NULL} /* Sentinel */
};
Gregory Szorc
dirs: use PyVarObject_HEAD_INIT...
r30167 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
void dirs_module_init(PyObject *mod)
{
dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
dirsType.tp_name = "parsers.dirs";
dirsType.tp_new = PyType_GenericNew;
dirsType.tp_basicsize = sizeof(dirsObject);
dirsType.tp_dealloc = (destructor)dirs_dealloc;
dirsType.tp_as_sequence = &dirs_sequence_methods;
dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
dirsType.tp_doc = "dirs";
dirsType.tp_iter = (getiterfunc)dirs_iter;
dirsType.tp_methods = dirs_methods;
dirsType.tp_init = (initproc)dirs_init;
if (PyType_Ready(&dirsType) < 0)
return;
Py_INCREF(&dirsType);
PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
}