# HG changeset patch # User Bryan O'Sullivan # Date 2012-05-12 08:55:08 # Node ID e410be8603932e46a51298748a4b874739037fad # Parent 5bc6edf71b39d40e46392dfe898b35284e77082b revlog: speed up prefix matching against nodes The radix tree already contains all the information we need to determine whether a short string is an unambiguous node identifier. We now make use of this information. In a kernel tree, this improves the performance of "hg log -q -r24bf01de75" from 0.27 seconds to 0.06. diff --git a/mercurial/parsers.c b/mercurial/parsers.c --- a/mercurial/parsers.c +++ b/mercurial/parsers.c @@ -566,7 +566,7 @@ static int nt_find(indexObject *self, co return -2; if (hex) - maxlevel = nodelen > 40 ? 40 : nodelen; + maxlevel = nodelen > 40 ? 40 : (int)nodelen; else maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); @@ -795,6 +795,77 @@ static PyObject *index_getitem(indexObje return NULL; } +static int nt_partialmatch(indexObject *self, const char *node, + Py_ssize_t nodelen) +{ + int rev; + + if (nt_init(self) == -1) + return -3; + + if (self->ntrev > 0) { + /* ensure that the radix tree is fully populated */ + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (nt_insert(self, n, rev) == -1) + return -3; + } + self->ntrev = rev; + } + + return nt_find(self, node, nodelen, 1); +} + +static PyObject *index_partialmatch(indexObject *self, PyObject *args) +{ + const char *fullnode; + int nodelen; + char *node; + int rev, i; + + if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) + return NULL; + + if (nodelen < 4) { + PyErr_SetString(PyExc_ValueError, "key too short"); + return NULL; + } + + if (nodelen > 40) + nodelen = 40; + + for (i = 0; i < nodelen; i++) + hexdigit(node, i); + if (PyErr_Occurred()) { + /* input contains non-hex characters */ + PyErr_Clear(); + Py_RETURN_NONE; + } + + rev = nt_partialmatch(self, node, nodelen); + + switch (rev) { + case -4: + raise_revlog_error(); + case -3: + return NULL; + case -2: + Py_RETURN_NONE; + case -1: + return PyString_FromStringAndSize(nullid, 20); + } + + fullnode = index_node(self, rev); + if (fullnode == NULL) { + PyErr_Format(PyExc_IndexError, + "could not access rev %d", rev); + return NULL; + } + return PyString_FromStringAndSize(fullnode, 20); +} + static PyObject *index_m_get(indexObject *self, PyObject *args) { char *node; @@ -1077,6 +1148,8 @@ static PyMethodDef index_methods[] = { "get an index entry"}, {"insert", (PyCFunction)index_insert, METH_VARARGS, "insert an index entry"}, + {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, + "match a potentially ambiguous node ID"}, {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"}, {NULL} /* Sentinel */ diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -756,6 +756,15 @@ class revlog(object): pass def _partialmatch(self, id): + try: + return self.index.partialmatch(id) + except RevlogError: + # parsers.c radix tree lookup gave multiple matches + raise LookupError(id, self.indexfile, _("ambiguous identifier")) + except (AttributeError, ValueError): + # we are pure python, or key was too short to search radix tree + pass + if id in self._pcache: return self._pcache[id]