##// END OF EJS Templates
parsers: fix buffer overflow by invalid parent revision read from revlog...
parsers: fix buffer overflow by invalid parent revision read from revlog If revlog file is corrupted, it can have parent pointing to invalid revision. So we should validate it before updating nothead[], phases[], seen[], etc. Otherwise it would cause segfault at best. We could use "rev" instead of "maxrev" as upper bound, but I think the explicit "maxrev" can clarify that we just want to avoid possible buffer overflow vulnerability.

File last commit:

r25810:82d6a35c default
r25810:82d6a35c default
Show More
parsers.c
2649 lines | 63.1 KiB | text/x-c | CLexer
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 /*
parsers.c - efficient content parsing
Copyright 2008 Matt Mackall <mpm@selenic.com> and others
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
#include <Python.h>
#include <ctype.h>
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 #include <stddef.h>
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 #include <string.h>
Renato Cunha
parsers.c: Added support for py3k....
r11361 #include "util.h"
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 static char *versionerrortext = "Python minor version mismatch";
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 static int8_t hextable[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 static char lowertable[128] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40',
'\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
'\x78', '\x79', '\x7a', /* X-Z */
'\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
};
Siddharth Agarwal
parsers: introduce an asciiupper function
r24577 static char uppertable[128] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60',
'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
'\x58', '\x59', '\x5a', /* x-z */
'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
};
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 static inline int hexdigit(const char *p, Py_ssize_t off)
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 int8_t val = hextable[(unsigned char)p[off]];
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 if (val >= 0) {
return val;
}
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
return 0;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 }
/*
* Turn a hex-encoded string into binary.
*/
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyObject *unhexlify(const char *str, int len)
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 PyObject *ret;
Benoit Boissinot
fix const annotation warning
r6395 char *d;
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 int i;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Renato Cunha
parsers.c: Added support for py3k....
r11361 ret = PyBytes_FromStringAndSize(NULL, len / 2);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!ret)
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 return NULL;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Renato Cunha
parsers.c: Added support for py3k....
r11361 d = PyBytes_AsString(ret);
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 for (i = 0; i < len;) {
int hi = hexdigit(str, i++);
int lo = hexdigit(str, i++);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 *d++ = (hi << 4) | lo;
}
Matt Mackall
parsers: clean up whitespace
r7091
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 return ret;
}
Siddharth Agarwal
parsers: make _asciilower a generic _asciitransform function...
r24576 static inline PyObject *_asciitransform(PyObject *str_obj,
Siddharth Agarwal
parsers._asciitransform: also accept a fallback function...
r24606 const char table[128],
PyObject *fallback_fn)
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 {
char *str, *newstr;
Siddharth Agarwal
parsers: factor out most of asciilower into an internal function...
r24574 Py_ssize_t i, len;
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 PyObject *newobj = NULL;
Siddharth Agarwal
parsers._asciilower: use an explicit return object...
r24575 PyObject *ret = NULL;
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778
Siddharth Agarwal
parsers: factor out most of asciilower into an internal function...
r24574 str = PyBytes_AS_STRING(str_obj);
len = PyBytes_GET_SIZE(str_obj);
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778
newobj = PyBytes_FromStringAndSize(NULL, len);
if (!newobj)
goto quit;
newstr = PyBytes_AS_STRING(newobj);
for (i = 0; i < len; i++) {
char c = str[i];
if (c & 0x80) {
Siddharth Agarwal
parsers._asciitransform: also accept a fallback function...
r24606 if (fallback_fn != NULL) {
ret = PyObject_CallFunctionObjArgs(fallback_fn,
str_obj, NULL);
} else {
PyObject *err = PyUnicodeDecodeError_Create(
"ascii", str, len, i, (i + 1),
"unexpected code byte");
PyErr_SetObject(PyExc_UnicodeDecodeError, err);
Py_XDECREF(err);
}
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 goto quit;
}
Siddharth Agarwal
parsers: make _asciilower a generic _asciitransform function...
r24576 newstr[i] = table[(unsigned char)c];
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 }
Siddharth Agarwal
parsers._asciilower: use an explicit return object...
r24575 ret = newobj;
Py_INCREF(ret);
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 quit:
Py_XDECREF(newobj);
Siddharth Agarwal
parsers._asciilower: use an explicit return object...
r24575 return ret;
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 }
Siddharth Agarwal
parsers: factor out most of asciilower into an internal function...
r24574 static PyObject *asciilower(PyObject *self, PyObject *args)
{
PyObject *str_obj;
if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
return NULL;
Siddharth Agarwal
parsers._asciitransform: also accept a fallback function...
r24606 return _asciitransform(str_obj, lowertable, NULL);
Siddharth Agarwal
parsers: factor out most of asciilower into an internal function...
r24574 }
Siddharth Agarwal
parsers: introduce an asciiupper function
r24577 static PyObject *asciiupper(PyObject *self, PyObject *args)
{
PyObject *str_obj;
if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
return NULL;
Siddharth Agarwal
parsers._asciitransform: also accept a fallback function...
r24606 return _asciitransform(str_obj, uppertable, NULL);
Siddharth Agarwal
parsers: introduce an asciiupper function
r24577 }
Siddharth Agarwal
parsers: factor out code to create a presized dict...
r25583 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
{
/* _PyDict_NewPresized expects a minused parameter, but it actually
creates a dictionary that's the nearest power of two bigger than the
parameter. For example, with the initial minused = 1000, the
dictionary created has size 1024. Of course in a lot of cases that
can be greater than the maximum load factor Python's dict object
expects (= 2/3), so as soon as we cross the threshold we'll resize
anyway. So create a dictionary that's at least 3/2 the size. */
return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
}
Siddharth Agarwal
parsers: add an API to create a new presized dict
r25584 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
{
Py_ssize_t expected_size;
if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
return NULL;
return _dict_new_presized(expected_size);
}
Siddharth Agarwal
parsers: add a C function to create a file foldmap...
r24609 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
{
PyObject *dmap, *spec_obj, *normcase_fallback;
PyObject *file_foldmap = NULL;
enum normcase_spec spec;
PyObject *k, *v;
dirstateTupleObject *tuple;
Py_ssize_t pos = 0;
const char *table;
if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
&PyDict_Type, &dmap,
&PyInt_Type, &spec_obj,
&PyFunction_Type, &normcase_fallback))
goto quit;
André Sintzoff
parsers.c: avoid implicit conversion loses integer precision warning...
r24622 spec = (int)PyInt_AS_LONG(spec_obj);
Siddharth Agarwal
parsers: add a C function to create a file foldmap...
r24609 switch (spec) {
case NORMCASE_LOWER:
table = lowertable;
break;
case NORMCASE_UPPER:
table = uppertable;
break;
case NORMCASE_OTHER:
table = NULL;
break;
default:
PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
goto quit;
}
Siddharth Agarwal
parsers: factor out code to create a presized dict...
r25583 /* Add some more entries to deal with additions outside this
function. */
file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
Siddharth Agarwal
parsers: add a C function to create a file foldmap...
r24609 if (file_foldmap == NULL)
goto quit;
while (PyDict_Next(dmap, &pos, &k, &v)) {
if (!dirstate_tuple_check(v)) {
PyErr_SetString(PyExc_TypeError,
"expected a dirstate tuple");
goto quit;
}
tuple = (dirstateTupleObject *)v;
if (tuple->state != 'r') {
PyObject *normed;
if (table != NULL) {
normed = _asciitransform(k, table,
normcase_fallback);
} else {
normed = PyObject_CallFunctionObjArgs(
normcase_fallback, k, NULL);
}
if (normed == NULL)
goto quit;
if (PyDict_SetItem(file_foldmap, normed, k) == -1)
goto quit;
}
}
return file_foldmap;
quit:
Py_XDECREF(file_foldmap);
return NULL;
}
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 /*
* This code assumes that a manifest is stitched together with newline
* ('\n') characters.
*/
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
PyObject *mfdict, *fdict;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 char *str, *start, *end;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 int len;
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
&PyDict_Type, &mfdict,
&PyDict_Type, &fdict,
&str, &len))
goto quit;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 start = str;
end = str + len;
while (start < end) {
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 PyObject *file = NULL, *node = NULL;
PyObject *flags = NULL;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 char *zero = NULL, *newline = NULL;
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 ptrdiff_t nlen;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 zero = memchr(start, '\0', end - start);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!zero) {
PyErr_SetString(PyExc_ValueError,
"manifest entry has no separator");
goto quit;
}
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 newline = memchr(zero + 1, '\n', end - (zero + 1));
if (!newline) {
PyErr_SetString(PyExc_ValueError,
"manifest contains trailing garbage");
goto quit;
}
Renato Cunha
parsers.c: Added support for py3k....
r11361 file = PyBytes_FromStringAndSize(start, zero - start);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!file)
goto bail;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 nlen = newline - zero - 1;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!node)
goto bail;
if (nlen > 40) {
Renato Cunha
parsers.c: Added support for py3k....
r11361 flags = PyBytes_FromStringAndSize(zero + 41,
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 nlen - 40);
if (!flags)
goto bail;
if (PyDict_SetItem(fdict, file, flags) == -1)
goto bail;
}
if (PyDict_SetItem(mfdict, file, node) == -1)
goto bail;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 start = newline + 1;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
continue;
bail:
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
goto quit;
}
Py_INCREF(Py_None);
return Py_None;
quit:
return NULL;
}
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
int size, int mtime)
{
dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
&dirstateTupleType);
if (!t)
return NULL;
t->state = state;
t->mode = mode;
t->size = size;
t->mtime = mtime;
return t;
}
static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
PyObject *kwds)
{
/* We do all the initialization here and not a tp_init function because
* dirstate_tuple is immutable. */
dirstateTupleObject *t;
char state;
int size, mode, mtime;
if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
return NULL;
t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
if (!t)
return NULL;
t->state = state;
t->mode = mode;
t->size = size;
t->mtime = mtime;
return (PyObject *)t;
}
static void dirstate_tuple_dealloc(PyObject *o)
{
PyObject_Del(o);
}
static Py_ssize_t dirstate_tuple_length(PyObject *o)
{
return 4;
}
static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
{
dirstateTupleObject *t = (dirstateTupleObject *)o;
switch (i) {
case 0:
return PyBytes_FromStringAndSize(&t->state, 1);
case 1:
return PyInt_FromLong(t->mode);
case 2:
return PyInt_FromLong(t->size);
case 3:
return PyInt_FromLong(t->mtime);
default:
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
}
static PySequenceMethods dirstate_tuple_sq = {
dirstate_tuple_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
dirstate_tuple_item, /* sq_item */
0, /* sq_ass_item */
0, /* sq_contains */
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
PyTypeObject dirstateTupleType = {
PyVarObject_HEAD_INIT(NULL, 0)
"dirstate_tuple", /* tp_name */
sizeof(dirstateTupleObject),/* tp_basicsize */
0, /* tp_itemsize */
(destructor)dirstate_tuple_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&dirstate_tuple_sq, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"dirstate tuple", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
dirstate_tuple_new, /* tp_new */
};
Matt Mackall
dirstate: C parsing extension
r7093 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
{
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
PyObject *fname = NULL, *cname = NULL, *entry = NULL;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 char state, *cur, *str, *cpos;
Bryan O'Sullivan
parsers: state is a char, not an int
r19725 int mode, size, mtime;
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 unsigned int flen, len, pos = 40;
int readlen;
Matt Mackall
dirstate: C parsing extension
r7093
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
&PyDict_Type, &dmap,
&PyDict_Type, &cmap,
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 &str, &readlen))
Matt Mackall
dirstate: C parsing extension
r7093 goto quit;
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 if (readlen < 0)
goto quit;
len = readlen;
Matt Mackall
dirstate: C parsing extension
r7093 /* read parents */
if (len < 40)
goto quit;
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
if (!parents)
goto quit;
/* read filenames */
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 while (pos >= 40 && pos < len) {
cur = str + pos;
Matt Mackall
dirstate: C parsing extension
r7093 /* unpack header */
state = *cur;
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 mode = getbe32(cur + 1);
size = getbe32(cur + 5);
mtime = getbe32(cur + 9);
flen = getbe32(cur + 13);
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += 17;
Matt Mackall
dirstate: C parsing extension
r7093 cur += 17;
David Soria Parra
parsers: fix 'unsigned expression is always true' warning (issue4142)...
r20316 if (flen > len - pos) {
Benoit Boissinot
parsers.c: fix integer overflows...
r7174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
Matt Mackall
dirstate: C parsing extension
r7093 goto quit;
Benoit Boissinot
parsers.c: fix integer overflows...
r7174 }
Matt Mackall
dirstate: C parsing extension
r7093
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
mtime);
Matt Mackall
dirstate: C parsing extension
r7093 cpos = memchr(cur, 0, flen);
if (cpos) {
Renato Cunha
parsers.c: Added support for py3k....
r11361 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
cname = PyBytes_FromStringAndSize(cpos + 1,
Matt Mackall
dirstate: C parsing extension
r7093 flen - (cpos - cur) - 1);
if (!fname || !cname ||
PyDict_SetItem(cmap, fname, cname) == -1 ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
Py_DECREF(cname);
} else {
Renato Cunha
parsers.c: Added support for py3k....
r11361 fname = PyBytes_FromStringAndSize(cur, flen);
Matt Mackall
dirstate: C parsing extension
r7093 if (!fname ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
}
Py_DECREF(fname);
Py_DECREF(entry);
fname = cname = entry = NULL;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += flen;
Matt Mackall
dirstate: C parsing extension
r7093 }
ret = parents;
Py_INCREF(ret);
quit:
Py_XDECREF(fname);
Py_XDECREF(cname);
Py_XDECREF(entry);
Py_XDECREF(parents);
return ret;
}
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 /*
* Efficiently pack a dirstate object into its on-disk format.
*/
static PyObject *pack_dirstate(PyObject *self, PyObject *args)
{
PyObject *packobj = NULL;
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 Py_ssize_t nbytes, pos, l;
Augie Fackler
parsers: don't leak a tuple in pack_dirstate...
r23946 PyObject *k, *v = NULL, *pn;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 char *p, *s;
double now;
if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
&PyDict_Type, &map, &PyDict_Type, &copymap,
&pl, &now))
return NULL;
if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
return NULL;
}
/* Figure out how much we need to allocate. */
for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
PyObject *c;
if (!PyString_Check(k)) {
PyErr_SetString(PyExc_TypeError, "expected string key");
goto bail;
}
nbytes += PyString_GET_SIZE(k) + 17;
c = PyDict_GetItem(copymap, k);
if (c) {
if (!PyString_Check(c)) {
PyErr_SetString(PyExc_TypeError,
"expected string key");
goto bail;
}
nbytes += PyString_GET_SIZE(c) + 1;
}
}
packobj = PyString_FromStringAndSize(NULL, nbytes);
if (packobj == NULL)
goto bail;
p = PyString_AS_STRING(packobj);
pn = PySequence_ITEM(pl, 0);
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
goto bail;
}
memcpy(p, s, l);
p += 20;
pn = PySequence_ITEM(pl, 1);
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
goto bail;
}
memcpy(p, s, l);
p += 20;
for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 dirstateTupleObject *tuple;
char state;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 uint32_t mode, size, mtime;
Py_ssize_t len, l;
PyObject *o;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 char *t;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (!dirstate_tuple_check(v)) {
PyErr_SetString(PyExc_TypeError,
"expected a dirstate tuple");
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 goto bail;
}
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 tuple = (dirstateTupleObject *)v;
state = tuple->state;
mode = tuple->mode;
size = tuple->size;
mtime = tuple->mtime;
if (state == 'n' && mtime == (uint32_t)now) {
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 /* See pure/parsers.py:pack_dirstate for why we do
* this. */
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 mtime = -1;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 mtime_unset = (PyObject *)make_dirstate_tuple(
state, mode, size, mtime);
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 if (!mtime_unset)
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 goto bail;
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 if (PyDict_SetItem(map, k, mtime_unset) == -1)
goto bail;
Py_DECREF(mtime_unset);
mtime_unset = NULL;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 }
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 *p++ = state;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 putbe32(mode, p);
putbe32(size, p + 4);
putbe32(mtime, p + 8);
t = p + 12;
p += 16;
len = PyString_GET_SIZE(k);
memcpy(p, PyString_AS_STRING(k), len);
p += len;
o = PyDict_GetItem(copymap, k);
if (o) {
*p++ = '\0';
l = PyString_GET_SIZE(o);
memcpy(p, PyString_AS_STRING(o), l);
p += l;
len += l + 1;
}
putbe32((uint32_t)len, t);
}
pos = p - PyString_AS_STRING(packobj);
if (pos != nbytes) {
PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
(long)pos, (long)nbytes);
goto bail;
}
return packobj;
bail:
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 Py_XDECREF(mtime_unset);
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 Py_XDECREF(packobj);
Augie Fackler
parsers: don't leak a tuple in pack_dirstate...
r23946 Py_XDECREF(v);
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 return NULL;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 /*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * A base-16 trie for fast node->rev mapping.
*
* Positive value is index of the next node in the trie
* Negative value is a leaf: -(rev + 1)
* Zero is empty
*/
typedef struct {
int children[16];
} nodetree;
/*
* This class has two behaviours.
*
* When used in a list-like way (with integer keys), we decode an
* entry in a RevlogNG index file on demand. Our last entry is a
* sentinel, always a nullid. We have limited support for
* integer-keyed insert and delete, only at elements right before the
* sentinel.
*
* With string keys, we lazily perform a reverse mapping from node to
* rev, using a base-16 trie.
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 */
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
PyObject *data; /* raw bytes of index */
PyObject **cache; /* cached tuples */
const char **offsets; /* populated on demand */
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
PyObject *added; /* populated on demand */
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 PyObject *headrevs; /* cache, invalidated on changes */
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 PyObject *filteredrevs;/* filtered revs set */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 nodetree *nt; /* base-16 trie */
int ntlength; /* # nodes in use */
int ntcapacity; /* # nodes allocated */
int ntdepth; /* maximum depth of tree */
int ntsplits; /* # splits performed */
int ntrev; /* last rev scanned */
int ntlookups; /* # lookups */
int ntmisses; /* # lookups that miss the cache */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 int inlined;
} indexObject;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static Py_ssize_t index_length(const indexObject *self)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
if (self->added == NULL)
return self->length;
return self->length + PyList_GET_SIZE(self->added);
}
static PyObject *nullentry;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static const char nullid[20];
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
#if LONG_MAX == 0x7fffffffL
Matt Mackall
util.h: more Python 2.4 fixes
r16393 static char *tuple_format = "Kiiiiiis#";
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 #else
Matt Mackall
util.h: more Python 2.4 fixes
r16393 static char *tuple_format = "kiiiiiis#";
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 #endif
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 /* A RevlogNG v1 index entry is 64 bytes long. */
static const long v1_hdrsize = 64;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return a pointer to the beginning of a RevlogNG record.
*/
static const char *index_deref(indexObject *self, Py_ssize_t pos)
{
if (self->inlined && pos > 0) {
if (self->offsets == NULL) {
self->offsets = malloc(self->raw_length *
sizeof(*self->offsets));
if (self->offsets == NULL)
return (const char *)PyErr_NoMemory();
inline_scan(self, self->offsets);
}
return self->offsets[pos];
}
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
int *ps, int maxrev)
Laurent Charignon
parsers: move index_get_parents's declaration higher...
r25311 {
if (rev >= self->length - 1) {
PyObject *tuple = PyList_GET_ITEM(self->added,
rev - self->length + 1);
ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
} else {
const char *data = index_deref(self, rev);
ps[0] = getbe32(data + 24);
ps[1] = getbe32(data + 28);
}
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 /* If index file is corrupted, ps[] may point to invalid revisions. So
* there is a risk of buffer overflow to trust them unconditionally. */
if (ps[0] > maxrev || ps[1] > maxrev) {
PyErr_SetString(PyExc_ValueError, "parent out of range");
return -1;
}
return 0;
Laurent Charignon
parsers: move index_get_parents's declaration higher...
r25311 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* RevlogNG format (all in big endian, data may be inlined):
Bernhard Leiner
C implementation of revlog index parsing
r7108 * 6 bytes: offset
* 2 bytes: flags
* 4 bytes: compressed length
* 4 bytes: uncompressed length
* 4 bytes: base revision
* 4 bytes: link revision
* 4 bytes: parent 1 revision
* 4 bytes: parent 2 revision
* 32 bytes: nodeid (only 20 bytes used)
*/
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
Bernhard Leiner
C implementation of revlog index parsing
r7108 {
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154 uint64_t offset_flags;
Bernhard Leiner
C implementation of revlog index parsing
r7108 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154 const char *c_node_id;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 const char *data;
Py_ssize_t length = index_length(self);
PyObject *entry;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (pos < 0)
pos += length;
if (pos < 0 || pos >= length) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
return NULL;
}
if (pos == length - 1) {
Py_INCREF(nullentry);
return nullentry;
}
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (pos >= self->length - 1) {
PyObject *obj;
obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
Py_INCREF(obj);
return obj;
}
if (self->cache) {
if (self->cache[pos]) {
Py_INCREF(self->cache[pos]);
return self->cache[pos];
}
} else {
self->cache = calloc(self->raw_length, sizeof(PyObject *));
if (self->cache == NULL)
return PyErr_NoMemory();
}
Thomas Arendsen Hein
Some additional space/tab cleanups
r7190
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 data = index_deref(self, pos);
if (data == NULL)
return NULL;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 offset_flags = getbe32(data + 4);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (pos == 0) /* mask out version number for the first entry */
offset_flags &= 0xFFFF;
else {
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 uint32_t offset_high = getbe32(data);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 offset_flags |= ((uint64_t)offset_high) << 32;
}
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 comp_len = getbe32(data + 8);
uncomp_len = getbe32(data + 12);
base_rev = getbe32(data + 16);
link_rev = getbe32(data + 20);
parent_1 = getbe32(data + 24);
parent_2 = getbe32(data + 28);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 c_node_id = data + 32;
entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
Matt Mackall
revlog: only build the nodemap on demand
r13254 uncomp_len, base_rev, link_rev,
parent_1, parent_2, c_node_id, 20);
Bryan O'Sullivan
parsers: use Py_INCREF safely
r19726 if (entry) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyObject_GC_UnTrack(entry);
Bryan O'Sullivan
parsers: use Py_INCREF safely
r19726 Py_INCREF(entry);
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
self->cache[pos] = entry;
return entry;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return the 20-byte SHA of the node corresponding to the given rev.
*/
static const char *index_node(indexObject *self, Py_ssize_t pos)
{
Py_ssize_t length = index_length(self);
const char *data;
Bryan O'Sullivan
parsers: ensure that nullid is always present in the radix tree
r16664 if (pos == length - 1 || pos == INT_MAX)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return nullid;
if (pos >= length)
return NULL;
if (pos >= self->length - 1) {
PyObject *tuple, *str;
tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
str = PyTuple_GetItem(tuple, 7);
return str ? PyString_AS_STRING(str) : NULL;
}
data = index_deref(self, pos);
return data ? data + 32 : NULL;
}
static int nt_insert(indexObject *self, const char *node, int rev);
static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
{
if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
return -1;
if (*nodelen == 20)
return 0;
PyErr_SetString(PyExc_ValueError, "20-byte hash required");
return -1;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static PyObject *index_insert(indexObject *self, PyObject *args)
{
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 PyObject *obj;
char *node;
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 int index;
Py_ssize_t len, nodelen;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 PyErr_SetString(PyExc_TypeError, "8-tuple required");
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
}
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
len = index_length(self);
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (index < 0)
index += len;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (index != len - 1) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_IndexError,
"insert only supported at index -1");
return NULL;
}
if (self->added == NULL) {
self->added = PyList_New(0);
if (self->added == NULL)
return NULL;
}
if (PyList_Append(self->added, obj) == -1)
return NULL;
Matt Mackall
revlog: only build the nodemap on demand
r13254
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt)
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 nt_insert(self, node, index);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_RETURN_NONE;
}
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 static void _index_clearcaches(indexObject *self)
{
if (self->cache) {
Py_ssize_t i;
Bryan O'Sullivan
parsers: use Py_CLEAR where appropriate
r16732 for (i = 0; i < self->raw_length; i++)
Py_CLEAR(self->cache[i]);
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 free(self->cache);
self->cache = NULL;
}
if (self->offsets) {
free(self->offsets);
self->offsets = NULL;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt) {
free(self->nt);
self->nt = NULL;
}
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 }
static PyObject *index_clearcaches(indexObject *self)
{
_index_clearcaches(self);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 self->ntlength = self->ntcapacity = 0;
self->ntdepth = self->ntsplits = 0;
self->ntrev = -1;
self->ntlookups = self->ntmisses = 0;
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 Py_RETURN_NONE;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_stats(indexObject *self)
{
PyObject *obj = PyDict_New();
Augie Fackler
parsers: avoid leaking several PyObjects in index_stats...
r23948 PyObject *t = NULL;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
if (obj == NULL)
return NULL;
#define istat(__n, __d) \
Augie Fackler
parsers: avoid leaking several PyObjects in index_stats...
r23948 t = PyInt_FromSsize_t(self->__n); \
if (!t) \
goto bail; \
if (PyDict_SetItemString(obj, __d, t) == -1) \
goto bail; \
Py_DECREF(t);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
if (self->added) {
Py_ssize_t len = PyList_GET_SIZE(self->added);
Augie Fackler
parsers: avoid leaking several PyObjects in index_stats...
r23948 t = PyInt_FromSsize_t(len);
if (!t)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 goto bail;
Augie Fackler
parsers: avoid leaking several PyObjects in index_stats...
r23948 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
goto bail;
Py_DECREF(t);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
if (self->raw_length != self->length - 1)
istat(raw_length, "revs on disk");
istat(length, "revs in memory");
istat(ntcapacity, "node trie capacity");
istat(ntdepth, "node trie depth");
istat(ntlength, "node trie count");
istat(ntlookups, "node trie lookups");
istat(ntmisses, "node trie misses");
istat(ntrev, "node trie last rev scanned");
istat(ntsplits, "node trie splits");
#undef istat
return obj;
bail:
Py_XDECREF(obj);
Augie Fackler
parsers: avoid leaking several PyObjects in index_stats...
r23948 Py_XDECREF(t);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return NULL;
}
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 /*
* When we cache a list, we want to be sure the caller can't mutate
* the cached copy.
*/
static PyObject *list_copy(PyObject *list)
{
Py_ssize_t len = PyList_GET_SIZE(list);
PyObject *newlist = PyList_New(len);
Py_ssize_t i;
if (newlist == NULL)
return NULL;
for (i = 0; i < len; i++) {
PyObject *obj = PyList_GET_ITEM(list, i);
Py_INCREF(obj);
PyList_SET_ITEM(newlist, i, obj);
}
return newlist;
}
Mads Kiilerich
parsers: use 'k' format for Py_BuildValue instead of 'n' because Python 2.4...
r23073 /* arg should be Py_ssize_t but Python 2.4 do not support the n format */
static int check_filter(PyObject *filter, unsigned long arg) {
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 if (filter) {
PyObject *arglist, *result;
int isfiltered;
Mads Kiilerich
parsers: use 'k' format for Py_BuildValue instead of 'n' because Python 2.4...
r23073 arglist = Py_BuildValue("(k)", arg);
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 if (!arglist) {
return -1;
}
result = PyEval_CallObject(filter, arglist);
Py_DECREF(arglist);
if (!result) {
return -1;
}
/* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
* same as this function, so we can just return it directly.*/
isfiltered = PyObject_IsTrue(result);
Py_DECREF(result);
return isfiltered;
} else {
return 0;
}
}
Laurent Charignon
phase: compute phases in C...
r24443 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
André Sintzoff
parsers.c: avoid implicit conversion loses integer warnings...
r24499 Py_ssize_t marker, char *phases)
Laurent Charignon
phase: compute phases in C...
r24443 {
PyObject *iter = NULL;
PyObject *iter_item = NULL;
Py_ssize_t min_idx = index_length(self) + 1;
long iter_item_long;
if (PyList_GET_SIZE(list) != 0) {
iter = PyObject_GetIter(list);
if (iter == NULL)
return -2;
while ((iter_item = PyIter_Next(iter)))
{
iter_item_long = PyInt_AS_LONG(iter_item);
Py_DECREF(iter_item);
if (iter_item_long < min_idx)
min_idx = iter_item_long;
phases[iter_item_long] = marker;
}
Py_DECREF(iter);
}
return min_idx;
}
static inline void set_phase_from_parents(char *phases, int parent_1,
André Sintzoff
parsers.c: avoid implicit conversion loses integer warnings...
r24499 int parent_2, Py_ssize_t i)
Laurent Charignon
phase: compute phases in C...
r24443 {
if (parent_1 >= 0 && phases[parent_1] > phases[i])
phases[i] = phases[parent_1];
if (parent_2 >= 0 && phases[parent_2] > phases[i])
phases[i] = phases[parent_2];
}
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
Laurent Charignon
phase: compute phases in C...
r24443 {
PyObject *roots = Py_None;
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 PyObject *ret = NULL;
Laurent Charignon
phase: compute phases in C...
r24443 PyObject *phaseslist = NULL;
PyObject *phaseroots = NULL;
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 PyObject *phaseset = NULL;
PyObject *phasessetlist = NULL;
Laurent Charignon
phase: compute phases in C...
r24443 Py_ssize_t len = index_length(self) - 1;
Py_ssize_t numphase = 0;
Py_ssize_t minrevallphases = 0;
Py_ssize_t minrevphase = 0;
Py_ssize_t i = 0;
char *phases = NULL;
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 long phase;
Laurent Charignon
phase: compute phases in C...
r24443
if (!PyArg_ParseTuple(args, "O", &roots))
goto release_none;
if (roots == NULL || !PyList_Check(roots))
goto release_none;
phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
if (phases == NULL)
goto release_none;
/* Put the phase information of all the roots in phases */
numphase = PyList_GET_SIZE(roots)+1;
minrevallphases = len + 1;
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 phasessetlist = PyList_New(numphase);
if (phasessetlist == NULL)
goto release_none;
PyList_SET_ITEM(phasessetlist, 0, Py_None);
Py_INCREF(Py_None);
Laurent Charignon
phase: compute phases in C...
r24443 for (i = 0; i < numphase-1; i++) {
phaseroots = PyList_GET_ITEM(roots, i);
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 phaseset = PySet_New(NULL);
if (phaseset == NULL)
goto release_phasesetlist;
PyList_SET_ITEM(phasessetlist, i+1, phaseset);
Laurent Charignon
phase: compute phases in C...
r24443 if (!PyList_Check(phaseroots))
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 goto release_phasesetlist;
Laurent Charignon
phase: compute phases in C...
r24443 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
if (minrevphase == -2) /* Error from add_roots_get_min */
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 goto release_phasesetlist;
Laurent Charignon
phase: compute phases in C...
r24443 minrevallphases = MIN(minrevallphases, minrevphase);
}
/* Propagate the phase information from the roots to the revs */
if (minrevallphases != -1) {
Laurent Charignon
parsers: simplify the code computing the phases...
r25312 int parents[2];
for (i = minrevallphases; i < len; i++) {
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 if (index_get_parents(self, i, parents, len - 1) < 0)
goto release_phasesetlist;
Laurent Charignon
parsers: simplify the code computing the phases...
r25312 set_phase_from_parents(phases, parents[0], parents[1], i);
Laurent Charignon
phase: compute phases in C...
r24443 }
}
/* Transform phase list to a python list */
phaseslist = PyList_New(len);
if (phaseslist == NULL)
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 goto release_phasesetlist;
for (i = 0; i < len; i++) {
phase = phases[i];
/* We only store the sets of phase for non public phase, the public phase
* is computed as a difference */
if (phase != 0) {
phaseset = PyList_GET_ITEM(phasessetlist, phase);
PySet_Add(phaseset, PyInt_FromLong(i));
}
PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phase));
}
ret = PyList_New(2);
if (ret == NULL)
goto release_phaseslist;
Laurent Charignon
phase: compute phases in C...
r24443
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 PyList_SET_ITEM(ret, 0, phaseslist);
PyList_SET_ITEM(ret, 1, phasessetlist);
/* We don't release phaseslist and phasessetlist as we return them to
* python */
goto release_phases;
release_phaseslist:
Py_XDECREF(phaseslist);
release_phasesetlist:
Py_XDECREF(phasessetlist);
Laurent Charignon
phase: compute phases in C...
r24443 release_phases:
free(phases);
release_none:
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 return ret;
Laurent Charignon
phase: compute phases in C...
r24443 }
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 static PyObject *index_headrevs(indexObject *self, PyObject *args)
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 {
Laurent Charignon
changelog: fix bug in heads computation...
r25297 Py_ssize_t i, j, len;
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 char *nothead = NULL;
David Soria Parra
parsers: fix uninitialize variable warning...
r22540 PyObject *heads = NULL;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 PyObject *filter = NULL;
PyObject *filteredrevs = Py_None;
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
return NULL;
}
if (self->headrevs && filteredrevs == self->filteredrevs)
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 return list_copy(self->headrevs);
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_DECREF(self->filteredrevs);
self->filteredrevs = filteredrevs;
Py_INCREF(filteredrevs);
if (filteredrevs != Py_None) {
filter = PyObject_GetAttrString(filteredrevs, "__contains__");
if (!filter) {
PyErr_SetString(PyExc_TypeError,
"filteredrevs has no attribute __contains__");
goto bail;
}
}
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 len = index_length(self) - 1;
heads = PyList_New(0);
if (heads == NULL)
goto bail;
if (len == 0) {
PyObject *nullid = PyInt_FromLong(-1);
if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
Py_XDECREF(nullid);
goto bail;
}
goto done;
}
nothead = calloc(len, 1);
if (nothead == NULL)
goto bail;
Laurent Charignon
changelog: fix bug in heads computation...
r25297 for (i = 0; i < len; i++) {
int isfiltered;
int parents[2];
Durham Goode
obsolete: use C code for headrevs calculation...
r22484
isfiltered = check_filter(filter, i);
if (isfiltered == -1) {
PyErr_SetString(PyExc_TypeError,
"unable to check filter");
goto bail;
}
if (isfiltered) {
nothead[i] = 1;
continue;
}
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 if (index_get_parents(self, i, parents, len - 1) < 0)
goto bail;
Laurent Charignon
changelog: fix bug in heads computation...
r25297 for (j = 0; j < 2; j++) {
if (parents[j] >= 0)
nothead[parents[j]] = 1;
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 }
}
for (i = 0; i < len; i++) {
PyObject *head;
if (nothead[i])
continue;
Henrik Stuart
parsers: fix typing issue when constructing Python integer object...
r22400 head = PyInt_FromSsize_t(i);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 if (head == NULL || PyList_Append(heads, head) == -1) {
Py_XDECREF(head);
goto bail;
}
}
done:
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 self->headrevs = heads;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(filter);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 free(nothead);
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 return list_copy(self->headrevs);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 bail:
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(filter);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 Py_XDECREF(heads);
free(nothead);
return NULL;
}
Bryan O'Sullivan
parsers: change the type of nt_level...
r16618 static inline int nt_level(const char *node, Py_ssize_t level)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {
int v = node[level>>1];
if (!(level & 1))
v >>= 4;
return v & 0xf;
}
Bryan O'Sullivan
parsers: allow nt_find to signal an ambiguous match
r16616 /*
* Return values:
*
* -4: match is ambiguous (multiple candidates)
* -2: not found
* rest: valid rev
*/
Bryan O'Sullivan
parsers: allow hex keys
r16663 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
int hex)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {
Bryan O'Sullivan
parsers: allow hex keys
r16663 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641 int level, maxlevel, off;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
return -1;
if (self->nt == NULL)
return -2;
Bryan O'Sullivan
parsers: allow hex keys
r16663 if (hex)
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
Bryan O'Sullivan
parsers: allow hex keys
r16663 else
maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641
for (level = off = 0; level < maxlevel; level++) {
Bryan O'Sullivan
parsers: allow hex keys
r16663 int k = getnybble(node, level);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 nodetree *n = &self->nt[off];
int v = n->children[k];
if (v < 0) {
const char *n;
Bryan O'Sullivan
parsers: allow hex keys
r16663 Py_ssize_t i;
Yuya Nishihara
parsers: avoid signed integer overflow in calculation of leaf-node index...
r24879 v = -(v + 1);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 n = index_node(self, v);
if (n == NULL)
return -2;
Bryan O'Sullivan
parsers: allow hex keys
r16663 for (i = level; i < maxlevel; i++)
if (getnybble(node, i) != nt_level(n, i))
return -2;
return v;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
if (v == 0)
return -2;
off = v;
}
Bryan O'Sullivan
parsers: allow nt_find to signal an ambiguous match
r16616 /* multiple matches against an ambiguous prefix */
return -4;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
static int nt_new(indexObject *self)
{
if (self->ntlength == self->ntcapacity) {
Bryan O'Sullivan
parsers: check for memory allocation overflows more carefully
r24623 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
PyErr_SetString(PyExc_MemoryError,
"overflow in nt_new");
return -1;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 self->ntcapacity *= 2;
self->nt = realloc(self->nt,
self->ntcapacity * sizeof(nodetree));
if (self->nt == NULL) {
PyErr_SetString(PyExc_MemoryError, "out of memory");
return -1;
}
memset(&self->nt[self->ntlength], 0,
sizeof(nodetree) * (self->ntcapacity - self->ntlength));
}
return self->ntlength++;
}
static int nt_insert(indexObject *self, const char *node, int rev)
{
int level = 0;
int off = 0;
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641 while (level < 40) {
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 int k = nt_level(node, level);
nodetree *n;
int v;
n = &self->nt[off];
v = n->children[k];
if (v == 0) {
n->children[k] = -rev - 1;
return 0;
}
if (v < 0) {
Yuya Nishihara
parsers: avoid signed integer overflow in calculation of leaf-node index...
r24879 const char *oldnode = index_node(self, -(v + 1));
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 int noff;
if (!oldnode || !memcmp(oldnode, node, 20)) {
n->children[k] = -rev - 1;
return 0;
}
noff = nt_new(self);
if (noff == -1)
return -1;
/* self->nt may have been changed by realloc */
self->nt[off].children[k] = noff;
off = noff;
n = &self->nt[off];
n->children[nt_level(oldnode, ++level)] = v;
if (level > self->ntdepth)
self->ntdepth = level;
self->ntsplits += 1;
} else {
level += 1;
off = v;
}
}
return -1;
}
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 static int nt_init(indexObject *self)
{
if (self->nt == NULL) {
Bryan O'Sullivan
parsers: check for memory allocation overflows more carefully
r24623 if (self->raw_length > INT_MAX / sizeof(nodetree)) {
Abhay Kadam
mercurial/parsers.c: fix compiler warning...
r20110 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
return -1;
}
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 self->ntcapacity = self->raw_length < 4
Abhay Kadam
mercurial/parsers.c: fix compiler warning...
r20110 ? 4 : (int)self->raw_length / 2;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
if (self->nt == NULL) {
PyErr_NoMemory();
return -1;
}
self->ntlength = 1;
self->ntrev = (int)index_length(self) - 1;
self->ntlookups = 1;
self->ntmisses = 0;
Bryan O'Sullivan
parsers: ensure that nullid is always present in the radix tree
r16664 if (nt_insert(self, nullid, INT_MAX) == -1)
return -1;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 }
return 0;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return values:
*
* -3: error (exception set)
* -2: not found (no exception set)
* rest: valid rev
*/
static int index_find_node(indexObject *self,
const char *node, Py_ssize_t nodelen)
{
int rev;
self->ntlookups++;
Bryan O'Sullivan
parsers: allow hex keys
r16663 rev = nt_find(self, node, nodelen, 0);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (rev >= -1)
return rev;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 if (nt_init(self) == -1)
return -3;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
/*
* For the first handful of lookups, we scan the entire index,
* and cache only the matching nodes. This optimizes for cases
* like "hg tip", where only a few nodes are accessed.
*
* After that, we cache every node we visit, using a single
* scan amortized over multiple lookups. This gives the best
* bulk performance, e.g. for "hg log".
*/
if (self->ntmisses++ < 4) {
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
if (n == NULL)
return -2;
if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
if (nt_insert(self, n, rev) == -1)
return -3;
break;
}
}
} else {
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 if (n == NULL) {
self->ntrev = rev + 1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return -2;
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 }
if (nt_insert(self, n, rev) == -1) {
self->ntrev = rev + 1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return -3;
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
break;
}
}
self->ntrev = rev;
}
if (rev >= 0)
return rev;
return -2;
}
Gregory Szorc
parsers: do not cache RevlogError type (issue4451)...
r25561 static void raise_revlog_error(void)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {
Gregory Szorc
parsers: do not cache RevlogError type (issue4451)...
r25561 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Gregory Szorc
parsers: do not cache RevlogError type (issue4451)...
r25561 mod = PyImport_ImportModule("mercurial.error");
if (mod == NULL) {
goto cleanup;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
Gregory Szorc
parsers: do not cache RevlogError type (issue4451)...
r25561 dict = PyModule_GetDict(mod);
if (dict == NULL) {
goto cleanup;
}
Py_INCREF(dict);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Gregory Szorc
parsers: do not cache RevlogError type (issue4451)...
r25561 errclass = PyDict_GetItemString(dict, "RevlogError");
if (errclass == NULL) {
PyErr_SetString(PyExc_SystemError,
"could not find RevlogError");
goto cleanup;
}
/* value of exception is ignored by callers */
PyErr_SetString(errclass, "RevlogError");
cleanup:
Py_XDECREF(dict);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 Py_XDECREF(mod);
}
static PyObject *index_getitem(indexObject *self, PyObject *value)
{
char *node;
Py_ssize_t nodelen;
int rev;
if (PyInt_Check(value))
return index_get(self, PyInt_AS_LONG(value));
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(value, &node, &nodelen) == -1)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return NULL;
rev = index_find_node(self, node, nodelen);
if (rev >= -1)
return PyInt_FromLong(rev);
if (rev == -2)
raise_revlog_error();
return NULL;
}
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 static int nt_partialmatch(indexObject *self, const char *node,
Py_ssize_t nodelen)
{
int rev;
if (nt_init(self) == -1)
return -3;
if (self->ntrev > 0) {
/* ensure that the radix tree is fully populated */
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
if (n == NULL)
return -2;
if (nt_insert(self, n, rev) == -1)
return -3;
}
self->ntrev = rev;
}
return nt_find(self, node, nodelen, 1);
}
static PyObject *index_partialmatch(indexObject *self, PyObject *args)
{
const char *fullnode;
int nodelen;
char *node;
int rev, i;
if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
return NULL;
if (nodelen < 4) {
PyErr_SetString(PyExc_ValueError, "key too short");
return NULL;
}
sorcerer
revlog: don't try to partialmatch strings those length > 40...
r17353 if (nodelen > 40) {
PyErr_SetString(PyExc_ValueError, "key too long");
return NULL;
}
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665
for (i = 0; i < nodelen; i++)
hexdigit(node, i);
if (PyErr_Occurred()) {
/* input contains non-hex characters */
PyErr_Clear();
Py_RETURN_NONE;
}
rev = nt_partialmatch(self, node, nodelen);
switch (rev) {
case -4:
raise_revlog_error();
case -3:
return NULL;
case -2:
Py_RETURN_NONE;
case -1:
return PyString_FromStringAndSize(nullid, 20);
}
fullnode = index_node(self, rev);
if (fullnode == NULL) {
PyErr_Format(PyExc_IndexError,
"could not access rev %d", rev);
return NULL;
}
return PyString_FromStringAndSize(fullnode, 20);
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_m_get(indexObject *self, PyObject *args)
{
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 Py_ssize_t nodelen;
PyObject *val;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 char *node;
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 int rev;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (!PyArg_ParseTuple(args, "O", &val))
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return NULL;
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(val, &node, &nodelen) == -1)
return NULL;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 rev = index_find_node(self, node, nodelen);
if (rev == -3)
return NULL;
if (rev == -2)
Py_RETURN_NONE;
return PyInt_FromLong(rev);
}
static int index_contains(indexObject *self, PyObject *value)
{
char *node;
Py_ssize_t nodelen;
if (PyInt_Check(value)) {
long rev = PyInt_AS_LONG(value);
return rev >= -1 && rev < index_length(self);
}
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(value, &node, &nodelen) == -1)
return -1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
switch (index_find_node(self, node, nodelen)) {
case -3:
return -1;
case -2:
return 0;
default:
return 1;
}
}
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 typedef uint64_t bitmask;
/*
* Given a disjoint set of revs, return all candidates for the
* greatest common ancestor. In revset notation, this is the set
* "heads(::a and ::b and ...)"
*/
static PyObject *find_gca_candidates(indexObject *self, const int *revs,
int revcount)
{
const bitmask allseen = (1ull << revcount) - 1;
const bitmask poison = 1ull << revcount;
PyObject *gca = PyList_New(0);
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 int i, v, interesting;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 int maxrev = -1;
Henrik Stuart
parsers: use bitmask type consistently in find_gca_candidates...
r22399 bitmask sp;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 bitmask *seen;
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 if (gca == NULL)
return PyErr_NoMemory();
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 for (i = 0; i < revcount; i++) {
if (revs[i] > maxrev)
maxrev = revs[i];
}
seen = calloc(sizeof(*seen), maxrev + 1);
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 if (seen == NULL) {
Py_DECREF(gca);
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 return PyErr_NoMemory();
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 }
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (i = 0; i < revcount; i++)
seen[revs[i]] = 1ull << i;
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 interesting = revcount;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (v = maxrev; v >= 0 && interesting; v--) {
Henrik Stuart
parsers: use bitmask type consistently in find_gca_candidates...
r22399 bitmask sv = seen[v];
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 int parents[2];
if (!sv)
continue;
if (sv < poison) {
interesting -= 1;
if (sv == allseen) {
PyObject *obj = PyInt_FromLong(v);
if (obj == NULL)
goto bail;
if (PyList_Append(gca, obj) == -1) {
Py_DECREF(obj);
goto bail;
}
sv |= poison;
for (i = 0; i < revcount; i++) {
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 if (revs[i] == v)
goto done;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 }
}
}
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 if (index_get_parents(self, v, parents, maxrev) < 0)
goto bail;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (i = 0; i < 2; i++) {
int p = parents[i];
if (p == -1)
continue;
Matt Mackall
parsers: fix variable declaration position issue
r19030 sp = seen[p];
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 if (sv < poison) {
if (sp == 0) {
seen[p] = sv;
interesting++;
}
else if (sp != sv)
seen[p] |= sv;
} else {
if (sp && sp < poison)
interesting--;
seen[p] = sv;
}
}
}
done:
free(seen);
return gca;
bail:
free(seen);
Py_XDECREF(gca);
return NULL;
}
/*
* Given a disjoint set of revs, return the subset with the longest
* path to the root.
*/
static PyObject *find_deepest(indexObject *self, PyObject *revs)
{
const Py_ssize_t revcount = PyList_GET_SIZE(revs);
static const Py_ssize_t capacity = 24;
int *depth, *interesting = NULL;
int i, j, v, ninteresting;
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 PyObject *dict = NULL, *keys = NULL;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 long *seen = NULL;
int maxrev = -1;
long final;
if (revcount > capacity) {
PyErr_Format(PyExc_OverflowError,
"bitset size (%ld) > capacity (%ld)",
André Sintzoff
parsers: remove warning: format ‘%ld’ expects argument of type ‘long int’...
r19062 (long)revcount, (long)capacity);
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 return NULL;
}
for (i = 0; i < revcount; i++) {
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
if (n > maxrev)
maxrev = n;
}
depth = calloc(sizeof(*depth), maxrev + 1);
if (depth == NULL)
return PyErr_NoMemory();
seen = calloc(sizeof(*seen), maxrev + 1);
if (seen == NULL) {
PyErr_NoMemory();
goto bail;
}
interesting = calloc(sizeof(*interesting), 2 << revcount);
if (interesting == NULL) {
PyErr_NoMemory();
goto bail;
}
Siddharth Agarwal
ancestor.deepest: sort revs in C version...
r19502 if (PyList_Sort(revs) == -1)
goto bail;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 for (i = 0; i < revcount; i++) {
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
long b = 1l << i;
depth[n] = 1;
seen[n] = b;
interesting[b] = 1;
}
ninteresting = (int)revcount;
for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
int dv = depth[v];
int parents[2];
long sv;
if (dv == 0)
continue;
sv = seen[v];
Yuya Nishihara
parsers: fix buffer overflow by invalid parent revision read from revlog...
r25810 if (index_get_parents(self, v, parents, maxrev) < 0)
goto bail;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (i = 0; i < 2; i++) {
int p = parents[i];
long nsp, sp;
int dp;
if (p == -1)
continue;
dp = depth[p];
nsp = sp = seen[p];
if (dp <= dv) {
depth[p] = dv + 1;
if (sp != sv) {
interesting[sv] += 1;
nsp = seen[p] = sv;
if (sp) {
interesting[sp] -= 1;
if (interesting[sp] == 0)
ninteresting -= 1;
}
}
}
else if (dv == dp - 1) {
nsp = sp | sv;
if (nsp == sp)
continue;
seen[p] = nsp;
Wei, Elson
ancestor.deepest: decrement ninteresting correctly (issue3984)...
r19503 interesting[sp] -= 1;
if (interesting[sp] == 0 && interesting[nsp] > 0)
ninteresting -= 1;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 interesting[nsp] += 1;
}
}
interesting[sv] -= 1;
if (interesting[sv] == 0)
ninteresting -= 1;
}
final = 0;
j = ninteresting;
for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
if (interesting[i] == 0)
continue;
final |= i;
j -= 1;
}
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 if (final == 0) {
keys = PyList_New(0);
goto bail;
}
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
dict = PyDict_New();
if (dict == NULL)
goto bail;
Siddharth Agarwal
ancestor.deepest: ignore ninteresting while building result (issue3984)...
r19504 for (i = 0; i < revcount; i++) {
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 PyObject *key;
if ((final & (1 << i)) == 0)
continue;
key = PyList_GET_ITEM(revs, i);
Py_INCREF(key);
Py_INCREF(Py_None);
if (PyDict_SetItem(dict, key, Py_None) == -1) {
Py_DECREF(key);
Py_DECREF(Py_None);
goto bail;
}
}
keys = PyDict_Keys(dict);
bail:
free(depth);
free(seen);
free(interesting);
Py_XDECREF(dict);
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 return keys;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 }
/*
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 * Given a (possibly overlapping) set of revs, return all the
* common ancestors heads: heads(::args[0] and ::a[1] and ...)
*/
static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
{
Mads Kiilerich
parsers: remove unnecessary gca variable in index_commonancestorsheads
r21103 PyObject *ret = NULL;
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 Py_ssize_t argcount, i, len;
bitmask repeat = 0;
int revcount = 0;
int *revs;
argcount = PySequence_Length(args);
revs = malloc(argcount * sizeof(*revs));
if (argcount > 0 && revs == NULL)
return PyErr_NoMemory();
len = index_length(self) - 1;
for (i = 0; i < argcount; i++) {
static const int capacity = 24;
PyObject *obj = PySequence_GetItem(args, i);
bitmask x;
long val;
if (!PyInt_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"arguments must all be ints");
Augie Fackler
parsers.c: fix a memory leak in index_commonancestorsheads...
r23945 Py_DECREF(obj);
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 goto bail;
}
val = PyInt_AsLong(obj);
Augie Fackler
parsers.c: fix a memory leak in index_commonancestorsheads...
r23945 Py_DECREF(obj);
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 if (val == -1) {
ret = PyList_New(0);
goto done;
}
if (val < 0 || val >= len) {
PyErr_SetString(PyExc_IndexError,
"index out of range");
goto bail;
}
/* this cheesy bloom filter lets us avoid some more
* expensive duplicate checks in the common set-is-disjoint
* case */
x = 1ull << (val & 0x3f);
if (repeat & x) {
int k;
for (k = 0; k < revcount; k++) {
if (val == revs[k])
goto duplicate;
}
}
else repeat |= x;
if (revcount >= capacity) {
PyErr_Format(PyExc_OverflowError,
"bitset size (%d) > capacity (%d)",
revcount, capacity);
goto bail;
}
revs[revcount++] = (int)val;
duplicate:;
}
if (revcount == 0) {
ret = PyList_New(0);
goto done;
}
if (revcount == 1) {
PyObject *obj;
ret = PyList_New(1);
if (ret == NULL)
goto bail;
obj = PyInt_FromLong(revs[0]);
if (obj == NULL)
goto bail;
PyList_SET_ITEM(ret, 0, obj);
goto done;
}
Mads Kiilerich
parsers: remove unnecessary gca variable in index_commonancestorsheads
r21103 ret = find_gca_candidates(self, revs, revcount);
if (ret == NULL)
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 goto bail;
done:
free(revs);
return ret;
bail:
free(revs);
Py_XDECREF(ret);
return NULL;
}
/*
Martin von Zweigbergk
parsers: rewrite index_ancestors() in terms of index_commonancestorsheads()...
r24004 * Given a (possibly overlapping) set of revs, return the greatest
* common ancestors: those with the longest path to the root.
*/
static PyObject *index_ancestors(indexObject *self, PyObject *args)
{
PyObject *gca = index_commonancestorsheads(self, args);
if (gca == NULL)
return NULL;
if (PyList_GET_SIZE(gca) <= 1) {
Py_INCREF(gca);
return gca;
}
return find_deepest(self, gca);
}
/*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * Invalidate any trie entries introduced by added revs.
*/
static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
{
Py_ssize_t i, len = PyList_GET_SIZE(self->added);
for (i = start; i < len; i++) {
PyObject *tuple = PyList_GET_ITEM(self->added, i);
PyObject *node = PyTuple_GET_ITEM(tuple, 7);
nt_insert(self, PyString_AS_STRING(node), -1);
}
Bryan O'Sullivan
parsers: use Py_CLEAR where appropriate
r16732 if (start == 0)
Py_CLEAR(self->added);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
/*
* Delete a numeric range of revs, which must be at the end of the
* range, but exclude the sentinel nullid entry.
*/
static int index_slice_del(indexObject *self, PyObject *item)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
Py_ssize_t start, stop, step, slicelength;
Py_ssize_t length = index_length(self);
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 int ret = 0;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
if (PySlice_GetIndicesEx((PySliceObject*)item, length,
&start, &stop, &step, &slicelength) < 0)
return -1;
if (slicelength <= 0)
return 0;
if ((step < 0 && start < stop) || (step > 0 && start > stop))
stop = start;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (step < 0) {
stop = start + 1;
start = stop + step*(slicelength - 1) - 1;
step = -step;
}
if (step != 1) {
PyErr_SetString(PyExc_ValueError,
"revlog index delete requires step size of 1");
return -1;
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
if (stop != length - 1) {
PyErr_SetString(PyExc_IndexError,
"revlog index deletion indices are invalid");
return -1;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (start < self->length - 1) {
if (self->nt) {
Py_ssize_t i;
for (i = start + 1; i < self->length - 1; i++) {
const char *node = index_node(self, i);
if (node)
nt_insert(self, node, -1);
}
if (self->added)
nt_invalidate_added(self, 0);
if (self->ntrev > start)
self->ntrev = (int)start;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->length = start + 1;
Yuya Nishihara
parsers: fix memleak of revlog cache entries on strip...
r18504 if (start < self->raw_length) {
if (self->cache) {
Py_ssize_t i;
for (i = start; i < self->raw_length; i++)
Py_CLEAR(self->cache[i]);
}
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 self->raw_length = start;
Yuya Nishihara
parsers: fix memleak of revlog cache entries on strip...
r18504 }
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 goto done;
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt) {
nt_invalidate_added(self, start - self->length + 1);
if (self->ntrev > start)
self->ntrev = (int)start;
}
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 if (self->added)
ret = PyList_SetSlice(self->added, start - self->length + 1,
PyList_GET_SIZE(self->added), NULL);
done:
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 return ret;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Supported ops:
*
* slice deletion
* string assignment (extend node->rev mapping)
* string deletion (shrink node->rev mapping)
*/
static int index_assign_subscript(indexObject *self, PyObject *item,
PyObject *value)
{
char *node;
Py_ssize_t nodelen;
long rev;
if (PySlice_Check(item) && value == NULL)
return index_slice_del(self, item);
if (node_check(item, &node, &nodelen) == -1)
return -1;
if (value == NULL)
return self->nt ? nt_insert(self, node, -1) : 0;
rev = PyInt_AsLong(value);
if (rev > INT_MAX || rev < 0) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "rev out of range");
return -1;
}
Mike Edgar
parsers: ensure revlog index node tree is initialized before insertion...
r23468
if (nt_init(self) == -1)
return -1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return nt_insert(self, node, (int)rev);
}
/*
* Find all RevlogNG entries in an index that has inline data. Update
* the optional "offsets" table with those entries.
*/
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
const char *data = PyString_AS_STRING(self->data);
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 Py_ssize_t pos = 0;
Py_ssize_t end = PyString_GET_SIZE(self->data);
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 long incr = v1_hdrsize;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_ssize_t len = 0;
Matt Mackall
revlog: only build the nodemap on demand
r13254
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 while (pos + v1_hdrsize <= end && pos >= 0) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 uint32_t comp_len;
/* 3rd element of header is length of compressed inline data */
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 comp_len = getbe32(data + pos + 8);
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 incr = v1_hdrsize + comp_len;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (offsets)
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 offsets[len] = data + pos;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 len++;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += incr;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 }
Matt Mackall
revlog: only build the nodemap on demand
r13254
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 if (pos != end) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "corrupt index file");
return -1;
}
return len;
}
Matt Mackall
revlog: only build the nodemap on demand
r13254
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 static int index_init(indexObject *self, PyObject *args)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 PyObject *data_obj, *inlined_obj;
Py_ssize_t size;
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 /* Initialize before argument-checking to avoid index_dealloc() crash. */
self->raw_length = 0;
self->added = NULL;
self->cache = NULL;
self->data = NULL;
self->headrevs = NULL;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 self->filteredrevs = Py_None;
Py_INCREF(Py_None);
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 self->nt = NULL;
self->offsets = NULL;
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
return -1;
if (!PyString_Check(data_obj)) {
PyErr_SetString(PyExc_TypeError, "data is not a string");
return -1;
}
size = PyString_GET_SIZE(data_obj);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
self->data = data_obj;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 self->ntlength = self->ntcapacity = 0;
self->ntdepth = self->ntsplits = 0;
self->ntlookups = self->ntmisses = 0;
self->ntrev = -1;
Matt Mackall
parsers: fix refcount bug on corrupt index...
r16597 Py_INCREF(self->data);
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (self->inlined) {
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 Py_ssize_t len = inline_scan(self, NULL);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (len == -1)
goto bail;
self->raw_length = len;
self->length = len + 1;
} else {
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 if (size % v1_hdrsize) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_ValueError, "corrupt index file");
goto bail;
}
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 self->raw_length = size / v1_hdrsize;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->length = self->raw_length + 1;
}
return 0;
bail:
return -1;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_nodemap(indexObject *self)
{
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 Py_INCREF(self);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return (PyObject *)self;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static void index_dealloc(indexObject *self)
{
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 _index_clearcaches(self);
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(self->filteredrevs);
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 Py_XDECREF(self->data);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_XDECREF(self->added);
PyObject_Del(self);
}
static PySequenceMethods index_sequence_methods = {
(lenfunc)index_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
(ssizeargfunc)index_get, /* sq_item */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)index_contains, /* sq_contains */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 };
static PyMappingMethods index_mapping_methods = {
(lenfunc)index_length, /* mp_length */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 (binaryfunc)index_getitem, /* mp_subscript */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
};
static PyMethodDef index_methods[] = {
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
"return the gca set of the given revs"},
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
METH_VARARGS,
"return the heads of the common ancestors of the given revs"},
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
"clear the index caches"},
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {"get", (PyCFunction)index_m_get, METH_VARARGS,
"get an index entry"},
Laurent Charignon
phases: add set per phase in C phase computation...
r25190 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
METH_VARARGS, "compute phases"},
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
Mads Kiilerich
parsers: introduce headrevsfiltered in C extension...
r23087 "get head revisions"}, /* Can do filtering since 3.2 */
{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
"get filtered head revisions"}, /* Can always do filtering */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {"insert", (PyCFunction)index_insert, METH_VARARGS,
"insert an index entry"},
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
"match a potentially ambiguous node ID"},
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {"stats", (PyCFunction)index_stats, METH_NOARGS,
"stats for the index"},
{NULL} /* Sentinel */
};
static PyGetSetDef index_getset[] = {
{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {NULL} /* Sentinel */
};
static PyTypeObject indexType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"parsers.index", /* tp_name */
sizeof(indexObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)index_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&index_sequence_methods, /* tp_as_sequence */
&index_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"revlog index", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
index_methods, /* tp_methods */
0, /* tp_members */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 index_getset, /* tp_getset */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)index_init, /* tp_init */
0, /* tp_alloc */
};
/*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * returns a tuple of the form (index, index, cache) with elements as
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 * follows:
Bernhard Leiner
C implementation of revlog index parsing
r7108 *
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * index: an index object that lazily parses RevlogNG records
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 * cache: if data is inlined, a tuple (index_file_content, 0), else None
*
* added complications are for backwards compatibility
Bernhard Leiner
C implementation of revlog index parsing
r7108 */
Matt Mackall
revlog: only build the nodemap on demand
r13254 static PyObject *parse_index2(PyObject *self, PyObject *args)
Bernhard Leiner
C implementation of revlog index parsing
r7108 {
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 PyObject *tuple = NULL, *cache = NULL;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 indexObject *idx;
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 int ret;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
idx = PyObject_New(indexObject, &indexType);
if (idx == NULL)
goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 ret = index_init(idx, args);
if (ret == -1)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (idx->inlined) {
cache = Py_BuildValue("iO", 0, idx->data);
if (cache == NULL)
goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108 } else {
cache = Py_None;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_INCREF(cache);
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 tuple = Py_BuildValue("NN", idx, cache);
if (!tuple)
goto bail;
return tuple;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 bail:
Py_XDECREF(idx);
Bernhard Leiner
C implementation of revlog index parsing
r7108 Py_XDECREF(cache);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_XDECREF(tuple);
Bernhard Leiner
C implementation of revlog index parsing
r7108 return NULL;
}
Augie Fackler
parsers: add fm1readmarker...
r24017 #define BUMPED_FIX 1
#define USING_SHA_256 2
static PyObject *readshas(
const char *source, unsigned char num, Py_ssize_t hashwidth)
{
int i;
PyObject *list = PyTuple_New(num);
if (list == NULL) {
return NULL;
}
for (i = 0; i < num; i++) {
PyObject *hash = PyString_FromStringAndSize(source, hashwidth);
if (hash == NULL) {
Py_DECREF(list);
return NULL;
}
PyTuple_SetItem(list, i, hash);
source += hashwidth;
}
return list;
}
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 static PyObject *fm1readmarker(const char *data, uint32_t *msize)
Augie Fackler
parsers: add fm1readmarker...
r24017 {
const char *meta;
double mtime;
int16_t tz;
uint16_t flags;
unsigned char nsuccs, nparents, nmetadata;
Py_ssize_t hashwidth = 20;
PyObject *prec = NULL, *parents = NULL, *succs = NULL;
PyObject *metadata = NULL, *ret = NULL;
int i;
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 *msize = getbe32(data);
Augie Fackler
parsers: add fm1readmarker...
r24017 data += 4;
mtime = getbefloat64(data);
data += 8;
tz = getbeint16(data);
data += 2;
flags = getbeuint16(data);
data += 2;
if (flags & USING_SHA_256) {
hashwidth = 32;
}
nsuccs = (unsigned char)(*data++);
nparents = (unsigned char)(*data++);
nmetadata = (unsigned char)(*data++);
prec = PyString_FromStringAndSize(data, hashwidth);
data += hashwidth;
if (prec == NULL) {
goto bail;
}
succs = readshas(data, nsuccs, hashwidth);
if (succs == NULL) {
goto bail;
}
data += nsuccs * hashwidth;
if (nparents == 1 || nparents == 2) {
parents = readshas(data, nparents, hashwidth);
if (parents == NULL) {
goto bail;
}
data += nparents * hashwidth;
} else {
parents = Py_None;
}
meta = data + (2 * nmetadata);
metadata = PyTuple_New(nmetadata);
if (metadata == NULL) {
goto bail;
}
for (i = 0; i < nmetadata; i++) {
PyObject *tmp, *left = NULL, *right = NULL;
Py_ssize_t metasize = (unsigned char)(*data++);
left = PyString_FromStringAndSize(meta, metasize);
meta += metasize;
metasize = (unsigned char)(*data++);
right = PyString_FromStringAndSize(meta, metasize);
meta += metasize;
if (!left || !right) {
Py_XDECREF(left);
Py_XDECREF(right);
goto bail;
}
tmp = PyTuple_Pack(2, left, right);
Py_DECREF(left);
Py_DECREF(right);
if (!tmp) {
goto bail;
}
PyTuple_SetItem(metadata, i, tmp);
}
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
Augie Fackler
parsers: add fm1readmarker...
r24017 metadata, mtime, (int)tz * 60, parents);
bail:
Py_XDECREF(prec);
Py_XDECREF(succs);
Py_XDECREF(metadata);
if (parents != Py_None)
Py_XDECREF(parents);
return ret;
}
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019
static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
const char *data;
Augie Fackler
parsers: use k instead of n for PyArg_ParseTuple because python 2.4 is awful
r24032 Py_ssize_t datalen;
/* only unsigned long because python 2.4, should be Py_ssize_t */
unsigned long offset, stop;
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 PyObject *markers = NULL;
Augie Fackler
parsers: use k instead of n for PyArg_ParseTuple because python 2.4 is awful
r24032 /* replace kk with nn when we drop Python 2.4 */
if (!PyArg_ParseTuple(args, "s#kk", &data, &datalen, &offset, &stop)) {
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 return NULL;
}
data += offset;
markers = PyList_New(0);
if (!markers) {
return NULL;
}
while (offset < stop) {
uint32_t msize;
int error;
PyObject *record = fm1readmarker(data, &msize);
if (!record) {
goto bail;
}
error = PyList_Append(markers, record);
Py_DECREF(record);
if (error) {
goto bail;
}
data += msize;
offset += msize;
}
return markers;
bail:
Py_DECREF(markers);
return NULL;
}
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 static char parsers_doc[] = "Efficient content parsing.";
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 PyObject *encodedir(PyObject *self, PyObject *args);
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 PyObject *pathencode(PyObject *self, PyObject *args);
Bryan O'Sullivan
store: implement lowerencode in C
r18430 PyObject *lowerencode(PyObject *self, PyObject *args);
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 static PyMethodDef methods[] = {
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
Matt Mackall
dirstate: C parsing extension
r7093 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
Matt Mackall
revlog: only build the nodemap on demand
r13254 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
Siddharth Agarwal
parsers: add a function to efficiently lowercase ASCII strings...
r22778 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
Siddharth Agarwal
parsers: introduce an asciiupper function
r24577 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
Siddharth Agarwal
parsers: add an API to create a new presized dict
r25584 {"dict_new_presized", dict_new_presized, METH_VARARGS,
"construct a dict with an expected size\n"},
Siddharth Agarwal
parsers: add a C function to create a file foldmap...
r24609 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
"make file foldmap\n"},
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
Bryan O'Sullivan
store: implement lowerencode in C
r18430 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
Martin von Zweigbergk
_fm1readmarkers: generate list in C...
r24019 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
"parse v1 obsolete markers\n"},
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {NULL, NULL}
};
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 void dirs_module_init(PyObject *mod);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 void manifest_module_init(PyObject *mod);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static void module_init(PyObject *mod)
{
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 /* This module constant has two purposes. First, it lets us unit test
* the ImportError raised without hard-coding any error text. This
* means we can change the text in the future without breaking tests,
* even across changesets without a recompile. Second, its presence
* can be used to determine whether the version-checking logic is
* present, which also helps in testing across changesets without a
* recompile. Note that this means the pure-Python version of parsers
* should not have this module constant. */
PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 dirs_module_init(mod);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 manifest_module_init(mod);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900
Adrian Buehlmann
parsers: statically initializing tp_new to PyType_GenericNew is not portable...
r16604 indexType.tp_new = PyType_GenericNew;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (PyType_Ready(&indexType) < 0 ||
PyType_Ready(&dirstateTupleType) < 0)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return;
Py_INCREF(&indexType);
PyModule_AddObject(mod, "index", (PyObject *)&indexType);
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 Py_INCREF(&dirstateTupleType);
PyModule_AddObject(mod, "dirstatetuple",
(PyObject *)&dirstateTupleType);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
-1, -1, -1, -1, nullid, 20);
if (nullentry)
PyObject_GC_UnTrack(nullentry);
}
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 static int check_python_version(void)
{
Augie Fackler
parsers: don't leak references to sys et al in check_python_version...
r23943 PyObject *sys = PyImport_ImportModule("sys"), *ver;
long hexversion;
if (!sys)
return -1;
ver = PyObject_GetAttrString(sys, "hexversion");
Py_DECREF(sys);
if (!ver)
return -1;
hexversion = PyInt_AsLong(ver);
Py_DECREF(ver);
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 /* sys.hexversion is a 32-bit number by default, so the -1 case
* should only occur in unusual circumstances (e.g. if sys.hexversion
* is manually set to an invalid value). */
if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
"modules were compiled with Python " PY_VERSION ", but "
"Mercurial is currently using Python with sys.hexversion=%ld: "
"Python %s\n at: %s", versionerrortext, hexversion,
Py_GetVersion(), Py_GetProgramFullPath());
return -1;
}
return 0;
}
Renato Cunha
parsers.c: Added support for py3k....
r11361 #ifdef IS_PY3K
static struct PyModuleDef parsers_module = {
PyModuleDef_HEAD_INIT,
"parsers",
parsers_doc,
-1,
methods
};
PyMODINIT_FUNC PyInit_parsers(void)
{
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 PyObject *mod;
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 if (check_python_version() == -1)
return;
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 mod = PyModule_Create(&parsers_module);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 module_init(mod);
return mod;
Renato Cunha
parsers.c: Added support for py3k....
r11361 }
#else
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 PyMODINIT_FUNC initparsers(void)
{
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 PyObject *mod;
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 if (check_python_version() == -1)
return;
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 mod = Py_InitModule3("parsers", methods, parsers_doc);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 module_init(mod);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 }
Renato Cunha
parsers.c: Added support for py3k....
r11361 #endif