##// END OF EJS Templates
revert: small refactoring in the way backup value are handled...
revert: small refactoring in the way backup value are handled The current backup value may have two different values: 1. Do not try to do backup 2. Do backup if applicable We are about to move to: 1. Do not try to do backup 2. Do backup if applicable 3. Do backup in all cases So we change the current values to make room for the new one.

File last commit:

r22604:5e0d1478 default
r22608:bf0ecb22 default
Show More
parsers.c
2253 lines | 51.5 KiB | text/x-c | CLexer
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 /*
parsers.c - efficient content parsing
Copyright 2008 Matt Mackall <mpm@selenic.com> and others
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
#include <Python.h>
#include <ctype.h>
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 #include <stddef.h>
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 #include <string.h>
Renato Cunha
parsers.c: Added support for py3k....
r11361 #include "util.h"
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 static char *versionerrortext = "Python minor version mismatch";
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 static int8_t hextable[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 static inline int hexdigit(const char *p, Py_ssize_t off)
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 int8_t val = hextable[(unsigned char)p[off]];
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617
Siddharth Agarwal
parsers: use a lookup table to convert hex to binary...
r19718 if (val >= 0) {
return val;
}
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
return 0;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 }
/*
* Turn a hex-encoded string into binary.
*/
static PyObject *unhexlify(const char *str, int len)
{
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 PyObject *ret;
Benoit Boissinot
fix const annotation warning
r6395 char *d;
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 int i;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Renato Cunha
parsers.c: Added support for py3k....
r11361 ret = PyBytes_FromStringAndSize(NULL, len / 2);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!ret)
Matt Mackall
parsers: speed up hex decoding for manifests
r7092 return NULL;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Renato Cunha
parsers.c: Added support for py3k....
r11361 d = PyBytes_AsString(ret);
Bryan O'Sullivan
parsers: change the type signature of hexdigit...
r16617 for (i = 0; i < len;) {
int hi = hexdigit(str, i++);
int lo = hexdigit(str, i++);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 *d++ = (hi << 4) | lo;
}
Matt Mackall
parsers: clean up whitespace
r7091
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 return ret;
}
/*
* This code assumes that a manifest is stitched together with newline
* ('\n') characters.
*/
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
PyObject *mfdict, *fdict;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 char *str, *start, *end;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 int len;
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
&PyDict_Type, &mfdict,
&PyDict_Type, &fdict,
&str, &len))
goto quit;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 start = str;
end = str + len;
while (start < end) {
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 PyObject *file = NULL, *node = NULL;
PyObject *flags = NULL;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 char *zero = NULL, *newline = NULL;
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 ptrdiff_t nlen;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 zero = memchr(start, '\0', end - start);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!zero) {
PyErr_SetString(PyExc_ValueError,
"manifest entry has no separator");
goto quit;
}
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 newline = memchr(zero + 1, '\n', end - (zero + 1));
if (!newline) {
PyErr_SetString(PyExc_ValueError,
"manifest contains trailing garbage");
goto quit;
}
Renato Cunha
parsers.c: Added support for py3k....
r11361 file = PyBytes_FromStringAndSize(start, zero - start);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!file)
goto bail;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 nlen = newline - zero - 1;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Bryan O'Sullivan
parsers: fix an integer size warning issued by clang
r17356 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 if (!node)
goto bail;
if (nlen > 40) {
Renato Cunha
parsers.c: Added support for py3k....
r11361 flags = PyBytes_FromStringAndSize(zero + 41,
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 nlen - 40);
if (!flags)
goto bail;
if (PyDict_SetItem(fdict, file, flags) == -1)
goto bail;
}
if (PyDict_SetItem(mfdict, file, node) == -1)
goto bail;
Siddharth Agarwal
parse_manifest: rewrite to use memchr...
r19728 start = newline + 1;
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
continue;
bail:
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
goto quit;
}
Py_INCREF(Py_None);
return Py_None;
quit:
return NULL;
}
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
int size, int mtime)
{
dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
&dirstateTupleType);
if (!t)
return NULL;
t->state = state;
t->mode = mode;
t->size = size;
t->mtime = mtime;
return t;
}
static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
PyObject *kwds)
{
/* We do all the initialization here and not a tp_init function because
* dirstate_tuple is immutable. */
dirstateTupleObject *t;
char state;
int size, mode, mtime;
if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
return NULL;
t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
if (!t)
return NULL;
t->state = state;
t->mode = mode;
t->size = size;
t->mtime = mtime;
return (PyObject *)t;
}
static void dirstate_tuple_dealloc(PyObject *o)
{
PyObject_Del(o);
}
static Py_ssize_t dirstate_tuple_length(PyObject *o)
{
return 4;
}
static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
{
dirstateTupleObject *t = (dirstateTupleObject *)o;
switch (i) {
case 0:
return PyBytes_FromStringAndSize(&t->state, 1);
case 1:
return PyInt_FromLong(t->mode);
case 2:
return PyInt_FromLong(t->size);
case 3:
return PyInt_FromLong(t->mtime);
default:
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
}
static PySequenceMethods dirstate_tuple_sq = {
dirstate_tuple_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
dirstate_tuple_item, /* sq_item */
0, /* sq_ass_item */
0, /* sq_contains */
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
PyTypeObject dirstateTupleType = {
PyVarObject_HEAD_INIT(NULL, 0)
"dirstate_tuple", /* tp_name */
sizeof(dirstateTupleObject),/* tp_basicsize */
0, /* tp_itemsize */
(destructor)dirstate_tuple_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&dirstate_tuple_sq, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"dirstate tuple", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
dirstate_tuple_new, /* tp_new */
};
Matt Mackall
dirstate: C parsing extension
r7093 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
{
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
PyObject *fname = NULL, *cname = NULL, *entry = NULL;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 char state, *cur, *str, *cpos;
Bryan O'Sullivan
parsers: state is a char, not an int
r19725 int mode, size, mtime;
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 unsigned int flen, len, pos = 40;
int readlen;
Matt Mackall
dirstate: C parsing extension
r7093
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
&PyDict_Type, &dmap,
&PyDict_Type, &cmap,
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 &str, &readlen))
Matt Mackall
dirstate: C parsing extension
r7093 goto quit;
Henrik Stuart
parsers: avoid signed/unsigned comparison mismatch...
r22403 if (readlen < 0)
goto quit;
len = readlen;
Matt Mackall
dirstate: C parsing extension
r7093 /* read parents */
if (len < 40)
goto quit;
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
if (!parents)
goto quit;
/* read filenames */
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 while (pos >= 40 && pos < len) {
cur = str + pos;
Matt Mackall
dirstate: C parsing extension
r7093 /* unpack header */
state = *cur;
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 mode = getbe32(cur + 1);
size = getbe32(cur + 5);
mtime = getbe32(cur + 9);
flen = getbe32(cur + 13);
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += 17;
Matt Mackall
dirstate: C parsing extension
r7093 cur += 17;
David Soria Parra
parsers: fix 'unsigned expression is always true' warning (issue4142)...
r20316 if (flen > len - pos) {
Benoit Boissinot
parsers.c: fix integer overflows...
r7174 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
Matt Mackall
dirstate: C parsing extension
r7093 goto quit;
Benoit Boissinot
parsers.c: fix integer overflows...
r7174 }
Matt Mackall
dirstate: C parsing extension
r7093
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
mtime);
Matt Mackall
dirstate: C parsing extension
r7093 cpos = memchr(cur, 0, flen);
if (cpos) {
Renato Cunha
parsers.c: Added support for py3k....
r11361 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
cname = PyBytes_FromStringAndSize(cpos + 1,
Matt Mackall
dirstate: C parsing extension
r7093 flen - (cpos - cur) - 1);
if (!fname || !cname ||
PyDict_SetItem(cmap, fname, cname) == -1 ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
Py_DECREF(cname);
} else {
Renato Cunha
parsers.c: Added support for py3k....
r11361 fname = PyBytes_FromStringAndSize(cur, flen);
Matt Mackall
dirstate: C parsing extension
r7093 if (!fname ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
}
Py_DECREF(fname);
Py_DECREF(entry);
fname = cname = entry = NULL;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += flen;
Matt Mackall
dirstate: C parsing extension
r7093 }
ret = parents;
Py_INCREF(ret);
quit:
Py_XDECREF(fname);
Py_XDECREF(cname);
Py_XDECREF(entry);
Py_XDECREF(parents);
return ret;
}
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 /*
* Efficiently pack a dirstate object into its on-disk format.
*/
static PyObject *pack_dirstate(PyObject *self, PyObject *args)
{
PyObject *packobj = NULL;
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 Py_ssize_t nbytes, pos, l;
PyObject *k, *v, *pn;
char *p, *s;
double now;
if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
&PyDict_Type, &map, &PyDict_Type, &copymap,
&pl, &now))
return NULL;
if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
return NULL;
}
/* Figure out how much we need to allocate. */
for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
PyObject *c;
if (!PyString_Check(k)) {
PyErr_SetString(PyExc_TypeError, "expected string key");
goto bail;
}
nbytes += PyString_GET_SIZE(k) + 17;
c = PyDict_GetItem(copymap, k);
if (c) {
if (!PyString_Check(c)) {
PyErr_SetString(PyExc_TypeError,
"expected string key");
goto bail;
}
nbytes += PyString_GET_SIZE(c) + 1;
}
}
packobj = PyString_FromStringAndSize(NULL, nbytes);
if (packobj == NULL)
goto bail;
p = PyString_AS_STRING(packobj);
pn = PySequence_ITEM(pl, 0);
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
goto bail;
}
memcpy(p, s, l);
p += 20;
pn = PySequence_ITEM(pl, 1);
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
goto bail;
}
memcpy(p, s, l);
p += 20;
for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 dirstateTupleObject *tuple;
char state;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 uint32_t mode, size, mtime;
Py_ssize_t len, l;
PyObject *o;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 char *t;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (!dirstate_tuple_check(v)) {
PyErr_SetString(PyExc_TypeError,
"expected a dirstate tuple");
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 goto bail;
}
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 tuple = (dirstateTupleObject *)v;
state = tuple->state;
mode = tuple->mode;
size = tuple->size;
mtime = tuple->mtime;
if (state == 'n' && mtime == (uint32_t)now) {
Siddharth Agarwal
dirstate: move pure python dirstate packing to pure/parsers.py
r18567 /* See pure/parsers.py:pack_dirstate for why we do
* this. */
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 mtime = -1;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 mtime_unset = (PyObject *)make_dirstate_tuple(
state, mode, size, mtime);
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 if (!mtime_unset)
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 goto bail;
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 if (PyDict_SetItem(map, k, mtime_unset) == -1)
goto bail;
Py_DECREF(mtime_unset);
mtime_unset = NULL;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 }
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 *p++ = state;
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 putbe32(mode, p);
putbe32(size, p + 4);
putbe32(mtime, p + 8);
t = p + 12;
p += 16;
len = PyString_GET_SIZE(k);
memcpy(p, PyString_AS_STRING(k), len);
p += len;
o = PyDict_GetItem(copymap, k);
if (o) {
*p++ = '\0';
l = PyString_GET_SIZE(o);
memcpy(p, PyString_AS_STRING(o), l);
p += l;
len += l + 1;
}
putbe32((uint32_t)len, t);
}
pos = p - PyString_AS_STRING(packobj);
if (pos != nbytes) {
PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
(long)pos, (long)nbytes);
goto bail;
}
return packobj;
bail:
Siddharth Agarwal
pack_dirstate: in C version, for invalidation set dict to what we write to disk...
r21806 Py_XDECREF(mtime_unset);
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 Py_XDECREF(packobj);
return NULL;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 /*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * A base-16 trie for fast node->rev mapping.
*
* Positive value is index of the next node in the trie
* Negative value is a leaf: -(rev + 1)
* Zero is empty
*/
typedef struct {
int children[16];
} nodetree;
/*
* This class has two behaviours.
*
* When used in a list-like way (with integer keys), we decode an
* entry in a RevlogNG index file on demand. Our last entry is a
* sentinel, always a nullid. We have limited support for
* integer-keyed insert and delete, only at elements right before the
* sentinel.
*
* With string keys, we lazily perform a reverse mapping from node to
* rev, using a base-16 trie.
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 */
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
PyObject *data; /* raw bytes of index */
PyObject **cache; /* cached tuples */
const char **offsets; /* populated on demand */
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
PyObject *added; /* populated on demand */
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 PyObject *headrevs; /* cache, invalidated on changes */
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 PyObject *filteredrevs;/* filtered revs set */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 nodetree *nt; /* base-16 trie */
int ntlength; /* # nodes in use */
int ntcapacity; /* # nodes allocated */
int ntdepth; /* maximum depth of tree */
int ntsplits; /* # splits performed */
int ntrev; /* last rev scanned */
int ntlookups; /* # lookups */
int ntmisses; /* # lookups that miss the cache */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 int inlined;
} indexObject;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static Py_ssize_t index_length(const indexObject *self)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
if (self->added == NULL)
return self->length;
return self->length + PyList_GET_SIZE(self->added);
}
static PyObject *nullentry;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static const char nullid[20];
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
#if LONG_MAX == 0x7fffffffL
Matt Mackall
util.h: more Python 2.4 fixes
r16393 static char *tuple_format = "Kiiiiiis#";
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 #else
Matt Mackall
util.h: more Python 2.4 fixes
r16393 static char *tuple_format = "kiiiiiis#";
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 #endif
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 /* A RevlogNG v1 index entry is 64 bytes long. */
static const long v1_hdrsize = 64;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return a pointer to the beginning of a RevlogNG record.
*/
static const char *index_deref(indexObject *self, Py_ssize_t pos)
{
if (self->inlined && pos > 0) {
if (self->offsets == NULL) {
self->offsets = malloc(self->raw_length *
sizeof(*self->offsets));
if (self->offsets == NULL)
return (const char *)PyErr_NoMemory();
inline_scan(self, self->offsets);
}
return self->offsets[pos];
}
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
/*
* RevlogNG format (all in big endian, data may be inlined):
Bernhard Leiner
C implementation of revlog index parsing
r7108 * 6 bytes: offset
* 2 bytes: flags
* 4 bytes: compressed length
* 4 bytes: uncompressed length
* 4 bytes: base revision
* 4 bytes: link revision
* 4 bytes: parent 1 revision
* 4 bytes: parent 2 revision
* 32 bytes: nodeid (only 20 bytes used)
*/
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
Bernhard Leiner
C implementation of revlog index parsing
r7108 {
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154 uint64_t offset_flags;
Bernhard Leiner
C implementation of revlog index parsing
r7108 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154 const char *c_node_id;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 const char *data;
Py_ssize_t length = index_length(self);
PyObject *entry;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (pos < 0)
pos += length;
if (pos < 0 || pos >= length) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
return NULL;
}
if (pos == length - 1) {
Py_INCREF(nullentry);
return nullentry;
}
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (pos >= self->length - 1) {
PyObject *obj;
obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
Py_INCREF(obj);
return obj;
}
if (self->cache) {
if (self->cache[pos]) {
Py_INCREF(self->cache[pos]);
return self->cache[pos];
}
} else {
self->cache = calloc(self->raw_length, sizeof(PyObject *));
if (self->cache == NULL)
return PyErr_NoMemory();
}
Thomas Arendsen Hein
Some additional space/tab cleanups
r7190
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 data = index_deref(self, pos);
if (data == NULL)
return NULL;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 offset_flags = getbe32(data + 4);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (pos == 0) /* mask out version number for the first entry */
offset_flags &= 0xFFFF;
else {
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 uint32_t offset_high = getbe32(data);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 offset_flags |= ((uint64_t)offset_high) << 32;
}
Benoit Boissinot
index parser: fix refcounting in case of errors, refactor...
r7154
Matt Mackall
util.h: replace ntohl/htonl with get/putbe32
r16437 comp_len = getbe32(data + 8);
uncomp_len = getbe32(data + 12);
base_rev = getbe32(data + 16);
link_rev = getbe32(data + 20);
parent_1 = getbe32(data + 24);
parent_2 = getbe32(data + 28);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 c_node_id = data + 32;
entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
Matt Mackall
revlog: only build the nodemap on demand
r13254 uncomp_len, base_rev, link_rev,
parent_1, parent_2, c_node_id, 20);
Bryan O'Sullivan
parsers: use Py_INCREF safely
r19726 if (entry) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyObject_GC_UnTrack(entry);
Bryan O'Sullivan
parsers: use Py_INCREF safely
r19726 Py_INCREF(entry);
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
self->cache[pos] = entry;
return entry;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return the 20-byte SHA of the node corresponding to the given rev.
*/
static const char *index_node(indexObject *self, Py_ssize_t pos)
{
Py_ssize_t length = index_length(self);
const char *data;
Bryan O'Sullivan
parsers: ensure that nullid is always present in the radix tree
r16664 if (pos == length - 1 || pos == INT_MAX)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return nullid;
if (pos >= length)
return NULL;
if (pos >= self->length - 1) {
PyObject *tuple, *str;
tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
str = PyTuple_GetItem(tuple, 7);
return str ? PyString_AS_STRING(str) : NULL;
}
data = index_deref(self, pos);
return data ? data + 32 : NULL;
}
static int nt_insert(indexObject *self, const char *node, int rev);
static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
{
if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
return -1;
if (*nodelen == 20)
return 0;
PyErr_SetString(PyExc_ValueError, "20-byte hash required");
return -1;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static PyObject *index_insert(indexObject *self, PyObject *args)
{
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 PyObject *obj;
char *node;
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 int index;
Py_ssize_t len, nodelen;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 PyErr_SetString(PyExc_TypeError, "8-tuple required");
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
}
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return NULL;
len = index_length(self);
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (index < 0)
index += len;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 if (index != len - 1) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_IndexError,
"insert only supported at index -1");
return NULL;
}
if (self->added == NULL) {
self->added = PyList_New(0);
if (self->added == NULL)
return NULL;
}
if (PyList_Append(self->added, obj) == -1)
return NULL;
Matt Mackall
revlog: only build the nodemap on demand
r13254
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt)
Matt Mackall
parsers: fix Py2.4 argument parsing issue...
r22604 nt_insert(self, node, index);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_RETURN_NONE;
}
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 static void _index_clearcaches(indexObject *self)
{
if (self->cache) {
Py_ssize_t i;
Bryan O'Sullivan
parsers: use Py_CLEAR where appropriate
r16732 for (i = 0; i < self->raw_length; i++)
Py_CLEAR(self->cache[i]);
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 free(self->cache);
self->cache = NULL;
}
if (self->offsets) {
free(self->offsets);
self->offsets = NULL;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt) {
free(self->nt);
self->nt = NULL;
}
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 }
static PyObject *index_clearcaches(indexObject *self)
{
_index_clearcaches(self);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 self->ntlength = self->ntcapacity = 0;
self->ntdepth = self->ntsplits = 0;
self->ntrev = -1;
self->ntlookups = self->ntmisses = 0;
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 Py_RETURN_NONE;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_stats(indexObject *self)
{
PyObject *obj = PyDict_New();
if (obj == NULL)
return NULL;
#define istat(__n, __d) \
Adrian Buehlmann
parser: use PyInt_FromSsize_t in index_stats...
r16629 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 goto bail;
if (self->added) {
Py_ssize_t len = PyList_GET_SIZE(self->added);
if (PyDict_SetItemString(obj, "index entries added",
Adrian Buehlmann
parser: use PyInt_FromSsize_t in index_stats...
r16629 PyInt_FromSsize_t(len)) == -1)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 goto bail;
}
if (self->raw_length != self->length - 1)
istat(raw_length, "revs on disk");
istat(length, "revs in memory");
istat(ntcapacity, "node trie capacity");
istat(ntdepth, "node trie depth");
istat(ntlength, "node trie count");
istat(ntlookups, "node trie lookups");
istat(ntmisses, "node trie misses");
istat(ntrev, "node trie last rev scanned");
istat(ntsplits, "node trie splits");
#undef istat
return obj;
bail:
Py_XDECREF(obj);
return NULL;
}
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 /*
* When we cache a list, we want to be sure the caller can't mutate
* the cached copy.
*/
static PyObject *list_copy(PyObject *list)
{
Py_ssize_t len = PyList_GET_SIZE(list);
PyObject *newlist = PyList_New(len);
Py_ssize_t i;
if (newlist == NULL)
return NULL;
for (i = 0; i < len; i++) {
PyObject *obj = PyList_GET_ITEM(list, i);
Py_INCREF(obj);
PyList_SET_ITEM(newlist, i, obj);
}
return newlist;
}
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 static int check_filter(PyObject *filter, Py_ssize_t arg) {
if (filter) {
PyObject *arglist, *result;
int isfiltered;
arglist = Py_BuildValue("(n)", arg);
if (!arglist) {
return -1;
}
result = PyEval_CallObject(filter, arglist);
Py_DECREF(arglist);
if (!result) {
return -1;
}
/* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
* same as this function, so we can just return it directly.*/
isfiltered = PyObject_IsTrue(result);
Py_DECREF(result);
return isfiltered;
} else {
return 0;
}
}
static PyObject *index_headrevs(indexObject *self, PyObject *args)
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 {
Py_ssize_t i, len, addlen;
char *nothead = NULL;
David Soria Parra
parsers: fix uninitialize variable warning...
r22540 PyObject *heads = NULL;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 PyObject *filter = NULL;
PyObject *filteredrevs = Py_None;
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
return NULL;
}
if (self->headrevs && filteredrevs == self->filteredrevs)
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 return list_copy(self->headrevs);
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_DECREF(self->filteredrevs);
self->filteredrevs = filteredrevs;
Py_INCREF(filteredrevs);
if (filteredrevs != Py_None) {
filter = PyObject_GetAttrString(filteredrevs, "__contains__");
if (!filter) {
PyErr_SetString(PyExc_TypeError,
"filteredrevs has no attribute __contains__");
goto bail;
}
}
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 len = index_length(self) - 1;
heads = PyList_New(0);
if (heads == NULL)
goto bail;
if (len == 0) {
PyObject *nullid = PyInt_FromLong(-1);
if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
Py_XDECREF(nullid);
goto bail;
}
goto done;
}
nothead = calloc(len, 1);
if (nothead == NULL)
goto bail;
for (i = 0; i < self->raw_length; i++) {
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 const char *data;
int parent_1, parent_2, isfiltered;
isfiltered = check_filter(filter, i);
if (isfiltered == -1) {
PyErr_SetString(PyExc_TypeError,
"unable to check filter");
goto bail;
}
if (isfiltered) {
nothead[i] = 1;
continue;
}
data = index_deref(self, i);
parent_1 = getbe32(data + 24);
parent_2 = getbe32(data + 28);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 if (parent_1 >= 0)
nothead[parent_1] = 1;
if (parent_2 >= 0)
nothead[parent_2] = 1;
}
addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
for (i = 0; i < addlen; i++) {
PyObject *rev = PyList_GET_ITEM(self->added, i);
PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
long parent_1, parent_2;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 int isfiltered;
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786
if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
PyErr_SetString(PyExc_TypeError,
"revlog parents are invalid");
goto bail;
}
Durham Goode
obsolete: use C code for headrevs calculation...
r22484
isfiltered = check_filter(filter, i);
if (isfiltered == -1) {
PyErr_SetString(PyExc_TypeError,
"unable to check filter");
goto bail;
}
if (isfiltered) {
nothead[i] = 1;
continue;
}
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 parent_1 = PyInt_AS_LONG(p1);
parent_2 = PyInt_AS_LONG(p2);
if (parent_1 >= 0)
nothead[parent_1] = 1;
if (parent_2 >= 0)
nothead[parent_2] = 1;
}
for (i = 0; i < len; i++) {
PyObject *head;
if (nothead[i])
continue;
Henrik Stuart
parsers: fix typing issue when constructing Python integer object...
r22400 head = PyInt_FromSsize_t(i);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 if (head == NULL || PyList_Append(heads, head) == -1) {
Py_XDECREF(head);
goto bail;
}
}
done:
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 self->headrevs = heads;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(filter);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 free(nothead);
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 return list_copy(self->headrevs);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 bail:
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(filter);
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 Py_XDECREF(heads);
free(nothead);
return NULL;
}
Bryan O'Sullivan
parsers: change the type of nt_level...
r16618 static inline int nt_level(const char *node, Py_ssize_t level)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {
int v = node[level>>1];
if (!(level & 1))
v >>= 4;
return v & 0xf;
}
Bryan O'Sullivan
parsers: allow nt_find to signal an ambiguous match
r16616 /*
* Return values:
*
* -4: match is ambiguous (multiple candidates)
* -2: not found
* rest: valid rev
*/
Bryan O'Sullivan
parsers: allow hex keys
r16663 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
int hex)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {
Bryan O'Sullivan
parsers: allow hex keys
r16663 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641 int level, maxlevel, off;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
return -1;
if (self->nt == NULL)
return -2;
Bryan O'Sullivan
parsers: allow hex keys
r16663 if (hex)
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
Bryan O'Sullivan
parsers: allow hex keys
r16663 else
maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641
for (level = off = 0; level < maxlevel; level++) {
Bryan O'Sullivan
parsers: allow hex keys
r16663 int k = getnybble(node, level);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 nodetree *n = &self->nt[off];
int v = n->children[k];
if (v < 0) {
const char *n;
Bryan O'Sullivan
parsers: allow hex keys
r16663 Py_ssize_t i;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 v = -v - 1;
n = index_node(self, v);
if (n == NULL)
return -2;
Bryan O'Sullivan
parsers: allow hex keys
r16663 for (i = level; i < maxlevel; i++)
if (getnybble(node, i) != nt_level(n, i))
return -2;
return v;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
if (v == 0)
return -2;
off = v;
}
Bryan O'Sullivan
parsers: allow nt_find to signal an ambiguous match
r16616 /* multiple matches against an ambiguous prefix */
return -4;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
static int nt_new(indexObject *self)
{
if (self->ntlength == self->ntcapacity) {
self->ntcapacity *= 2;
self->nt = realloc(self->nt,
self->ntcapacity * sizeof(nodetree));
if (self->nt == NULL) {
PyErr_SetString(PyExc_MemoryError, "out of memory");
return -1;
}
memset(&self->nt[self->ntlength], 0,
sizeof(nodetree) * (self->ntcapacity - self->ntlength));
}
return self->ntlength++;
}
static int nt_insert(indexObject *self, const char *node, int rev)
{
int level = 0;
int off = 0;
Bryan O'Sullivan
parsers: use the correct maximum radix tree depth...
r16641 while (level < 40) {
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 int k = nt_level(node, level);
nodetree *n;
int v;
n = &self->nt[off];
v = n->children[k];
if (v == 0) {
n->children[k] = -rev - 1;
return 0;
}
if (v < 0) {
const char *oldnode = index_node(self, -v - 1);
int noff;
if (!oldnode || !memcmp(oldnode, node, 20)) {
n->children[k] = -rev - 1;
return 0;
}
noff = nt_new(self);
if (noff == -1)
return -1;
/* self->nt may have been changed by realloc */
self->nt[off].children[k] = noff;
off = noff;
n = &self->nt[off];
n->children[nt_level(oldnode, ++level)] = v;
if (level > self->ntdepth)
self->ntdepth = level;
self->ntsplits += 1;
} else {
level += 1;
off = v;
}
}
return -1;
}
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 static int nt_init(indexObject *self)
{
if (self->nt == NULL) {
Abhay Kadam
mercurial/parsers.c: fix compiler warning...
r20110 if (self->raw_length > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
return -1;
}
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 self->ntcapacity = self->raw_length < 4
Abhay Kadam
mercurial/parsers.c: fix compiler warning...
r20110 ? 4 : (int)self->raw_length / 2;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
if (self->nt == NULL) {
PyErr_NoMemory();
return -1;
}
self->ntlength = 1;
self->ntrev = (int)index_length(self) - 1;
self->ntlookups = 1;
self->ntmisses = 0;
Bryan O'Sullivan
parsers: ensure that nullid is always present in the radix tree
r16664 if (nt_insert(self, nullid, INT_MAX) == -1)
return -1;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 }
return 0;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Return values:
*
* -3: error (exception set)
* -2: not found (no exception set)
* rest: valid rev
*/
static int index_find_node(indexObject *self,
const char *node, Py_ssize_t nodelen)
{
int rev;
self->ntlookups++;
Bryan O'Sullivan
parsers: allow hex keys
r16663 rev = nt_find(self, node, nodelen, 0);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (rev >= -1)
return rev;
Bryan O'Sullivan
parsers: factor out radix tree initialization
r16615 if (nt_init(self) == -1)
return -3;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
/*
* For the first handful of lookups, we scan the entire index,
* and cache only the matching nodes. This optimizes for cases
* like "hg tip", where only a few nodes are accessed.
*
* After that, we cache every node we visit, using a single
* scan amortized over multiple lookups. This gives the best
* bulk performance, e.g. for "hg log".
*/
if (self->ntmisses++ < 4) {
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
if (n == NULL)
return -2;
if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
if (nt_insert(self, n, rev) == -1)
return -3;
break;
}
}
} else {
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 if (n == NULL) {
self->ntrev = rev + 1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return -2;
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 }
if (nt_insert(self, n, rev) == -1) {
self->ntrev = rev + 1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return -3;
Bryan O'Sullivan
parsers: update ntrev when we stop scanning...
r16614 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
break;
}
}
self->ntrev = rev;
}
if (rev >= 0)
return rev;
return -2;
}
static PyObject *raise_revlog_error(void)
{
static PyObject *errclass;
PyObject *mod = NULL, *errobj;
if (errclass == NULL) {
PyObject *dict;
mod = PyImport_ImportModule("mercurial.error");
if (mod == NULL)
goto classfail;
dict = PyModule_GetDict(mod);
if (dict == NULL)
goto classfail;
errclass = PyDict_GetItemString(dict, "RevlogError");
if (errclass == NULL) {
PyErr_SetString(PyExc_SystemError,
"could not find RevlogError");
goto classfail;
}
Py_INCREF(errclass);
}
errobj = PyObject_CallFunction(errclass, NULL);
if (errobj == NULL)
return NULL;
PyErr_SetObject(errclass, errobj);
return errobj;
classfail:
Py_XDECREF(mod);
return NULL;
}
static PyObject *index_getitem(indexObject *self, PyObject *value)
{
char *node;
Py_ssize_t nodelen;
int rev;
if (PyInt_Check(value))
return index_get(self, PyInt_AS_LONG(value));
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(value, &node, &nodelen) == -1)
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return NULL;
rev = index_find_node(self, node, nodelen);
if (rev >= -1)
return PyInt_FromLong(rev);
if (rev == -2)
raise_revlog_error();
return NULL;
}
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 static int nt_partialmatch(indexObject *self, const char *node,
Py_ssize_t nodelen)
{
int rev;
if (nt_init(self) == -1)
return -3;
if (self->ntrev > 0) {
/* ensure that the radix tree is fully populated */
for (rev = self->ntrev - 1; rev >= 0; rev--) {
const char *n = index_node(self, rev);
if (n == NULL)
return -2;
if (nt_insert(self, n, rev) == -1)
return -3;
}
self->ntrev = rev;
}
return nt_find(self, node, nodelen, 1);
}
static PyObject *index_partialmatch(indexObject *self, PyObject *args)
{
const char *fullnode;
int nodelen;
char *node;
int rev, i;
if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
return NULL;
if (nodelen < 4) {
PyErr_SetString(PyExc_ValueError, "key too short");
return NULL;
}
sorcerer
revlog: don't try to partialmatch strings those length > 40...
r17353 if (nodelen > 40) {
PyErr_SetString(PyExc_ValueError, "key too long");
return NULL;
}
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665
for (i = 0; i < nodelen; i++)
hexdigit(node, i);
if (PyErr_Occurred()) {
/* input contains non-hex characters */
PyErr_Clear();
Py_RETURN_NONE;
}
rev = nt_partialmatch(self, node, nodelen);
switch (rev) {
case -4:
raise_revlog_error();
case -3:
return NULL;
case -2:
Py_RETURN_NONE;
case -1:
return PyString_FromStringAndSize(nullid, 20);
}
fullnode = index_node(self, rev);
if (fullnode == NULL) {
PyErr_Format(PyExc_IndexError,
"could not access rev %d", rev);
return NULL;
}
return PyString_FromStringAndSize(fullnode, 20);
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_m_get(indexObject *self, PyObject *args)
{
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 Py_ssize_t nodelen;
PyObject *val;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 char *node;
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 int rev;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (!PyArg_ParseTuple(args, "O", &val))
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return NULL;
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(val, &node, &nodelen) == -1)
return NULL;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 rev = index_find_node(self, node, nodelen);
if (rev == -3)
return NULL;
if (rev == -2)
Py_RETURN_NONE;
return PyInt_FromLong(rev);
}
static int index_contains(indexObject *self, PyObject *value)
{
char *node;
Py_ssize_t nodelen;
if (PyInt_Check(value)) {
long rev = PyInt_AS_LONG(value);
return rev >= -1 && rev < index_length(self);
}
Bryan O'Sullivan
parsers: strictly check for 20-byte hashes where they're required
r16679 if (node_check(value, &node, &nodelen) == -1)
return -1;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414
switch (index_find_node(self, node, nodelen)) {
case -3:
return -1;
case -2:
return 0;
default:
return 1;
}
}
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 static inline void index_get_parents(indexObject *self, int rev, int *ps)
{
if (rev >= self->length - 1) {
PyObject *tuple = PyList_GET_ITEM(self->added,
rev - self->length + 1);
ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
} else {
const char *data = index_deref(self, rev);
ps[0] = getbe32(data + 24);
ps[1] = getbe32(data + 28);
}
}
typedef uint64_t bitmask;
/*
* Given a disjoint set of revs, return all candidates for the
* greatest common ancestor. In revset notation, this is the set
* "heads(::a and ::b and ...)"
*/
static PyObject *find_gca_candidates(indexObject *self, const int *revs,
int revcount)
{
const bitmask allseen = (1ull << revcount) - 1;
const bitmask poison = 1ull << revcount;
PyObject *gca = PyList_New(0);
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 int i, v, interesting;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 int maxrev = -1;
Henrik Stuart
parsers: use bitmask type consistently in find_gca_candidates...
r22399 bitmask sp;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 bitmask *seen;
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 if (gca == NULL)
return PyErr_NoMemory();
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 for (i = 0; i < revcount; i++) {
if (revs[i] > maxrev)
maxrev = revs[i];
}
seen = calloc(sizeof(*seen), maxrev + 1);
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 if (seen == NULL) {
Py_DECREF(gca);
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 return PyErr_NoMemory();
Bryan O'Sullivan
parsers: correctly handle a failed allocation
r19727 }
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (i = 0; i < revcount; i++)
seen[revs[i]] = 1ull << i;
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 interesting = revcount;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
for (v = maxrev; v >= 0 && interesting; v--) {
Henrik Stuart
parsers: use bitmask type consistently in find_gca_candidates...
r22399 bitmask sv = seen[v];
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 int parents[2];
if (!sv)
continue;
if (sv < poison) {
interesting -= 1;
if (sv == allseen) {
PyObject *obj = PyInt_FromLong(v);
if (obj == NULL)
goto bail;
if (PyList_Append(gca, obj) == -1) {
Py_DECREF(obj);
goto bail;
}
sv |= poison;
for (i = 0; i < revcount; i++) {
Mads Kiilerich
ancestors: remove unnecessary handling of 'left'...
r20555 if (revs[i] == v)
goto done;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 }
}
}
index_get_parents(self, v, parents);
for (i = 0; i < 2; i++) {
int p = parents[i];
if (p == -1)
continue;
Matt Mackall
parsers: fix variable declaration position issue
r19030 sp = seen[p];
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 if (sv < poison) {
if (sp == 0) {
seen[p] = sv;
interesting++;
}
else if (sp != sv)
seen[p] |= sv;
} else {
if (sp && sp < poison)
interesting--;
seen[p] = sv;
}
}
}
done:
free(seen);
return gca;
bail:
free(seen);
Py_XDECREF(gca);
return NULL;
}
/*
* Given a disjoint set of revs, return the subset with the longest
* path to the root.
*/
static PyObject *find_deepest(indexObject *self, PyObject *revs)
{
const Py_ssize_t revcount = PyList_GET_SIZE(revs);
static const Py_ssize_t capacity = 24;
int *depth, *interesting = NULL;
int i, j, v, ninteresting;
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 PyObject *dict = NULL, *keys = NULL;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 long *seen = NULL;
int maxrev = -1;
long final;
if (revcount > capacity) {
PyErr_Format(PyExc_OverflowError,
"bitset size (%ld) > capacity (%ld)",
André Sintzoff
parsers: remove warning: format ‘%ld’ expects argument of type ‘long int’...
r19062 (long)revcount, (long)capacity);
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 return NULL;
}
for (i = 0; i < revcount; i++) {
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
if (n > maxrev)
maxrev = n;
}
depth = calloc(sizeof(*depth), maxrev + 1);
if (depth == NULL)
return PyErr_NoMemory();
seen = calloc(sizeof(*seen), maxrev + 1);
if (seen == NULL) {
PyErr_NoMemory();
goto bail;
}
interesting = calloc(sizeof(*interesting), 2 << revcount);
if (interesting == NULL) {
PyErr_NoMemory();
goto bail;
}
Siddharth Agarwal
ancestor.deepest: sort revs in C version...
r19502 if (PyList_Sort(revs) == -1)
goto bail;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 for (i = 0; i < revcount; i++) {
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
long b = 1l << i;
depth[n] = 1;
seen[n] = b;
interesting[b] = 1;
}
ninteresting = (int)revcount;
for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
int dv = depth[v];
int parents[2];
long sv;
if (dv == 0)
continue;
sv = seen[v];
index_get_parents(self, v, parents);
for (i = 0; i < 2; i++) {
int p = parents[i];
long nsp, sp;
int dp;
if (p == -1)
continue;
dp = depth[p];
nsp = sp = seen[p];
if (dp <= dv) {
depth[p] = dv + 1;
if (sp != sv) {
interesting[sv] += 1;
nsp = seen[p] = sv;
if (sp) {
interesting[sp] -= 1;
if (interesting[sp] == 0)
ninteresting -= 1;
}
}
}
else if (dv == dp - 1) {
nsp = sp | sv;
if (nsp == sp)
continue;
seen[p] = nsp;
Wei, Elson
ancestor.deepest: decrement ninteresting correctly (issue3984)...
r19503 interesting[sp] -= 1;
if (interesting[sp] == 0 && interesting[nsp] > 0)
ninteresting -= 1;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 interesting[nsp] += 1;
}
}
interesting[sv] -= 1;
if (interesting[sv] == 0)
ninteresting -= 1;
}
final = 0;
j = ninteresting;
for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
if (interesting[i] == 0)
continue;
final |= i;
j -= 1;
}
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 if (final == 0) {
keys = PyList_New(0);
goto bail;
}
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988
dict = PyDict_New();
if (dict == NULL)
goto bail;
Siddharth Agarwal
ancestor.deepest: ignore ninteresting while building result (issue3984)...
r19504 for (i = 0; i < revcount; i++) {
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 PyObject *key;
if ((final & (1 << i)) == 0)
continue;
key = PyList_GET_ITEM(revs, i);
Py_INCREF(key);
Py_INCREF(Py_None);
if (PyDict_SetItem(dict, key, Py_None) == -1) {
Py_DECREF(key);
Py_DECREF(Py_None);
goto bail;
}
}
keys = PyDict_Keys(dict);
bail:
free(depth);
free(seen);
free(interesting);
Py_XDECREF(dict);
Danek Duvall
parsers.c: fix a couple of memory leaks
r21730 return keys;
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 }
/*
* Given a (possibly overlapping) set of revs, return the greatest
* common ancestors: those with the longest path to the root.
*/
static PyObject *index_ancestors(indexObject *self, PyObject *args)
{
PyObject *ret = NULL, *gca = NULL;
Py_ssize_t argcount, i, len;
bitmask repeat = 0;
int revcount = 0;
int *revs;
argcount = PySequence_Length(args);
revs = malloc(argcount * sizeof(*revs));
if (argcount > 0 && revs == NULL)
return PyErr_NoMemory();
len = index_length(self) - 1;
for (i = 0; i < argcount; i++) {
static const int capacity = 24;
PyObject *obj = PySequence_GetItem(args, i);
bitmask x;
long val;
if (!PyInt_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"arguments must all be ints");
goto bail;
}
val = PyInt_AsLong(obj);
if (val == -1) {
ret = PyList_New(0);
goto done;
}
if (val < 0 || val >= len) {
PyErr_SetString(PyExc_IndexError,
"index out of range");
goto bail;
}
/* this cheesy bloom filter lets us avoid some more
* expensive duplicate checks in the common set-is-disjoint
* case */
x = 1ull << (val & 0x3f);
if (repeat & x) {
int k;
for (k = 0; k < revcount; k++) {
if (val == revs[k])
goto duplicate;
}
}
else repeat |= x;
if (revcount >= capacity) {
PyErr_Format(PyExc_OverflowError,
"bitset size (%d) > capacity (%d)",
revcount, capacity);
goto bail;
}
revs[revcount++] = (int)val;
duplicate:;
}
if (revcount == 0) {
ret = PyList_New(0);
goto done;
}
if (revcount == 1) {
PyObject *obj;
ret = PyList_New(1);
if (ret == NULL)
goto bail;
obj = PyInt_FromLong(revs[0]);
if (obj == NULL)
goto bail;
PyList_SET_ITEM(ret, 0, obj);
goto done;
}
gca = find_gca_candidates(self, revs, revcount);
if (gca == NULL)
goto bail;
if (PyList_GET_SIZE(gca) <= 1) {
ret = gca;
Py_INCREF(gca);
}
else ret = find_deepest(self, gca);
done:
free(revs);
Py_XDECREF(gca);
return ret;
bail:
free(revs);
Py_XDECREF(gca);
Py_XDECREF(ret);
return NULL;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 * Given a (possibly overlapping) set of revs, return all the
* common ancestors heads: heads(::args[0] and ::a[1] and ...)
*/
static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
{
Mads Kiilerich
parsers: remove unnecessary gca variable in index_commonancestorsheads
r21103 PyObject *ret = NULL;
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 Py_ssize_t argcount, i, len;
bitmask repeat = 0;
int revcount = 0;
int *revs;
argcount = PySequence_Length(args);
revs = malloc(argcount * sizeof(*revs));
if (argcount > 0 && revs == NULL)
return PyErr_NoMemory();
len = index_length(self) - 1;
for (i = 0; i < argcount; i++) {
static const int capacity = 24;
PyObject *obj = PySequence_GetItem(args, i);
bitmask x;
long val;
if (!PyInt_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"arguments must all be ints");
goto bail;
}
val = PyInt_AsLong(obj);
if (val == -1) {
ret = PyList_New(0);
goto done;
}
if (val < 0 || val >= len) {
PyErr_SetString(PyExc_IndexError,
"index out of range");
goto bail;
}
/* this cheesy bloom filter lets us avoid some more
* expensive duplicate checks in the common set-is-disjoint
* case */
x = 1ull << (val & 0x3f);
if (repeat & x) {
int k;
for (k = 0; k < revcount; k++) {
if (val == revs[k])
goto duplicate;
}
}
else repeat |= x;
if (revcount >= capacity) {
PyErr_Format(PyExc_OverflowError,
"bitset size (%d) > capacity (%d)",
revcount, capacity);
goto bail;
}
revs[revcount++] = (int)val;
duplicate:;
}
if (revcount == 0) {
ret = PyList_New(0);
goto done;
}
if (revcount == 1) {
PyObject *obj;
ret = PyList_New(1);
if (ret == NULL)
goto bail;
obj = PyInt_FromLong(revs[0]);
if (obj == NULL)
goto bail;
PyList_SET_ITEM(ret, 0, obj);
goto done;
}
Mads Kiilerich
parsers: remove unnecessary gca variable in index_commonancestorsheads
r21103 ret = find_gca_candidates(self, revs, revcount);
if (ret == NULL)
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 goto bail;
done:
free(revs);
return ret;
bail:
free(revs);
Py_XDECREF(ret);
return NULL;
}
/*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * Invalidate any trie entries introduced by added revs.
*/
static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
{
Py_ssize_t i, len = PyList_GET_SIZE(self->added);
for (i = start; i < len; i++) {
PyObject *tuple = PyList_GET_ITEM(self->added, i);
PyObject *node = PyTuple_GET_ITEM(tuple, 7);
nt_insert(self, PyString_AS_STRING(node), -1);
}
Bryan O'Sullivan
parsers: use Py_CLEAR where appropriate
r16732 if (start == 0)
Py_CLEAR(self->added);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 }
/*
* Delete a numeric range of revs, which must be at the end of the
* range, but exclude the sentinel nullid entry.
*/
static int index_slice_del(indexObject *self, PyObject *item)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
Py_ssize_t start, stop, step, slicelength;
Py_ssize_t length = index_length(self);
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 int ret = 0;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
if (PySlice_GetIndicesEx((PySliceObject*)item, length,
&start, &stop, &step, &slicelength) < 0)
return -1;
if (slicelength <= 0)
return 0;
if ((step < 0 && start < stop) || (step > 0 && start > stop))
stop = start;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (step < 0) {
stop = start + 1;
start = stop + step*(slicelength - 1) - 1;
step = -step;
}
if (step != 1) {
PyErr_SetString(PyExc_ValueError,
"revlog index delete requires step size of 1");
return -1;
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
if (stop != length - 1) {
PyErr_SetString(PyExc_IndexError,
"revlog index deletion indices are invalid");
return -1;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (start < self->length - 1) {
if (self->nt) {
Py_ssize_t i;
for (i = start + 1; i < self->length - 1; i++) {
const char *node = index_node(self, i);
if (node)
nt_insert(self, node, -1);
}
if (self->added)
nt_invalidate_added(self, 0);
if (self->ntrev > start)
self->ntrev = (int)start;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->length = start + 1;
Yuya Nishihara
parsers: fix memleak of revlog cache entries on strip...
r18504 if (start < self->raw_length) {
if (self->cache) {
Py_ssize_t i;
for (i = start; i < self->raw_length; i++)
Py_CLEAR(self->cache[i]);
}
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 self->raw_length = start;
Yuya Nishihara
parsers: fix memleak of revlog cache entries on strip...
r18504 }
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 goto done;
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 if (self->nt) {
nt_invalidate_added(self, start - self->length + 1);
if (self->ntrev > start)
self->ntrev = (int)start;
}
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 if (self->added)
ret = PyList_SetSlice(self->added, start - self->length + 1,
PyList_GET_SIZE(self->added), NULL);
done:
Bryan O'Sullivan
parsers: cache the result of index_headrevs...
r16787 Py_CLEAR(self->headrevs);
Bryan O'Sullivan
parsers: reduce raw_length when truncating...
r16784 return ret;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 }
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 /*
* Supported ops:
*
* slice deletion
* string assignment (extend node->rev mapping)
* string deletion (shrink node->rev mapping)
*/
static int index_assign_subscript(indexObject *self, PyObject *item,
PyObject *value)
{
char *node;
Py_ssize_t nodelen;
long rev;
if (PySlice_Check(item) && value == NULL)
return index_slice_del(self, item);
if (node_check(item, &node, &nodelen) == -1)
return -1;
if (value == NULL)
return self->nt ? nt_insert(self, node, -1) : 0;
rev = PyInt_AsLong(value);
if (rev > INT_MAX || rev < 0) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "rev out of range");
return -1;
}
return nt_insert(self, node, (int)rev);
}
/*
* Find all RevlogNG entries in an index that has inline data. Update
* the optional "offsets" table with those entries.
*/
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
const char *data = PyString_AS_STRING(self->data);
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 Py_ssize_t pos = 0;
Py_ssize_t end = PyString_GET_SIZE(self->data);
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 long incr = v1_hdrsize;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_ssize_t len = 0;
Matt Mackall
revlog: only build the nodemap on demand
r13254
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 while (pos + v1_hdrsize <= end && pos >= 0) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 uint32_t comp_len;
/* 3rd element of header is length of compressed inline data */
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 comp_len = getbe32(data + pos + 8);
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 incr = v1_hdrsize + comp_len;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (offsets)
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 offsets[len] = data + pos;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 len++;
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 pos += incr;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 }
Matt Mackall
revlog: only build the nodemap on demand
r13254
Matt Mackall
mpatch: rewrite pointer overflow checks
r20167 if (pos != end) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "corrupt index file");
return -1;
}
return len;
}
Matt Mackall
revlog: only build the nodemap on demand
r13254
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 static int index_init(indexObject *self, PyObject *args)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 PyObject *data_obj, *inlined_obj;
Py_ssize_t size;
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 /* Initialize before argument-checking to avoid index_dealloc() crash. */
self->raw_length = 0;
self->added = NULL;
self->cache = NULL;
self->data = NULL;
self->headrevs = NULL;
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 self->filteredrevs = Py_None;
Py_INCREF(Py_None);
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 self->nt = NULL;
self->offsets = NULL;
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
return -1;
if (!PyString_Check(data_obj)) {
PyErr_SetString(PyExc_TypeError, "data is not a string");
return -1;
}
size = PyString_GET_SIZE(data_obj);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
self->data = data_obj;
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 self->ntlength = self->ntcapacity = 0;
self->ntdepth = self->ntsplits = 0;
self->ntlookups = self->ntmisses = 0;
self->ntrev = -1;
Matt Mackall
parsers: fix refcount bug on corrupt index...
r16597 Py_INCREF(self->data);
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (self->inlined) {
Henrik Stuart
parsers: ensure correct return type for inline_scan...
r22401 Py_ssize_t len = inline_scan(self, NULL);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (len == -1)
goto bail;
self->raw_length = len;
self->length = len + 1;
} else {
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 if (size % v1_hdrsize) {
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 PyErr_SetString(PyExc_ValueError, "corrupt index file");
goto bail;
}
Bryan O'Sullivan
parsers: replace magic number 64 with symbolic constant
r16863 self->raw_length = size / v1_hdrsize;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 self->length = self->raw_length + 1;
}
return 0;
bail:
return -1;
}
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 static PyObject *index_nodemap(indexObject *self)
{
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 Py_INCREF(self);
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 return (PyObject *)self;
}
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static void index_dealloc(indexObject *self)
{
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 _index_clearcaches(self);
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 Py_XDECREF(self->filteredrevs);
Chris Jerdonek
parse_index2: fix crash on bad argument type (issue4110)...
r20109 Py_XDECREF(self->data);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_XDECREF(self->added);
PyObject_Del(self);
}
static PySequenceMethods index_sequence_methods = {
(lenfunc)index_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
(ssizeargfunc)index_get, /* sq_item */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)index_contains, /* sq_contains */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 };
static PyMappingMethods index_mapping_methods = {
(lenfunc)index_length, /* mp_length */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 (binaryfunc)index_getitem, /* mp_subscript */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
};
static PyMethodDef index_methods[] = {
Bryan O'Sullivan
parsers: a C implementation of the new ancestors algorithm...
r18988 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
"return the gca set of the given revs"},
Mads Kiilerich
parsers: introduce index_commonancestorsheads...
r21102 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
METH_VARARGS,
"return the heads of the common ancestors of the given revs"},
Bryan O'Sullivan
parsers: fix a memleak, and add a clearcaches method to the index...
r16370 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
"clear the index caches"},
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {"get", (PyCFunction)index_m_get, METH_VARARGS,
"get an index entry"},
Durham Goode
obsolete: use C code for headrevs calculation...
r22484 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
Bryan O'Sullivan
revlog: switch to a C version of headrevs...
r16786 "get head revisions"},
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {"insert", (PyCFunction)index_insert, METH_VARARGS,
"insert an index entry"},
Bryan O'Sullivan
revlog: speed up prefix matching against nodes...
r16665 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
"match a potentially ambiguous node ID"},
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 {"stats", (PyCFunction)index_stats, METH_NOARGS,
"stats for the index"},
{NULL} /* Sentinel */
};
static PyGetSetDef index_getset[] = {
{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 {NULL} /* Sentinel */
};
static PyTypeObject indexType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"parsers.index", /* tp_name */
sizeof(indexObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)index_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&index_sequence_methods, /* tp_as_sequence */
&index_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"revlog index", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
index_methods, /* tp_methods */
0, /* tp_members */
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 index_getset, /* tp_getset */
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)index_init, /* tp_init */
0, /* tp_alloc */
};
/*
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * returns a tuple of the form (index, index, cache) with elements as
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 * follows:
Bernhard Leiner
C implementation of revlog index parsing
r7108 *
Bryan O'Sullivan
parsers: use base-16 trie for faster node->rev mapping...
r16414 * index: an index object that lazily parses RevlogNG records
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 * cache: if data is inlined, a tuple (index_file_content, 0), else None
*
* added complications are for backwards compatibility
Bernhard Leiner
C implementation of revlog index parsing
r7108 */
Matt Mackall
revlog: only build the nodemap on demand
r13254 static PyObject *parse_index2(PyObject *self, PyObject *args)
Bernhard Leiner
C implementation of revlog index parsing
r7108 {
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 PyObject *tuple = NULL, *cache = NULL;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 indexObject *idx;
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 int ret;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
idx = PyObject_New(indexObject, &indexType);
if (idx == NULL)
goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: fix refcount leak, simplify init of index (issue3417)...
r16572 ret = index_init(idx, args);
if (ret == -1)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 if (idx->inlined) {
cache = Py_BuildValue("iO", 0, idx->data);
if (cache == NULL)
goto bail;
Bernhard Leiner
C implementation of revlog index parsing
r7108 } else {
cache = Py_None;
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_INCREF(cache);
Bernhard Leiner
C implementation of revlog index parsing
r7108 }
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 tuple = Py_BuildValue("NN", idx, cache);
if (!tuple)
goto bail;
return tuple;
Bernhard Leiner
C implementation of revlog index parsing
r7108
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 bail:
Py_XDECREF(idx);
Bernhard Leiner
C implementation of revlog index parsing
r7108 Py_XDECREF(cache);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 Py_XDECREF(tuple);
Bernhard Leiner
C implementation of revlog index parsing
r7108 return NULL;
}
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 static char parsers_doc[] = "Efficient content parsing.";
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 PyObject *encodedir(PyObject *self, PyObject *args);
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 PyObject *pathencode(PyObject *self, PyObject *args);
Bryan O'Sullivan
store: implement lowerencode in C
r18430 PyObject *lowerencode(PyObject *self, PyObject *args);
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 static PyMethodDef methods[] = {
Bryan O'Sullivan
parsers: add a C function to pack the dirstate...
r16955 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
Matt Mackall
dirstate: C parsing extension
r7093 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
Matt Mackall
revlog: only build the nodemap on demand
r13254 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
Adrian Buehlmann
pathencode: new C module with fast encodedir() function...
r17606 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
Bryan O'Sullivan
store: implement fncache basic path encoding in C...
r17616 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
Bryan O'Sullivan
store: implement lowerencode in C
r18430 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 {NULL, NULL}
};
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 void dirs_module_init(PyObject *mod);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 static void module_init(PyObject *mod)
{
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 /* This module constant has two purposes. First, it lets us unit test
* the ImportError raised without hard-coding any error text. This
* means we can change the text in the future without breaking tests,
* even across changesets without a recompile. Second, its presence
* can be used to determine whether the version-checking logic is
* present, which also helps in testing across changesets without a
* recompile. Note that this means the pure-Python version of parsers
* should not have this module constant. */
PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
Bryan O'Sullivan
scmutil: rewrite dirs in C, use if available...
r18900 dirs_module_init(mod);
Adrian Buehlmann
parsers: statically initializing tp_new to PyType_GenericNew is not portable...
r16604 indexType.tp_new = PyType_GenericNew;
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 if (PyType_Ready(&indexType) < 0 ||
PyType_Ready(&dirstateTupleType) < 0)
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 return;
Py_INCREF(&indexType);
PyModule_AddObject(mod, "index", (PyObject *)&indexType);
Siddharth Agarwal
parsers: inline fields of dirstate values in C version...
r21809 Py_INCREF(&dirstateTupleType);
PyModule_AddObject(mod, "dirstatetuple",
(PyObject *)&dirstateTupleType);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363
nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
-1, -1, -1, -1, nullid, 20);
if (nullentry)
PyObject_GC_UnTrack(nullentry);
}
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 static int check_python_version(void)
{
PyObject *sys = PyImport_ImportModule("sys");
long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
/* sys.hexversion is a 32-bit number by default, so the -1 case
* should only occur in unusual circumstances (e.g. if sys.hexversion
* is manually set to an invalid value). */
if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
"modules were compiled with Python " PY_VERSION ", but "
"Mercurial is currently using Python with sys.hexversion=%ld: "
"Python %s\n at: %s", versionerrortext, hexversion,
Py_GetVersion(), Py_GetProgramFullPath());
return -1;
}
return 0;
}
Renato Cunha
parsers.c: Added support for py3k....
r11361 #ifdef IS_PY3K
static struct PyModuleDef parsers_module = {
PyModuleDef_HEAD_INIT,
"parsers",
parsers_doc,
-1,
methods
};
PyMODINIT_FUNC PyInit_parsers(void)
{
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 PyObject *mod;
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 if (check_python_version() == -1)
return;
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 mod = PyModule_Create(&parsers_module);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 module_init(mod);
return mod;
Renato Cunha
parsers.c: Added support for py3k....
r11361 }
#else
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 PyMODINIT_FUNC initparsers(void)
{
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 PyObject *mod;
Chris Jerdonek
parsers: fail fast if Python has wrong minor version (issue4110)...
r20742 if (check_python_version() == -1)
return;
Matt Harbison
parsers: fix compiler errors on MSVC 2008...
r20797 mod = Py_InitModule3("parsers", methods, parsers_doc);
Bryan O'Sullivan
parsers: incrementally parse the revlog index in C...
r16363 module_init(mod);
Bryan O'Sullivan
manifest: improve parsing performance by 8x via a new C extension
r6389 }
Renato Cunha
parsers.c: Added support for py3k....
r11361 #endif