parsers.c
2197 lines
| 50.4 KiB
| text/x-c
|
CLexer
/ mercurial / parsers.c
Bryan O'Sullivan
|
r6389 | /* | ||
parsers.c - efficient content parsing | ||||
Copyright 2008 Matt Mackall <mpm@selenic.com> and others | ||||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
#include <Python.h> | ||||
#include <ctype.h> | ||||
Bryan O'Sullivan
|
r17356 | #include <stddef.h> | ||
Bryan O'Sullivan
|
r6389 | #include <string.h> | ||
Renato Cunha
|
r11361 | #include "util.h" | ||
Chris Jerdonek
|
r20742 | static char *versionerrortext = "Python minor version mismatch"; | ||
Siddharth Agarwal
|
r19718 | static int8_t hextable[256] = { | ||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */ | ||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */ | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */ | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 | ||||
}; | ||||
Bryan O'Sullivan
|
r16617 | static inline int hexdigit(const char *p, Py_ssize_t off) | ||
Bryan O'Sullivan
|
r6389 | { | ||
Siddharth Agarwal
|
r19718 | int8_t val = hextable[(unsigned char)p[off]]; | ||
Bryan O'Sullivan
|
r16617 | |||
Siddharth Agarwal
|
r19718 | if (val >= 0) { | ||
return val; | ||||
} | ||||
Bryan O'Sullivan
|
r6389 | |||
Matt Mackall
|
r7092 | PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); | ||
return 0; | ||||
Bryan O'Sullivan
|
r6389 | } | ||
/* | ||||
* Turn a hex-encoded string into binary. | ||||
*/ | ||||
static PyObject *unhexlify(const char *str, int len) | ||||
{ | ||||
Matt Mackall
|
r7092 | PyObject *ret; | ||
Benoit Boissinot
|
r6395 | char *d; | ||
Bryan O'Sullivan
|
r16617 | int i; | ||
Bryan O'Sullivan
|
r6389 | |||
Renato Cunha
|
r11361 | ret = PyBytes_FromStringAndSize(NULL, len / 2); | ||
Bryan O'Sullivan
|
r6389 | if (!ret) | ||
Matt Mackall
|
r7092 | return NULL; | ||
Bryan O'Sullivan
|
r6389 | |||
Renato Cunha
|
r11361 | d = PyBytes_AsString(ret); | ||
Bryan O'Sullivan
|
r16617 | for (i = 0; i < len;) { | ||
int hi = hexdigit(str, i++); | ||||
int lo = hexdigit(str, i++); | ||||
Bryan O'Sullivan
|
r6389 | *d++ = (hi << 4) | lo; | ||
} | ||||
Matt Mackall
|
r7091 | |||
Bryan O'Sullivan
|
r6389 | return ret; | ||
} | ||||
/* | ||||
* This code assumes that a manifest is stitched together with newline | ||||
* ('\n') characters. | ||||
*/ | ||||
static PyObject *parse_manifest(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *mfdict, *fdict; | ||||
Siddharth Agarwal
|
r19728 | char *str, *start, *end; | ||
Bryan O'Sullivan
|
r6389 | int len; | ||
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", | ||||
&PyDict_Type, &mfdict, | ||||
&PyDict_Type, &fdict, | ||||
&str, &len)) | ||||
goto quit; | ||||
Siddharth Agarwal
|
r19728 | start = str; | ||
end = str + len; | ||||
while (start < end) { | ||||
Bryan O'Sullivan
|
r6389 | PyObject *file = NULL, *node = NULL; | ||
PyObject *flags = NULL; | ||||
Siddharth Agarwal
|
r19728 | char *zero = NULL, *newline = NULL; | ||
Bryan O'Sullivan
|
r17356 | ptrdiff_t nlen; | ||
Bryan O'Sullivan
|
r6389 | |||
Siddharth Agarwal
|
r19728 | zero = memchr(start, '\0', end - start); | ||
Bryan O'Sullivan
|
r6389 | if (!zero) { | ||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest entry has no separator"); | ||||
goto quit; | ||||
} | ||||
Siddharth Agarwal
|
r19728 | newline = memchr(zero + 1, '\n', end - (zero + 1)); | ||
if (!newline) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest contains trailing garbage"); | ||||
goto quit; | ||||
} | ||||
Renato Cunha
|
r11361 | file = PyBytes_FromStringAndSize(start, zero - start); | ||
Bryan O'Sullivan
|
r6389 | if (!file) | ||
goto bail; | ||||
Siddharth Agarwal
|
r19728 | nlen = newline - zero - 1; | ||
Bryan O'Sullivan
|
r6389 | |||
Bryan O'Sullivan
|
r17356 | node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen); | ||
Bryan O'Sullivan
|
r6389 | if (!node) | ||
goto bail; | ||||
if (nlen > 40) { | ||||
Renato Cunha
|
r11361 | flags = PyBytes_FromStringAndSize(zero + 41, | ||
Bryan O'Sullivan
|
r6389 | nlen - 40); | ||
if (!flags) | ||||
goto bail; | ||||
if (PyDict_SetItem(fdict, file, flags) == -1) | ||||
goto bail; | ||||
} | ||||
if (PyDict_SetItem(mfdict, file, node) == -1) | ||||
goto bail; | ||||
Siddharth Agarwal
|
r19728 | start = newline + 1; | ||
Bryan O'Sullivan
|
r6389 | |||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
continue; | ||||
bail: | ||||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
goto quit; | ||||
} | ||||
Py_INCREF(Py_None); | ||||
return Py_None; | ||||
quit: | ||||
return NULL; | ||||
} | ||||
Siddharth Agarwal
|
r21809 | static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode, | ||
int size, int mtime) | ||||
{ | ||||
dirstateTupleObject *t = PyObject_New(dirstateTupleObject, | ||||
&dirstateTupleType); | ||||
if (!t) | ||||
return NULL; | ||||
t->state = state; | ||||
t->mode = mode; | ||||
t->size = size; | ||||
t->mtime = mtime; | ||||
return t; | ||||
} | ||||
static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args, | ||||
PyObject *kwds) | ||||
{ | ||||
/* We do all the initialization here and not a tp_init function because | ||||
* dirstate_tuple is immutable. */ | ||||
dirstateTupleObject *t; | ||||
char state; | ||||
int size, mode, mtime; | ||||
if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) | ||||
return NULL; | ||||
t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1); | ||||
if (!t) | ||||
return NULL; | ||||
t->state = state; | ||||
t->mode = mode; | ||||
t->size = size; | ||||
t->mtime = mtime; | ||||
return (PyObject *)t; | ||||
} | ||||
static void dirstate_tuple_dealloc(PyObject *o) | ||||
{ | ||||
PyObject_Del(o); | ||||
} | ||||
static Py_ssize_t dirstate_tuple_length(PyObject *o) | ||||
{ | ||||
return 4; | ||||
} | ||||
static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i) | ||||
{ | ||||
dirstateTupleObject *t = (dirstateTupleObject *)o; | ||||
switch (i) { | ||||
case 0: | ||||
return PyBytes_FromStringAndSize(&t->state, 1); | ||||
case 1: | ||||
return PyInt_FromLong(t->mode); | ||||
case 2: | ||||
return PyInt_FromLong(t->size); | ||||
case 3: | ||||
return PyInt_FromLong(t->mtime); | ||||
default: | ||||
PyErr_SetString(PyExc_IndexError, "index out of range"); | ||||
return NULL; | ||||
} | ||||
} | ||||
static PySequenceMethods dirstate_tuple_sq = { | ||||
dirstate_tuple_length, /* sq_length */ | ||||
0, /* sq_concat */ | ||||
0, /* sq_repeat */ | ||||
dirstate_tuple_item, /* sq_item */ | ||||
0, /* sq_ass_item */ | ||||
0, /* sq_contains */ | ||||
0, /* sq_inplace_concat */ | ||||
0 /* sq_inplace_repeat */ | ||||
}; | ||||
PyTypeObject dirstateTupleType = { | ||||
PyVarObject_HEAD_INIT(NULL, 0) | ||||
"dirstate_tuple", /* tp_name */ | ||||
sizeof(dirstateTupleObject),/* tp_basicsize */ | ||||
0, /* tp_itemsize */ | ||||
(destructor)dirstate_tuple_dealloc, /* tp_dealloc */ | ||||
0, /* tp_print */ | ||||
0, /* tp_getattr */ | ||||
0, /* tp_setattr */ | ||||
0, /* tp_compare */ | ||||
0, /* tp_repr */ | ||||
0, /* tp_as_number */ | ||||
&dirstate_tuple_sq, /* tp_as_sequence */ | ||||
0, /* tp_as_mapping */ | ||||
0, /* tp_hash */ | ||||
0, /* tp_call */ | ||||
0, /* tp_str */ | ||||
0, /* tp_getattro */ | ||||
0, /* tp_setattro */ | ||||
0, /* tp_as_buffer */ | ||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | ||||
"dirstate tuple", /* tp_doc */ | ||||
0, /* tp_traverse */ | ||||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
0, /* tp_iter */ | ||||
0, /* tp_iternext */ | ||||
0, /* tp_methods */ | ||||
0, /* tp_members */ | ||||
0, /* tp_getset */ | ||||
0, /* tp_base */ | ||||
0, /* tp_dict */ | ||||
0, /* tp_descr_get */ | ||||
0, /* tp_descr_set */ | ||||
0, /* tp_dictoffset */ | ||||
0, /* tp_init */ | ||||
0, /* tp_alloc */ | ||||
dirstate_tuple_new, /* tp_new */ | ||||
}; | ||||
Matt Mackall
|
r7093 | static PyObject *parse_dirstate(PyObject *self, PyObject *args) | ||
{ | ||||
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; | ||||
PyObject *fname = NULL, *cname = NULL, *entry = NULL; | ||||
Matt Mackall
|
r20167 | char state, *cur, *str, *cpos; | ||
Bryan O'Sullivan
|
r19725 | int mode, size, mtime; | ||
Benoit Boissinot
|
r7174 | unsigned int flen; | ||
Matt Mackall
|
r20167 | int len, pos = 40; | ||
Matt Mackall
|
r7093 | |||
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", | ||||
&PyDict_Type, &dmap, | ||||
&PyDict_Type, &cmap, | ||||
&str, &len)) | ||||
goto quit; | ||||
/* read parents */ | ||||
if (len < 40) | ||||
goto quit; | ||||
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); | ||||
if (!parents) | ||||
goto quit; | ||||
/* read filenames */ | ||||
Matt Mackall
|
r20167 | while (pos >= 40 && pos < len) { | ||
cur = str + pos; | ||||
Matt Mackall
|
r7093 | /* unpack header */ | ||
state = *cur; | ||||
Matt Mackall
|
r16437 | mode = getbe32(cur + 1); | ||
size = getbe32(cur + 5); | ||||
mtime = getbe32(cur + 9); | ||||
flen = getbe32(cur + 13); | ||||
Matt Mackall
|
r20167 | pos += 17; | ||
Matt Mackall
|
r7093 | cur += 17; | ||
David Soria Parra
|
r20316 | if (flen > len - pos) { | ||
Benoit Boissinot
|
r7174 | PyErr_SetString(PyExc_ValueError, "overflow in dirstate"); | ||
Matt Mackall
|
r7093 | goto quit; | ||
Benoit Boissinot
|
r7174 | } | ||
Matt Mackall
|
r7093 | |||
Siddharth Agarwal
|
r21809 | entry = (PyObject *)make_dirstate_tuple(state, mode, size, | ||
mtime); | ||||
Matt Mackall
|
r7093 | cpos = memchr(cur, 0, flen); | ||
if (cpos) { | ||||
Renato Cunha
|
r11361 | fname = PyBytes_FromStringAndSize(cur, cpos - cur); | ||
cname = PyBytes_FromStringAndSize(cpos + 1, | ||||
Matt Mackall
|
r7093 | flen - (cpos - cur) - 1); | ||
if (!fname || !cname || | ||||
PyDict_SetItem(cmap, fname, cname) == -1 || | ||||
PyDict_SetItem(dmap, fname, entry) == -1) | ||||
goto quit; | ||||
Py_DECREF(cname); | ||||
} else { | ||||
Renato Cunha
|
r11361 | fname = PyBytes_FromStringAndSize(cur, flen); | ||
Matt Mackall
|
r7093 | if (!fname || | ||
PyDict_SetItem(dmap, fname, entry) == -1) | ||||
goto quit; | ||||
} | ||||
Py_DECREF(fname); | ||||
Py_DECREF(entry); | ||||
fname = cname = entry = NULL; | ||||
Matt Mackall
|
r20167 | pos += flen; | ||
Matt Mackall
|
r7093 | } | ||
ret = parents; | ||||
Py_INCREF(ret); | ||||
quit: | ||||
Py_XDECREF(fname); | ||||
Py_XDECREF(cname); | ||||
Py_XDECREF(entry); | ||||
Py_XDECREF(parents); | ||||
return ret; | ||||
} | ||||
Bryan O'Sullivan
|
r16955 | static inline int getintat(PyObject *tuple, int off, uint32_t *v) | ||
{ | ||||
PyObject *o = PyTuple_GET_ITEM(tuple, off); | ||||
long val; | ||||
if (PyInt_Check(o)) | ||||
val = PyInt_AS_LONG(o); | ||||
else if (PyLong_Check(o)) { | ||||
val = PyLong_AsLong(o); | ||||
if (val == -1 && PyErr_Occurred()) | ||||
return -1; | ||||
} else { | ||||
PyErr_SetString(PyExc_TypeError, "expected an int or long"); | ||||
return -1; | ||||
} | ||||
if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) { | ||||
PyErr_SetString(PyExc_OverflowError, | ||||
"Python value to large to convert to uint32_t"); | ||||
return -1; | ||||
} | ||||
*v = (uint32_t)val; | ||||
return 0; | ||||
} | ||||
/* | ||||
* Efficiently pack a dirstate object into its on-disk format. | ||||
*/ | ||||
static PyObject *pack_dirstate(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *packobj = NULL; | ||||
Siddharth Agarwal
|
r21806 | PyObject *map, *copymap, *pl, *mtime_unset = NULL; | ||
Bryan O'Sullivan
|
r16955 | Py_ssize_t nbytes, pos, l; | ||
PyObject *k, *v, *pn; | ||||
char *p, *s; | ||||
double now; | ||||
if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate", | ||||
&PyDict_Type, &map, &PyDict_Type, ©map, | ||||
&pl, &now)) | ||||
return NULL; | ||||
if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) { | ||||
PyErr_SetString(PyExc_TypeError, "expected 2-element sequence"); | ||||
return NULL; | ||||
} | ||||
/* Figure out how much we need to allocate. */ | ||||
for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) { | ||||
PyObject *c; | ||||
if (!PyString_Check(k)) { | ||||
PyErr_SetString(PyExc_TypeError, "expected string key"); | ||||
goto bail; | ||||
} | ||||
nbytes += PyString_GET_SIZE(k) + 17; | ||||
c = PyDict_GetItem(copymap, k); | ||||
if (c) { | ||||
if (!PyString_Check(c)) { | ||||
PyErr_SetString(PyExc_TypeError, | ||||
"expected string key"); | ||||
goto bail; | ||||
} | ||||
nbytes += PyString_GET_SIZE(c) + 1; | ||||
} | ||||
} | ||||
packobj = PyString_FromStringAndSize(NULL, nbytes); | ||||
if (packobj == NULL) | ||||
goto bail; | ||||
p = PyString_AS_STRING(packobj); | ||||
pn = PySequence_ITEM(pl, 0); | ||||
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { | ||||
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); | ||||
goto bail; | ||||
} | ||||
memcpy(p, s, l); | ||||
p += 20; | ||||
pn = PySequence_ITEM(pl, 1); | ||||
if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) { | ||||
PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash"); | ||||
goto bail; | ||||
} | ||||
memcpy(p, s, l); | ||||
p += 20; | ||||
for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) { | ||||
Siddharth Agarwal
|
r21809 | dirstateTupleObject *tuple; | ||
char state; | ||||
Bryan O'Sullivan
|
r16955 | uint32_t mode, size, mtime; | ||
Py_ssize_t len, l; | ||||
PyObject *o; | ||||
Siddharth Agarwal
|
r21809 | char *t; | ||
Bryan O'Sullivan
|
r16955 | |||
Siddharth Agarwal
|
r21809 | if (!dirstate_tuple_check(v)) { | ||
PyErr_SetString(PyExc_TypeError, | ||||
"expected a dirstate tuple"); | ||||
Bryan O'Sullivan
|
r16955 | goto bail; | ||
} | ||||
Siddharth Agarwal
|
r21809 | tuple = (dirstateTupleObject *)v; | ||
state = tuple->state; | ||||
mode = tuple->mode; | ||||
size = tuple->size; | ||||
mtime = tuple->mtime; | ||||
if (state == 'n' && mtime == (uint32_t)now) { | ||||
Siddharth Agarwal
|
r18567 | /* See pure/parsers.py:pack_dirstate for why we do | ||
* this. */ | ||||
Siddharth Agarwal
|
r21806 | mtime = -1; | ||
Siddharth Agarwal
|
r21809 | mtime_unset = (PyObject *)make_dirstate_tuple( | ||
state, mode, size, mtime); | ||||
Siddharth Agarwal
|
r21806 | if (!mtime_unset) | ||
Bryan O'Sullivan
|
r16955 | goto bail; | ||
Siddharth Agarwal
|
r21806 | if (PyDict_SetItem(map, k, mtime_unset) == -1) | ||
goto bail; | ||||
Py_DECREF(mtime_unset); | ||||
mtime_unset = NULL; | ||||
Bryan O'Sullivan
|
r16955 | } | ||
Siddharth Agarwal
|
r21809 | *p++ = state; | ||
Bryan O'Sullivan
|
r16955 | putbe32(mode, p); | ||
putbe32(size, p + 4); | ||||
putbe32(mtime, p + 8); | ||||
t = p + 12; | ||||
p += 16; | ||||
len = PyString_GET_SIZE(k); | ||||
memcpy(p, PyString_AS_STRING(k), len); | ||||
p += len; | ||||
o = PyDict_GetItem(copymap, k); | ||||
if (o) { | ||||
*p++ = '\0'; | ||||
l = PyString_GET_SIZE(o); | ||||
memcpy(p, PyString_AS_STRING(o), l); | ||||
p += l; | ||||
len += l + 1; | ||||
} | ||||
putbe32((uint32_t)len, t); | ||||
} | ||||
pos = p - PyString_AS_STRING(packobj); | ||||
if (pos != nbytes) { | ||||
PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld", | ||||
(long)pos, (long)nbytes); | ||||
goto bail; | ||||
} | ||||
return packobj; | ||||
bail: | ||||
Siddharth Agarwal
|
r21806 | Py_XDECREF(mtime_unset); | ||
Bryan O'Sullivan
|
r16955 | Py_XDECREF(packobj); | ||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16363 | /* | ||
Bryan O'Sullivan
|
r16414 | * A base-16 trie for fast node->rev mapping. | ||
* | ||||
* Positive value is index of the next node in the trie | ||||
* Negative value is a leaf: -(rev + 1) | ||||
* Zero is empty | ||||
*/ | ||||
typedef struct { | ||||
int children[16]; | ||||
} nodetree; | ||||
/* | ||||
* This class has two behaviours. | ||||
* | ||||
* When used in a list-like way (with integer keys), we decode an | ||||
* entry in a RevlogNG index file on demand. Our last entry is a | ||||
* sentinel, always a nullid. We have limited support for | ||||
* integer-keyed insert and delete, only at elements right before the | ||||
* sentinel. | ||||
* | ||||
* With string keys, we lazily perform a reverse mapping from node to | ||||
* rev, using a base-16 trie. | ||||
Bryan O'Sullivan
|
r16363 | */ | ||
typedef struct { | ||||
PyObject_HEAD | ||||
/* Type-specific fields go here. */ | ||||
PyObject *data; /* raw bytes of index */ | ||||
PyObject **cache; /* cached tuples */ | ||||
const char **offsets; /* populated on demand */ | ||||
Py_ssize_t raw_length; /* original number of elements */ | ||||
Py_ssize_t length; /* current number of elements */ | ||||
PyObject *added; /* populated on demand */ | ||||
Bryan O'Sullivan
|
r16787 | PyObject *headrevs; /* cache, invalidated on changes */ | ||
Bryan O'Sullivan
|
r16414 | nodetree *nt; /* base-16 trie */ | ||
int ntlength; /* # nodes in use */ | ||||
int ntcapacity; /* # nodes allocated */ | ||||
int ntdepth; /* maximum depth of tree */ | ||||
int ntsplits; /* # splits performed */ | ||||
int ntrev; /* last rev scanned */ | ||||
int ntlookups; /* # lookups */ | ||||
int ntmisses; /* # lookups that miss the cache */ | ||||
Bryan O'Sullivan
|
r16363 | int inlined; | ||
} indexObject; | ||||
Bryan O'Sullivan
|
r16414 | static Py_ssize_t index_length(const indexObject *self) | ||
Bryan O'Sullivan
|
r16363 | { | ||
if (self->added == NULL) | ||||
return self->length; | ||||
return self->length + PyList_GET_SIZE(self->added); | ||||
} | ||||
static PyObject *nullentry; | ||||
Bryan O'Sullivan
|
r16414 | static const char nullid[20]; | ||
Bryan O'Sullivan
|
r16363 | |||
static long inline_scan(indexObject *self, const char **offsets); | ||||
#if LONG_MAX == 0x7fffffffL | ||||
Matt Mackall
|
r16393 | static char *tuple_format = "Kiiiiiis#"; | ||
Bryan O'Sullivan
|
r16363 | #else | ||
Matt Mackall
|
r16393 | static char *tuple_format = "kiiiiiis#"; | ||
Bryan O'Sullivan
|
r16363 | #endif | ||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16863 | /* A RevlogNG v1 index entry is 64 bytes long. */ | ||
static const long v1_hdrsize = 64; | ||||
Bryan O'Sullivan
|
r16414 | /* | ||
* Return a pointer to the beginning of a RevlogNG record. | ||||
*/ | ||||
static const char *index_deref(indexObject *self, Py_ssize_t pos) | ||||
{ | ||||
if (self->inlined && pos > 0) { | ||||
if (self->offsets == NULL) { | ||||
self->offsets = malloc(self->raw_length * | ||||
sizeof(*self->offsets)); | ||||
if (self->offsets == NULL) | ||||
return (const char *)PyErr_NoMemory(); | ||||
inline_scan(self, self->offsets); | ||||
} | ||||
return self->offsets[pos]; | ||||
} | ||||
Bryan O'Sullivan
|
r16863 | return PyString_AS_STRING(self->data) + pos * v1_hdrsize; | ||
Bryan O'Sullivan
|
r16414 | } | ||
/* | ||||
* RevlogNG format (all in big endian, data may be inlined): | ||||
Bernhard Leiner
|
r7108 | * 6 bytes: offset | ||
* 2 bytes: flags | ||||
* 4 bytes: compressed length | ||||
* 4 bytes: uncompressed length | ||||
* 4 bytes: base revision | ||||
* 4 bytes: link revision | ||||
* 4 bytes: parent 1 revision | ||||
* 4 bytes: parent 2 revision | ||||
* 32 bytes: nodeid (only 20 bytes used) | ||||
*/ | ||||
Bryan O'Sullivan
|
r16363 | static PyObject *index_get(indexObject *self, Py_ssize_t pos) | ||
Bernhard Leiner
|
r7108 | { | ||
Benoit Boissinot
|
r7154 | uint64_t offset_flags; | ||
Bernhard Leiner
|
r7108 | int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; | ||
Benoit Boissinot
|
r7154 | const char *c_node_id; | ||
Bryan O'Sullivan
|
r16363 | const char *data; | ||
Py_ssize_t length = index_length(self); | ||||
PyObject *entry; | ||||
Bryan O'Sullivan
|
r16414 | if (pos < 0) | ||
pos += length; | ||||
if (pos < 0 || pos >= length) { | ||||
Bryan O'Sullivan
|
r16363 | PyErr_SetString(PyExc_IndexError, "revlog index out of range"); | ||
return NULL; | ||||
} | ||||
if (pos == length - 1) { | ||||
Py_INCREF(nullentry); | ||||
return nullentry; | ||||
} | ||||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16363 | if (pos >= self->length - 1) { | ||
PyObject *obj; | ||||
obj = PyList_GET_ITEM(self->added, pos - self->length + 1); | ||||
Py_INCREF(obj); | ||||
return obj; | ||||
} | ||||
if (self->cache) { | ||||
if (self->cache[pos]) { | ||||
Py_INCREF(self->cache[pos]); | ||||
return self->cache[pos]; | ||||
} | ||||
} else { | ||||
self->cache = calloc(self->raw_length, sizeof(PyObject *)); | ||||
if (self->cache == NULL) | ||||
return PyErr_NoMemory(); | ||||
} | ||||
Thomas Arendsen Hein
|
r7190 | |||
Bryan O'Sullivan
|
r16414 | data = index_deref(self, pos); | ||
if (data == NULL) | ||||
return NULL; | ||||
Bryan O'Sullivan
|
r16363 | |||
Matt Mackall
|
r16437 | offset_flags = getbe32(data + 4); | ||
Bryan O'Sullivan
|
r16363 | if (pos == 0) /* mask out version number for the first entry */ | ||
offset_flags &= 0xFFFF; | ||||
else { | ||||
Matt Mackall
|
r16437 | uint32_t offset_high = getbe32(data); | ||
Bryan O'Sullivan
|
r16363 | offset_flags |= ((uint64_t)offset_high) << 32; | ||
} | ||||
Benoit Boissinot
|
r7154 | |||
Matt Mackall
|
r16437 | comp_len = getbe32(data + 8); | ||
uncomp_len = getbe32(data + 12); | ||||
base_rev = getbe32(data + 16); | ||||
link_rev = getbe32(data + 20); | ||||
parent_1 = getbe32(data + 24); | ||||
parent_2 = getbe32(data + 28); | ||||
Bryan O'Sullivan
|
r16363 | c_node_id = data + 32; | ||
entry = Py_BuildValue(tuple_format, offset_flags, comp_len, | ||||
Matt Mackall
|
r13254 | uncomp_len, base_rev, link_rev, | ||
parent_1, parent_2, c_node_id, 20); | ||||
Bryan O'Sullivan
|
r19726 | if (entry) { | ||
Bryan O'Sullivan
|
r16363 | PyObject_GC_UnTrack(entry); | ||
Bryan O'Sullivan
|
r19726 | Py_INCREF(entry); | ||
} | ||||
Bryan O'Sullivan
|
r16363 | |||
self->cache[pos] = entry; | ||||
return entry; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | /* | ||
* Return the 20-byte SHA of the node corresponding to the given rev. | ||||
*/ | ||||
static const char *index_node(indexObject *self, Py_ssize_t pos) | ||||
{ | ||||
Py_ssize_t length = index_length(self); | ||||
const char *data; | ||||
Bryan O'Sullivan
|
r16664 | if (pos == length - 1 || pos == INT_MAX) | ||
Bryan O'Sullivan
|
r16414 | return nullid; | ||
if (pos >= length) | ||||
return NULL; | ||||
if (pos >= self->length - 1) { | ||||
PyObject *tuple, *str; | ||||
tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); | ||||
str = PyTuple_GetItem(tuple, 7); | ||||
return str ? PyString_AS_STRING(str) : NULL; | ||||
} | ||||
data = index_deref(self, pos); | ||||
return data ? data + 32 : NULL; | ||||
} | ||||
static int nt_insert(indexObject *self, const char *node, int rev); | ||||
static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) | ||||
{ | ||||
if (PyString_AsStringAndSize(obj, node, nodelen) == -1) | ||||
return -1; | ||||
if (*nodelen == 20) | ||||
return 0; | ||||
PyErr_SetString(PyExc_ValueError, "20-byte hash required"); | ||||
return -1; | ||||
} | ||||
Bryan O'Sullivan
|
r16363 | static PyObject *index_insert(indexObject *self, PyObject *args) | ||
{ | ||||
Bryan O'Sullivan
|
r16414 | PyObject *obj; | ||
char *node; | ||||
Bryan O'Sullivan
|
r16363 | long offset; | ||
Bryan O'Sullivan
|
r16414 | Py_ssize_t len, nodelen; | ||
Bryan O'Sullivan
|
r16363 | |||
if (!PyArg_ParseTuple(args, "lO", &offset, &obj)) | ||||
return NULL; | ||||
if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { | ||||
Bryan O'Sullivan
|
r16414 | PyErr_SetString(PyExc_TypeError, "8-tuple required"); | ||
Bryan O'Sullivan
|
r16363 | return NULL; | ||
} | ||||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16414 | if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) | ||
Bryan O'Sullivan
|
r16363 | return NULL; | ||
len = index_length(self); | ||||
if (offset < 0) | ||||
offset += len; | ||||
if (offset != len - 1) { | ||||
PyErr_SetString(PyExc_IndexError, | ||||
"insert only supported at index -1"); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | if (offset > INT_MAX) { | ||
PyErr_SetString(PyExc_ValueError, | ||||
"currently only 2**31 revs supported"); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16363 | if (self->added == NULL) { | ||
self->added = PyList_New(0); | ||||
if (self->added == NULL) | ||||
return NULL; | ||||
} | ||||
if (PyList_Append(self->added, obj) == -1) | ||||
return NULL; | ||||
Matt Mackall
|
r13254 | |||
Bryan O'Sullivan
|
r16414 | if (self->nt) | ||
nt_insert(self, node, (int)offset); | ||||
Bryan O'Sullivan
|
r16787 | Py_CLEAR(self->headrevs); | ||
Bryan O'Sullivan
|
r16363 | Py_RETURN_NONE; | ||
} | ||||
Bryan O'Sullivan
|
r16370 | static void _index_clearcaches(indexObject *self) | ||
{ | ||||
if (self->cache) { | ||||
Py_ssize_t i; | ||||
Bryan O'Sullivan
|
r16732 | for (i = 0; i < self->raw_length; i++) | ||
Py_CLEAR(self->cache[i]); | ||||
Bryan O'Sullivan
|
r16370 | free(self->cache); | ||
self->cache = NULL; | ||||
} | ||||
if (self->offsets) { | ||||
free(self->offsets); | ||||
self->offsets = NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | if (self->nt) { | ||
free(self->nt); | ||||
self->nt = NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16787 | Py_CLEAR(self->headrevs); | ||
Bryan O'Sullivan
|
r16370 | } | ||
static PyObject *index_clearcaches(indexObject *self) | ||||
{ | ||||
_index_clearcaches(self); | ||||
Bryan O'Sullivan
|
r16414 | self->ntlength = self->ntcapacity = 0; | ||
self->ntdepth = self->ntsplits = 0; | ||||
self->ntrev = -1; | ||||
self->ntlookups = self->ntmisses = 0; | ||||
Bryan O'Sullivan
|
r16370 | Py_RETURN_NONE; | ||
} | ||||
Bryan O'Sullivan
|
r16414 | static PyObject *index_stats(indexObject *self) | ||
{ | ||||
PyObject *obj = PyDict_New(); | ||||
if (obj == NULL) | ||||
return NULL; | ||||
#define istat(__n, __d) \ | ||||
Adrian Buehlmann
|
r16629 | if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \ | ||
Bryan O'Sullivan
|
r16414 | goto bail; | ||
if (self->added) { | ||||
Py_ssize_t len = PyList_GET_SIZE(self->added); | ||||
if (PyDict_SetItemString(obj, "index entries added", | ||||
Adrian Buehlmann
|
r16629 | PyInt_FromSsize_t(len)) == -1) | ||
Bryan O'Sullivan
|
r16414 | goto bail; | ||
} | ||||
if (self->raw_length != self->length - 1) | ||||
istat(raw_length, "revs on disk"); | ||||
istat(length, "revs in memory"); | ||||
istat(ntcapacity, "node trie capacity"); | ||||
istat(ntdepth, "node trie depth"); | ||||
istat(ntlength, "node trie count"); | ||||
istat(ntlookups, "node trie lookups"); | ||||
istat(ntmisses, "node trie misses"); | ||||
istat(ntrev, "node trie last rev scanned"); | ||||
istat(ntsplits, "node trie splits"); | ||||
#undef istat | ||||
return obj; | ||||
bail: | ||||
Py_XDECREF(obj); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16787 | /* | ||
* When we cache a list, we want to be sure the caller can't mutate | ||||
* the cached copy. | ||||
*/ | ||||
static PyObject *list_copy(PyObject *list) | ||||
{ | ||||
Py_ssize_t len = PyList_GET_SIZE(list); | ||||
PyObject *newlist = PyList_New(len); | ||||
Py_ssize_t i; | ||||
if (newlist == NULL) | ||||
return NULL; | ||||
for (i = 0; i < len; i++) { | ||||
PyObject *obj = PyList_GET_ITEM(list, i); | ||||
Py_INCREF(obj); | ||||
PyList_SET_ITEM(newlist, i, obj); | ||||
} | ||||
return newlist; | ||||
} | ||||
Bryan O'Sullivan
|
r16786 | static PyObject *index_headrevs(indexObject *self) | ||
{ | ||||
Py_ssize_t i, len, addlen; | ||||
char *nothead = NULL; | ||||
PyObject *heads; | ||||
Bryan O'Sullivan
|
r16787 | if (self->headrevs) | ||
return list_copy(self->headrevs); | ||||
Bryan O'Sullivan
|
r16786 | len = index_length(self) - 1; | ||
heads = PyList_New(0); | ||||
if (heads == NULL) | ||||
goto bail; | ||||
if (len == 0) { | ||||
PyObject *nullid = PyInt_FromLong(-1); | ||||
if (nullid == NULL || PyList_Append(heads, nullid) == -1) { | ||||
Py_XDECREF(nullid); | ||||
goto bail; | ||||
} | ||||
goto done; | ||||
} | ||||
nothead = calloc(len, 1); | ||||
if (nothead == NULL) | ||||
goto bail; | ||||
for (i = 0; i < self->raw_length; i++) { | ||||
const char *data = index_deref(self, i); | ||||
int parent_1 = getbe32(data + 24); | ||||
int parent_2 = getbe32(data + 28); | ||||
if (parent_1 >= 0) | ||||
nothead[parent_1] = 1; | ||||
if (parent_2 >= 0) | ||||
nothead[parent_2] = 1; | ||||
} | ||||
addlen = self->added ? PyList_GET_SIZE(self->added) : 0; | ||||
for (i = 0; i < addlen; i++) { | ||||
PyObject *rev = PyList_GET_ITEM(self->added, i); | ||||
PyObject *p1 = PyTuple_GET_ITEM(rev, 5); | ||||
PyObject *p2 = PyTuple_GET_ITEM(rev, 6); | ||||
long parent_1, parent_2; | ||||
if (!PyInt_Check(p1) || !PyInt_Check(p2)) { | ||||
PyErr_SetString(PyExc_TypeError, | ||||
"revlog parents are invalid"); | ||||
goto bail; | ||||
} | ||||
parent_1 = PyInt_AS_LONG(p1); | ||||
parent_2 = PyInt_AS_LONG(p2); | ||||
if (parent_1 >= 0) | ||||
nothead[parent_1] = 1; | ||||
if (parent_2 >= 0) | ||||
nothead[parent_2] = 1; | ||||
} | ||||
for (i = 0; i < len; i++) { | ||||
PyObject *head; | ||||
if (nothead[i]) | ||||
continue; | ||||
head = PyInt_FromLong(i); | ||||
if (head == NULL || PyList_Append(heads, head) == -1) { | ||||
Py_XDECREF(head); | ||||
goto bail; | ||||
} | ||||
} | ||||
done: | ||||
Bryan O'Sullivan
|
r16787 | self->headrevs = heads; | ||
Bryan O'Sullivan
|
r16786 | free(nothead); | ||
Bryan O'Sullivan
|
r16787 | return list_copy(self->headrevs); | ||
Bryan O'Sullivan
|
r16786 | bail: | ||
Py_XDECREF(heads); | ||||
free(nothead); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16618 | static inline int nt_level(const char *node, Py_ssize_t level) | ||
Bryan O'Sullivan
|
r16414 | { | ||
int v = node[level>>1]; | ||||
if (!(level & 1)) | ||||
v >>= 4; | ||||
return v & 0xf; | ||||
} | ||||
Bryan O'Sullivan
|
r16616 | /* | ||
* Return values: | ||||
* | ||||
* -4: match is ambiguous (multiple candidates) | ||||
* -2: not found | ||||
* rest: valid rev | ||||
*/ | ||||
Bryan O'Sullivan
|
r16663 | static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen, | ||
int hex) | ||||
Bryan O'Sullivan
|
r16414 | { | ||
Bryan O'Sullivan
|
r16663 | int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level; | ||
Bryan O'Sullivan
|
r16641 | int level, maxlevel, off; | ||
Bryan O'Sullivan
|
r16414 | |||
if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) | ||||
return -1; | ||||
if (self->nt == NULL) | ||||
return -2; | ||||
Bryan O'Sullivan
|
r16663 | if (hex) | ||
Bryan O'Sullivan
|
r16665 | maxlevel = nodelen > 40 ? 40 : (int)nodelen; | ||
Bryan O'Sullivan
|
r16663 | else | ||
maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); | ||||
Bryan O'Sullivan
|
r16641 | |||
for (level = off = 0; level < maxlevel; level++) { | ||||
Bryan O'Sullivan
|
r16663 | int k = getnybble(node, level); | ||
Bryan O'Sullivan
|
r16414 | nodetree *n = &self->nt[off]; | ||
int v = n->children[k]; | ||||
if (v < 0) { | ||||
const char *n; | ||||
Bryan O'Sullivan
|
r16663 | Py_ssize_t i; | ||
Bryan O'Sullivan
|
r16414 | v = -v - 1; | ||
n = index_node(self, v); | ||||
if (n == NULL) | ||||
return -2; | ||||
Bryan O'Sullivan
|
r16663 | for (i = level; i < maxlevel; i++) | ||
if (getnybble(node, i) != nt_level(n, i)) | ||||
return -2; | ||||
return v; | ||||
Bryan O'Sullivan
|
r16414 | } | ||
if (v == 0) | ||||
return -2; | ||||
off = v; | ||||
} | ||||
Bryan O'Sullivan
|
r16616 | /* multiple matches against an ambiguous prefix */ | ||
return -4; | ||||
Bryan O'Sullivan
|
r16414 | } | ||
static int nt_new(indexObject *self) | ||||
{ | ||||
if (self->ntlength == self->ntcapacity) { | ||||
self->ntcapacity *= 2; | ||||
self->nt = realloc(self->nt, | ||||
self->ntcapacity * sizeof(nodetree)); | ||||
if (self->nt == NULL) { | ||||
PyErr_SetString(PyExc_MemoryError, "out of memory"); | ||||
return -1; | ||||
} | ||||
memset(&self->nt[self->ntlength], 0, | ||||
sizeof(nodetree) * (self->ntcapacity - self->ntlength)); | ||||
} | ||||
return self->ntlength++; | ||||
} | ||||
static int nt_insert(indexObject *self, const char *node, int rev) | ||||
{ | ||||
int level = 0; | ||||
int off = 0; | ||||
Bryan O'Sullivan
|
r16641 | while (level < 40) { | ||
Bryan O'Sullivan
|
r16414 | int k = nt_level(node, level); | ||
nodetree *n; | ||||
int v; | ||||
n = &self->nt[off]; | ||||
v = n->children[k]; | ||||
if (v == 0) { | ||||
n->children[k] = -rev - 1; | ||||
return 0; | ||||
} | ||||
if (v < 0) { | ||||
const char *oldnode = index_node(self, -v - 1); | ||||
int noff; | ||||
if (!oldnode || !memcmp(oldnode, node, 20)) { | ||||
n->children[k] = -rev - 1; | ||||
return 0; | ||||
} | ||||
noff = nt_new(self); | ||||
if (noff == -1) | ||||
return -1; | ||||
/* self->nt may have been changed by realloc */ | ||||
self->nt[off].children[k] = noff; | ||||
off = noff; | ||||
n = &self->nt[off]; | ||||
n->children[nt_level(oldnode, ++level)] = v; | ||||
if (level > self->ntdepth) | ||||
self->ntdepth = level; | ||||
self->ntsplits += 1; | ||||
} else { | ||||
level += 1; | ||||
off = v; | ||||
} | ||||
} | ||||
return -1; | ||||
} | ||||
Bryan O'Sullivan
|
r16615 | static int nt_init(indexObject *self) | ||
{ | ||||
if (self->nt == NULL) { | ||||
Abhay Kadam
|
r20110 | if (self->raw_length > INT_MAX) { | ||
PyErr_SetString(PyExc_ValueError, "overflow in nt_init"); | ||||
return -1; | ||||
} | ||||
Bryan O'Sullivan
|
r16615 | self->ntcapacity = self->raw_length < 4 | ||
Abhay Kadam
|
r20110 | ? 4 : (int)self->raw_length / 2; | ||
Bryan O'Sullivan
|
r16615 | self->nt = calloc(self->ntcapacity, sizeof(nodetree)); | ||
if (self->nt == NULL) { | ||||
PyErr_NoMemory(); | ||||
return -1; | ||||
} | ||||
self->ntlength = 1; | ||||
self->ntrev = (int)index_length(self) - 1; | ||||
self->ntlookups = 1; | ||||
self->ntmisses = 0; | ||||
Bryan O'Sullivan
|
r16664 | if (nt_insert(self, nullid, INT_MAX) == -1) | ||
return -1; | ||||
Bryan O'Sullivan
|
r16615 | } | ||
return 0; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | /* | ||
* Return values: | ||||
* | ||||
* -3: error (exception set) | ||||
* -2: not found (no exception set) | ||||
* rest: valid rev | ||||
*/ | ||||
static int index_find_node(indexObject *self, | ||||
const char *node, Py_ssize_t nodelen) | ||||
{ | ||||
int rev; | ||||
self->ntlookups++; | ||||
Bryan O'Sullivan
|
r16663 | rev = nt_find(self, node, nodelen, 0); | ||
Bryan O'Sullivan
|
r16414 | if (rev >= -1) | ||
return rev; | ||||
Bryan O'Sullivan
|
r16615 | if (nt_init(self) == -1) | ||
return -3; | ||||
Bryan O'Sullivan
|
r16414 | |||
/* | ||||
* For the first handful of lookups, we scan the entire index, | ||||
* and cache only the matching nodes. This optimizes for cases | ||||
* like "hg tip", where only a few nodes are accessed. | ||||
* | ||||
* After that, we cache every node we visit, using a single | ||||
* scan amortized over multiple lookups. This gives the best | ||||
* bulk performance, e.g. for "hg log". | ||||
*/ | ||||
if (self->ntmisses++ < 4) { | ||||
for (rev = self->ntrev - 1; rev >= 0; rev--) { | ||||
const char *n = index_node(self, rev); | ||||
if (n == NULL) | ||||
return -2; | ||||
if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { | ||||
if (nt_insert(self, n, rev) == -1) | ||||
return -3; | ||||
break; | ||||
} | ||||
} | ||||
} else { | ||||
for (rev = self->ntrev - 1; rev >= 0; rev--) { | ||||
const char *n = index_node(self, rev); | ||||
Bryan O'Sullivan
|
r16614 | if (n == NULL) { | ||
self->ntrev = rev + 1; | ||||
Bryan O'Sullivan
|
r16414 | return -2; | ||
Bryan O'Sullivan
|
r16614 | } | ||
if (nt_insert(self, n, rev) == -1) { | ||||
self->ntrev = rev + 1; | ||||
Bryan O'Sullivan
|
r16414 | return -3; | ||
Bryan O'Sullivan
|
r16614 | } | ||
Bryan O'Sullivan
|
r16414 | if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { | ||
break; | ||||
} | ||||
} | ||||
self->ntrev = rev; | ||||
} | ||||
if (rev >= 0) | ||||
return rev; | ||||
return -2; | ||||
} | ||||
static PyObject *raise_revlog_error(void) | ||||
{ | ||||
static PyObject *errclass; | ||||
PyObject *mod = NULL, *errobj; | ||||
if (errclass == NULL) { | ||||
PyObject *dict; | ||||
mod = PyImport_ImportModule("mercurial.error"); | ||||
if (mod == NULL) | ||||
goto classfail; | ||||
dict = PyModule_GetDict(mod); | ||||
if (dict == NULL) | ||||
goto classfail; | ||||
errclass = PyDict_GetItemString(dict, "RevlogError"); | ||||
if (errclass == NULL) { | ||||
PyErr_SetString(PyExc_SystemError, | ||||
"could not find RevlogError"); | ||||
goto classfail; | ||||
} | ||||
Py_INCREF(errclass); | ||||
} | ||||
errobj = PyObject_CallFunction(errclass, NULL); | ||||
if (errobj == NULL) | ||||
return NULL; | ||||
PyErr_SetObject(errclass, errobj); | ||||
return errobj; | ||||
classfail: | ||||
Py_XDECREF(mod); | ||||
return NULL; | ||||
} | ||||
static PyObject *index_getitem(indexObject *self, PyObject *value) | ||||
{ | ||||
char *node; | ||||
Py_ssize_t nodelen; | ||||
int rev; | ||||
if (PyInt_Check(value)) | ||||
return index_get(self, PyInt_AS_LONG(value)); | ||||
Bryan O'Sullivan
|
r16679 | if (node_check(value, &node, &nodelen) == -1) | ||
Bryan O'Sullivan
|
r16414 | return NULL; | ||
rev = index_find_node(self, node, nodelen); | ||||
if (rev >= -1) | ||||
return PyInt_FromLong(rev); | ||||
if (rev == -2) | ||||
raise_revlog_error(); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16665 | static int nt_partialmatch(indexObject *self, const char *node, | ||
Py_ssize_t nodelen) | ||||
{ | ||||
int rev; | ||||
if (nt_init(self) == -1) | ||||
return -3; | ||||
if (self->ntrev > 0) { | ||||
/* ensure that the radix tree is fully populated */ | ||||
for (rev = self->ntrev - 1; rev >= 0; rev--) { | ||||
const char *n = index_node(self, rev); | ||||
if (n == NULL) | ||||
return -2; | ||||
if (nt_insert(self, n, rev) == -1) | ||||
return -3; | ||||
} | ||||
self->ntrev = rev; | ||||
} | ||||
return nt_find(self, node, nodelen, 1); | ||||
} | ||||
static PyObject *index_partialmatch(indexObject *self, PyObject *args) | ||||
{ | ||||
const char *fullnode; | ||||
int nodelen; | ||||
char *node; | ||||
int rev, i; | ||||
if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) | ||||
return NULL; | ||||
if (nodelen < 4) { | ||||
PyErr_SetString(PyExc_ValueError, "key too short"); | ||||
return NULL; | ||||
} | ||||
sorcerer
|
r17353 | if (nodelen > 40) { | ||
PyErr_SetString(PyExc_ValueError, "key too long"); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16665 | |||
for (i = 0; i < nodelen; i++) | ||||
hexdigit(node, i); | ||||
if (PyErr_Occurred()) { | ||||
/* input contains non-hex characters */ | ||||
PyErr_Clear(); | ||||
Py_RETURN_NONE; | ||||
} | ||||
rev = nt_partialmatch(self, node, nodelen); | ||||
switch (rev) { | ||||
case -4: | ||||
raise_revlog_error(); | ||||
case -3: | ||||
return NULL; | ||||
case -2: | ||||
Py_RETURN_NONE; | ||||
case -1: | ||||
return PyString_FromStringAndSize(nullid, 20); | ||||
} | ||||
fullnode = index_node(self, rev); | ||||
if (fullnode == NULL) { | ||||
PyErr_Format(PyExc_IndexError, | ||||
"could not access rev %d", rev); | ||||
return NULL; | ||||
} | ||||
return PyString_FromStringAndSize(fullnode, 20); | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | static PyObject *index_m_get(indexObject *self, PyObject *args) | ||
{ | ||||
Bryan O'Sullivan
|
r16679 | Py_ssize_t nodelen; | ||
PyObject *val; | ||||
Bryan O'Sullivan
|
r16414 | char *node; | ||
Bryan O'Sullivan
|
r16679 | int rev; | ||
Bryan O'Sullivan
|
r16414 | |||
Bryan O'Sullivan
|
r16679 | if (!PyArg_ParseTuple(args, "O", &val)) | ||
Bryan O'Sullivan
|
r16414 | return NULL; | ||
Bryan O'Sullivan
|
r16679 | if (node_check(val, &node, &nodelen) == -1) | ||
return NULL; | ||||
Bryan O'Sullivan
|
r16414 | rev = index_find_node(self, node, nodelen); | ||
if (rev == -3) | ||||
return NULL; | ||||
if (rev == -2) | ||||
Py_RETURN_NONE; | ||||
return PyInt_FromLong(rev); | ||||
} | ||||
static int index_contains(indexObject *self, PyObject *value) | ||||
{ | ||||
char *node; | ||||
Py_ssize_t nodelen; | ||||
if (PyInt_Check(value)) { | ||||
long rev = PyInt_AS_LONG(value); | ||||
return rev >= -1 && rev < index_length(self); | ||||
} | ||||
Bryan O'Sullivan
|
r16679 | if (node_check(value, &node, &nodelen) == -1) | ||
return -1; | ||||
Bryan O'Sullivan
|
r16414 | |||
switch (index_find_node(self, node, nodelen)) { | ||||
case -3: | ||||
return -1; | ||||
case -2: | ||||
return 0; | ||||
default: | ||||
return 1; | ||||
} | ||||
} | ||||
Bryan O'Sullivan
|
r18988 | static inline void index_get_parents(indexObject *self, int rev, int *ps) | ||
{ | ||||
if (rev >= self->length - 1) { | ||||
PyObject *tuple = PyList_GET_ITEM(self->added, | ||||
rev - self->length + 1); | ||||
ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5)); | ||||
ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6)); | ||||
} else { | ||||
const char *data = index_deref(self, rev); | ||||
ps[0] = getbe32(data + 24); | ||||
ps[1] = getbe32(data + 28); | ||||
} | ||||
} | ||||
typedef uint64_t bitmask; | ||||
/* | ||||
* Given a disjoint set of revs, return all candidates for the | ||||
* greatest common ancestor. In revset notation, this is the set | ||||
* "heads(::a and ::b and ...)" | ||||
*/ | ||||
static PyObject *find_gca_candidates(indexObject *self, const int *revs, | ||||
int revcount) | ||||
{ | ||||
const bitmask allseen = (1ull << revcount) - 1; | ||||
const bitmask poison = 1ull << revcount; | ||||
PyObject *gca = PyList_New(0); | ||||
Mads Kiilerich
|
r20555 | int i, v, interesting; | ||
Bryan O'Sullivan
|
r18988 | int maxrev = -1; | ||
Matt Mackall
|
r19030 | long sp; | ||
Bryan O'Sullivan
|
r18988 | bitmask *seen; | ||
Bryan O'Sullivan
|
r19727 | if (gca == NULL) | ||
return PyErr_NoMemory(); | ||||
Bryan O'Sullivan
|
r18988 | for (i = 0; i < revcount; i++) { | ||
if (revs[i] > maxrev) | ||||
maxrev = revs[i]; | ||||
} | ||||
seen = calloc(sizeof(*seen), maxrev + 1); | ||||
Bryan O'Sullivan
|
r19727 | if (seen == NULL) { | ||
Py_DECREF(gca); | ||||
Bryan O'Sullivan
|
r18988 | return PyErr_NoMemory(); | ||
Bryan O'Sullivan
|
r19727 | } | ||
Bryan O'Sullivan
|
r18988 | |||
for (i = 0; i < revcount; i++) | ||||
seen[revs[i]] = 1ull << i; | ||||
Mads Kiilerich
|
r20555 | interesting = revcount; | ||
Bryan O'Sullivan
|
r18988 | |||
for (v = maxrev; v >= 0 && interesting; v--) { | ||||
long sv = seen[v]; | ||||
int parents[2]; | ||||
if (!sv) | ||||
continue; | ||||
if (sv < poison) { | ||||
interesting -= 1; | ||||
if (sv == allseen) { | ||||
PyObject *obj = PyInt_FromLong(v); | ||||
if (obj == NULL) | ||||
goto bail; | ||||
if (PyList_Append(gca, obj) == -1) { | ||||
Py_DECREF(obj); | ||||
goto bail; | ||||
} | ||||
sv |= poison; | ||||
for (i = 0; i < revcount; i++) { | ||||
Mads Kiilerich
|
r20555 | if (revs[i] == v) | ||
goto done; | ||||
Bryan O'Sullivan
|
r18988 | } | ||
} | ||||
} | ||||
index_get_parents(self, v, parents); | ||||
for (i = 0; i < 2; i++) { | ||||
int p = parents[i]; | ||||
if (p == -1) | ||||
continue; | ||||
Matt Mackall
|
r19030 | sp = seen[p]; | ||
Bryan O'Sullivan
|
r18988 | if (sv < poison) { | ||
if (sp == 0) { | ||||
seen[p] = sv; | ||||
interesting++; | ||||
} | ||||
else if (sp != sv) | ||||
seen[p] |= sv; | ||||
} else { | ||||
if (sp && sp < poison) | ||||
interesting--; | ||||
seen[p] = sv; | ||||
} | ||||
} | ||||
} | ||||
done: | ||||
free(seen); | ||||
return gca; | ||||
bail: | ||||
free(seen); | ||||
Py_XDECREF(gca); | ||||
return NULL; | ||||
} | ||||
/* | ||||
* Given a disjoint set of revs, return the subset with the longest | ||||
* path to the root. | ||||
*/ | ||||
static PyObject *find_deepest(indexObject *self, PyObject *revs) | ||||
{ | ||||
const Py_ssize_t revcount = PyList_GET_SIZE(revs); | ||||
static const Py_ssize_t capacity = 24; | ||||
int *depth, *interesting = NULL; | ||||
int i, j, v, ninteresting; | ||||
Danek Duvall
|
r21730 | PyObject *dict = NULL, *keys = NULL; | ||
Bryan O'Sullivan
|
r18988 | long *seen = NULL; | ||
int maxrev = -1; | ||||
long final; | ||||
if (revcount > capacity) { | ||||
PyErr_Format(PyExc_OverflowError, | ||||
"bitset size (%ld) > capacity (%ld)", | ||||
André Sintzoff
|
r19062 | (long)revcount, (long)capacity); | ||
Bryan O'Sullivan
|
r18988 | return NULL; | ||
} | ||||
for (i = 0; i < revcount; i++) { | ||||
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); | ||||
if (n > maxrev) | ||||
maxrev = n; | ||||
} | ||||
depth = calloc(sizeof(*depth), maxrev + 1); | ||||
if (depth == NULL) | ||||
return PyErr_NoMemory(); | ||||
seen = calloc(sizeof(*seen), maxrev + 1); | ||||
if (seen == NULL) { | ||||
PyErr_NoMemory(); | ||||
goto bail; | ||||
} | ||||
interesting = calloc(sizeof(*interesting), 2 << revcount); | ||||
if (interesting == NULL) { | ||||
PyErr_NoMemory(); | ||||
goto bail; | ||||
} | ||||
Siddharth Agarwal
|
r19502 | if (PyList_Sort(revs) == -1) | ||
goto bail; | ||||
Bryan O'Sullivan
|
r18988 | for (i = 0; i < revcount; i++) { | ||
int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i)); | ||||
long b = 1l << i; | ||||
depth[n] = 1; | ||||
seen[n] = b; | ||||
interesting[b] = 1; | ||||
} | ||||
ninteresting = (int)revcount; | ||||
for (v = maxrev; v >= 0 && ninteresting > 1; v--) { | ||||
int dv = depth[v]; | ||||
int parents[2]; | ||||
long sv; | ||||
if (dv == 0) | ||||
continue; | ||||
sv = seen[v]; | ||||
index_get_parents(self, v, parents); | ||||
for (i = 0; i < 2; i++) { | ||||
int p = parents[i]; | ||||
long nsp, sp; | ||||
int dp; | ||||
if (p == -1) | ||||
continue; | ||||
dp = depth[p]; | ||||
nsp = sp = seen[p]; | ||||
if (dp <= dv) { | ||||
depth[p] = dv + 1; | ||||
if (sp != sv) { | ||||
interesting[sv] += 1; | ||||
nsp = seen[p] = sv; | ||||
if (sp) { | ||||
interesting[sp] -= 1; | ||||
if (interesting[sp] == 0) | ||||
ninteresting -= 1; | ||||
} | ||||
} | ||||
} | ||||
else if (dv == dp - 1) { | ||||
nsp = sp | sv; | ||||
if (nsp == sp) | ||||
continue; | ||||
seen[p] = nsp; | ||||
Wei, Elson
|
r19503 | interesting[sp] -= 1; | ||
if (interesting[sp] == 0 && interesting[nsp] > 0) | ||||
ninteresting -= 1; | ||||
Bryan O'Sullivan
|
r18988 | interesting[nsp] += 1; | ||
} | ||||
} | ||||
interesting[sv] -= 1; | ||||
if (interesting[sv] == 0) | ||||
ninteresting -= 1; | ||||
} | ||||
final = 0; | ||||
j = ninteresting; | ||||
for (i = 0; i < (int)(2 << revcount) && j > 0; i++) { | ||||
if (interesting[i] == 0) | ||||
continue; | ||||
final |= i; | ||||
j -= 1; | ||||
} | ||||
Danek Duvall
|
r21730 | if (final == 0) { | ||
keys = PyList_New(0); | ||||
goto bail; | ||||
} | ||||
Bryan O'Sullivan
|
r18988 | |||
dict = PyDict_New(); | ||||
if (dict == NULL) | ||||
goto bail; | ||||
Siddharth Agarwal
|
r19504 | for (i = 0; i < revcount; i++) { | ||
Bryan O'Sullivan
|
r18988 | PyObject *key; | ||
if ((final & (1 << i)) == 0) | ||||
continue; | ||||
key = PyList_GET_ITEM(revs, i); | ||||
Py_INCREF(key); | ||||
Py_INCREF(Py_None); | ||||
if (PyDict_SetItem(dict, key, Py_None) == -1) { | ||||
Py_DECREF(key); | ||||
Py_DECREF(Py_None); | ||||
goto bail; | ||||
} | ||||
} | ||||
keys = PyDict_Keys(dict); | ||||
bail: | ||||
free(depth); | ||||
free(seen); | ||||
free(interesting); | ||||
Py_XDECREF(dict); | ||||
Danek Duvall
|
r21730 | return keys; | ||
Bryan O'Sullivan
|
r18988 | } | ||
/* | ||||
* Given a (possibly overlapping) set of revs, return the greatest | ||||
* common ancestors: those with the longest path to the root. | ||||
*/ | ||||
static PyObject *index_ancestors(indexObject *self, PyObject *args) | ||||
{ | ||||
PyObject *ret = NULL, *gca = NULL; | ||||
Py_ssize_t argcount, i, len; | ||||
bitmask repeat = 0; | ||||
int revcount = 0; | ||||
int *revs; | ||||
argcount = PySequence_Length(args); | ||||
revs = malloc(argcount * sizeof(*revs)); | ||||
if (argcount > 0 && revs == NULL) | ||||
return PyErr_NoMemory(); | ||||
len = index_length(self) - 1; | ||||
for (i = 0; i < argcount; i++) { | ||||
static const int capacity = 24; | ||||
PyObject *obj = PySequence_GetItem(args, i); | ||||
bitmask x; | ||||
long val; | ||||
if (!PyInt_Check(obj)) { | ||||
PyErr_SetString(PyExc_TypeError, | ||||
"arguments must all be ints"); | ||||
goto bail; | ||||
} | ||||
val = PyInt_AsLong(obj); | ||||
if (val == -1) { | ||||
ret = PyList_New(0); | ||||
goto done; | ||||
} | ||||
if (val < 0 || val >= len) { | ||||
PyErr_SetString(PyExc_IndexError, | ||||
"index out of range"); | ||||
goto bail; | ||||
} | ||||
/* this cheesy bloom filter lets us avoid some more | ||||
* expensive duplicate checks in the common set-is-disjoint | ||||
* case */ | ||||
x = 1ull << (val & 0x3f); | ||||
if (repeat & x) { | ||||
int k; | ||||
for (k = 0; k < revcount; k++) { | ||||
if (val == revs[k]) | ||||
goto duplicate; | ||||
} | ||||
} | ||||
else repeat |= x; | ||||
if (revcount >= capacity) { | ||||
PyErr_Format(PyExc_OverflowError, | ||||
"bitset size (%d) > capacity (%d)", | ||||
revcount, capacity); | ||||
goto bail; | ||||
} | ||||
revs[revcount++] = (int)val; | ||||
duplicate:; | ||||
} | ||||
if (revcount == 0) { | ||||
ret = PyList_New(0); | ||||
goto done; | ||||
} | ||||
if (revcount == 1) { | ||||
PyObject *obj; | ||||
ret = PyList_New(1); | ||||
if (ret == NULL) | ||||
goto bail; | ||||
obj = PyInt_FromLong(revs[0]); | ||||
if (obj == NULL) | ||||
goto bail; | ||||
PyList_SET_ITEM(ret, 0, obj); | ||||
goto done; | ||||
} | ||||
gca = find_gca_candidates(self, revs, revcount); | ||||
if (gca == NULL) | ||||
goto bail; | ||||
if (PyList_GET_SIZE(gca) <= 1) { | ||||
ret = gca; | ||||
Py_INCREF(gca); | ||||
} | ||||
else ret = find_deepest(self, gca); | ||||
done: | ||||
free(revs); | ||||
Py_XDECREF(gca); | ||||
return ret; | ||||
bail: | ||||
free(revs); | ||||
Py_XDECREF(gca); | ||||
Py_XDECREF(ret); | ||||
return NULL; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | /* | ||
Mads Kiilerich
|
r21102 | * Given a (possibly overlapping) set of revs, return all the | ||
* common ancestors heads: heads(::args[0] and ::a[1] and ...) | ||||
*/ | ||||
static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args) | ||||
{ | ||||
Mads Kiilerich
|
r21103 | PyObject *ret = NULL; | ||
Mads Kiilerich
|
r21102 | Py_ssize_t argcount, i, len; | ||
bitmask repeat = 0; | ||||
int revcount = 0; | ||||
int *revs; | ||||
argcount = PySequence_Length(args); | ||||
revs = malloc(argcount * sizeof(*revs)); | ||||
if (argcount > 0 && revs == NULL) | ||||
return PyErr_NoMemory(); | ||||
len = index_length(self) - 1; | ||||
for (i = 0; i < argcount; i++) { | ||||
static const int capacity = 24; | ||||
PyObject *obj = PySequence_GetItem(args, i); | ||||
bitmask x; | ||||
long val; | ||||
if (!PyInt_Check(obj)) { | ||||
PyErr_SetString(PyExc_TypeError, | ||||
"arguments must all be ints"); | ||||
goto bail; | ||||
} | ||||
val = PyInt_AsLong(obj); | ||||
if (val == -1) { | ||||
ret = PyList_New(0); | ||||
goto done; | ||||
} | ||||
if (val < 0 || val >= len) { | ||||
PyErr_SetString(PyExc_IndexError, | ||||
"index out of range"); | ||||
goto bail; | ||||
} | ||||
/* this cheesy bloom filter lets us avoid some more | ||||
* expensive duplicate checks in the common set-is-disjoint | ||||
* case */ | ||||
x = 1ull << (val & 0x3f); | ||||
if (repeat & x) { | ||||
int k; | ||||
for (k = 0; k < revcount; k++) { | ||||
if (val == revs[k]) | ||||
goto duplicate; | ||||
} | ||||
} | ||||
else repeat |= x; | ||||
if (revcount >= capacity) { | ||||
PyErr_Format(PyExc_OverflowError, | ||||
"bitset size (%d) > capacity (%d)", | ||||
revcount, capacity); | ||||
goto bail; | ||||
} | ||||
revs[revcount++] = (int)val; | ||||
duplicate:; | ||||
} | ||||
if (revcount == 0) { | ||||
ret = PyList_New(0); | ||||
goto done; | ||||
} | ||||
if (revcount == 1) { | ||||
PyObject *obj; | ||||
ret = PyList_New(1); | ||||
if (ret == NULL) | ||||
goto bail; | ||||
obj = PyInt_FromLong(revs[0]); | ||||
if (obj == NULL) | ||||
goto bail; | ||||
PyList_SET_ITEM(ret, 0, obj); | ||||
goto done; | ||||
} | ||||
Mads Kiilerich
|
r21103 | ret = find_gca_candidates(self, revs, revcount); | ||
if (ret == NULL) | ||||
Mads Kiilerich
|
r21102 | goto bail; | ||
done: | ||||
free(revs); | ||||
return ret; | ||||
bail: | ||||
free(revs); | ||||
Py_XDECREF(ret); | ||||
return NULL; | ||||
} | ||||
/* | ||||
Bryan O'Sullivan
|
r16414 | * Invalidate any trie entries introduced by added revs. | ||
*/ | ||||
static void nt_invalidate_added(indexObject *self, Py_ssize_t start) | ||||
{ | ||||
Py_ssize_t i, len = PyList_GET_SIZE(self->added); | ||||
for (i = start; i < len; i++) { | ||||
PyObject *tuple = PyList_GET_ITEM(self->added, i); | ||||
PyObject *node = PyTuple_GET_ITEM(tuple, 7); | ||||
nt_insert(self, PyString_AS_STRING(node), -1); | ||||
} | ||||
Bryan O'Sullivan
|
r16732 | if (start == 0) | ||
Py_CLEAR(self->added); | ||||
Bryan O'Sullivan
|
r16414 | } | ||
/* | ||||
* Delete a numeric range of revs, which must be at the end of the | ||||
* range, but exclude the sentinel nullid entry. | ||||
*/ | ||||
static int index_slice_del(indexObject *self, PyObject *item) | ||||
Bryan O'Sullivan
|
r16363 | { | ||
Py_ssize_t start, stop, step, slicelength; | ||||
Py_ssize_t length = index_length(self); | ||||
Bryan O'Sullivan
|
r16784 | int ret = 0; | ||
Bryan O'Sullivan
|
r16363 | |||
if (PySlice_GetIndicesEx((PySliceObject*)item, length, | ||||
&start, &stop, &step, &slicelength) < 0) | ||||
return -1; | ||||
if (slicelength <= 0) | ||||
return 0; | ||||
if ((step < 0 && start < stop) || (step > 0 && start > stop)) | ||||
stop = start; | ||||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16363 | if (step < 0) { | ||
stop = start + 1; | ||||
start = stop + step*(slicelength - 1) - 1; | ||||
step = -step; | ||||
} | ||||
if (step != 1) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"revlog index delete requires step size of 1"); | ||||
return -1; | ||||
Bernhard Leiner
|
r7108 | } | ||
Bryan O'Sullivan
|
r16363 | |||
if (stop != length - 1) { | ||||
PyErr_SetString(PyExc_IndexError, | ||||
"revlog index deletion indices are invalid"); | ||||
return -1; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | if (start < self->length - 1) { | ||
if (self->nt) { | ||||
Py_ssize_t i; | ||||
for (i = start + 1; i < self->length - 1; i++) { | ||||
const char *node = index_node(self, i); | ||||
if (node) | ||||
nt_insert(self, node, -1); | ||||
} | ||||
if (self->added) | ||||
nt_invalidate_added(self, 0); | ||||
if (self->ntrev > start) | ||||
self->ntrev = (int)start; | ||||
} | ||||
Bryan O'Sullivan
|
r16363 | self->length = start + 1; | ||
Yuya Nishihara
|
r18504 | if (start < self->raw_length) { | ||
if (self->cache) { | ||||
Py_ssize_t i; | ||||
for (i = start; i < self->raw_length; i++) | ||||
Py_CLEAR(self->cache[i]); | ||||
} | ||||
Bryan O'Sullivan
|
r16784 | self->raw_length = start; | ||
Yuya Nishihara
|
r18504 | } | ||
Bryan O'Sullivan
|
r16784 | goto done; | ||
Bernhard Leiner
|
r7108 | } | ||
Bryan O'Sullivan
|
r16414 | if (self->nt) { | ||
nt_invalidate_added(self, start - self->length + 1); | ||||
if (self->ntrev > start) | ||||
self->ntrev = (int)start; | ||||
} | ||||
Bryan O'Sullivan
|
r16784 | if (self->added) | ||
ret = PyList_SetSlice(self->added, start - self->length + 1, | ||||
PyList_GET_SIZE(self->added), NULL); | ||||
done: | ||||
Bryan O'Sullivan
|
r16787 | Py_CLEAR(self->headrevs); | ||
Bryan O'Sullivan
|
r16784 | return ret; | ||
Bryan O'Sullivan
|
r16363 | } | ||
Bryan O'Sullivan
|
r16414 | /* | ||
* Supported ops: | ||||
* | ||||
* slice deletion | ||||
* string assignment (extend node->rev mapping) | ||||
* string deletion (shrink node->rev mapping) | ||||
*/ | ||||
static int index_assign_subscript(indexObject *self, PyObject *item, | ||||
PyObject *value) | ||||
{ | ||||
char *node; | ||||
Py_ssize_t nodelen; | ||||
long rev; | ||||
if (PySlice_Check(item) && value == NULL) | ||||
return index_slice_del(self, item); | ||||
if (node_check(item, &node, &nodelen) == -1) | ||||
return -1; | ||||
if (value == NULL) | ||||
return self->nt ? nt_insert(self, node, -1) : 0; | ||||
rev = PyInt_AsLong(value); | ||||
if (rev > INT_MAX || rev < 0) { | ||||
if (!PyErr_Occurred()) | ||||
PyErr_SetString(PyExc_ValueError, "rev out of range"); | ||||
return -1; | ||||
} | ||||
return nt_insert(self, node, (int)rev); | ||||
} | ||||
/* | ||||
* Find all RevlogNG entries in an index that has inline data. Update | ||||
* the optional "offsets" table with those entries. | ||||
*/ | ||||
Bryan O'Sullivan
|
r16363 | static long inline_scan(indexObject *self, const char **offsets) | ||
{ | ||||
const char *data = PyString_AS_STRING(self->data); | ||||
Matt Mackall
|
r20167 | Py_ssize_t pos = 0; | ||
Py_ssize_t end = PyString_GET_SIZE(self->data); | ||||
Bryan O'Sullivan
|
r16863 | long incr = v1_hdrsize; | ||
Bryan O'Sullivan
|
r16363 | Py_ssize_t len = 0; | ||
Matt Mackall
|
r13254 | |||
Matt Mackall
|
r20167 | while (pos + v1_hdrsize <= end && pos >= 0) { | ||
Bryan O'Sullivan
|
r16363 | uint32_t comp_len; | ||
/* 3rd element of header is length of compressed inline data */ | ||||
Matt Mackall
|
r20167 | comp_len = getbe32(data + pos + 8); | ||
Bryan O'Sullivan
|
r16863 | incr = v1_hdrsize + comp_len; | ||
Bryan O'Sullivan
|
r16363 | if (offsets) | ||
Matt Mackall
|
r20167 | offsets[len] = data + pos; | ||
Bryan O'Sullivan
|
r16363 | len++; | ||
Matt Mackall
|
r20167 | pos += incr; | ||
Bryan O'Sullivan
|
r16363 | } | ||
Matt Mackall
|
r13254 | |||
Matt Mackall
|
r20167 | if (pos != end) { | ||
Bryan O'Sullivan
|
r16363 | if (!PyErr_Occurred()) | ||
PyErr_SetString(PyExc_ValueError, "corrupt index file"); | ||||
return -1; | ||||
} | ||||
return len; | ||||
} | ||||
Matt Mackall
|
r13254 | |||
Bryan O'Sullivan
|
r16572 | static int index_init(indexObject *self, PyObject *args) | ||
Bryan O'Sullivan
|
r16363 | { | ||
Bryan O'Sullivan
|
r16572 | PyObject *data_obj, *inlined_obj; | ||
Py_ssize_t size; | ||||
Chris Jerdonek
|
r20109 | /* Initialize before argument-checking to avoid index_dealloc() crash. */ | ||
self->raw_length = 0; | ||||
self->added = NULL; | ||||
self->cache = NULL; | ||||
self->data = NULL; | ||||
self->headrevs = NULL; | ||||
self->nt = NULL; | ||||
self->offsets = NULL; | ||||
Bryan O'Sullivan
|
r16572 | if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) | ||
return -1; | ||||
if (!PyString_Check(data_obj)) { | ||||
PyErr_SetString(PyExc_TypeError, "data is not a string"); | ||||
return -1; | ||||
} | ||||
size = PyString_GET_SIZE(data_obj); | ||||
Bryan O'Sullivan
|
r16363 | self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); | ||
self->data = data_obj; | ||||
Bryan O'Sullivan
|
r16414 | self->ntlength = self->ntcapacity = 0; | ||
self->ntdepth = self->ntsplits = 0; | ||||
self->ntlookups = self->ntmisses = 0; | ||||
self->ntrev = -1; | ||||
Matt Mackall
|
r16597 | Py_INCREF(self->data); | ||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16363 | if (self->inlined) { | ||
long len = inline_scan(self, NULL); | ||||
if (len == -1) | ||||
goto bail; | ||||
self->raw_length = len; | ||||
self->length = len + 1; | ||||
} else { | ||||
Bryan O'Sullivan
|
r16863 | if (size % v1_hdrsize) { | ||
Bryan O'Sullivan
|
r16363 | PyErr_SetString(PyExc_ValueError, "corrupt index file"); | ||
goto bail; | ||||
} | ||||
Bryan O'Sullivan
|
r16863 | self->raw_length = size / v1_hdrsize; | ||
Bryan O'Sullivan
|
r16363 | self->length = self->raw_length + 1; | ||
} | ||||
return 0; | ||||
bail: | ||||
return -1; | ||||
} | ||||
Bryan O'Sullivan
|
r16414 | static PyObject *index_nodemap(indexObject *self) | ||
{ | ||||
Bryan O'Sullivan
|
r16572 | Py_INCREF(self); | ||
Bryan O'Sullivan
|
r16414 | return (PyObject *)self; | ||
} | ||||
Bryan O'Sullivan
|
r16363 | static void index_dealloc(indexObject *self) | ||
{ | ||||
Bryan O'Sullivan
|
r16370 | _index_clearcaches(self); | ||
Chris Jerdonek
|
r20109 | Py_XDECREF(self->data); | ||
Bryan O'Sullivan
|
r16363 | Py_XDECREF(self->added); | ||
PyObject_Del(self); | ||||
} | ||||
static PySequenceMethods index_sequence_methods = { | ||||
(lenfunc)index_length, /* sq_length */ | ||||
0, /* sq_concat */ | ||||
0, /* sq_repeat */ | ||||
(ssizeargfunc)index_get, /* sq_item */ | ||||
Bryan O'Sullivan
|
r16414 | 0, /* sq_slice */ | ||
0, /* sq_ass_item */ | ||||
0, /* sq_ass_slice */ | ||||
(objobjproc)index_contains, /* sq_contains */ | ||||
Bryan O'Sullivan
|
r16363 | }; | ||
static PyMappingMethods index_mapping_methods = { | ||||
(lenfunc)index_length, /* mp_length */ | ||||
Bryan O'Sullivan
|
r16414 | (binaryfunc)index_getitem, /* mp_subscript */ | ||
Bryan O'Sullivan
|
r16363 | (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ | ||
}; | ||||
static PyMethodDef index_methods[] = { | ||||
Bryan O'Sullivan
|
r18988 | {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS, | ||
"return the gca set of the given revs"}, | ||||
Mads Kiilerich
|
r21102 | {"commonancestorsheads", (PyCFunction)index_commonancestorsheads, | ||
METH_VARARGS, | ||||
"return the heads of the common ancestors of the given revs"}, | ||||
Bryan O'Sullivan
|
r16370 | {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, | ||
"clear the index caches"}, | ||||
Bryan O'Sullivan
|
r16414 | {"get", (PyCFunction)index_m_get, METH_VARARGS, | ||
"get an index entry"}, | ||||
Bryan O'Sullivan
|
r16786 | {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS, | ||
"get head revisions"}, | ||||
Bryan O'Sullivan
|
r16363 | {"insert", (PyCFunction)index_insert, METH_VARARGS, | ||
"insert an index entry"}, | ||||
Bryan O'Sullivan
|
r16665 | {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, | ||
"match a potentially ambiguous node ID"}, | ||||
Bryan O'Sullivan
|
r16414 | {"stats", (PyCFunction)index_stats, METH_NOARGS, | ||
"stats for the index"}, | ||||
{NULL} /* Sentinel */ | ||||
}; | ||||
static PyGetSetDef index_getset[] = { | ||||
{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, | ||||
Bryan O'Sullivan
|
r16363 | {NULL} /* Sentinel */ | ||
}; | ||||
static PyTypeObject indexType = { | ||||
PyObject_HEAD_INIT(NULL) | ||||
0, /* ob_size */ | ||||
"parsers.index", /* tp_name */ | ||||
sizeof(indexObject), /* tp_basicsize */ | ||||
0, /* tp_itemsize */ | ||||
(destructor)index_dealloc, /* tp_dealloc */ | ||||
0, /* tp_print */ | ||||
0, /* tp_getattr */ | ||||
0, /* tp_setattr */ | ||||
0, /* tp_compare */ | ||||
0, /* tp_repr */ | ||||
0, /* tp_as_number */ | ||||
&index_sequence_methods, /* tp_as_sequence */ | ||||
&index_mapping_methods, /* tp_as_mapping */ | ||||
0, /* tp_hash */ | ||||
0, /* tp_call */ | ||||
0, /* tp_str */ | ||||
0, /* tp_getattro */ | ||||
0, /* tp_setattro */ | ||||
0, /* tp_as_buffer */ | ||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | ||||
"revlog index", /* tp_doc */ | ||||
0, /* tp_traverse */ | ||||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
0, /* tp_iter */ | ||||
0, /* tp_iternext */ | ||||
index_methods, /* tp_methods */ | ||||
0, /* tp_members */ | ||||
Bryan O'Sullivan
|
r16414 | index_getset, /* tp_getset */ | ||
Bryan O'Sullivan
|
r16363 | 0, /* tp_base */ | ||
0, /* tp_dict */ | ||||
0, /* tp_descr_get */ | ||||
0, /* tp_descr_set */ | ||||
0, /* tp_dictoffset */ | ||||
(initproc)index_init, /* tp_init */ | ||||
0, /* tp_alloc */ | ||||
}; | ||||
/* | ||||
Bryan O'Sullivan
|
r16414 | * returns a tuple of the form (index, index, cache) with elements as | ||
Bryan O'Sullivan
|
r16363 | * follows: | ||
Bernhard Leiner
|
r7108 | * | ||
Bryan O'Sullivan
|
r16414 | * index: an index object that lazily parses RevlogNG records | ||
Bryan O'Sullivan
|
r16363 | * cache: if data is inlined, a tuple (index_file_content, 0), else None | ||
* | ||||
* added complications are for backwards compatibility | ||||
Bernhard Leiner
|
r7108 | */ | ||
Matt Mackall
|
r13254 | static PyObject *parse_index2(PyObject *self, PyObject *args) | ||
Bernhard Leiner
|
r7108 | { | ||
Bryan O'Sullivan
|
r16572 | PyObject *tuple = NULL, *cache = NULL; | ||
Bryan O'Sullivan
|
r16363 | indexObject *idx; | ||
Bryan O'Sullivan
|
r16572 | int ret; | ||
Bryan O'Sullivan
|
r16363 | |||
idx = PyObject_New(indexObject, &indexType); | ||||
if (idx == NULL) | ||||
goto bail; | ||||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16572 | ret = index_init(idx, args); | ||
if (ret == -1) | ||||
Bryan O'Sullivan
|
r16363 | goto bail; | ||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16363 | if (idx->inlined) { | ||
cache = Py_BuildValue("iO", 0, idx->data); | ||||
if (cache == NULL) | ||||
goto bail; | ||||
Bernhard Leiner
|
r7108 | } else { | ||
cache = Py_None; | ||||
Bryan O'Sullivan
|
r16363 | Py_INCREF(cache); | ||
Bernhard Leiner
|
r7108 | } | ||
Bryan O'Sullivan
|
r16363 | tuple = Py_BuildValue("NN", idx, cache); | ||
if (!tuple) | ||||
goto bail; | ||||
return tuple; | ||||
Bernhard Leiner
|
r7108 | |||
Bryan O'Sullivan
|
r16363 | bail: | ||
Py_XDECREF(idx); | ||||
Bernhard Leiner
|
r7108 | Py_XDECREF(cache); | ||
Bryan O'Sullivan
|
r16363 | Py_XDECREF(tuple); | ||
Bernhard Leiner
|
r7108 | return NULL; | ||
} | ||||
Bryan O'Sullivan
|
r6389 | static char parsers_doc[] = "Efficient content parsing."; | ||
Adrian Buehlmann
|
r17606 | PyObject *encodedir(PyObject *self, PyObject *args); | ||
Bryan O'Sullivan
|
r17616 | PyObject *pathencode(PyObject *self, PyObject *args); | ||
Bryan O'Sullivan
|
r18430 | PyObject *lowerencode(PyObject *self, PyObject *args); | ||
Adrian Buehlmann
|
r17606 | |||
Bryan O'Sullivan
|
r6389 | static PyMethodDef methods[] = { | ||
Bryan O'Sullivan
|
r16955 | {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"}, | ||
Bryan O'Sullivan
|
r6389 | {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, | ||
Matt Mackall
|
r7093 | {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, | ||
Matt Mackall
|
r13254 | {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"}, | ||
Adrian Buehlmann
|
r17606 | {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"}, | ||
Bryan O'Sullivan
|
r17616 | {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"}, | ||
Bryan O'Sullivan
|
r18430 | {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"}, | ||
Bryan O'Sullivan
|
r6389 | {NULL, NULL} | ||
}; | ||||
Bryan O'Sullivan
|
r18900 | void dirs_module_init(PyObject *mod); | ||
Bryan O'Sullivan
|
r16363 | static void module_init(PyObject *mod) | ||
{ | ||||
Chris Jerdonek
|
r20742 | /* This module constant has two purposes. First, it lets us unit test | ||
* the ImportError raised without hard-coding any error text. This | ||||
* means we can change the text in the future without breaking tests, | ||||
* even across changesets without a recompile. Second, its presence | ||||
* can be used to determine whether the version-checking logic is | ||||
* present, which also helps in testing across changesets without a | ||||
* recompile. Note that this means the pure-Python version of parsers | ||||
* should not have this module constant. */ | ||||
PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext); | ||||
Bryan O'Sullivan
|
r18900 | dirs_module_init(mod); | ||
Adrian Buehlmann
|
r16604 | indexType.tp_new = PyType_GenericNew; | ||
Siddharth Agarwal
|
r21809 | if (PyType_Ready(&indexType) < 0 || | ||
PyType_Ready(&dirstateTupleType) < 0) | ||||
Bryan O'Sullivan
|
r16363 | return; | ||
Py_INCREF(&indexType); | ||||
PyModule_AddObject(mod, "index", (PyObject *)&indexType); | ||||
Siddharth Agarwal
|
r21809 | Py_INCREF(&dirstateTupleType); | ||
PyModule_AddObject(mod, "dirstatetuple", | ||||
(PyObject *)&dirstateTupleType); | ||||
Bryan O'Sullivan
|
r16363 | |||
nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0, | ||||
-1, -1, -1, -1, nullid, 20); | ||||
if (nullentry) | ||||
PyObject_GC_UnTrack(nullentry); | ||||
} | ||||
Chris Jerdonek
|
r20742 | static int check_python_version(void) | ||
{ | ||||
PyObject *sys = PyImport_ImportModule("sys"); | ||||
long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion")); | ||||
/* sys.hexversion is a 32-bit number by default, so the -1 case | ||||
* should only occur in unusual circumstances (e.g. if sys.hexversion | ||||
* is manually set to an invalid value). */ | ||||
if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) { | ||||
PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension " | ||||
"modules were compiled with Python " PY_VERSION ", but " | ||||
"Mercurial is currently using Python with sys.hexversion=%ld: " | ||||
"Python %s\n at: %s", versionerrortext, hexversion, | ||||
Py_GetVersion(), Py_GetProgramFullPath()); | ||||
return -1; | ||||
} | ||||
return 0; | ||||
} | ||||
Renato Cunha
|
r11361 | #ifdef IS_PY3K | ||
static struct PyModuleDef parsers_module = { | ||||
PyModuleDef_HEAD_INIT, | ||||
"parsers", | ||||
parsers_doc, | ||||
-1, | ||||
methods | ||||
}; | ||||
PyMODINIT_FUNC PyInit_parsers(void) | ||||
{ | ||||
Matt Harbison
|
r20797 | PyObject *mod; | ||
Chris Jerdonek
|
r20742 | if (check_python_version() == -1) | ||
return; | ||||
Matt Harbison
|
r20797 | mod = PyModule_Create(&parsers_module); | ||
Bryan O'Sullivan
|
r16363 | module_init(mod); | ||
return mod; | ||||
Renato Cunha
|
r11361 | } | ||
#else | ||||
Bryan O'Sullivan
|
r6389 | PyMODINIT_FUNC initparsers(void) | ||
{ | ||||
Matt Harbison
|
r20797 | PyObject *mod; | ||
Chris Jerdonek
|
r20742 | if (check_python_version() == -1) | ||
return; | ||||
Matt Harbison
|
r20797 | mod = Py_InitModule3("parsers", methods, parsers_doc); | ||
Bryan O'Sullivan
|
r16363 | module_init(mod); | ||
Bryan O'Sullivan
|
r6389 | } | ||
Renato Cunha
|
r11361 | #endif | ||