parsers.c
248 lines
| 5.1 KiB
| text/x-c
|
CLexer
/ mercurial / parsers.c
|
r6389 | /* | ||
parsers.c - efficient content parsing | ||||
Copyright 2008 Matt Mackall <mpm@selenic.com> and others | ||||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
#include <Python.h> | ||||
#include <ctype.h> | ||||
#include <string.h> | ||||
static int hexdigit(char c) | ||||
{ | ||||
if (c >= '0' && c <= '9') | ||||
return c - '0'; | ||||
|
r7092 | if (c >= 'a' && c <= 'f') | ||
return c - 'a' + 10; | ||||
|
r6389 | if (c >= 'A' && c <= 'F') | ||
return c - 'A' + 10; | ||||
|
r7092 | PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); | ||
return 0; | ||||
|
r6389 | } | ||
/* | ||||
* Turn a hex-encoded string into binary. | ||||
*/ | ||||
static PyObject *unhexlify(const char *str, int len) | ||||
{ | ||||
|
r7092 | PyObject *ret; | ||
|
r6395 | const char *c; | ||
char *d; | ||||
|
r6389 | |||
ret = PyString_FromStringAndSize(NULL, len / 2); | ||||
if (!ret) | ||||
|
r7092 | return NULL; | ||
|
r6389 | |||
|
r7092 | d = PyString_AS_STRING(ret); | ||
|
r6389 | for (c = str; c < str + len;) { | ||
int hi = hexdigit(*c++); | ||||
int lo = hexdigit(*c++); | ||||
*d++ = (hi << 4) | lo; | ||||
} | ||||
|
r7091 | |||
|
r6389 | return ret; | ||
} | ||||
/* | ||||
* This code assumes that a manifest is stitched together with newline | ||||
* ('\n') characters. | ||||
*/ | ||||
static PyObject *parse_manifest(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *mfdict, *fdict; | ||||
char *str, *cur, *start, *zero; | ||||
int len; | ||||
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", | ||||
&PyDict_Type, &mfdict, | ||||
&PyDict_Type, &fdict, | ||||
&str, &len)) | ||||
goto quit; | ||||
for (start = cur = str, zero = NULL; cur < str + len; cur++) { | ||||
PyObject *file = NULL, *node = NULL; | ||||
PyObject *flags = NULL; | ||||
int nlen; | ||||
if (!*cur) { | ||||
zero = cur; | ||||
continue; | ||||
} | ||||
else if (*cur != '\n') | ||||
continue; | ||||
if (!zero) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest entry has no separator"); | ||||
goto quit; | ||||
} | ||||
file = PyString_FromStringAndSize(start, zero - start); | ||||
if (!file) | ||||
goto bail; | ||||
nlen = cur - zero - 1; | ||||
node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); | ||||
if (!node) | ||||
goto bail; | ||||
if (nlen > 40) { | ||||
PyObject *flags; | ||||
flags = PyString_FromStringAndSize(zero + 41, | ||||
nlen - 40); | ||||
if (!flags) | ||||
goto bail; | ||||
if (PyDict_SetItem(fdict, file, flags) == -1) | ||||
goto bail; | ||||
} | ||||
if (PyDict_SetItem(mfdict, file, node) == -1) | ||||
goto bail; | ||||
start = cur + 1; | ||||
zero = NULL; | ||||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
continue; | ||||
bail: | ||||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
goto quit; | ||||
} | ||||
if (len > 0 && *(cur - 1) != '\n') { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest contains trailing garbage"); | ||||
goto quit; | ||||
} | ||||
Py_INCREF(Py_None); | ||||
return Py_None; | ||||
quit: | ||||
return NULL; | ||||
} | ||||
|
r7093 | #ifdef _WIN32 | ||
# ifdef _MSC_VER | ||||
/* msvc 6.0 has problems */ | ||||
# define inline __inline | ||||
typedef unsigned long uint32_t; | ||||
# else | ||||
# include <stdint.h> | ||||
# endif | ||||
static uint32_t ntohl(uint32_t x) | ||||
{ | ||||
return ((x & 0x000000ffUL) << 24) | | ||||
((x & 0x0000ff00UL) << 8) | | ||||
((x & 0x00ff0000UL) >> 8) | | ||||
((x & 0xff000000UL) >> 24); | ||||
} | ||||
#else | ||||
/* not windows */ | ||||
# include <sys/types.h> | ||||
# if defined __BEOS__ && !defined __HAIKU__ | ||||
# include <ByteOrder.h> | ||||
# else | ||||
# include <arpa/inet.h> | ||||
# endif | ||||
# include <inttypes.h> | ||||
#endif | ||||
static PyObject *parse_dirstate(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; | ||||
PyObject *fname = NULL, *cname = NULL, *entry = NULL; | ||||
char *str, *cur, *end, *cpos; | ||||
int state, mode, size, mtime, flen; | ||||
int len; | ||||
char decode[16]; /* for alignment */ | ||||
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", | ||||
&PyDict_Type, &dmap, | ||||
&PyDict_Type, &cmap, | ||||
&str, &len)) | ||||
goto quit; | ||||
/* read parents */ | ||||
if (len < 40) | ||||
goto quit; | ||||
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); | ||||
if (!parents) | ||||
goto quit; | ||||
/* read filenames */ | ||||
cur = str + 40; | ||||
end = str + len; | ||||
while (cur < end - 17) { | ||||
/* unpack header */ | ||||
state = *cur; | ||||
memcpy(decode, cur + 1, 16); | ||||
mode = ntohl(*(uint32_t *)(decode)); | ||||
size = ntohl(*(uint32_t *)(decode + 4)); | ||||
mtime = ntohl(*(uint32_t *)(decode + 8)); | ||||
flen = ntohl(*(uint32_t *)(decode + 12)); | ||||
cur += 17; | ||||
if (cur + flen > end) | ||||
goto quit; | ||||
entry = Py_BuildValue("ciii", state, mode, size, mtime); | ||||
PyObject_GC_UnTrack(entry); /* don't waste time with this */ | ||||
if (!entry) | ||||
goto quit; | ||||
cpos = memchr(cur, 0, flen); | ||||
if (cpos) { | ||||
fname = PyString_FromStringAndSize(cur, cpos - cur); | ||||
cname = PyString_FromStringAndSize(cpos + 1, | ||||
flen - (cpos - cur) - 1); | ||||
if (!fname || !cname || | ||||
PyDict_SetItem(cmap, fname, cname) == -1 || | ||||
PyDict_SetItem(dmap, fname, entry) == -1) | ||||
goto quit; | ||||
Py_DECREF(cname); | ||||
} else { | ||||
fname = PyString_FromStringAndSize(cur, flen); | ||||
if (!fname || | ||||
PyDict_SetItem(dmap, fname, entry) == -1) | ||||
goto quit; | ||||
} | ||||
cur += flen; | ||||
Py_DECREF(fname); | ||||
Py_DECREF(entry); | ||||
fname = cname = entry = NULL; | ||||
} | ||||
ret = parents; | ||||
Py_INCREF(ret); | ||||
quit: | ||||
Py_XDECREF(fname); | ||||
Py_XDECREF(cname); | ||||
Py_XDECREF(entry); | ||||
Py_XDECREF(parents); | ||||
return ret; | ||||
} | ||||
|
r6389 | static char parsers_doc[] = "Efficient content parsing."; | ||
static PyMethodDef methods[] = { | ||||
{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, | ||||
|
r7093 | {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, | ||
|
r6389 | {NULL, NULL} | ||
}; | ||||
PyMODINIT_FUNC initparsers(void) | ||||
{ | ||||
Py_InitModule3("parsers", methods, parsers_doc); | ||||
} | ||||