parsers.c
169 lines
| 3.0 KiB
| text/x-c
|
CLexer
/ mercurial / parsers.c
Bryan O'Sullivan
|
r6389 | /* | ||
parsers.c - efficient content parsing | ||||
Copyright 2008 Matt Mackall <mpm@selenic.com> and others | ||||
This software may be used and distributed according to the terms of | ||||
the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
#include <Python.h> | ||||
#include <ctype.h> | ||||
#include <string.h> | ||||
static int hexdigit(char c) | ||||
{ | ||||
if (c >= '0' && c <= '9') | ||||
return c - '0'; | ||||
if (c >= 'A' && c <= 'F') | ||||
return c - 'A' + 10; | ||||
if (c >= 'a' && c <= 'f') | ||||
return c - 'a' + 10; | ||||
return -1; | ||||
} | ||||
/* | ||||
* Turn a hex-encoded string into binary. | ||||
*/ | ||||
static PyObject *unhexlify(const char *str, int len) | ||||
{ | ||||
PyObject *ret = NULL; | ||||
Benoit Boissinot
|
r6395 | const char *c; | ||
char *d; | ||||
Bryan O'Sullivan
|
r6389 | |||
if (len % 2) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"input is not even in length"); | ||||
goto bail; | ||||
} | ||||
ret = PyString_FromStringAndSize(NULL, len / 2); | ||||
if (!ret) | ||||
goto bail; | ||||
d = PyString_AsString(ret); | ||||
if (!d) | ||||
goto bail; | ||||
for (c = str; c < str + len;) { | ||||
int hi = hexdigit(*c++); | ||||
int lo = hexdigit(*c++); | ||||
if (hi == -1 || lo == -1) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"input contains non-hex character"); | ||||
goto bail; | ||||
} | ||||
*d++ = (hi << 4) | lo; | ||||
} | ||||
goto done; | ||||
bail: | ||||
Py_XDECREF(ret); | ||||
ret = NULL; | ||||
done: | ||||
return ret; | ||||
} | ||||
/* | ||||
* This code assumes that a manifest is stitched together with newline | ||||
* ('\n') characters. | ||||
*/ | ||||
static PyObject *parse_manifest(PyObject *self, PyObject *args) | ||||
{ | ||||
PyObject *mfdict, *fdict; | ||||
char *str, *cur, *start, *zero; | ||||
int len; | ||||
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", | ||||
&PyDict_Type, &mfdict, | ||||
&PyDict_Type, &fdict, | ||||
&str, &len)) | ||||
goto quit; | ||||
for (start = cur = str, zero = NULL; cur < str + len; cur++) { | ||||
PyObject *file = NULL, *node = NULL; | ||||
PyObject *flags = NULL; | ||||
int nlen; | ||||
if (!*cur) { | ||||
zero = cur; | ||||
continue; | ||||
} | ||||
else if (*cur != '\n') | ||||
continue; | ||||
if (!zero) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest entry has no separator"); | ||||
goto quit; | ||||
} | ||||
file = PyString_FromStringAndSize(start, zero - start); | ||||
if (!file) | ||||
goto bail; | ||||
nlen = cur - zero - 1; | ||||
node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); | ||||
if (!node) | ||||
goto bail; | ||||
if (nlen > 40) { | ||||
PyObject *flags; | ||||
flags = PyString_FromStringAndSize(zero + 41, | ||||
nlen - 40); | ||||
if (!flags) | ||||
goto bail; | ||||
if (PyDict_SetItem(fdict, file, flags) == -1) | ||||
goto bail; | ||||
} | ||||
if (PyDict_SetItem(mfdict, file, node) == -1) | ||||
goto bail; | ||||
start = cur + 1; | ||||
zero = NULL; | ||||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
continue; | ||||
bail: | ||||
Py_XDECREF(flags); | ||||
Py_XDECREF(node); | ||||
Py_XDECREF(file); | ||||
goto quit; | ||||
} | ||||
if (len > 0 && *(cur - 1) != '\n') { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"manifest contains trailing garbage"); | ||||
goto quit; | ||||
} | ||||
Py_INCREF(Py_None); | ||||
return Py_None; | ||||
quit: | ||||
return NULL; | ||||
} | ||||
static char parsers_doc[] = "Efficient content parsing."; | ||||
static PyMethodDef methods[] = { | ||||
{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, | ||||
{NULL, NULL} | ||||
}; | ||||
PyMODINIT_FUNC initparsers(void) | ||||
{ | ||||
Py_InitModule3("parsers", methods, parsers_doc); | ||||
} | ||||