##// END OF EJS Templates
plan9: initial support for plan 9 from bell labs...
plan9: initial support for plan 9 from bell labs This patch contains support for Plan 9 from Bell Labs. A README is provided in contrib/plan9 which describes the port in greater detail. A new extension is also provided named factotum which permits the factotum(4) authentication agent to provide credentials for HTTP repositories. This extension is also applicable to other POSIX platforms which make use of Plan 9 from User Space (aka plan9ports).

File last commit:

r16370:28bb4daf default
r16383:f5dd179b default
Show More
parsers.c
740 lines | 16.9 KiB | text/x-c | CLexer
/*
parsers.c - efficient content parsing
Copyright 2008 Matt Mackall <mpm@selenic.com> and others
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
*/
#include <Python.h>
#include <ctype.h>
#include <string.h>
#include "util.h"
static int hexdigit(char c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
return 0;
}
/*
* Turn a hex-encoded string into binary.
*/
static PyObject *unhexlify(const char *str, int len)
{
PyObject *ret;
const char *c;
char *d;
ret = PyBytes_FromStringAndSize(NULL, len / 2);
if (!ret)
return NULL;
d = PyBytes_AsString(ret);
for (c = str; c < str + len;) {
int hi = hexdigit(*c++);
int lo = hexdigit(*c++);
*d++ = (hi << 4) | lo;
}
return ret;
}
/*
* This code assumes that a manifest is stitched together with newline
* ('\n') characters.
*/
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
PyObject *mfdict, *fdict;
char *str, *cur, *start, *zero;
int len;
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
&PyDict_Type, &mfdict,
&PyDict_Type, &fdict,
&str, &len))
goto quit;
for (start = cur = str, zero = NULL; cur < str + len; cur++) {
PyObject *file = NULL, *node = NULL;
PyObject *flags = NULL;
int nlen;
if (!*cur) {
zero = cur;
continue;
}
else if (*cur != '\n')
continue;
if (!zero) {
PyErr_SetString(PyExc_ValueError,
"manifest entry has no separator");
goto quit;
}
file = PyBytes_FromStringAndSize(start, zero - start);
if (!file)
goto bail;
nlen = cur - zero - 1;
node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
if (!node)
goto bail;
if (nlen > 40) {
flags = PyBytes_FromStringAndSize(zero + 41,
nlen - 40);
if (!flags)
goto bail;
if (PyDict_SetItem(fdict, file, flags) == -1)
goto bail;
}
if (PyDict_SetItem(mfdict, file, node) == -1)
goto bail;
start = cur + 1;
zero = NULL;
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
continue;
bail:
Py_XDECREF(flags);
Py_XDECREF(node);
Py_XDECREF(file);
goto quit;
}
if (len > 0 && *(cur - 1) != '\n') {
PyErr_SetString(PyExc_ValueError,
"manifest contains trailing garbage");
goto quit;
}
Py_INCREF(Py_None);
return Py_None;
quit:
return NULL;
}
#ifdef _WIN32
#ifdef _MSC_VER
/* msvc 6.0 has problems */
#define inline __inline
typedef unsigned long uint32_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
static uint32_t ntohl(uint32_t x)
{
return ((x & 0x000000ffUL) << 24) |
((x & 0x0000ff00UL) << 8) |
((x & 0x00ff0000UL) >> 8) |
((x & 0xff000000UL) >> 24);
}
#else
/* not windows */
#include <sys/types.h>
#if defined __BEOS__ && !defined __HAIKU__
#include <ByteOrder.h>
#else
#include <arpa/inet.h>
#endif
#include <inttypes.h>
#endif
static PyObject *parse_dirstate(PyObject *self, PyObject *args)
{
PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
PyObject *fname = NULL, *cname = NULL, *entry = NULL;
char *str, *cur, *end, *cpos;
int state, mode, size, mtime;
unsigned int flen;
int len;
uint32_t decode[4]; /* for alignment */
if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
&PyDict_Type, &dmap,
&PyDict_Type, &cmap,
&str, &len))
goto quit;
/* read parents */
if (len < 40)
goto quit;
parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
if (!parents)
goto quit;
/* read filenames */
cur = str + 40;
end = str + len;
while (cur < end - 17) {
/* unpack header */
state = *cur;
memcpy(decode, cur + 1, 16);
mode = ntohl(decode[0]);
size = ntohl(decode[1]);
mtime = ntohl(decode[2]);
flen = ntohl(decode[3]);
cur += 17;
if (cur + flen > end || cur + flen < cur) {
PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
goto quit;
}
entry = Py_BuildValue("ciii", state, mode, size, mtime);
if (!entry)
goto quit;
PyObject_GC_UnTrack(entry); /* don't waste time with this */
cpos = memchr(cur, 0, flen);
if (cpos) {
fname = PyBytes_FromStringAndSize(cur, cpos - cur);
cname = PyBytes_FromStringAndSize(cpos + 1,
flen - (cpos - cur) - 1);
if (!fname || !cname ||
PyDict_SetItem(cmap, fname, cname) == -1 ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
Py_DECREF(cname);
} else {
fname = PyBytes_FromStringAndSize(cur, flen);
if (!fname ||
PyDict_SetItem(dmap, fname, entry) == -1)
goto quit;
}
cur += flen;
Py_DECREF(fname);
Py_DECREF(entry);
fname = cname = entry = NULL;
}
ret = parents;
Py_INCREF(ret);
quit:
Py_XDECREF(fname);
Py_XDECREF(cname);
Py_XDECREF(entry);
Py_XDECREF(parents);
return ret;
}
/*
* A list-like object that decodes the contents of a RevlogNG index
* file on demand. It has limited support for insert and delete at the
* last element before the end. The last entry is always a sentinel
* nullid.
*/
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
PyObject *data; /* raw bytes of index */
PyObject **cache; /* cached tuples */
const char **offsets; /* populated on demand */
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
PyObject *added; /* populated on demand */
int inlined;
} indexObject;
static Py_ssize_t index_length(indexObject *self)
{
if (self->added == NULL)
return self->length;
return self->length + PyList_GET_SIZE(self->added);
}
static PyObject *nullentry;
static long inline_scan(indexObject *self, const char **offsets);
#if LONG_MAX == 0x7fffffffL
static const char *tuple_format = "Kiiiiiis#";
#else
static const char *tuple_format = "kiiiiiis#";
#endif
/* RevlogNG format (all in big endian, data may be inlined):
* 6 bytes: offset
* 2 bytes: flags
* 4 bytes: compressed length
* 4 bytes: uncompressed length
* 4 bytes: base revision
* 4 bytes: link revision
* 4 bytes: parent 1 revision
* 4 bytes: parent 2 revision
* 32 bytes: nodeid (only 20 bytes used)
*/
static PyObject *index_get(indexObject *self, Py_ssize_t pos)
{
uint32_t decode[8]; /* to enforce alignment with inline data */
uint64_t offset_flags;
int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
const char *c_node_id;
const char *data;
Py_ssize_t length = index_length(self);
PyObject *entry;
if (pos >= length) {
PyErr_SetString(PyExc_IndexError, "revlog index out of range");
return NULL;
}
if (pos == length - 1) {
Py_INCREF(nullentry);
return nullentry;
}
if (pos >= self->length - 1) {
PyObject *obj;
obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
Py_INCREF(obj);
return obj;
}
if (self->cache) {
if (self->cache[pos]) {
Py_INCREF(self->cache[pos]);
return self->cache[pos];
}
} else {
self->cache = calloc(self->raw_length, sizeof(PyObject *));
if (self->cache == NULL)
return PyErr_NoMemory();
}
if (self->inlined && pos > 0) {
if (self->offsets == NULL) {
self->offsets = malloc(self->raw_length *
sizeof(*self->offsets));
if (self->offsets == NULL)
return PyErr_NoMemory();
inline_scan(self, self->offsets);
}
data = self->offsets[pos];
} else
data = PyString_AS_STRING(self->data) + pos * 64;
memcpy(decode, data, 8 * sizeof(uint32_t));
offset_flags = ntohl(decode[1]);
if (pos == 0) /* mask out version number for the first entry */
offset_flags &= 0xFFFF;
else {
uint32_t offset_high = ntohl(decode[0]);
offset_flags |= ((uint64_t)offset_high) << 32;
}
comp_len = ntohl(decode[2]);
uncomp_len = ntohl(decode[3]);
base_rev = ntohl(decode[4]);
link_rev = ntohl(decode[5]);
parent_1 = ntohl(decode[6]);
parent_2 = ntohl(decode[7]);
c_node_id = data + 32;
entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
uncomp_len, base_rev, link_rev,
parent_1, parent_2, c_node_id, 20);
if (entry)
PyObject_GC_UnTrack(entry);
self->cache[pos] = entry;
Py_INCREF(entry);
return entry;
}
static PyObject *index_insert(indexObject *self, PyObject *args)
{
PyObject *obj, *node;
long offset;
Py_ssize_t len;
if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
return NULL;
if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
PyErr_SetString(PyExc_ValueError, "8-tuple required");
return NULL;
}
node = PyTuple_GET_ITEM(obj, 7);
if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
PyErr_SetString(PyExc_ValueError,
"20-byte hash required as last element");
return NULL;
}
len = index_length(self);
if (offset < 0)
offset += len;
if (offset != len - 1) {
PyErr_SetString(PyExc_IndexError,
"insert only supported at index -1");
return NULL;
}
if (self->added == NULL) {
self->added = PyList_New(0);
if (self->added == NULL)
return NULL;
}
if (PyList_Append(self->added, obj) == -1)
return NULL;
Py_RETURN_NONE;
}
static void _index_clearcaches(indexObject *self)
{
if (self->cache) {
Py_ssize_t i;
for (i = 0; i < self->raw_length; i++) {
Py_XDECREF(self->cache[i]);
self->cache[i] = NULL;
}
free(self->cache);
self->cache = NULL;
}
if (self->offsets) {
free(self->offsets);
self->offsets = NULL;
}
}
static PyObject *index_clearcaches(indexObject *self)
{
_index_clearcaches(self);
Py_RETURN_NONE;
}
static int index_assign_subscript(indexObject *self, PyObject *item,
PyObject *value)
{
Py_ssize_t start, stop, step, slicelength;
Py_ssize_t length = index_length(self);
if (!PySlice_Check(item) || value != NULL) {
PyErr_SetString(PyExc_TypeError,
"revlog index only supports slice deletion");
return -1;
}
if (PySlice_GetIndicesEx((PySliceObject*)item, length,
&start, &stop, &step, &slicelength) < 0)
return -1;
if (slicelength <= 0)
return 0;
if ((step < 0 && start < stop) || (step > 0 && start > stop))
stop = start;
if (step < 0) {
stop = start + 1;
start = stop + step*(slicelength - 1) - 1;
step = -step;
}
if (step != 1) {
PyErr_SetString(PyExc_ValueError,
"revlog index delete requires step size of 1");
return -1;
}
if (stop != length - 1) {
PyErr_SetString(PyExc_IndexError,
"revlog index deletion indices are invalid");
return -1;
}
if (start < self->length) {
self->length = start + 1;
if (self->added) {
Py_DECREF(self->added);
self->added = NULL;
}
return 0;
}
return PyList_SetSlice(self->added, start - self->length + 1,
PyList_GET_SIZE(self->added),
NULL);
}
static long inline_scan(indexObject *self, const char **offsets)
{
const char *data = PyString_AS_STRING(self->data);
const char *end = data + PyString_GET_SIZE(self->data);
const long hdrsize = 64;
long incr = hdrsize;
Py_ssize_t len = 0;
while (data + hdrsize <= end) {
uint32_t comp_len;
const char *old_data;
/* 3rd element of header is length of compressed inline data */
memcpy(&comp_len, data + 8, sizeof(uint32_t));
incr = hdrsize + ntohl(comp_len);
if (incr < hdrsize)
break;
if (offsets)
offsets[len] = data;
len++;
old_data = data;
data += incr;
if (data <= old_data)
break;
}
if (data != end && data + hdrsize != end) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_ValueError, "corrupt index file");
return -1;
}
return len;
}
static int index_real_init(indexObject *self, const char *data, int size,
PyObject *inlined_obj, PyObject *data_obj)
{
self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
self->data = data_obj;
self->cache = NULL;
self->added = NULL;
self->offsets = NULL;
Py_INCREF(self->data);
if (self->inlined) {
long len = inline_scan(self, NULL);
if (len == -1)
goto bail;
self->raw_length = len;
self->length = len + 1;
} else {
if (size % 64) {
PyErr_SetString(PyExc_ValueError, "corrupt index file");
goto bail;
}
self->raw_length = size / 64;
self->length = self->raw_length + 1;
}
return 0;
bail:
return -1;
}
static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
{
const char *data;
int size;
PyObject *inlined_obj;
if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
return -1;
return index_real_init(self, data, size, inlined_obj,
PyTuple_GET_ITEM(args, 0));
}
static void index_dealloc(indexObject *self)
{
_index_clearcaches(self);
Py_DECREF(self->data);
Py_XDECREF(self->added);
PyObject_Del(self);
}
static PySequenceMethods index_sequence_methods = {
(lenfunc)index_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
(ssizeargfunc)index_get, /* sq_item */
};
static PyMappingMethods index_mapping_methods = {
(lenfunc)index_length, /* mp_length */
NULL, /* mp_subscript */
(objobjargproc)index_assign_subscript, /* mp_ass_subscript */
};
static PyMethodDef index_methods[] = {
{"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
"clear the index caches"},
{"insert", (PyCFunction)index_insert, METH_VARARGS,
"insert an index entry"},
{NULL} /* Sentinel */
};
static PyTypeObject indexType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"parsers.index", /* tp_name */
sizeof(indexObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)index_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&index_sequence_methods, /* tp_as_sequence */
&index_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"revlog index", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
index_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)index_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
/*
* returns a tuple of the form (index, None, cache) with elements as
* follows:
*
* index: an index object that lazily parses the RevlogNG records
* cache: if data is inlined, a tuple (index_file_content, 0), else None
*
* added complications are for backwards compatibility
*/
static PyObject *parse_index2(PyObject *self, PyObject *args)
{
const char *data;
int size, ret;
PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
indexObject *idx;
if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
return NULL;
idx = PyObject_New(indexObject, &indexType);
if (idx == NULL)
goto bail;
ret = index_real_init(idx, data, size, inlined_obj,
PyTuple_GET_ITEM(args, 0));
if (ret)
goto bail;
if (idx->inlined) {
Py_INCREF(idx->data);
cache = Py_BuildValue("iO", 0, idx->data);
if (cache == NULL)
goto bail;
} else {
cache = Py_None;
Py_INCREF(cache);
}
tuple = Py_BuildValue("NN", idx, cache);
if (!tuple)
goto bail;
return tuple;
bail:
Py_XDECREF(idx);
Py_XDECREF(cache);
Py_XDECREF(tuple);
return NULL;
}
static char parsers_doc[] = "Efficient content parsing.";
static PyMethodDef methods[] = {
{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
{NULL, NULL}
};
static void module_init(PyObject *mod)
{
static const char nullid[20];
if (PyType_Ready(&indexType) < 0)
return;
Py_INCREF(&indexType);
PyModule_AddObject(mod, "index", (PyObject *)&indexType);
nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
-1, -1, -1, -1, nullid, 20);
if (nullentry)
PyObject_GC_UnTrack(nullentry);
}
#ifdef IS_PY3K
static struct PyModuleDef parsers_module = {
PyModuleDef_HEAD_INIT,
"parsers",
parsers_doc,
-1,
methods
};
PyMODINIT_FUNC PyInit_parsers(void)
{
PyObject *mod = PyModule_Create(&parsers_module);
module_init(mod);
return mod;
}
#else
PyMODINIT_FUNC initparsers(void)
{
PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
module_init(mod);
}
#endif