manifest.c
939 lines
| 24.5 KiB
| text/x-c
|
CLexer
/ mercurial / manifest.c
Augie Fackler
|
r24214 | /* | ||
* manifest.c - manifest type that does on-demand parsing. | ||||
* | ||||
* Copyright 2015, Google Inc. | ||||
* | ||||
* This software may be used and distributed according to the terms of | ||||
* the GNU General Public License, incorporated herein by reference. | ||||
*/ | ||||
Drew Gottlieb
|
r24352 | #include <Python.h> | ||
Augie Fackler
|
r24214 | #include <assert.h> | ||
#include <string.h> | ||||
#include <stdlib.h> | ||||
Matt Mackall
|
r24441 | #include "util.h" | ||
Augie Fackler
|
r24214 | #define DEFAULT_LINES 100000 | ||
typedef struct { | ||||
char *start; | ||||
Py_ssize_t len; /* length of line including terminal newline */ | ||||
char hash_suffix; | ||||
bool from_malloc; | ||||
bool deleted; | ||||
} line; | ||||
typedef struct { | ||||
PyObject_HEAD | ||||
PyObject *pydata; | ||||
line *lines; | ||||
int numlines; /* number of line entries */ | ||||
int livelines; /* number of non-deleted lines */ | ||||
int maxlines; /* allocated number of lines */ | ||||
bool dirty; | ||||
} lazymanifest; | ||||
#define MANIFEST_OOM -1 | ||||
#define MANIFEST_NOT_SORTED -2 | ||||
#define MANIFEST_MALFORMED -3 | ||||
/* defined in parsers.c */ | ||||
PyObject *unhexlify(const char *str, int len); | ||||
/* get the length of the path for a line */ | ||||
static size_t pathlen(line *l) { | ||||
return strlen(l->start); | ||||
} | ||||
/* get the node value of a single line */ | ||||
static PyObject *nodeof(line *l) { | ||||
char *s = l->start; | ||||
ssize_t llen = pathlen(l); | ||||
PyObject *hash = unhexlify(s + llen + 1, 40); | ||||
if (!hash) { | ||||
return NULL; | ||||
} | ||||
if (l->hash_suffix != '\0') { | ||||
char newhash[21]; | ||||
Gregory Szorc
|
r30097 | memcpy(newhash, PyBytes_AsString(hash), 20); | ||
Augie Fackler
|
r24214 | Py_DECREF(hash); | ||
newhash[20] = l->hash_suffix; | ||||
Gregory Szorc
|
r30097 | hash = PyBytes_FromStringAndSize(newhash, 21); | ||
Augie Fackler
|
r24214 | } | ||
return hash; | ||||
} | ||||
/* get the node hash and flags of a line as a tuple */ | ||||
static PyObject *hashflags(line *l) | ||||
{ | ||||
char *s = l->start; | ||||
size_t plen = pathlen(l); | ||||
PyObject *hash = nodeof(l); | ||||
/* 40 for hash, 1 for null byte, 1 for newline */ | ||||
size_t hplen = plen + 42; | ||||
Py_ssize_t flen = l->len - hplen; | ||||
PyObject *flags; | ||||
PyObject *tup; | ||||
if (!hash) | ||||
return NULL; | ||||
Gregory Szorc
|
r30097 | flags = PyBytes_FromStringAndSize(s + hplen - 1, flen); | ||
Augie Fackler
|
r24214 | if (!flags) { | ||
Py_DECREF(hash); | ||||
return NULL; | ||||
} | ||||
tup = PyTuple_Pack(2, hash, flags); | ||||
Py_DECREF(flags); | ||||
Py_DECREF(hash); | ||||
return tup; | ||||
} | ||||
/* if we're about to run out of space in the line index, add more */ | ||||
static bool realloc_if_full(lazymanifest *self) | ||||
{ | ||||
if (self->numlines == self->maxlines) { | ||||
self->maxlines *= 2; | ||||
self->lines = realloc(self->lines, self->maxlines * sizeof(line)); | ||||
} | ||||
Matt Harbison
|
r24312 | return !!self->lines; | ||
Augie Fackler
|
r24214 | } | ||
/* | ||||
* Find the line boundaries in the manifest that 'data' points to and store | ||||
* information about each line in 'self'. | ||||
*/ | ||||
static int find_lines(lazymanifest *self, char *data, Py_ssize_t len) | ||||
{ | ||||
char *prev = NULL; | ||||
while (len > 0) { | ||||
line *l; | ||||
char *next = memchr(data, '\n', len); | ||||
if (!next) { | ||||
return MANIFEST_MALFORMED; | ||||
} | ||||
next++; /* advance past newline */ | ||||
if (!realloc_if_full(self)) { | ||||
return MANIFEST_OOM; /* no memory */ | ||||
} | ||||
if (prev && strcmp(prev, data) > -1) { | ||||
/* This data isn't sorted, so we have to abort. */ | ||||
return MANIFEST_NOT_SORTED; | ||||
} | ||||
l = self->lines + ((self->numlines)++); | ||||
l->start = data; | ||||
l->len = next - data; | ||||
l->hash_suffix = '\0'; | ||||
l->from_malloc = false; | ||||
l->deleted = false; | ||||
len = len - l->len; | ||||
prev = data; | ||||
data = next; | ||||
} | ||||
self->livelines = self->numlines; | ||||
return 0; | ||||
} | ||||
static int lazymanifest_init(lazymanifest *self, PyObject *args) | ||||
{ | ||||
char *data; | ||||
Py_ssize_t len; | ||||
int err, ret; | ||||
PyObject *pydata; | ||||
if (!PyArg_ParseTuple(args, "S", &pydata)) { | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | err = PyBytes_AsStringAndSize(pydata, &data, &len); | ||
Augie Fackler
|
r24214 | |||
self->dirty = false; | ||||
if (err == -1) | ||||
return -1; | ||||
self->pydata = pydata; | ||||
Py_INCREF(self->pydata); | ||||
Py_BEGIN_ALLOW_THREADS | ||||
self->lines = malloc(DEFAULT_LINES * sizeof(line)); | ||||
self->maxlines = DEFAULT_LINES; | ||||
self->numlines = 0; | ||||
if (!self->lines) | ||||
ret = MANIFEST_OOM; | ||||
else | ||||
ret = find_lines(self, data, len); | ||||
Py_END_ALLOW_THREADS | ||||
switch (ret) { | ||||
case 0: | ||||
break; | ||||
case MANIFEST_OOM: | ||||
PyErr_NoMemory(); | ||||
break; | ||||
case MANIFEST_NOT_SORTED: | ||||
PyErr_Format(PyExc_ValueError, | ||||
"Manifest lines not in sorted order."); | ||||
break; | ||||
case MANIFEST_MALFORMED: | ||||
PyErr_Format(PyExc_ValueError, | ||||
"Manifest did not end in a newline."); | ||||
break; | ||||
default: | ||||
PyErr_Format(PyExc_ValueError, | ||||
"Unknown problem parsing manifest."); | ||||
} | ||||
return ret == 0 ? 0 : -1; | ||||
} | ||||
static void lazymanifest_dealloc(lazymanifest *self) | ||||
{ | ||||
/* free any extra lines we had to allocate */ | ||||
int i; | ||||
for (i = 0; i < self->numlines; i++) { | ||||
if (self->lines[i].from_malloc) { | ||||
free(self->lines[i].start); | ||||
} | ||||
} | ||||
if (self->lines) { | ||||
free(self->lines); | ||||
self->lines = NULL; | ||||
} | ||||
if (self->pydata) { | ||||
Py_DECREF(self->pydata); | ||||
self->pydata = NULL; | ||||
} | ||||
PyObject_Del(self); | ||||
} | ||||
/* iteration support */ | ||||
typedef struct { | ||||
PyObject_HEAD lazymanifest *m; | ||||
Py_ssize_t pos; | ||||
} lmIter; | ||||
static void lmiter_dealloc(PyObject *o) | ||||
{ | ||||
lmIter *self = (lmIter *)o; | ||||
Py_DECREF(self->m); | ||||
PyObject_Del(self); | ||||
} | ||||
Martin von Zweigbergk
|
r24294 | static line *lmiter_nextline(lmIter *self) | ||
{ | ||||
do { | ||||
self->pos++; | ||||
if (self->pos >= self->m->numlines) { | ||||
return NULL; | ||||
} | ||||
/* skip over deleted manifest entries */ | ||||
} while (self->m->lines[self->pos].deleted); | ||||
return self->m->lines + self->pos; | ||||
} | ||||
Martin von Zweigbergk
|
r24298 | static PyObject *lmiter_iterentriesnext(PyObject *o) | ||
Augie Fackler
|
r24214 | { | ||
size_t pl; | ||||
line *l; | ||||
Py_ssize_t consumed; | ||||
Mike Hommey
|
r24699 | PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL; | ||
Martin von Zweigbergk
|
r24294 | l = lmiter_nextline((lmIter *)o); | ||
if (!l) { | ||||
Martin von Zweigbergk
|
r24974 | goto done; | ||
Martin von Zweigbergk
|
r24294 | } | ||
Augie Fackler
|
r24214 | pl = pathlen(l); | ||
Gregory Szorc
|
r30097 | path = PyBytes_FromStringAndSize(l->start, pl); | ||
Augie Fackler
|
r24214 | hash = nodeof(l); | ||
consumed = pl + 41; | ||||
Gregory Szorc
|
r30097 | flags = PyBytes_FromStringAndSize(l->start + consumed, | ||
Bryan O'Sullivan
|
r27340 | l->len - consumed - 1); | ||
Martin von Zweigbergk
|
r24293 | if (!path || !hash || !flags) { | ||
Martin von Zweigbergk
|
r24974 | goto done; | ||
Augie Fackler
|
r24214 | } | ||
Mike Hommey
|
r24699 | ret = PyTuple_Pack(3, path, hash, flags); | ||
Martin von Zweigbergk
|
r24975 | done: | ||
Augie Fackler
|
r24214 | Py_XDECREF(path); | ||
Py_XDECREF(hash); | ||||
Py_XDECREF(flags); | ||||
Mike Hommey
|
r24699 | return ret; | ||
Augie Fackler
|
r24214 | } | ||
Gregory Szorc
|
r30079 | #ifdef IS_PY3K | ||
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT | ||||
#else | ||||
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ | ||||
| Py_TPFLAGS_HAVE_ITER | ||||
#endif | ||||
Martin von Zweigbergk
|
r24298 | static PyTypeObject lazymanifestEntriesIterator = { | ||
Gregory Szorc
|
r30168 | PyVarObject_HEAD_INIT(NULL, 0) | ||
Martin von Zweigbergk
|
r24298 | "parsers.lazymanifest.entriesiterator", /*tp_name */ | ||
Augie Fackler
|
r24214 | sizeof(lmIter), /*tp_basicsize */ | ||
0, /*tp_itemsize */ | ||||
lmiter_dealloc, /*tp_dealloc */ | ||||
0, /*tp_print */ | ||||
0, /*tp_getattr */ | ||||
0, /*tp_setattr */ | ||||
0, /*tp_compare */ | ||||
0, /*tp_repr */ | ||||
0, /*tp_as_number */ | ||||
0, /*tp_as_sequence */ | ||||
0, /*tp_as_mapping */ | ||||
0, /*tp_hash */ | ||||
0, /*tp_call */ | ||||
0, /*tp_str */ | ||||
0, /*tp_getattro */ | ||||
0, /*tp_setattro */ | ||||
0, /*tp_as_buffer */ | ||||
Gregory Szorc
|
r30079 | LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */ | ||
Martin von Zweigbergk
|
r24298 | "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */ | ||
Augie Fackler
|
r24214 | 0, /* tp_traverse */ | ||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
PyObject_SelfIter, /* tp_iter: __iter__() method */ | ||||
Martin von Zweigbergk
|
r24298 | lmiter_iterentriesnext, /* tp_iternext: next() method */ | ||
Augie Fackler
|
r24214 | }; | ||
Martin von Zweigbergk
|
r24295 | static PyObject *lmiter_iterkeysnext(PyObject *o) | ||
{ | ||||
size_t pl; | ||||
line *l = lmiter_nextline((lmIter *)o); | ||||
if (!l) { | ||||
return NULL; | ||||
} | ||||
pl = pathlen(l); | ||||
Gregory Szorc
|
r30097 | return PyBytes_FromStringAndSize(l->start, pl); | ||
Martin von Zweigbergk
|
r24295 | } | ||
Gregory Szorc
|
r30079 | #ifdef IS_PY3K | ||
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT | ||||
#else | ||||
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \ | ||||
| Py_TPFLAGS_HAVE_ITER | ||||
#endif | ||||
Martin von Zweigbergk
|
r24295 | static PyTypeObject lazymanifestKeysIterator = { | ||
Gregory Szorc
|
r30168 | PyVarObject_HEAD_INIT(NULL, 0) | ||
Martin von Zweigbergk
|
r24295 | "parsers.lazymanifest.keysiterator", /*tp_name */ | ||
sizeof(lmIter), /*tp_basicsize */ | ||||
0, /*tp_itemsize */ | ||||
lmiter_dealloc, /*tp_dealloc */ | ||||
0, /*tp_print */ | ||||
0, /*tp_getattr */ | ||||
0, /*tp_setattr */ | ||||
0, /*tp_compare */ | ||||
0, /*tp_repr */ | ||||
0, /*tp_as_number */ | ||||
0, /*tp_as_sequence */ | ||||
0, /*tp_as_mapping */ | ||||
0, /*tp_hash */ | ||||
0, /*tp_call */ | ||||
0, /*tp_str */ | ||||
0, /*tp_getattro */ | ||||
0, /*tp_setattro */ | ||||
0, /*tp_as_buffer */ | ||||
Gregory Szorc
|
r30079 | LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */ | ||
Martin von Zweigbergk
|
r24295 | "Keys iterator for a lazymanifest.", /* tp_doc */ | ||
0, /* tp_traverse */ | ||||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
PyObject_SelfIter, /* tp_iter: __iter__() method */ | ||||
lmiter_iterkeysnext, /* tp_iternext: next() method */ | ||||
}; | ||||
Augie Fackler
|
r24214 | static lazymanifest *lazymanifest_copy(lazymanifest *self); | ||
Martin von Zweigbergk
|
r24298 | static PyObject *lazymanifest_getentriesiter(lazymanifest *self) | ||
Augie Fackler
|
r24214 | { | ||
lmIter *i = NULL; | ||||
lazymanifest *t = lazymanifest_copy(self); | ||||
if (!t) { | ||||
PyErr_NoMemory(); | ||||
return NULL; | ||||
} | ||||
Martin von Zweigbergk
|
r24298 | i = PyObject_New(lmIter, &lazymanifestEntriesIterator); | ||
Augie Fackler
|
r24214 | if (i) { | ||
i->m = t; | ||||
i->pos = -1; | ||||
} else { | ||||
Py_DECREF(t); | ||||
PyErr_NoMemory(); | ||||
} | ||||
return (PyObject *)i; | ||||
} | ||||
Martin von Zweigbergk
|
r24295 | static PyObject *lazymanifest_getkeysiter(lazymanifest *self) | ||
{ | ||||
lmIter *i = NULL; | ||||
lazymanifest *t = lazymanifest_copy(self); | ||||
if (!t) { | ||||
PyErr_NoMemory(); | ||||
return NULL; | ||||
} | ||||
i = PyObject_New(lmIter, &lazymanifestKeysIterator); | ||||
if (i) { | ||||
i->m = t; | ||||
i->pos = -1; | ||||
} else { | ||||
Py_DECREF(t); | ||||
PyErr_NoMemory(); | ||||
} | ||||
return (PyObject *)i; | ||||
} | ||||
Augie Fackler
|
r24214 | /* __getitem__ and __setitem__ support */ | ||
static Py_ssize_t lazymanifest_size(lazymanifest *self) | ||||
{ | ||||
return self->livelines; | ||||
} | ||||
static int linecmp(const void *left, const void *right) | ||||
{ | ||||
return strcmp(((const line *)left)->start, | ||||
((const line *)right)->start); | ||||
} | ||||
static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key) | ||||
{ | ||||
line needle; | ||||
line *hit; | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(key)) { | ||
Augie Fackler
|
r24214 | PyErr_Format(PyExc_TypeError, | ||
"getitem: manifest keys must be a string."); | ||||
return NULL; | ||||
} | ||||
Gregory Szorc
|
r30097 | needle.start = PyBytes_AsString(key); | ||
Augie Fackler
|
r24214 | hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), | ||
&linecmp); | ||||
if (!hit || hit->deleted) { | ||||
PyErr_Format(PyExc_KeyError, "No such manifest entry."); | ||||
return NULL; | ||||
} | ||||
return hashflags(hit); | ||||
} | ||||
static int lazymanifest_delitem(lazymanifest *self, PyObject *key) | ||||
{ | ||||
line needle; | ||||
line *hit; | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(key)) { | ||
Augie Fackler
|
r24214 | PyErr_Format(PyExc_TypeError, | ||
"delitem: manifest keys must be a string."); | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | needle.start = PyBytes_AsString(key); | ||
Augie Fackler
|
r24214 | hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), | ||
&linecmp); | ||||
if (!hit || hit->deleted) { | ||||
PyErr_Format(PyExc_KeyError, | ||||
"Tried to delete nonexistent manifest entry."); | ||||
return -1; | ||||
} | ||||
self->dirty = true; | ||||
hit->deleted = true; | ||||
self->livelines--; | ||||
return 0; | ||||
} | ||||
Augie Fackler
|
r24228 | /* Do a binary search for the insertion point for new, creating the | ||
* new entry if needed. */ | ||||
static int internalsetitem(lazymanifest *self, line *new) { | ||||
int start = 0, end = self->numlines; | ||||
while (start < end) { | ||||
int pos = start + (end - start) / 2; | ||||
int c = linecmp(new, self->lines + pos); | ||||
if (c < 0) | ||||
end = pos; | ||||
else if (c > 0) | ||||
start = pos + 1; | ||||
else { | ||||
if (self->lines[pos].deleted) | ||||
self->livelines++; | ||||
Augie Fackler
|
r24710 | if (self->lines[pos].from_malloc) | ||
free(self->lines[pos].start); | ||||
Augie Fackler
|
r24228 | start = pos; | ||
goto finish; | ||||
} | ||||
} | ||||
/* being here means we need to do an insert */ | ||||
if (!realloc_if_full(self)) { | ||||
PyErr_NoMemory(); | ||||
return -1; | ||||
} | ||||
memmove(self->lines + start + 1, self->lines + start, | ||||
(self->numlines - start) * sizeof(line)); | ||||
self->numlines++; | ||||
self->livelines++; | ||||
finish: | ||||
self->lines[start] = *new; | ||||
self->dirty = true; | ||||
return 0; | ||||
} | ||||
Augie Fackler
|
r24214 | static int lazymanifest_setitem( | ||
lazymanifest *self, PyObject *key, PyObject *value) | ||||
{ | ||||
char *path; | ||||
Py_ssize_t plen; | ||||
PyObject *pyhash; | ||||
Py_ssize_t hlen; | ||||
char *hash; | ||||
PyObject *pyflags; | ||||
char *flags; | ||||
Py_ssize_t flen; | ||||
size_t dlen; | ||||
char *dest; | ||||
int i; | ||||
line new; | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(key)) { | ||
Augie Fackler
|
r24214 | PyErr_Format(PyExc_TypeError, | ||
"setitem: manifest keys must be a string."); | ||||
return -1; | ||||
} | ||||
if (!value) { | ||||
return lazymanifest_delitem(self, key); | ||||
} | ||||
if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) { | ||||
PyErr_Format(PyExc_TypeError, | ||||
"Manifest values must be a tuple of (node, flags)."); | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) { | ||
Augie Fackler
|
r24214 | return -1; | ||
} | ||||
pyhash = PyTuple_GetItem(value, 0); | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(pyhash)) { | ||
Augie Fackler
|
r24214 | PyErr_Format(PyExc_TypeError, | ||
"node must be a 20-byte string"); | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | hlen = PyBytes_Size(pyhash); | ||
Augie Fackler
|
r24214 | /* Some parts of the codebase try and set 21 or 22 | ||
* byte "hash" values in order to perturb things for | ||||
* status. We have to preserve at least the 21st | ||||
* byte. Sigh. If there's a 22nd byte, we drop it on | ||||
* the floor, which works fine. | ||||
*/ | ||||
if (hlen != 20 && hlen != 21 && hlen != 22) { | ||||
PyErr_Format(PyExc_TypeError, | ||||
"node must be a 20-byte string"); | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | hash = PyBytes_AsString(pyhash); | ||
Augie Fackler
|
r24214 | |||
pyflags = PyTuple_GetItem(value, 1); | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) { | ||
Augie Fackler
|
r24214 | PyErr_Format(PyExc_TypeError, | ||
"flags must a 0 or 1 byte string"); | ||||
return -1; | ||||
} | ||||
Gregory Szorc
|
r30097 | if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) { | ||
Augie Fackler
|
r24214 | return -1; | ||
} | ||||
/* one null byte and one newline */ | ||||
dlen = plen + 41 + flen + 1; | ||||
dest = malloc(dlen); | ||||
if (!dest) { | ||||
PyErr_NoMemory(); | ||||
return -1; | ||||
} | ||||
memcpy(dest, path, plen + 1); | ||||
for (i = 0; i < 20; i++) { | ||||
Martin von Zweigbergk
|
r24286 | /* Cast to unsigned, so it will not get sign-extended when promoted | ||
* to int (as is done when passing to a variadic function) | ||||
*/ | ||||
sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]); | ||||
Augie Fackler
|
r24214 | } | ||
memcpy(dest + plen + 41, flags, flen); | ||||
dest[plen + 41 + flen] = '\n'; | ||||
new.start = dest; | ||||
new.len = dlen; | ||||
new.hash_suffix = '\0'; | ||||
if (hlen > 20) { | ||||
new.hash_suffix = hash[20]; | ||||
} | ||||
new.from_malloc = true; /* is `start` a pointer we allocated? */ | ||||
new.deleted = false; /* is this entry deleted? */ | ||||
Augie Fackler
|
r24228 | if (internalsetitem(self, &new)) { | ||
return -1; | ||||
Augie Fackler
|
r24214 | } | ||
return 0; | ||||
} | ||||
static PyMappingMethods lazymanifest_mapping_methods = { | ||||
(lenfunc)lazymanifest_size, /* mp_length */ | ||||
(binaryfunc)lazymanifest_getitem, /* mp_subscript */ | ||||
(objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */ | ||||
}; | ||||
Augie Fackler
|
r27608 | /* sequence methods (important or __contains__ builds an iterator) */ | ||
Augie Fackler
|
r24214 | |||
static int lazymanifest_contains(lazymanifest *self, PyObject *key) | ||||
{ | ||||
line needle; | ||||
line *hit; | ||||
Gregory Szorc
|
r30097 | if (!PyBytes_Check(key)) { | ||
Augie Fackler
|
r24214 | /* Our keys are always strings, so if the contains | ||
* check is for a non-string, just return false. */ | ||||
return 0; | ||||
} | ||||
Gregory Szorc
|
r30097 | needle.start = PyBytes_AsString(key); | ||
Augie Fackler
|
r24214 | hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), | ||
&linecmp); | ||||
if (!hit || hit->deleted) { | ||||
return 0; | ||||
} | ||||
return 1; | ||||
} | ||||
static PySequenceMethods lazymanifest_seq_meths = { | ||||
(lenfunc)lazymanifest_size, /* sq_length */ | ||||
0, /* sq_concat */ | ||||
0, /* sq_repeat */ | ||||
0, /* sq_item */ | ||||
0, /* sq_slice */ | ||||
0, /* sq_ass_item */ | ||||
0, /* sq_ass_slice */ | ||||
(objobjproc)lazymanifest_contains, /* sq_contains */ | ||||
0, /* sq_inplace_concat */ | ||||
0, /* sq_inplace_repeat */ | ||||
}; | ||||
/* Other methods (copy, diff, etc) */ | ||||
static PyTypeObject lazymanifestType; | ||||
/* If the manifest has changes, build the new manifest text and reindex it. */ | ||||
static int compact(lazymanifest *self) { | ||||
int i; | ||||
ssize_t need = 0; | ||||
char *data; | ||||
line *src, *dst; | ||||
PyObject *pydata; | ||||
if (!self->dirty) | ||||
return 0; | ||||
for (i = 0; i < self->numlines; i++) { | ||||
if (!self->lines[i].deleted) { | ||||
need += self->lines[i].len; | ||||
} | ||||
} | ||||
Gregory Szorc
|
r30097 | pydata = PyBytes_FromStringAndSize(NULL, need); | ||
Augie Fackler
|
r24214 | if (!pydata) | ||
return -1; | ||||
Gregory Szorc
|
r30097 | data = PyBytes_AsString(pydata); | ||
Augie Fackler
|
r24214 | if (!data) { | ||
return -1; | ||||
} | ||||
src = self->lines; | ||||
dst = self->lines; | ||||
for (i = 0; i < self->numlines; i++, src++) { | ||||
char *tofree = NULL; | ||||
if (src->from_malloc) { | ||||
tofree = src->start; | ||||
} | ||||
if (!src->deleted) { | ||||
memcpy(data, src->start, src->len); | ||||
*dst = *src; | ||||
dst->start = data; | ||||
dst->from_malloc = false; | ||||
data += dst->len; | ||||
dst++; | ||||
} | ||||
free(tofree); | ||||
} | ||||
Py_DECREF(self->pydata); | ||||
self->pydata = pydata; | ||||
self->numlines = self->livelines; | ||||
self->dirty = false; | ||||
return 0; | ||||
} | ||||
static PyObject *lazymanifest_text(lazymanifest *self) | ||||
{ | ||||
if (compact(self) != 0) { | ||||
PyErr_NoMemory(); | ||||
return NULL; | ||||
} | ||||
Py_INCREF(self->pydata); | ||||
return self->pydata; | ||||
} | ||||
static lazymanifest *lazymanifest_copy(lazymanifest *self) | ||||
{ | ||||
lazymanifest *copy = NULL; | ||||
if (compact(self) != 0) { | ||||
goto nomem; | ||||
} | ||||
copy = PyObject_New(lazymanifest, &lazymanifestType); | ||||
if (!copy) { | ||||
goto nomem; | ||||
} | ||||
copy->numlines = self->numlines; | ||||
copy->livelines = self->livelines; | ||||
copy->dirty = false; | ||||
copy->lines = malloc(self->maxlines *sizeof(line)); | ||||
if (!copy->lines) { | ||||
goto nomem; | ||||
} | ||||
memcpy(copy->lines, self->lines, self->numlines * sizeof(line)); | ||||
copy->maxlines = self->maxlines; | ||||
copy->pydata = self->pydata; | ||||
Py_INCREF(copy->pydata); | ||||
return copy; | ||||
Martin von Zweigbergk
|
r24975 | nomem: | ||
Augie Fackler
|
r24214 | PyErr_NoMemory(); | ||
Py_XDECREF(copy); | ||||
return NULL; | ||||
} | ||||
static lazymanifest *lazymanifest_filtercopy( | ||||
lazymanifest *self, PyObject *matchfn) | ||||
{ | ||||
lazymanifest *copy = NULL; | ||||
int i; | ||||
if (!PyCallable_Check(matchfn)) { | ||||
PyErr_SetString(PyExc_TypeError, "matchfn must be callable"); | ||||
return NULL; | ||||
} | ||||
/* compact ourselves first to avoid double-frees later when we | ||||
* compact tmp so that it doesn't have random pointers to our | ||||
* underlying from_malloc-data (self->pydata is safe) */ | ||||
if (compact(self) != 0) { | ||||
goto nomem; | ||||
} | ||||
copy = PyObject_New(lazymanifest, &lazymanifestType); | ||||
Augie Fackler
|
r27609 | if (!copy) { | ||
goto nomem; | ||||
} | ||||
Augie Fackler
|
r24214 | copy->dirty = true; | ||
copy->lines = malloc(self->maxlines * sizeof(line)); | ||||
if (!copy->lines) { | ||||
goto nomem; | ||||
} | ||||
copy->maxlines = self->maxlines; | ||||
copy->numlines = 0; | ||||
copy->pydata = self->pydata; | ||||
Py_INCREF(self->pydata); | ||||
for (i = 0; i < self->numlines; i++) { | ||||
Augie Fackler
|
r27661 | PyObject *arglist = NULL, *result = NULL; | ||
arglist = Py_BuildValue("(s)", self->lines[i].start); | ||||
if (!arglist) { | ||||
return NULL; | ||||
} | ||||
result = PyObject_CallObject(matchfn, arglist); | ||||
Augie Fackler
|
r24214 | Py_DECREF(arglist); | ||
/* if the callback raised an exception, just let it | ||||
* through and give up */ | ||||
if (!result) { | ||||
free(copy->lines); | ||||
Py_DECREF(self->pydata); | ||||
return NULL; | ||||
} | ||||
if (PyObject_IsTrue(result)) { | ||||
assert(!(self->lines[i].from_malloc)); | ||||
copy->lines[copy->numlines++] = self->lines[i]; | ||||
} | ||||
Py_DECREF(result); | ||||
} | ||||
copy->livelines = copy->numlines; | ||||
return copy; | ||||
Martin von Zweigbergk
|
r24975 | nomem: | ||
Augie Fackler
|
r24214 | PyErr_NoMemory(); | ||
Py_XDECREF(copy); | ||||
return NULL; | ||||
} | ||||
static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args) | ||||
{ | ||||
lazymanifest *other; | ||||
PyObject *pyclean = NULL; | ||||
bool listclean; | ||||
PyObject *emptyTup = NULL, *ret = NULL; | ||||
PyObject *es; | ||||
int sneedle = 0, oneedle = 0; | ||||
if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) { | ||||
return NULL; | ||||
} | ||||
listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean); | ||||
Gregory Szorc
|
r30097 | es = PyBytes_FromString(""); | ||
Augie Fackler
|
r24214 | if (!es) { | ||
goto nomem; | ||||
} | ||||
emptyTup = PyTuple_Pack(2, Py_None, es); | ||||
Py_DECREF(es); | ||||
if (!emptyTup) { | ||||
goto nomem; | ||||
} | ||||
ret = PyDict_New(); | ||||
if (!ret) { | ||||
goto nomem; | ||||
} | ||||
while (sneedle != self->numlines || oneedle != other->numlines) { | ||||
line *left = self->lines + sneedle; | ||||
line *right = other->lines + oneedle; | ||||
int result; | ||||
PyObject *key; | ||||
PyObject *outer; | ||||
/* If we're looking at a deleted entry and it's not | ||||
* the end of the manifest, just skip it. */ | ||||
if (left->deleted && sneedle < self->numlines) { | ||||
sneedle++; | ||||
continue; | ||||
} | ||||
if (right->deleted && oneedle < other->numlines) { | ||||
oneedle++; | ||||
continue; | ||||
} | ||||
/* if we're at the end of either manifest, then we | ||||
* know the remaining items are adds so we can skip | ||||
* the strcmp. */ | ||||
if (sneedle == self->numlines) { | ||||
result = 1; | ||||
} else if (oneedle == other->numlines) { | ||||
result = -1; | ||||
} else { | ||||
result = linecmp(left, right); | ||||
} | ||||
key = result <= 0 ? | ||||
Gregory Szorc
|
r30097 | PyBytes_FromString(left->start) : | ||
PyBytes_FromString(right->start); | ||||
Augie Fackler
|
r24214 | if (!key) | ||
goto nomem; | ||||
if (result < 0) { | ||||
PyObject *l = hashflags(left); | ||||
if (!l) { | ||||
goto nomem; | ||||
} | ||||
outer = PyTuple_Pack(2, l, emptyTup); | ||||
Py_DECREF(l); | ||||
if (!outer) { | ||||
goto nomem; | ||||
} | ||||
PyDict_SetItem(ret, key, outer); | ||||
Py_DECREF(outer); | ||||
sneedle++; | ||||
} else if (result > 0) { | ||||
PyObject *r = hashflags(right); | ||||
if (!r) { | ||||
goto nomem; | ||||
} | ||||
outer = PyTuple_Pack(2, emptyTup, r); | ||||
Py_DECREF(r); | ||||
if (!outer) { | ||||
goto nomem; | ||||
} | ||||
PyDict_SetItem(ret, key, outer); | ||||
Py_DECREF(outer); | ||||
oneedle++; | ||||
} else { | ||||
/* file exists in both manifests */ | ||||
if (left->len != right->len | ||||
|| memcmp(left->start, right->start, left->len) | ||||
|| left->hash_suffix != right->hash_suffix) { | ||||
PyObject *l = hashflags(left); | ||||
PyObject *r; | ||||
if (!l) { | ||||
goto nomem; | ||||
} | ||||
r = hashflags(right); | ||||
if (!r) { | ||||
Py_DECREF(l); | ||||
goto nomem; | ||||
} | ||||
outer = PyTuple_Pack(2, l, r); | ||||
Py_DECREF(l); | ||||
Py_DECREF(r); | ||||
if (!outer) { | ||||
goto nomem; | ||||
} | ||||
PyDict_SetItem(ret, key, outer); | ||||
Py_DECREF(outer); | ||||
} else if (listclean) { | ||||
PyDict_SetItem(ret, key, Py_None); | ||||
} | ||||
sneedle++; | ||||
oneedle++; | ||||
} | ||||
Py_DECREF(key); | ||||
} | ||||
Py_DECREF(emptyTup); | ||||
return ret; | ||||
Martin von Zweigbergk
|
r24975 | nomem: | ||
Augie Fackler
|
r24214 | PyErr_NoMemory(); | ||
Py_XDECREF(ret); | ||||
Py_XDECREF(emptyTup); | ||||
return NULL; | ||||
} | ||||
static PyMethodDef lazymanifest_methods[] = { | ||||
Martin von Zweigbergk
|
r24295 | {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS, | ||
"Iterate over file names in this lazymanifest."}, | ||||
Martin von Zweigbergk
|
r24298 | {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS, | ||
Javi Merino
|
r29254 | "Iterate over (path, nodeid, flags) tuples in this lazymanifest."}, | ||
Augie Fackler
|
r24214 | {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS, | ||
"Make a copy of this lazymanifest."}, | ||||
{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O, | ||||
"Make a copy of this manifest filtered by matchfn."}, | ||||
{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS, | ||||
"Compare this lazymanifest to another one."}, | ||||
{"text", (PyCFunction)lazymanifest_text, METH_NOARGS, | ||||
"Encode this manifest to text."}, | ||||
{NULL}, | ||||
}; | ||||
Gregory Szorc
|
r30079 | #ifdef IS_PY3K | ||
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | ||||
#else | ||||
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN | ||||
#endif | ||||
Augie Fackler
|
r24214 | static PyTypeObject lazymanifestType = { | ||
Gregory Szorc
|
r30168 | PyVarObject_HEAD_INIT(NULL, 0) | ||
Augie Fackler
|
r24214 | "parsers.lazymanifest", /* tp_name */ | ||
sizeof(lazymanifest), /* tp_basicsize */ | ||||
0, /* tp_itemsize */ | ||||
(destructor)lazymanifest_dealloc, /* tp_dealloc */ | ||||
0, /* tp_print */ | ||||
0, /* tp_getattr */ | ||||
0, /* tp_setattr */ | ||||
0, /* tp_compare */ | ||||
0, /* tp_repr */ | ||||
0, /* tp_as_number */ | ||||
&lazymanifest_seq_meths, /* tp_as_sequence */ | ||||
&lazymanifest_mapping_methods, /* tp_as_mapping */ | ||||
0, /* tp_hash */ | ||||
0, /* tp_call */ | ||||
0, /* tp_str */ | ||||
0, /* tp_getattro */ | ||||
0, /* tp_setattro */ | ||||
0, /* tp_as_buffer */ | ||||
Gregory Szorc
|
r30079 | LAZYMANIFEST_TPFLAGS, /* tp_flags */ | ||
Augie Fackler
|
r24214 | "TODO(augie)", /* tp_doc */ | ||
0, /* tp_traverse */ | ||||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
Martin von Zweigbergk
|
r24298 | (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */ | ||
Augie Fackler
|
r24214 | 0, /* tp_iternext */ | ||
lazymanifest_methods, /* tp_methods */ | ||||
0, /* tp_members */ | ||||
0, /* tp_getset */ | ||||
0, /* tp_base */ | ||||
0, /* tp_dict */ | ||||
0, /* tp_descr_get */ | ||||
0, /* tp_descr_set */ | ||||
0, /* tp_dictoffset */ | ||||
(initproc)lazymanifest_init, /* tp_init */ | ||||
0, /* tp_alloc */ | ||||
}; | ||||
void manifest_module_init(PyObject * mod) | ||||
{ | ||||
lazymanifestType.tp_new = PyType_GenericNew; | ||||
if (PyType_Ready(&lazymanifestType) < 0) | ||||
return; | ||||
Py_INCREF(&lazymanifestType); | ||||
PyModule_AddObject(mod, "lazymanifest", | ||||
(PyObject *)&lazymanifestType); | ||||
} | ||||