##// END OF EJS Templates
bdiff: don't check border condition in loop...
bdiff: don't check border condition in loop `plast = a + len - 1`. So, this "for" loop iterates from "a" to "plast", inclusive. So, `p == plast` can only be true on the final iteration of the loop. So checking for it on every loop iteration is wasteful. This patch simply decreases the upper bound of the loop by 1 and adds an explicit check after iteration for the `p == plast` case. We can't simply add 1 to the initial value for "i" because that doesn't do the correct thing on empty input strings. `perfbdiff -m 3041e4d59df2` on the Firefox repo becomes significantly faster: ! wall 0.072763 comb 0.070000 user 0.070000 sys 0.000000 (best of 100) ! wall 0.053221 comb 0.060000 user 0.060000 sys 0.000000 (best of 100) For the curious, this code has its origins in 8b067bde6679, which is the changeset that introduced bdiff.c in 2005. Also, GNU diffutils is able to perform a similar line-based diff in under 20ms. So there's likely more perf wins to be found in this code. One of them is the hashing algorithm. But it looks like mpm spent some time testing hash collisions in d0c48891dd4a. I'd like to do the same before switching away from lyhash, just to be on the safe side.

File last commit:

r30168:1a327889 default
r30308:d500ddae default
Show More
manifest.c
939 lines | 24.5 KiB | text/x-c | CLexer
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /*
* manifest.c - manifest type that does on-demand parsing.
*
* Copyright 2015, Google Inc.
*
* This software may be used and distributed according to the terms of
* the GNU General Public License, incorporated herein by reference.
*/
Drew Gottlieb
manifest: include Python.h before standard headers...
r24352 #include <Python.h>
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 #include <assert.h>
#include <string.h>
#include <stdlib.h>
Matt Mackall
manifest: use util.h to get Py_ssize_t
r24441 #include "util.h"
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 #define DEFAULT_LINES 100000
typedef struct {
char *start;
Py_ssize_t len; /* length of line including terminal newline */
char hash_suffix;
bool from_malloc;
bool deleted;
} line;
typedef struct {
PyObject_HEAD
PyObject *pydata;
line *lines;
int numlines; /* number of line entries */
int livelines; /* number of non-deleted lines */
int maxlines; /* allocated number of lines */
bool dirty;
} lazymanifest;
#define MANIFEST_OOM -1
#define MANIFEST_NOT_SORTED -2
#define MANIFEST_MALFORMED -3
/* defined in parsers.c */
PyObject *unhexlify(const char *str, int len);
/* get the length of the path for a line */
static size_t pathlen(line *l) {
return strlen(l->start);
}
/* get the node value of a single line */
static PyObject *nodeof(line *l) {
char *s = l->start;
ssize_t llen = pathlen(l);
PyObject *hash = unhexlify(s + llen + 1, 40);
if (!hash) {
return NULL;
}
if (l->hash_suffix != '\0') {
char newhash[21];
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 memcpy(newhash, PyBytes_AsString(hash), 20);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_DECREF(hash);
newhash[20] = l->hash_suffix;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hash = PyBytes_FromStringAndSize(newhash, 21);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
return hash;
}
/* get the node hash and flags of a line as a tuple */
static PyObject *hashflags(line *l)
{
char *s = l->start;
size_t plen = pathlen(l);
PyObject *hash = nodeof(l);
/* 40 for hash, 1 for null byte, 1 for newline */
size_t hplen = plen + 42;
Py_ssize_t flen = l->len - hplen;
PyObject *flags;
PyObject *tup;
if (!hash)
return NULL;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!flags) {
Py_DECREF(hash);
return NULL;
}
tup = PyTuple_Pack(2, hash, flags);
Py_DECREF(flags);
Py_DECREF(hash);
return tup;
}
/* if we're about to run out of space in the line index, add more */
static bool realloc_if_full(lazymanifest *self)
{
if (self->numlines == self->maxlines) {
self->maxlines *= 2;
self->lines = realloc(self->lines, self->maxlines * sizeof(line));
}
Matt Harbison
manifest.c: ensure realloc_if_full() returns 1 or 0...
r24312 return !!self->lines;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
/*
* Find the line boundaries in the manifest that 'data' points to and store
* information about each line in 'self'.
*/
static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
{
char *prev = NULL;
while (len > 0) {
line *l;
char *next = memchr(data, '\n', len);
if (!next) {
return MANIFEST_MALFORMED;
}
next++; /* advance past newline */
if (!realloc_if_full(self)) {
return MANIFEST_OOM; /* no memory */
}
if (prev && strcmp(prev, data) > -1) {
/* This data isn't sorted, so we have to abort. */
return MANIFEST_NOT_SORTED;
}
l = self->lines + ((self->numlines)++);
l->start = data;
l->len = next - data;
l->hash_suffix = '\0';
l->from_malloc = false;
l->deleted = false;
len = len - l->len;
prev = data;
data = next;
}
self->livelines = self->numlines;
return 0;
}
static int lazymanifest_init(lazymanifest *self, PyObject *args)
{
char *data;
Py_ssize_t len;
int err, ret;
PyObject *pydata;
if (!PyArg_ParseTuple(args, "S", &pydata)) {
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 err = PyBytes_AsStringAndSize(pydata, &data, &len);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
self->dirty = false;
if (err == -1)
return -1;
self->pydata = pydata;
Py_INCREF(self->pydata);
Py_BEGIN_ALLOW_THREADS
self->lines = malloc(DEFAULT_LINES * sizeof(line));
self->maxlines = DEFAULT_LINES;
self->numlines = 0;
if (!self->lines)
ret = MANIFEST_OOM;
else
ret = find_lines(self, data, len);
Py_END_ALLOW_THREADS
switch (ret) {
case 0:
break;
case MANIFEST_OOM:
PyErr_NoMemory();
break;
case MANIFEST_NOT_SORTED:
PyErr_Format(PyExc_ValueError,
"Manifest lines not in sorted order.");
break;
case MANIFEST_MALFORMED:
PyErr_Format(PyExc_ValueError,
"Manifest did not end in a newline.");
break;
default:
PyErr_Format(PyExc_ValueError,
"Unknown problem parsing manifest.");
}
return ret == 0 ? 0 : -1;
}
static void lazymanifest_dealloc(lazymanifest *self)
{
/* free any extra lines we had to allocate */
int i;
for (i = 0; i < self->numlines; i++) {
if (self->lines[i].from_malloc) {
free(self->lines[i].start);
}
}
if (self->lines) {
free(self->lines);
self->lines = NULL;
}
if (self->pydata) {
Py_DECREF(self->pydata);
self->pydata = NULL;
}
PyObject_Del(self);
}
/* iteration support */
typedef struct {
PyObject_HEAD lazymanifest *m;
Py_ssize_t pos;
} lmIter;
static void lmiter_dealloc(PyObject *o)
{
lmIter *self = (lmIter *)o;
Py_DECREF(self->m);
PyObject_Del(self);
}
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 static line *lmiter_nextline(lmIter *self)
{
do {
self->pos++;
if (self->pos >= self->m->numlines) {
return NULL;
}
/* skip over deleted manifest entries */
} while (self->m->lines[self->pos].deleted);
return self->m->lines + self->pos;
}
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyObject *lmiter_iterentriesnext(PyObject *o)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {
size_t pl;
line *l;
Py_ssize_t consumed;
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 l = lmiter_nextline((lmIter *)o);
if (!l) {
Martin von Zweigbergk
lazymanifest: avoid 'bail' label when used on success path...
r24974 goto done;
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 }
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 pl = pathlen(l);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 path = PyBytes_FromStringAndSize(l->start, pl);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hash = nodeof(l);
consumed = pl + 41;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 flags = PyBytes_FromStringAndSize(l->start + consumed,
Bryan O'Sullivan
manifest: fix formatting...
r27340 l->len - consumed - 1);
Martin von Zweigbergk
lazymanifest: fail if path or hash strings cannot be created...
r24293 if (!path || !hash || !flags) {
Martin von Zweigbergk
lazymanifest: avoid 'bail' label when used on success path...
r24974 goto done;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 ret = PyTuple_Pack(3, path, hash, flags);
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 done:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_XDECREF(path);
Py_XDECREF(hash);
Py_XDECREF(flags);
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 return ret;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyTypeObject lazymanifestEntriesIterator = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 "parsers.lazymanifest.entriesiterator", /*tp_name */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 lmiter_iterentriesnext, /* tp_iternext: next() method */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 };
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyObject *lmiter_iterkeysnext(PyObject *o)
{
size_t pl;
line *l = lmiter_nextline((lmIter *)o);
if (!l) {
return NULL;
}
pl = pathlen(l);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 return PyBytes_FromStringAndSize(l->start, pl);
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 }
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyTypeObject lazymanifestKeysIterator = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 "parsers.lazymanifest.keysiterator", /*tp_name */
sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 "Keys iterator for a lazymanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
lmiter_iterkeysnext, /* tp_iternext: next() method */
};
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static lazymanifest *lazymanifest_copy(lazymanifest *self);
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
{
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(lmIter, &lazymanifestKeysIterator);
if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* __getitem__ and __setitem__ support */
static Py_ssize_t lazymanifest_size(lazymanifest *self)
{
return self->livelines;
}
static int linecmp(const void *left, const void *right)
{
return strcmp(((const line *)left)->start,
((const line *)right)->start);
}
static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"getitem: manifest keys must be a string.");
return NULL;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError, "No such manifest entry.");
return NULL;
}
return hashflags(hit);
}
static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"delitem: manifest keys must be a string.");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError,
"Tried to delete nonexistent manifest entry.");
return -1;
}
self->dirty = true;
hit->deleted = true;
self->livelines--;
return 0;
}
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 /* Do a binary search for the insertion point for new, creating the
* new entry if needed. */
static int internalsetitem(lazymanifest *self, line *new) {
int start = 0, end = self->numlines;
while (start < end) {
int pos = start + (end - start) / 2;
int c = linecmp(new, self->lines + pos);
if (c < 0)
end = pos;
else if (c > 0)
start = pos + 1;
else {
if (self->lines[pos].deleted)
self->livelines++;
Augie Fackler
lazymanifest: prevent leak when updating an entry more than once...
r24710 if (self->lines[pos].from_malloc)
free(self->lines[pos].start);
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 start = pos;
goto finish;
}
}
/* being here means we need to do an insert */
if (!realloc_if_full(self)) {
PyErr_NoMemory();
return -1;
}
memmove(self->lines + start + 1, self->lines + start,
(self->numlines - start) * sizeof(line));
self->numlines++;
self->livelines++;
finish:
self->lines[start] = *new;
self->dirty = true;
return 0;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static int lazymanifest_setitem(
lazymanifest *self, PyObject *key, PyObject *value)
{
char *path;
Py_ssize_t plen;
PyObject *pyhash;
Py_ssize_t hlen;
char *hash;
PyObject *pyflags;
char *flags;
Py_ssize_t flen;
size_t dlen;
char *dest;
int i;
line new;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"setitem: manifest keys must be a string.");
return -1;
}
if (!value) {
return lazymanifest_delitem(self, key);
}
if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
PyErr_Format(PyExc_TypeError,
"Manifest values must be a tuple of (node, flags).");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 return -1;
}
pyhash = PyTuple_GetItem(value, 0);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(pyhash)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"node must be a 20-byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hlen = PyBytes_Size(pyhash);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* Some parts of the codebase try and set 21 or 22
* byte "hash" values in order to perturb things for
* status. We have to preserve at least the 21st
* byte. Sigh. If there's a 22nd byte, we drop it on
* the floor, which works fine.
*/
if (hlen != 20 && hlen != 21 && hlen != 22) {
PyErr_Format(PyExc_TypeError,
"node must be a 20-byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hash = PyBytes_AsString(pyhash);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
pyflags = PyTuple_GetItem(value, 1);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"flags must a 0 or 1 byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 return -1;
}
/* one null byte and one newline */
dlen = plen + 41 + flen + 1;
dest = malloc(dlen);
if (!dest) {
PyErr_NoMemory();
return -1;
}
memcpy(dest, path, plen + 1);
for (i = 0; i < 20; i++) {
Martin von Zweigbergk
lazymanifest: don't depend on printf's 'hh' format to work...
r24286 /* Cast to unsigned, so it will not get sign-extended when promoted
* to int (as is done when passing to a variadic function)
*/
sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
memcpy(dest + plen + 41, flags, flen);
dest[plen + 41 + flen] = '\n';
new.start = dest;
new.len = dlen;
new.hash_suffix = '\0';
if (hlen > 20) {
new.hash_suffix = hash[20];
}
new.from_malloc = true; /* is `start` a pointer we allocated? */
new.deleted = false; /* is this entry deleted? */
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 if (internalsetitem(self, &new)) {
return -1;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
return 0;
}
static PyMappingMethods lazymanifest_mapping_methods = {
(lenfunc)lazymanifest_size, /* mp_length */
(binaryfunc)lazymanifest_getitem, /* mp_subscript */
(objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
};
Augie Fackler
lazymanifest: add missing closing parenthesis in comment
r27608 /* sequence methods (important or __contains__ builds an iterator) */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
static int lazymanifest_contains(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* Our keys are always strings, so if the contains
* check is for a non-string, just return false. */
return 0;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
return 0;
}
return 1;
}
static PySequenceMethods lazymanifest_seq_meths = {
(lenfunc)lazymanifest_size, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)lazymanifest_contains, /* sq_contains */
0, /* sq_inplace_concat */
0, /* sq_inplace_repeat */
};
/* Other methods (copy, diff, etc) */
static PyTypeObject lazymanifestType;
/* If the manifest has changes, build the new manifest text and reindex it. */
static int compact(lazymanifest *self) {
int i;
ssize_t need = 0;
char *data;
line *src, *dst;
PyObject *pydata;
if (!self->dirty)
return 0;
for (i = 0; i < self->numlines; i++) {
if (!self->lines[i].deleted) {
need += self->lines[i].len;
}
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 pydata = PyBytes_FromStringAndSize(NULL, need);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!pydata)
return -1;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 data = PyBytes_AsString(pydata);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!data) {
return -1;
}
src = self->lines;
dst = self->lines;
for (i = 0; i < self->numlines; i++, src++) {
char *tofree = NULL;
if (src->from_malloc) {
tofree = src->start;
}
if (!src->deleted) {
memcpy(data, src->start, src->len);
*dst = *src;
dst->start = data;
dst->from_malloc = false;
data += dst->len;
dst++;
}
free(tofree);
}
Py_DECREF(self->pydata);
self->pydata = pydata;
self->numlines = self->livelines;
self->dirty = false;
return 0;
}
static PyObject *lazymanifest_text(lazymanifest *self)
{
if (compact(self) != 0) {
PyErr_NoMemory();
return NULL;
}
Py_INCREF(self->pydata);
return self->pydata;
}
static lazymanifest *lazymanifest_copy(lazymanifest *self)
{
lazymanifest *copy = NULL;
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
if (!copy) {
goto nomem;
}
copy->numlines = self->numlines;
copy->livelines = self->livelines;
copy->dirty = false;
copy->lines = malloc(self->maxlines *sizeof(line));
if (!copy->lines) {
goto nomem;
}
memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
copy->maxlines = self->maxlines;
copy->pydata = self->pydata;
Py_INCREF(copy->pydata);
return copy;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(copy);
return NULL;
}
static lazymanifest *lazymanifest_filtercopy(
lazymanifest *self, PyObject *matchfn)
{
lazymanifest *copy = NULL;
int i;
if (!PyCallable_Check(matchfn)) {
PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
return NULL;
}
/* compact ourselves first to avoid double-frees later when we
* compact tmp so that it doesn't have random pointers to our
* underlying from_malloc-data (self->pydata is safe) */
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
Augie Fackler
lazymanifest: check error return in filter
r27609 if (!copy) {
goto nomem;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 copy->dirty = true;
copy->lines = malloc(self->maxlines * sizeof(line));
if (!copy->lines) {
goto nomem;
}
copy->maxlines = self->maxlines;
copy->numlines = 0;
copy->pydata = self->pydata;
Py_INCREF(self->pydata);
for (i = 0; i < self->numlines; i++) {
Augie Fackler
lazymanifest: check more return values in filtercopy...
r27661 PyObject *arglist = NULL, *result = NULL;
arglist = Py_BuildValue("(s)", self->lines[i].start);
if (!arglist) {
return NULL;
}
result = PyObject_CallObject(matchfn, arglist);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_DECREF(arglist);
/* if the callback raised an exception, just let it
* through and give up */
if (!result) {
free(copy->lines);
Py_DECREF(self->pydata);
return NULL;
}
if (PyObject_IsTrue(result)) {
assert(!(self->lines[i].from_malloc));
copy->lines[copy->numlines++] = self->lines[i];
}
Py_DECREF(result);
}
copy->livelines = copy->numlines;
return copy;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(copy);
return NULL;
}
static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
{
lazymanifest *other;
PyObject *pyclean = NULL;
bool listclean;
PyObject *emptyTup = NULL, *ret = NULL;
PyObject *es;
int sneedle = 0, oneedle = 0;
if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
return NULL;
}
listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 es = PyBytes_FromString("");
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!es) {
goto nomem;
}
emptyTup = PyTuple_Pack(2, Py_None, es);
Py_DECREF(es);
if (!emptyTup) {
goto nomem;
}
ret = PyDict_New();
if (!ret) {
goto nomem;
}
while (sneedle != self->numlines || oneedle != other->numlines) {
line *left = self->lines + sneedle;
line *right = other->lines + oneedle;
int result;
PyObject *key;
PyObject *outer;
/* If we're looking at a deleted entry and it's not
* the end of the manifest, just skip it. */
if (left->deleted && sneedle < self->numlines) {
sneedle++;
continue;
}
if (right->deleted && oneedle < other->numlines) {
oneedle++;
continue;
}
/* if we're at the end of either manifest, then we
* know the remaining items are adds so we can skip
* the strcmp. */
if (sneedle == self->numlines) {
result = 1;
} else if (oneedle == other->numlines) {
result = -1;
} else {
result = linecmp(left, right);
}
key = result <= 0 ?
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 PyBytes_FromString(left->start) :
PyBytes_FromString(right->start);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!key)
goto nomem;
if (result < 0) {
PyObject *l = hashflags(left);
if (!l) {
goto nomem;
}
outer = PyTuple_Pack(2, l, emptyTup);
Py_DECREF(l);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
sneedle++;
} else if (result > 0) {
PyObject *r = hashflags(right);
if (!r) {
goto nomem;
}
outer = PyTuple_Pack(2, emptyTup, r);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
oneedle++;
} else {
/* file exists in both manifests */
if (left->len != right->len
|| memcmp(left->start, right->start, left->len)
|| left->hash_suffix != right->hash_suffix) {
PyObject *l = hashflags(left);
PyObject *r;
if (!l) {
goto nomem;
}
r = hashflags(right);
if (!r) {
Py_DECREF(l);
goto nomem;
}
outer = PyTuple_Pack(2, l, r);
Py_DECREF(l);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
} else if (listclean) {
PyDict_SetItem(ret, key, Py_None);
}
sneedle++;
oneedle++;
}
Py_DECREF(key);
}
Py_DECREF(emptyTup);
return ret;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(ret);
Py_XDECREF(emptyTup);
return NULL;
}
static PyMethodDef lazymanifest_methods[] = {
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
"Iterate over file names in this lazymanifest."},
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
Javi Merino
lazymanifest: fix typo s/typles/tuples/
r29254 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
"Make a copy of this lazymanifest."},
{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
"Make a copy of this manifest filtered by matchfn."},
{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
"Compare this lazymanifest to another one."},
{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
"Encode this manifest to text."},
{NULL},
};
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
#endif
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static PyTypeObject lazymanifestType = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 "parsers.lazymanifest", /* tp_name */
sizeof(lazymanifest), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)lazymanifest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&lazymanifest_seq_meths, /* tp_as_sequence */
&lazymanifest_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFEST_TPFLAGS, /* tp_flags */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 "TODO(augie)", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 0, /* tp_iternext */
lazymanifest_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)lazymanifest_init, /* tp_init */
0, /* tp_alloc */
};
void manifest_module_init(PyObject * mod)
{
lazymanifestType.tp_new = PyType_GenericNew;
if (PyType_Ready(&lazymanifestType) < 0)
return;
Py_INCREF(&lazymanifestType);
PyModule_AddObject(mod, "lazymanifest",
(PyObject *)&lazymanifestType);
}