##// END OF EJS Templates
win32mbcs: wrap underlying pycompat.bytestr to use checkwinfilename safely...
win32mbcs: wrap underlying pycompat.bytestr to use checkwinfilename safely win32mbcs wraps some functions, to prevent them from unintentionally treating backslash (0x5c), which is used as the second or later byte of multi bytes characters by problematic encodings, as a path component delimiter on Windows platform. This wrapping assumes that wrapped functions can safely accept unicode string arguments. Unfortunately, d1937bdcee8c broke this assumption by introducing pycompat.bytestr() into util.checkwinfilename() for py3 support. After that, wrapped checkwinfilename() always fails for non-ASCII filename at pycompat.bytestr() invocation. This patch wraps underlying pycompat.bytestr() function to use util.checkwinfilename() safely. To avoid similar regression in the future, another patch series will add smoke testing on default branch.

File last commit:

r30168:1a327889 default
r32244:176ed32d stable
Show More
manifest.c
939 lines | 24.5 KiB | text/x-c | CLexer
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /*
* manifest.c - manifest type that does on-demand parsing.
*
* Copyright 2015, Google Inc.
*
* This software may be used and distributed according to the terms of
* the GNU General Public License, incorporated herein by reference.
*/
Drew Gottlieb
manifest: include Python.h before standard headers...
r24352 #include <Python.h>
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 #include <assert.h>
#include <string.h>
#include <stdlib.h>
Matt Mackall
manifest: use util.h to get Py_ssize_t
r24441 #include "util.h"
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 #define DEFAULT_LINES 100000
typedef struct {
char *start;
Py_ssize_t len; /* length of line including terminal newline */
char hash_suffix;
bool from_malloc;
bool deleted;
} line;
typedef struct {
PyObject_HEAD
PyObject *pydata;
line *lines;
int numlines; /* number of line entries */
int livelines; /* number of non-deleted lines */
int maxlines; /* allocated number of lines */
bool dirty;
} lazymanifest;
#define MANIFEST_OOM -1
#define MANIFEST_NOT_SORTED -2
#define MANIFEST_MALFORMED -3
/* defined in parsers.c */
PyObject *unhexlify(const char *str, int len);
/* get the length of the path for a line */
static size_t pathlen(line *l) {
return strlen(l->start);
}
/* get the node value of a single line */
static PyObject *nodeof(line *l) {
char *s = l->start;
ssize_t llen = pathlen(l);
PyObject *hash = unhexlify(s + llen + 1, 40);
if (!hash) {
return NULL;
}
if (l->hash_suffix != '\0') {
char newhash[21];
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 memcpy(newhash, PyBytes_AsString(hash), 20);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_DECREF(hash);
newhash[20] = l->hash_suffix;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hash = PyBytes_FromStringAndSize(newhash, 21);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
return hash;
}
/* get the node hash and flags of a line as a tuple */
static PyObject *hashflags(line *l)
{
char *s = l->start;
size_t plen = pathlen(l);
PyObject *hash = nodeof(l);
/* 40 for hash, 1 for null byte, 1 for newline */
size_t hplen = plen + 42;
Py_ssize_t flen = l->len - hplen;
PyObject *flags;
PyObject *tup;
if (!hash)
return NULL;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!flags) {
Py_DECREF(hash);
return NULL;
}
tup = PyTuple_Pack(2, hash, flags);
Py_DECREF(flags);
Py_DECREF(hash);
return tup;
}
/* if we're about to run out of space in the line index, add more */
static bool realloc_if_full(lazymanifest *self)
{
if (self->numlines == self->maxlines) {
self->maxlines *= 2;
self->lines = realloc(self->lines, self->maxlines * sizeof(line));
}
Matt Harbison
manifest.c: ensure realloc_if_full() returns 1 or 0...
r24312 return !!self->lines;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
/*
* Find the line boundaries in the manifest that 'data' points to and store
* information about each line in 'self'.
*/
static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
{
char *prev = NULL;
while (len > 0) {
line *l;
char *next = memchr(data, '\n', len);
if (!next) {
return MANIFEST_MALFORMED;
}
next++; /* advance past newline */
if (!realloc_if_full(self)) {
return MANIFEST_OOM; /* no memory */
}
if (prev && strcmp(prev, data) > -1) {
/* This data isn't sorted, so we have to abort. */
return MANIFEST_NOT_SORTED;
}
l = self->lines + ((self->numlines)++);
l->start = data;
l->len = next - data;
l->hash_suffix = '\0';
l->from_malloc = false;
l->deleted = false;
len = len - l->len;
prev = data;
data = next;
}
self->livelines = self->numlines;
return 0;
}
static int lazymanifest_init(lazymanifest *self, PyObject *args)
{
char *data;
Py_ssize_t len;
int err, ret;
PyObject *pydata;
if (!PyArg_ParseTuple(args, "S", &pydata)) {
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 err = PyBytes_AsStringAndSize(pydata, &data, &len);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
self->dirty = false;
if (err == -1)
return -1;
self->pydata = pydata;
Py_INCREF(self->pydata);
Py_BEGIN_ALLOW_THREADS
self->lines = malloc(DEFAULT_LINES * sizeof(line));
self->maxlines = DEFAULT_LINES;
self->numlines = 0;
if (!self->lines)
ret = MANIFEST_OOM;
else
ret = find_lines(self, data, len);
Py_END_ALLOW_THREADS
switch (ret) {
case 0:
break;
case MANIFEST_OOM:
PyErr_NoMemory();
break;
case MANIFEST_NOT_SORTED:
PyErr_Format(PyExc_ValueError,
"Manifest lines not in sorted order.");
break;
case MANIFEST_MALFORMED:
PyErr_Format(PyExc_ValueError,
"Manifest did not end in a newline.");
break;
default:
PyErr_Format(PyExc_ValueError,
"Unknown problem parsing manifest.");
}
return ret == 0 ? 0 : -1;
}
static void lazymanifest_dealloc(lazymanifest *self)
{
/* free any extra lines we had to allocate */
int i;
for (i = 0; i < self->numlines; i++) {
if (self->lines[i].from_malloc) {
free(self->lines[i].start);
}
}
if (self->lines) {
free(self->lines);
self->lines = NULL;
}
if (self->pydata) {
Py_DECREF(self->pydata);
self->pydata = NULL;
}
PyObject_Del(self);
}
/* iteration support */
typedef struct {
PyObject_HEAD lazymanifest *m;
Py_ssize_t pos;
} lmIter;
static void lmiter_dealloc(PyObject *o)
{
lmIter *self = (lmIter *)o;
Py_DECREF(self->m);
PyObject_Del(self);
}
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 static line *lmiter_nextline(lmIter *self)
{
do {
self->pos++;
if (self->pos >= self->m->numlines) {
return NULL;
}
/* skip over deleted manifest entries */
} while (self->m->lines[self->pos].deleted);
return self->m->lines + self->pos;
}
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyObject *lmiter_iterentriesnext(PyObject *o)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {
size_t pl;
line *l;
Py_ssize_t consumed;
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 l = lmiter_nextline((lmIter *)o);
if (!l) {
Martin von Zweigbergk
lazymanifest: avoid 'bail' label when used on success path...
r24974 goto done;
Martin von Zweigbergk
lazymanifest: extract function for iterating to next line...
r24294 }
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 pl = pathlen(l);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 path = PyBytes_FromStringAndSize(l->start, pl);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hash = nodeof(l);
consumed = pl + 41;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 flags = PyBytes_FromStringAndSize(l->start + consumed,
Bryan O'Sullivan
manifest: fix formatting...
r27340 l->len - consumed - 1);
Martin von Zweigbergk
lazymanifest: fail if path or hash strings cannot be created...
r24293 if (!path || !hash || !flags) {
Martin von Zweigbergk
lazymanifest: avoid 'bail' label when used on success path...
r24974 goto done;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 ret = PyTuple_Pack(3, path, hash, flags);
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 done:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_XDECREF(path);
Py_XDECREF(hash);
Py_XDECREF(flags);
Mike Hommey
lazymanifest: fix memory leak in lmiter_iterentriesnext() after 3d485727e45e
r24699 return ret;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyTypeObject lazymanifestEntriesIterator = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 "parsers.lazymanifest.entriesiterator", /*tp_name */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 lmiter_iterentriesnext, /* tp_iternext: next() method */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 };
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyObject *lmiter_iterkeysnext(PyObject *o)
{
size_t pl;
line *l = lmiter_nextline((lmIter *)o);
if (!l) {
return NULL;
}
pl = pathlen(l);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 return PyBytes_FromStringAndSize(l->start, pl);
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 }
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyTypeObject lazymanifestKeysIterator = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 "parsers.lazymanifest.keysiterator", /*tp_name */
sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 "Keys iterator for a lazymanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
lmiter_iterkeysnext, /* tp_iternext: next() method */
};
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static lazymanifest *lazymanifest_copy(lazymanifest *self);
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
{
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(lmIter, &lazymanifestKeysIterator);
if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* __getitem__ and __setitem__ support */
static Py_ssize_t lazymanifest_size(lazymanifest *self)
{
return self->livelines;
}
static int linecmp(const void *left, const void *right)
{
return strcmp(((const line *)left)->start,
((const line *)right)->start);
}
static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"getitem: manifest keys must be a string.");
return NULL;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError, "No such manifest entry.");
return NULL;
}
return hashflags(hit);
}
static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"delitem: manifest keys must be a string.");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError,
"Tried to delete nonexistent manifest entry.");
return -1;
}
self->dirty = true;
hit->deleted = true;
self->livelines--;
return 0;
}
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 /* Do a binary search for the insertion point for new, creating the
* new entry if needed. */
static int internalsetitem(lazymanifest *self, line *new) {
int start = 0, end = self->numlines;
while (start < end) {
int pos = start + (end - start) / 2;
int c = linecmp(new, self->lines + pos);
if (c < 0)
end = pos;
else if (c > 0)
start = pos + 1;
else {
if (self->lines[pos].deleted)
self->livelines++;
Augie Fackler
lazymanifest: prevent leak when updating an entry more than once...
r24710 if (self->lines[pos].from_malloc)
free(self->lines[pos].start);
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 start = pos;
goto finish;
}
}
/* being here means we need to do an insert */
if (!realloc_if_full(self)) {
PyErr_NoMemory();
return -1;
}
memmove(self->lines + start + 1, self->lines + start,
(self->numlines - start) * sizeof(line));
self->numlines++;
self->livelines++;
finish:
self->lines[start] = *new;
self->dirty = true;
return 0;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static int lazymanifest_setitem(
lazymanifest *self, PyObject *key, PyObject *value)
{
char *path;
Py_ssize_t plen;
PyObject *pyhash;
Py_ssize_t hlen;
char *hash;
PyObject *pyflags;
char *flags;
Py_ssize_t flen;
size_t dlen;
char *dest;
int i;
line new;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"setitem: manifest keys must be a string.");
return -1;
}
if (!value) {
return lazymanifest_delitem(self, key);
}
if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
PyErr_Format(PyExc_TypeError,
"Manifest values must be a tuple of (node, flags).");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 return -1;
}
pyhash = PyTuple_GetItem(value, 0);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(pyhash)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"node must be a 20-byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hlen = PyBytes_Size(pyhash);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* Some parts of the codebase try and set 21 or 22
* byte "hash" values in order to perturb things for
* status. We have to preserve at least the 21st
* byte. Sigh. If there's a 22nd byte, we drop it on
* the floor, which works fine.
*/
if (hlen != 20 && hlen != 21 && hlen != 22) {
PyErr_Format(PyExc_TypeError,
"node must be a 20-byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 hash = PyBytes_AsString(pyhash);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
pyflags = PyTuple_GetItem(value, 1);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_Format(PyExc_TypeError,
"flags must a 0 or 1 byte string");
return -1;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 return -1;
}
/* one null byte and one newline */
dlen = plen + 41 + flen + 1;
dest = malloc(dlen);
if (!dest) {
PyErr_NoMemory();
return -1;
}
memcpy(dest, path, plen + 1);
for (i = 0; i < 20; i++) {
Martin von Zweigbergk
lazymanifest: don't depend on printf's 'hh' format to work...
r24286 /* Cast to unsigned, so it will not get sign-extended when promoted
* to int (as is done when passing to a variadic function)
*/
sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
memcpy(dest + plen + 41, flags, flen);
dest[plen + 41 + flen] = '\n';
new.start = dest;
new.len = dlen;
new.hash_suffix = '\0';
if (hlen > 20) {
new.hash_suffix = hash[20];
}
new.from_malloc = true; /* is `start` a pointer we allocated? */
new.deleted = false; /* is this entry deleted? */
Augie Fackler
lazymanifest: use a binary search to do an insertion...
r24228 if (internalsetitem(self, &new)) {
return -1;
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 }
return 0;
}
static PyMappingMethods lazymanifest_mapping_methods = {
(lenfunc)lazymanifest_size, /* mp_length */
(binaryfunc)lazymanifest_getitem, /* mp_subscript */
(objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
};
Augie Fackler
lazymanifest: add missing closing parenthesis in comment
r27608 /* sequence methods (important or __contains__ builds an iterator) */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214
static int lazymanifest_contains(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 if (!PyBytes_Check(key)) {
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 /* Our keys are always strings, so if the contains
* check is for a non-string, just return false. */
return 0;
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 needle.start = PyBytes_AsString(key);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
return 0;
}
return 1;
}
static PySequenceMethods lazymanifest_seq_meths = {
(lenfunc)lazymanifest_size, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)lazymanifest_contains, /* sq_contains */
0, /* sq_inplace_concat */
0, /* sq_inplace_repeat */
};
/* Other methods (copy, diff, etc) */
static PyTypeObject lazymanifestType;
/* If the manifest has changes, build the new manifest text and reindex it. */
static int compact(lazymanifest *self) {
int i;
ssize_t need = 0;
char *data;
line *src, *dst;
PyObject *pydata;
if (!self->dirty)
return 0;
for (i = 0; i < self->numlines; i++) {
if (!self->lines[i].deleted) {
need += self->lines[i].len;
}
}
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 pydata = PyBytes_FromStringAndSize(NULL, need);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!pydata)
return -1;
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 data = PyBytes_AsString(pydata);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!data) {
return -1;
}
src = self->lines;
dst = self->lines;
for (i = 0; i < self->numlines; i++, src++) {
char *tofree = NULL;
if (src->from_malloc) {
tofree = src->start;
}
if (!src->deleted) {
memcpy(data, src->start, src->len);
*dst = *src;
dst->start = data;
dst->from_malloc = false;
data += dst->len;
dst++;
}
free(tofree);
}
Py_DECREF(self->pydata);
self->pydata = pydata;
self->numlines = self->livelines;
self->dirty = false;
return 0;
}
static PyObject *lazymanifest_text(lazymanifest *self)
{
if (compact(self) != 0) {
PyErr_NoMemory();
return NULL;
}
Py_INCREF(self->pydata);
return self->pydata;
}
static lazymanifest *lazymanifest_copy(lazymanifest *self)
{
lazymanifest *copy = NULL;
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
if (!copy) {
goto nomem;
}
copy->numlines = self->numlines;
copy->livelines = self->livelines;
copy->dirty = false;
copy->lines = malloc(self->maxlines *sizeof(line));
if (!copy->lines) {
goto nomem;
}
memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
copy->maxlines = self->maxlines;
copy->pydata = self->pydata;
Py_INCREF(copy->pydata);
return copy;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(copy);
return NULL;
}
static lazymanifest *lazymanifest_filtercopy(
lazymanifest *self, PyObject *matchfn)
{
lazymanifest *copy = NULL;
int i;
if (!PyCallable_Check(matchfn)) {
PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
return NULL;
}
/* compact ourselves first to avoid double-frees later when we
* compact tmp so that it doesn't have random pointers to our
* underlying from_malloc-data (self->pydata is safe) */
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
Augie Fackler
lazymanifest: check error return in filter
r27609 if (!copy) {
goto nomem;
}
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 copy->dirty = true;
copy->lines = malloc(self->maxlines * sizeof(line));
if (!copy->lines) {
goto nomem;
}
copy->maxlines = self->maxlines;
copy->numlines = 0;
copy->pydata = self->pydata;
Py_INCREF(self->pydata);
for (i = 0; i < self->numlines; i++) {
Augie Fackler
lazymanifest: check more return values in filtercopy...
r27661 PyObject *arglist = NULL, *result = NULL;
arglist = Py_BuildValue("(s)", self->lines[i].start);
if (!arglist) {
return NULL;
}
result = PyObject_CallObject(matchfn, arglist);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 Py_DECREF(arglist);
/* if the callback raised an exception, just let it
* through and give up */
if (!result) {
free(copy->lines);
Py_DECREF(self->pydata);
return NULL;
}
if (PyObject_IsTrue(result)) {
assert(!(self->lines[i].from_malloc));
copy->lines[copy->numlines++] = self->lines[i];
}
Py_DECREF(result);
}
copy->livelines = copy->numlines;
return copy;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(copy);
return NULL;
}
static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
{
lazymanifest *other;
PyObject *pyclean = NULL;
bool listclean;
PyObject *emptyTup = NULL, *ret = NULL;
PyObject *es;
int sneedle = 0, oneedle = 0;
if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
return NULL;
}
listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 es = PyBytes_FromString("");
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!es) {
goto nomem;
}
emptyTup = PyTuple_Pack(2, Py_None, es);
Py_DECREF(es);
if (!emptyTup) {
goto nomem;
}
ret = PyDict_New();
if (!ret) {
goto nomem;
}
while (sneedle != self->numlines || oneedle != other->numlines) {
line *left = self->lines + sneedle;
line *right = other->lines + oneedle;
int result;
PyObject *key;
PyObject *outer;
/* If we're looking at a deleted entry and it's not
* the end of the manifest, just skip it. */
if (left->deleted && sneedle < self->numlines) {
sneedle++;
continue;
}
if (right->deleted && oneedle < other->numlines) {
oneedle++;
continue;
}
/* if we're at the end of either manifest, then we
* know the remaining items are adds so we can skip
* the strcmp. */
if (sneedle == self->numlines) {
result = 1;
} else if (oneedle == other->numlines) {
result = -1;
} else {
result = linecmp(left, right);
}
key = result <= 0 ?
Gregory Szorc
manifest: convert PyString* to PyBytes*...
r30097 PyBytes_FromString(left->start) :
PyBytes_FromString(right->start);
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 if (!key)
goto nomem;
if (result < 0) {
PyObject *l = hashflags(left);
if (!l) {
goto nomem;
}
outer = PyTuple_Pack(2, l, emptyTup);
Py_DECREF(l);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
sneedle++;
} else if (result > 0) {
PyObject *r = hashflags(right);
if (!r) {
goto nomem;
}
outer = PyTuple_Pack(2, emptyTup, r);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
oneedle++;
} else {
/* file exists in both manifests */
if (left->len != right->len
|| memcmp(left->start, right->start, left->len)
|| left->hash_suffix != right->hash_suffix) {
PyObject *l = hashflags(left);
PyObject *r;
if (!l) {
goto nomem;
}
r = hashflags(right);
if (!r) {
Py_DECREF(l);
goto nomem;
}
outer = PyTuple_Pack(2, l, r);
Py_DECREF(l);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
} else if (listclean) {
PyDict_SetItem(ret, key, Py_None);
}
sneedle++;
oneedle++;
}
Py_DECREF(key);
}
Py_DECREF(emptyTup);
return ret;
Martin von Zweigbergk
lazymanifest: drop SP before some labels...
r24975 nomem:
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 PyErr_NoMemory();
Py_XDECREF(ret);
Py_XDECREF(emptyTup);
return NULL;
}
static PyMethodDef lazymanifest_methods[] = {
Martin von Zweigbergk
lazymanifest: add iterkeys() method...
r24295 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
"Iterate over file names in this lazymanifest."},
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
Javi Merino
lazymanifest: fix typo s/typles/tuples/
r29254 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
"Make a copy of this lazymanifest."},
{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
"Make a copy of this manifest filtered by matchfn."},
{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
"Compare this lazymanifest to another one."},
{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
"Encode this manifest to text."},
{NULL},
};
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 #ifdef IS_PY3K
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
#endif
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 static PyTypeObject lazymanifestType = {
Gregory Szorc
manifest: use PyVarObject_HEAD_INIT...
r30168 PyVarObject_HEAD_INIT(NULL, 0)
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 "parsers.lazymanifest", /* tp_name */
sizeof(lazymanifest), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)lazymanifest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&lazymanifest_seq_meths, /* tp_as_sequence */
&lazymanifest_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Gregory Szorc
manifest: drop Py_TPFLAGS_HAVE_SEQUENCE_IN from tp_flags in Python 3...
r30079 LAZYMANIFEST_TPFLAGS, /* tp_flags */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 "TODO(augie)", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
Martin von Zweigbergk
lazymanifest: make __iter__ generate filenames, not 3-tuples...
r24298 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
Augie Fackler
manifest.c: new extension code to lazily parse manifests...
r24214 0, /* tp_iternext */
lazymanifest_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)lazymanifest_init, /* tp_init */
0, /* tp_alloc */
};
void manifest_module_init(PyObject * mod)
{
lazymanifestType.tp_new = PyType_GenericNew;
if (PyType_Ready(&lazymanifestType) < 0)
return;
Py_INCREF(&lazymanifestType);
PyModule_AddObject(mod, "lazymanifest",
(PyObject *)&lazymanifestType);
}