##// END OF EJS Templates
dirstate-map: factor out the change to _dirs and _alldirs on dropping...
dirstate-map: factor out the change to _dirs and _alldirs on dropping This logic is complicated enough to deserves its own function. So it now does. This will make it easier to reuse that logic in later changeset. Differential Revision: https://phab.mercurial-scm.org/D11130

File last commit:

r47817:12450fbe default
r48488:b37ab6b5 default
Show More
manifest.c
1013 lines | 26.3 KiB | text/x-c | CLexer
Yuya Nishihara
parsers: switch to policy importer...
r32372 /*
* manifest.c - manifest type that does on-demand parsing.
*
* Copyright 2015, Google Inc.
*
* This software may be used and distributed according to the terms of
* the GNU General Public License, incorporated herein by reference.
*/
#include <Python.h>
#include <assert.h>
Gregory Szorc
cext: reorder #include...
r34439 #include <stdlib.h>
Yuya Nishihara
parsers: switch to policy importer...
r32372 #include <string.h>
Yuya Nishihara
cext: factor out header for charencode.c...
r33753 #include "charencode.h"
Yuya Nishihara
parsers: switch to policy importer...
r32372 #include "util.h"
#define DEFAULT_LINES 100000
typedef struct {
char *start;
Py_ssize_t len; /* length of line including terminal newline */
char hash_suffix;
bool from_malloc;
bool deleted;
} line;
typedef struct {
PyObject_HEAD
PyObject *pydata;
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 Py_ssize_t nodelen;
Yuya Nishihara
parsers: switch to policy importer...
r32372 line *lines;
int numlines; /* number of line entries */
int livelines; /* number of non-deleted lines */
int maxlines; /* allocated number of lines */
bool dirty;
} lazymanifest;
#define MANIFEST_OOM -1
#define MANIFEST_NOT_SORTED -2
#define MANIFEST_MALFORMED -3
Augie Fackler
manifest: make sure there's a filename before bothering to look for newline...
r40635 #define MANIFEST_BOGUS_FILENAME -4
Augie Fackler
manifest: also reject obviously-too-short lines when parsing lines...
r40636 #define MANIFEST_TOO_SHORT_LINE -5
Yuya Nishihara
parsers: switch to policy importer...
r32372
/* get the length of the path for a line */
Kyle Lippincott
cext: fix compiler warning about sign changing...
r44559 static Py_ssize_t pathlen(line *l)
Gregory Szorc
cext: wrap before brace for functions...
r34441 {
Yuya Nishihara
manifest: rewrite pathlen() to not cross entry boundary...
r39975 const char *end = memchr(l->start, '\0', l->len);
Kyle Lippincott
cext: fix compiler warning about sign changing...
r44559 return (end) ? (Py_ssize_t)(end - l->start) : l->len;
Yuya Nishihara
parsers: switch to policy importer...
r32372 }
/* get the node value of a single line */
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 static PyObject *nodeof(Py_ssize_t nodelen, line *l, char *flag)
Gregory Szorc
cext: wrap before brace for functions...
r34441 {
Yuya Nishihara
parsers: switch to policy importer...
r32372 char *s = l->start;
Kyle Lippincott
cext: fix compiler warning about sign changing...
r44559 Py_ssize_t llen = pathlen(l);
Augie Fackler
manifest: start removing 40-byte hash restrictions from C code...
r45192 Py_ssize_t hlen = l->len - llen - 2;
Yuya Nishihara
manifest: fix out-of-bounds read of corrupted manifest entry...
r39976 PyObject *hash;
if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
PyErr_SetString(PyExc_ValueError, "manifest line too short");
return NULL;
}
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 /* Detect flags after the hash first. */
switch (s[llen + hlen]) {
case 'l':
case 't':
case 'x':
*flag = s[llen + hlen];
--hlen;
break;
default:
*flag = '\0';
break;
}
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 if (hlen != 2 * nodelen) {
Augie Fackler
manifest: start removing 40-byte hash restrictions from C code...
r45192 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
return NULL;
}
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 hash = unhexlify(s + llen + 1, nodelen * 2);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!hash) {
return NULL;
}
if (l->hash_suffix != '\0') {
Augie Fackler
manifest: start removing 40-byte hash restrictions from C code...
r45192 char newhash[33];
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 memcpy(newhash, PyBytes_AsString(hash), nodelen);
Yuya Nishihara
parsers: switch to policy importer...
r32372 Py_DECREF(hash);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 newhash[nodelen] = l->hash_suffix;
hash = PyBytes_FromStringAndSize(newhash, nodelen + 1);
Yuya Nishihara
parsers: switch to policy importer...
r32372 }
return hash;
}
/* get the node hash and flags of a line as a tuple */
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 static PyObject *hashflags(Py_ssize_t nodelen, line *l)
Yuya Nishihara
parsers: switch to policy importer...
r32372 {
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 char flag;
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 PyObject *hash = nodeof(nodelen, l, &flag);
Yuya Nishihara
parsers: switch to policy importer...
r32372 PyObject *flags;
PyObject *tup;
if (!hash)
return NULL;
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!flags) {
Py_DECREF(hash);
return NULL;
}
tup = PyTuple_Pack(2, hash, flags);
Py_DECREF(flags);
Py_DECREF(hash);
return tup;
}
/* if we're about to run out of space in the line index, add more */
static bool realloc_if_full(lazymanifest *self)
{
if (self->numlines == self->maxlines) {
self->maxlines *= 2;
self->lines = realloc(self->lines, self->maxlines * sizeof(line));
}
return !!self->lines;
}
/*
* Find the line boundaries in the manifest that 'data' points to and store
* information about each line in 'self'.
*/
static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
{
char *prev = NULL;
while (len > 0) {
line *l;
Augie Fackler
manifest: make sure there's a filename before bothering to look for newline...
r40635 char *next;
if (*data == '\0') {
/* It's implausible there's no filename, don't
* even bother looking for the newline. */
return MANIFEST_BOGUS_FILENAME;
}
next = memchr(data, '\n', len);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!next) {
return MANIFEST_MALFORMED;
}
Augie Fackler
manifest: reject lines shorter than 42 bytes, not 22...
r40801 if ((next - data) < 42) {
/* We should have at least 42 bytes in a line:
Augie Fackler
manifest: also reject obviously-too-short lines when parsing lines...
r40636 1 byte filename
1 NUL
Augie Fackler
manifest: reject lines shorter than 42 bytes, not 22...
r40801 40 bytes of hash
Augie Fackler
manifest: also reject obviously-too-short lines when parsing lines...
r40636 so we can give up here.
*/
return MANIFEST_TOO_SHORT_LINE;
}
Yuya Nishihara
parsers: switch to policy importer...
r32372 next++; /* advance past newline */
if (prev && strcmp(prev, data) > -1) {
/* This data isn't sorted, so we have to abort. */
return MANIFEST_NOT_SORTED;
}
Augie Fackler
manifest: perform cheap checks before potentially allocating memory...
r40637 if (!realloc_if_full(self)) {
return MANIFEST_OOM; /* no memory */
}
Yuya Nishihara
parsers: switch to policy importer...
r32372 l = self->lines + ((self->numlines)++);
l->start = data;
l->len = next - data;
l->hash_suffix = '\0';
l->from_malloc = false;
l->deleted = false;
len = len - l->len;
prev = data;
data = next;
}
self->livelines = self->numlines;
return 0;
}
Yuya Nishihara
manifest: fix possible SEGV caused by uninitialized lazymanifest fields...
r38346 static void lazymanifest_init_early(lazymanifest *self)
{
self->pydata = NULL;
self->lines = NULL;
self->numlines = 0;
self->maxlines = 0;
}
Yuya Nishihara
parsers: switch to policy importer...
r32372 static int lazymanifest_init(lazymanifest *self, PyObject *args)
{
char *data;
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 Py_ssize_t nodelen, len;
Yuya Nishihara
parsers: switch to policy importer...
r32372 int err, ret;
PyObject *pydata;
Yuya Nishihara
manifest: fix possible SEGV caused by uninitialized lazymanifest fields...
r38346
lazymanifest_init_early(self);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 if (!PyArg_ParseTuple(args, "nS", &nodelen, &pydata)) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 return -1;
}
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 if (nodelen != 20 && nodelen != 32) {
/* See fixed buffer in nodeof */
PyErr_Format(PyExc_ValueError, "Unsupported node length");
return -1;
}
self->nodelen = nodelen;
self->dirty = false;
Yuya Nishihara
parsers: switch to policy importer...
r32372
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 err = PyBytes_AsStringAndSize(pydata, &data, &len);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (err == -1)
return -1;
self->pydata = pydata;
Py_INCREF(self->pydata);
Py_BEGIN_ALLOW_THREADS
self->lines = malloc(DEFAULT_LINES * sizeof(line));
self->maxlines = DEFAULT_LINES;
self->numlines = 0;
if (!self->lines)
ret = MANIFEST_OOM;
else
ret = find_lines(self, data, len);
Py_END_ALLOW_THREADS
switch (ret) {
case 0:
break;
case MANIFEST_OOM:
PyErr_NoMemory();
break;
case MANIFEST_NOT_SORTED:
PyErr_Format(PyExc_ValueError,
"Manifest lines not in sorted order.");
break;
case MANIFEST_MALFORMED:
PyErr_Format(PyExc_ValueError,
"Manifest did not end in a newline.");
break;
Augie Fackler
manifest: make sure there's a filename before bothering to look for newline...
r40635 case MANIFEST_BOGUS_FILENAME:
PyErr_Format(
PyExc_ValueError,
"Manifest had an entry with a zero-length filename.");
break;
Augie Fackler
manifest: also reject obviously-too-short lines when parsing lines...
r40636 case MANIFEST_TOO_SHORT_LINE:
PyErr_Format(
PyExc_ValueError,
"Manifest had implausibly-short line.");
break;
Yuya Nishihara
parsers: switch to policy importer...
r32372 default:
PyErr_Format(PyExc_ValueError,
"Unknown problem parsing manifest.");
}
return ret == 0 ? 0 : -1;
}
static void lazymanifest_dealloc(lazymanifest *self)
{
/* free any extra lines we had to allocate */
int i;
Josef 'Jeff' Sipek
lazymanifest: don't crash when out of memory (issue5916)...
r38321 for (i = 0; self->lines && (i < self->numlines); i++) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (self->lines[i].from_malloc) {
free(self->lines[i].start);
}
}
Josef 'Jeff' Sipek
cext: stop worrying and love the free(NULL)...
r38320 free(self->lines);
self->lines = NULL;
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (self->pydata) {
Py_DECREF(self->pydata);
self->pydata = NULL;
}
PyObject_Del(self);
}
/* iteration support */
typedef struct {
PyObject_HEAD lazymanifest *m;
Py_ssize_t pos;
} lmIter;
static void lmiter_dealloc(PyObject *o)
{
lmIter *self = (lmIter *)o;
Py_DECREF(self->m);
PyObject_Del(self);
}
static line *lmiter_nextline(lmIter *self)
{
do {
self->pos++;
if (self->pos >= self->m->numlines) {
return NULL;
}
/* skip over deleted manifest entries */
} while (self->m->lines[self->pos].deleted);
return self->m->lines + self->pos;
}
static PyObject *lmiter_iterentriesnext(PyObject *o)
{
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 lmIter *self = (lmIter *)o;
Kyle Lippincott
cext: fix compiler warning about sign changing...
r44559 Py_ssize_t pl;
Yuya Nishihara
parsers: switch to policy importer...
r32372 line *l;
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 char flag;
Yuya Nishihara
parsers: switch to policy importer...
r32372 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 l = lmiter_nextline(self);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!l) {
goto done;
}
pl = pathlen(l);
path = PyBytes_FromStringAndSize(l->start, pl);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 hash = nodeof(self->m->nodelen, l, &flag);
Yuya Nishihara
manifest: fix out-of-bounds read of corrupted manifest entry...
r39976 if (!path || !hash) {
goto done;
}
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
Yuya Nishihara
manifest: fix out-of-bounds read of corrupted manifest entry...
r39976 if (!flags) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 goto done;
}
ret = PyTuple_Pack(3, path, hash, flags);
done:
Py_XDECREF(path);
Py_XDECREF(hash);
Py_XDECREF(flags);
return ret;
}
#ifdef IS_PY3K
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
static PyTypeObject lazymanifestEntriesIterator = {
Augie Fackler
cext: add /* header */ comment to all PyVarObject_HEAD_INIT() calls...
r34862 PyVarObject_HEAD_INIT(NULL, 0) /* header */
Yuya Nishihara
parsers: switch to policy importer...
r32372 "parsers.lazymanifest.entriesiterator", /*tp_name */
sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
"Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
lmiter_iterentriesnext, /* tp_iternext: next() method */
};
static PyObject *lmiter_iterkeysnext(PyObject *o)
{
Kyle Lippincott
cext: fix compiler warning about sign changing...
r44559 Py_ssize_t pl;
Yuya Nishihara
parsers: switch to policy importer...
r32372 line *l = lmiter_nextline((lmIter *)o);
if (!l) {
return NULL;
}
pl = pathlen(l);
return PyBytes_FromStringAndSize(l->start, pl);
}
#ifdef IS_PY3K
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
| Py_TPFLAGS_HAVE_ITER
#endif
static PyTypeObject lazymanifestKeysIterator = {
Augie Fackler
cext: add /* header */ comment to all PyVarObject_HEAD_INIT() calls...
r34862 PyVarObject_HEAD_INIT(NULL, 0) /* header */
Yuya Nishihara
parsers: switch to policy importer...
r32372 "parsers.lazymanifest.keysiterator", /*tp_name */
sizeof(lmIter), /*tp_basicsize */
0, /*tp_itemsize */
lmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
"Keys iterator for a lazymanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
lmiter_iterkeysnext, /* tp_iternext: next() method */
};
static lazymanifest *lazymanifest_copy(lazymanifest *self);
static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
{
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
{
lmIter *i = NULL;
lazymanifest *t = lazymanifest_copy(self);
if (!t) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(lmIter, &lazymanifestKeysIterator);
if (i) {
i->m = t;
i->pos = -1;
} else {
Py_DECREF(t);
PyErr_NoMemory();
}
return (PyObject *)i;
}
/* __getitem__ and __setitem__ support */
static Py_ssize_t lazymanifest_size(lazymanifest *self)
{
return self->livelines;
}
static int linecmp(const void *left, const void *right)
{
return strcmp(((const line *)left)->start,
((const line *)right)->start);
}
static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
if (!PyBytes_Check(key)) {
PyErr_Format(PyExc_TypeError,
"getitem: manifest keys must be a string.");
return NULL;
}
needle.start = PyBytes_AsString(key);
hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError, "No such manifest entry.");
return NULL;
}
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 return hashflags(self->nodelen, hit);
Yuya Nishihara
parsers: switch to policy importer...
r32372 }
static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
if (!PyBytes_Check(key)) {
PyErr_Format(PyExc_TypeError,
"delitem: manifest keys must be a string.");
return -1;
}
needle.start = PyBytes_AsString(key);
hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
PyErr_Format(PyExc_KeyError,
"Tried to delete nonexistent manifest entry.");
return -1;
}
self->dirty = true;
hit->deleted = true;
self->livelines--;
return 0;
}
/* Do a binary search for the insertion point for new, creating the
* new entry if needed. */
Gregory Szorc
cext: wrap before brace for functions...
r34441 static int internalsetitem(lazymanifest *self, line *new)
{
Yuya Nishihara
parsers: switch to policy importer...
r32372 int start = 0, end = self->numlines;
while (start < end) {
int pos = start + (end - start) / 2;
int c = linecmp(new, self->lines + pos);
if (c < 0)
end = pos;
else if (c > 0)
start = pos + 1;
else {
if (self->lines[pos].deleted)
self->livelines++;
if (self->lines[pos].from_malloc)
free(self->lines[pos].start);
start = pos;
goto finish;
}
}
/* being here means we need to do an insert */
if (!realloc_if_full(self)) {
PyErr_NoMemory();
return -1;
}
memmove(self->lines + start + 1, self->lines + start,
(self->numlines - start) * sizeof(line));
self->numlines++;
self->livelines++;
finish:
self->lines[start] = *new;
self->dirty = true;
return 0;
}
static int lazymanifest_setitem(
lazymanifest *self, PyObject *key, PyObject *value)
{
char *path;
Py_ssize_t plen;
PyObject *pyhash;
Py_ssize_t hlen;
char *hash;
PyObject *pyflags;
char *flags;
Py_ssize_t flen;
Kyle Lippincott
cext: change two more vars to Py_ssize_t in manifest.c...
r44594 Py_ssize_t dlen;
Yuya Nishihara
parsers: switch to policy importer...
r32372 char *dest;
int i;
line new;
if (!PyBytes_Check(key)) {
PyErr_Format(PyExc_TypeError,
"setitem: manifest keys must be a string.");
return -1;
}
if (!value) {
return lazymanifest_delitem(self, key);
}
if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
PyErr_Format(PyExc_TypeError,
"Manifest values must be a tuple of (node, flags).");
return -1;
}
if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
return -1;
}
pyhash = PyTuple_GetItem(value, 0);
if (!PyBytes_Check(pyhash)) {
PyErr_Format(PyExc_TypeError,
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 "node must be a %zi bytes string", self->nodelen);
Yuya Nishihara
parsers: switch to policy importer...
r32372 return -1;
}
hlen = PyBytes_Size(pyhash);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 if (hlen != self->nodelen) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 PyErr_Format(PyExc_TypeError,
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 "node must be a %zi bytes string", self->nodelen);
Yuya Nishihara
parsers: switch to policy importer...
r32372 return -1;
}
hash = PyBytes_AsString(pyhash);
pyflags = PyTuple_GetItem(value, 1);
if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
PyErr_Format(PyExc_TypeError,
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 "flags must a 0 or 1 bytes string");
Yuya Nishihara
parsers: switch to policy importer...
r32372 return -1;
}
if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
return -1;
}
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 if (flen == 1) {
switch (*flags) {
case 'l':
case 't':
case 'x':
break;
default:
PyErr_Format(PyExc_TypeError, "invalid manifest flag");
return -1;
}
}
Yuya Nishihara
parsers: switch to policy importer...
r32372 /* one null byte and one newline */
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 dlen = plen + hlen * 2 + 1 + flen + 1;
Yuya Nishihara
parsers: switch to policy importer...
r32372 dest = malloc(dlen);
if (!dest) {
PyErr_NoMemory();
return -1;
}
memcpy(dest, path, plen + 1);
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 for (i = 0; i < hlen; i++) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 /* Cast to unsigned, so it will not get sign-extended when promoted
* to int (as is done when passing to a variadic function)
*/
sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
}
Joerg Sonnenberger
manifest: tigher manifest parsing and flag use...
r45678 memcpy(dest + plen + 2 * hlen + 1, flags, flen);
dest[plen + 2 * hlen + 1 + flen] = '\n';
Yuya Nishihara
parsers: switch to policy importer...
r32372 new.start = dest;
new.len = dlen;
new.hash_suffix = '\0';
if (hlen > 20) {
new.hash_suffix = hash[20];
}
new.from_malloc = true; /* is `start` a pointer we allocated? */
new.deleted = false; /* is this entry deleted? */
if (internalsetitem(self, &new)) {
return -1;
}
return 0;
}
static PyMappingMethods lazymanifest_mapping_methods = {
(lenfunc)lazymanifest_size, /* mp_length */
(binaryfunc)lazymanifest_getitem, /* mp_subscript */
(objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
};
/* sequence methods (important or __contains__ builds an iterator) */
static int lazymanifest_contains(lazymanifest *self, PyObject *key)
{
line needle;
line *hit;
if (!PyBytes_Check(key)) {
/* Our keys are always strings, so if the contains
* check is for a non-string, just return false. */
return 0;
}
needle.start = PyBytes_AsString(key);
hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
&linecmp);
if (!hit || hit->deleted) {
return 0;
}
return 1;
}
static PySequenceMethods lazymanifest_seq_meths = {
(lenfunc)lazymanifest_size, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)lazymanifest_contains, /* sq_contains */
0, /* sq_inplace_concat */
0, /* sq_inplace_repeat */
};
/* Other methods (copy, diff, etc) */
static PyTypeObject lazymanifestType;
/* If the manifest has changes, build the new manifest text and reindex it. */
Gregory Szorc
cext: wrap before brace for functions...
r34441 static int compact(lazymanifest *self)
{
Yuya Nishihara
parsers: switch to policy importer...
r32372 int i;
ssize_t need = 0;
char *data;
line *src, *dst;
PyObject *pydata;
if (!self->dirty)
return 0;
for (i = 0; i < self->numlines; i++) {
if (!self->lines[i].deleted) {
need += self->lines[i].len;
}
}
pydata = PyBytes_FromStringAndSize(NULL, need);
if (!pydata)
return -1;
data = PyBytes_AsString(pydata);
if (!data) {
return -1;
}
src = self->lines;
dst = self->lines;
for (i = 0; i < self->numlines; i++, src++) {
char *tofree = NULL;
if (src->from_malloc) {
tofree = src->start;
}
if (!src->deleted) {
memcpy(data, src->start, src->len);
*dst = *src;
dst->start = data;
dst->from_malloc = false;
data += dst->len;
dst++;
}
free(tofree);
}
Py_DECREF(self->pydata);
self->pydata = pydata;
self->numlines = self->livelines;
self->dirty = false;
return 0;
}
static PyObject *lazymanifest_text(lazymanifest *self)
{
if (compact(self) != 0) {
PyErr_NoMemory();
return NULL;
}
Py_INCREF(self->pydata);
return self->pydata;
}
static lazymanifest *lazymanifest_copy(lazymanifest *self)
{
lazymanifest *copy = NULL;
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
if (!copy) {
goto nomem;
}
Yuya Nishihara
manifest: fix possible SEGV caused by uninitialized lazymanifest fields...
r38346 lazymanifest_init_early(copy);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 copy->nodelen = self->nodelen;
Yuya Nishihara
parsers: switch to policy importer...
r32372 copy->numlines = self->numlines;
copy->livelines = self->livelines;
copy->dirty = false;
copy->lines = malloc(self->maxlines *sizeof(line));
if (!copy->lines) {
goto nomem;
}
memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
copy->maxlines = self->maxlines;
copy->pydata = self->pydata;
Py_INCREF(copy->pydata);
return copy;
nomem:
PyErr_NoMemory();
Py_XDECREF(copy);
return NULL;
}
static lazymanifest *lazymanifest_filtercopy(
lazymanifest *self, PyObject *matchfn)
{
lazymanifest *copy = NULL;
int i;
if (!PyCallable_Check(matchfn)) {
PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
return NULL;
}
/* compact ourselves first to avoid double-frees later when we
* compact tmp so that it doesn't have random pointers to our
* underlying from_malloc-data (self->pydata is safe) */
if (compact(self) != 0) {
goto nomem;
}
copy = PyObject_New(lazymanifest, &lazymanifestType);
if (!copy) {
goto nomem;
}
Yuya Nishihara
manifest: fix possible SEGV caused by uninitialized lazymanifest fields...
r38346 lazymanifest_init_early(copy);
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 copy->nodelen = self->nodelen;
Yuya Nishihara
parsers: switch to policy importer...
r32372 copy->dirty = true;
copy->lines = malloc(self->maxlines * sizeof(line));
if (!copy->lines) {
goto nomem;
}
copy->maxlines = self->maxlines;
copy->numlines = 0;
copy->pydata = self->pydata;
Yuya Nishihara
manifest: incref/decref copy->pydata to clarify 'copy' holds a reference
r39486 Py_INCREF(copy->pydata);
Yuya Nishihara
parsers: switch to policy importer...
r32372 for (i = 0; i < self->numlines; i++) {
PyObject *arglist = NULL, *result = NULL;
Yuya Nishihara
py3: bulk-replace bytes format specifier passed to Py_BuildValue()...
r36637 arglist = Py_BuildValue(PY23("(s)", "(y)"),
self->lines[i].start);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!arglist) {
Yuya Nishihara
manifest: fix leak on error return from lazymanifest_filtercopy()...
r39487 goto bail;
Yuya Nishihara
parsers: switch to policy importer...
r32372 }
result = PyObject_CallObject(matchfn, arglist);
Py_DECREF(arglist);
/* if the callback raised an exception, just let it
* through and give up */
if (!result) {
Yuya Nishihara
manifest: fix leak on error return from lazymanifest_filtercopy()...
r39487 goto bail;
Yuya Nishihara
parsers: switch to policy importer...
r32372 }
if (PyObject_IsTrue(result)) {
assert(!(self->lines[i].from_malloc));
copy->lines[copy->numlines++] = self->lines[i];
}
Py_DECREF(result);
}
copy->livelines = copy->numlines;
return copy;
nomem:
PyErr_NoMemory();
Yuya Nishihara
manifest: fix leak on error return from lazymanifest_filtercopy()...
r39487 bail:
Yuya Nishihara
parsers: switch to policy importer...
r32372 Py_XDECREF(copy);
return NULL;
}
static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
{
lazymanifest *other;
PyObject *pyclean = NULL;
bool listclean;
PyObject *emptyTup = NULL, *ret = NULL;
PyObject *es;
int sneedle = 0, oneedle = 0;
if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
return NULL;
}
listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
es = PyBytes_FromString("");
if (!es) {
goto nomem;
}
emptyTup = PyTuple_Pack(2, Py_None, es);
Py_DECREF(es);
if (!emptyTup) {
goto nomem;
}
ret = PyDict_New();
if (!ret) {
goto nomem;
}
while (sneedle != self->numlines || oneedle != other->numlines) {
line *left = self->lines + sneedle;
line *right = other->lines + oneedle;
int result;
PyObject *key;
PyObject *outer;
/* If we're looking at a deleted entry and it's not
* the end of the manifest, just skip it. */
Jun Wu
lazymanifest: avoid reading uninitialized memory...
r35828 if (sneedle < self->numlines && left->deleted) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 sneedle++;
continue;
}
Jun Wu
lazymanifest: avoid reading uninitialized memory...
r35828 if (oneedle < other->numlines && right->deleted) {
Yuya Nishihara
parsers: switch to policy importer...
r32372 oneedle++;
continue;
}
/* if we're at the end of either manifest, then we
* know the remaining items are adds so we can skip
* the strcmp. */
if (sneedle == self->numlines) {
result = 1;
} else if (oneedle == other->numlines) {
result = -1;
} else {
result = linecmp(left, right);
}
key = result <= 0 ?
PyBytes_FromString(left->start) :
PyBytes_FromString(right->start);
if (!key)
goto nomem;
if (result < 0) {
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 PyObject *l = hashflags(self->nodelen, left);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!l) {
goto nomem;
}
outer = PyTuple_Pack(2, l, emptyTup);
Py_DECREF(l);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
sneedle++;
} else if (result > 0) {
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 PyObject *r = hashflags(self->nodelen, right);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!r) {
goto nomem;
}
outer = PyTuple_Pack(2, emptyTup, r);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
oneedle++;
} else {
/* file exists in both manifests */
if (left->len != right->len
|| memcmp(left->start, right->start, left->len)
|| left->hash_suffix != right->hash_suffix) {
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 PyObject *l = hashflags(self->nodelen, left);
Yuya Nishihara
parsers: switch to policy importer...
r32372 PyObject *r;
if (!l) {
goto nomem;
}
Joerg Sonnenberger
manifests: push down expected node length into the parser...
r47817 r = hashflags(self->nodelen, right);
Yuya Nishihara
parsers: switch to policy importer...
r32372 if (!r) {
Py_DECREF(l);
goto nomem;
}
outer = PyTuple_Pack(2, l, r);
Py_DECREF(l);
Py_DECREF(r);
if (!outer) {
goto nomem;
}
PyDict_SetItem(ret, key, outer);
Py_DECREF(outer);
} else if (listclean) {
PyDict_SetItem(ret, key, Py_None);
}
sneedle++;
oneedle++;
}
Py_DECREF(key);
}
Py_DECREF(emptyTup);
return ret;
nomem:
PyErr_NoMemory();
Py_XDECREF(ret);
Py_XDECREF(emptyTup);
return NULL;
}
static PyMethodDef lazymanifest_methods[] = {
{"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
"Iterate over file names in this lazymanifest."},
{"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
"Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
{"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
"Make a copy of this lazymanifest."},
{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
"Make a copy of this manifest filtered by matchfn."},
{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
"Compare this lazymanifest to another one."},
{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
"Encode this manifest to text."},
{NULL},
};
#ifdef IS_PY3K
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
#endif
static PyTypeObject lazymanifestType = {
Augie Fackler
cext: add /* header */ comment to all PyVarObject_HEAD_INIT() calls...
r34862 PyVarObject_HEAD_INIT(NULL, 0) /* header */
Yuya Nishihara
parsers: switch to policy importer...
r32372 "parsers.lazymanifest", /* tp_name */
sizeof(lazymanifest), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)lazymanifest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&lazymanifest_seq_meths, /* tp_as_sequence */
&lazymanifest_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
LAZYMANIFEST_TPFLAGS, /* tp_flags */
"TODO(augie)", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
(getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
0, /* tp_iternext */
lazymanifest_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)lazymanifest_init, /* tp_init */
0, /* tp_alloc */
};
void manifest_module_init(PyObject * mod)
{
lazymanifestType.tp_new = PyType_GenericNew;
if (PyType_Ready(&lazymanifestType) < 0)
return;
Py_INCREF(&lazymanifestType);
PyModule_AddObject(mod, "lazymanifest",
(PyObject *)&lazymanifestType);
}