##// END OF EJS Templates
exchange: obtain compression engines from the registrar...
exchange: obtain compression engines from the registrar util.compengines has knowledge of all registered compression engines and the metadata that associates them with various bundle types. This patch removes the now redundant declaration of this metadata from exchange.py and obtains it from the new source. The effect of this patch is that once a new compression engine is registered with util.compengines, `hg bundle -t <engine>` will just work.

File last commit:

r30435:b86a448a default
r30440:c3944ab1 default
Show More
compressiondict.c
247 lines | 7.7 KiB | text/x-c | CLexer
/**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
size_t capacity;
PyObject* samples;
Py_ssize_t samplesLen;
PyObject* parameters = NULL;
ZDICT_params_t zparams;
Py_ssize_t sampleIndex;
Py_ssize_t sampleSize;
PyObject* sampleItem;
size_t zresult;
void* sampleBuffer;
void* sampleOffset;
size_t samplesSize = 0;
size_t* sampleSizes;
void* dict;
ZstdCompressionDict* result;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
&capacity,
&PyList_Type, &samples,
(PyObject*)&DictParametersType, &parameters)) {
return NULL;
}
/* Validate parameters first since it is easiest. */
zparams.selectivityLevel = 0;
zparams.compressionLevel = 0;
zparams.notificationLevel = 0;
zparams.dictID = 0;
zparams.reserved[0] = 0;
zparams.reserved[1] = 0;
if (parameters) {
/* TODO validate data ranges */
zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
}
/* Figure out the size of the raw samples */
samplesLen = PyList_Size(samples);
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
if (!PyBytes_Check(sampleItem)) {
PyErr_SetString(PyExc_ValueError, "samples must be bytes");
/* TODO probably need to perform DECREF here */
return NULL;
}
samplesSize += PyBytes_GET_SIZE(sampleItem);
}
/* Now that we know the total size of the raw simples, we can allocate
a buffer for the raw data */
sampleBuffer = malloc(samplesSize);
if (!sampleBuffer) {
PyErr_NoMemory();
return NULL;
}
sampleSizes = malloc(samplesLen * sizeof(size_t));
if (!sampleSizes) {
free(sampleBuffer);
PyErr_NoMemory();
return NULL;
}
sampleOffset = sampleBuffer;
/* Now iterate again and assemble the samples in the buffer */
for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
sampleItem = PyList_GetItem(samples, sampleIndex);
sampleSize = PyBytes_GET_SIZE(sampleItem);
sampleSizes[sampleIndex] = sampleSize;
memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
sampleOffset = (char*)sampleOffset + sampleSize;
}
dict = malloc(capacity);
if (!dict) {
free(sampleSizes);
free(sampleBuffer);
PyErr_NoMemory();
return NULL;
}
zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
sampleBuffer, sampleSizes, (unsigned int)samplesLen,
zparams);
if (ZDICT_isError(zresult)) {
PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
free(dict);
free(sampleSizes);
free(sampleBuffer);
return NULL;
}
result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
if (!result) {
return NULL;
}
result->dictData = dict;
result->dictSize = zresult;
return result;
}
PyDoc_STRVAR(ZstdCompressionDict__doc__,
"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
"\n"
"This type holds the results of a computed Zstandard compression dictionary.\n"
"Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
"obtained from another source into the constructor.\n"
);
static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
const char* source;
Py_ssize_t sourceSize;
self->dictData = NULL;
self->dictSize = 0;
#if PY_MAJOR_VERSION >= 3
if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
#else
if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
#endif
return -1;
}
self->dictData = malloc(sourceSize);
if (!self->dictData) {
PyErr_NoMemory();
return -1;
}
memcpy(self->dictData, source, sourceSize);
self->dictSize = sourceSize;
return 0;
}
static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
if (self->dictData) {
free(self->dictData);
self->dictData = NULL;
}
PyObject_Del(self);
}
static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
return PyLong_FromLong(dictID);
}
static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
}
static PyMethodDef ZstdCompressionDict_methods[] = {
{ "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
{ NULL, NULL }
};
static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
return self->dictSize;
}
static PySequenceMethods ZstdCompressionDict_sq = {
(lenfunc)ZstdCompressionDict_length, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_ass_item */
0, /* sq_contains */
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
PyTypeObject ZstdCompressionDictType = {
PyVarObject_HEAD_INIT(NULL, 0)
"zstd.ZstdCompressionDict", /* tp_name */
sizeof(ZstdCompressionDict), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&ZstdCompressionDict_sq, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
ZstdCompressionDict__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
ZstdCompressionDict_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)ZstdCompressionDict_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void compressiondict_module_init(PyObject* mod) {
Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
if (PyType_Ready(&ZstdCompressionDictType) < 0) {
return;
}
Py_INCREF((PyObject*)&ZstdCompressionDictType);
PyModule_AddObject(mod, "ZstdCompressionDict",
(PyObject*)&ZstdCompressionDictType);
}