/** * Copyright (c) 2016-present, Gregory Szorc * All rights reserved. * * This software may be modified and distributed under the terms * of the BSD license. See the LICENSE file for details. */ /* A Python C extension for Zstandard. */ #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include <Windows.h> #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) #include <sys/types.h> #include <sys/sysctl.h> #endif #include "python-zstandard.h" PyObject *ZstdError; PyDoc_STRVAR(estimate_decompression_context_size__doc__, "estimate_decompression_context_size()\n" "\n" "Estimate the amount of memory allocated to a decompression context.\n" ); static PyObject* estimate_decompression_context_size(PyObject* self) { return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); } PyDoc_STRVAR(frame_content_size__doc__, "frame_content_size(data)\n" "\n" "Obtain the decompressed size of a frame." ); static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) { static char* kwlist[] = { "source", NULL }; Py_buffer source; PyObject* result = NULL; unsigned long long size; #if PY_MAJOR_VERSION >= 3 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size", #else if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size", #endif kwlist, &source)) { return NULL; } if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have at most one dimension"); goto finally; } size = ZSTD_getFrameContentSize(source.buf, source.len); if (size == ZSTD_CONTENTSIZE_ERROR) { PyErr_SetString(ZstdError, "error when determining content size"); } else if (size == ZSTD_CONTENTSIZE_UNKNOWN) { result = PyLong_FromLong(-1); } else { result = PyLong_FromUnsignedLongLong(size); } finally: PyBuffer_Release(&source); return result; } PyDoc_STRVAR(frame_header_size__doc__, "frame_header_size(data)\n" "\n" "Obtain the size of a frame header.\n" ); static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) { static char* kwlist[] = { "source", NULL }; Py_buffer source; PyObject* result = NULL; size_t zresult; #if PY_MAJOR_VERSION >= 3 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size", #else if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size", #endif kwlist, &source)) { return NULL; } if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have at most one dimension"); goto finally; } zresult = ZSTD_frameHeaderSize(source.buf, source.len); if (ZSTD_isError(zresult)) { PyErr_Format(ZstdError, "could not determine frame header size: %s", ZSTD_getErrorName(zresult)); } else { result = PyLong_FromSize_t(zresult); } finally: PyBuffer_Release(&source); return result; } PyDoc_STRVAR(get_frame_parameters__doc__, "get_frame_parameters(data)\n" "\n" "Obtains a ``FrameParameters`` instance by parsing data.\n"); PyDoc_STRVAR(train_dictionary__doc__, "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n" " threads=None,notifications=0, dict_id=0, level=0)\n" "\n" "Train a dictionary from sample data using the COVER algorithm.\n" "\n" "A compression dictionary of size ``dict_size`` will be created from the\n" "iterable of ``samples``. The raw dictionary bytes will be returned.\n" "\n" "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n" "*segment size* and *dmer size*. A reasonable range for ``k`` is\n" "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" "``d`` must be less than or equal to ``k``.\n" "\n" "``steps`` can be specified to control the number of steps through potential\n" "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n" "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n" "will be varied in this mode.\n" "\n" "``threads`` can specify how many threads to use to test various ``k`` and\n" "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n" "a single thread is used.\n" "\n" "When ``k`` and ``d`` are not defined, default values are used and the\n" "algorithm will perform multiple iterations - or steps - to try to find\n" "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n" "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n" "multiple ``k`` and ``d`` variations.\n" ); static char zstd_doc[] = "Interface to zstandard"; static PyMethodDef zstd_methods[] = { { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, METH_NOARGS, estimate_decompression_context_size__doc__ }, { "frame_content_size", (PyCFunction)frame_content_size, METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ }, { "frame_header_size", (PyCFunction)frame_header_size, METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ }, { "get_frame_parameters", (PyCFunction)get_frame_parameters, METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ }, { "train_dictionary", (PyCFunction)train_dictionary, METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, { NULL, NULL } }; void bufferutil_module_init(PyObject* mod); void compressobj_module_init(PyObject* mod); void compressor_module_init(PyObject* mod); void compressionparams_module_init(PyObject* mod); void constants_module_init(PyObject* mod); void compressionchunker_module_init(PyObject* mod); void compressiondict_module_init(PyObject* mod); void compressionreader_module_init(PyObject* mod); void compressionwriter_module_init(PyObject* mod); void compressoriterator_module_init(PyObject* mod); void decompressor_module_init(PyObject* mod); void decompressobj_module_init(PyObject* mod); void decompressionreader_module_init(PyObject *mod); void decompressionwriter_module_init(PyObject* mod); void decompressoriterator_module_init(PyObject* mod); void frameparams_module_init(PyObject* mod); void zstd_module_init(PyObject* m) { /* python-zstandard relies on unstable zstd C API features. This means that changes in zstd may break expectations in python-zstandard. python-zstandard is distributed with a copy of the zstd sources. python-zstandard is only guaranteed to work with the bundled version of zstd. However, downstream redistributors or packagers may unbundle zstd from python-zstandard. This can result in a mismatch between zstd versions and API semantics. This essentially "voids the warranty" of python-zstandard and may cause undefined behavior. We detect this mismatch here and refuse to load the module if this scenario is detected. */ if (ZSTD_VERSION_NUMBER != 10404 || ZSTD_versionNumber() != 10404) { PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); return; } bufferutil_module_init(m); compressionparams_module_init(m); compressiondict_module_init(m); compressobj_module_init(m); compressor_module_init(m); compressionchunker_module_init(m); compressionreader_module_init(m); compressionwriter_module_init(m); compressoriterator_module_init(m); constants_module_init(m); decompressor_module_init(m); decompressobj_module_init(m); decompressionreader_module_init(m); decompressionwriter_module_init(m); decompressoriterator_module_init(m); frameparams_module_init(m); } #if defined(__GNUC__) && (__GNUC__ >= 4) # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default"))) #else # define PYTHON_ZSTD_VISIBILITY #endif #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef zstd_module = { PyModuleDef_HEAD_INIT, "zstd", zstd_doc, -1, zstd_methods }; PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) { PyObject *m = PyModule_Create(&zstd_module); if (m) { zstd_module_init(m); if (PyErr_Occurred()) { Py_DECREF(m); m = NULL; } } return m; } #else PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) { PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); if (m) { zstd_module_init(m); } } #endif /* Attempt to resolve the number of CPUs in the system. */ int cpu_count() { int count = 0; #if defined(_WIN32) SYSTEM_INFO si; si.dwNumberOfProcessors = 0; GetSystemInfo(&si); count = si.dwNumberOfProcessors; #elif defined(__APPLE__) int num; size_t size = sizeof(int); if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) { count = num; } #elif defined(__linux__) count = sysconf(_SC_NPROCESSORS_ONLN); #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) int mib[2]; size_t len = sizeof(count); mib[0] = CTL_HW; mib[1] = HW_NCPU; if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) { count = 0; } #elif defined(__hpux) count = mpctl(MPC_GETNUMSPUS, NULL, NULL); #endif return count; } size_t roundpow2(size_t i) { i--; i |= i >> 1; i |= i >> 2; i |= i >> 4; i |= i >> 8; i |= i >> 16; i++; return i; } /* Safer version of _PyBytes_Resize(). * * _PyBytes_Resize() only works if the refcount is 1. In some scenarios, * we can get an object with a refcount > 1, even if it was just created * with PyBytes_FromStringAndSize()! That's because (at least) CPython * pre-allocates PyBytes instances of size 1 for every possible byte value. * * If non-0 is returned, obj may or may not be NULL. */ int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) { PyObject* tmp; if ((*obj)->ob_refcnt == 1) { return _PyBytes_Resize(obj, size); } tmp = PyBytes_FromStringAndSize(NULL, size); if (!tmp) { return -1; } memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj), PyBytes_GET_SIZE(*obj)); Py_DECREF(*obj); *obj = tmp; return 0; }