##// END OF EJS Templates
zstd: vendor python-zstandard 0.7.0...
zstd: vendor python-zstandard 0.7.0 Commit 3054ae3a66112970a091d3939fee32c2d0c1a23e from https://github.com/indygreg/python-zstandard is imported without modifications (other than removing unwanted files). The vendored zstd library within has been upgraded from 1.1.2 to 1.1.3. This version introduced new APIs for threads, thread pools, multi-threaded compression, and a new dictionary builder (COVER). These features are not yet used by python-zstandard (or Mercurial for that matter). However, that will likely change in the next python-zstandard release (and I think there are opportunities for Mercurial to take advantage of the multi-threaded APIs). Relevant to Mercurial, the CFFI bindings are now fully implemented. This means zstd should "just work" with PyPy (although I haven't tried). The python-zstandard test suite also runs all tests against both the C extension and CFFI bindings to ensure feature parity. There is also a "decompress_content_dict_chain()" API. This was derived from discussions with Yann Collet on list about alternate ways of encoding delta chains. The change most relevant to Mercurial is a performance enhancement in the simple decompression API to reuse a data structure across operations. This makes decompression of multiple inputs significantly faster. (This scenario occurs when reading revlog delta chains, for example.) Using python-zstandard's bench.py to measure the performance difference... On changelog chunks in the mozilla-unified repo: decompress discrete decompress() reuse zctx 1.262243 wall; 1.260000 CPU; 1.260000 user; 0.000000 sys 170.43 MB/s (best of 3) 0.949106 wall; 0.950000 CPU; 0.950000 user; 0.000000 sys 226.66 MB/s (best of 4) decompress discrete dict decompress() reuse zctx 0.692170 wall; 0.690000 CPU; 0.690000 user; 0.000000 sys 310.80 MB/s (best of 5) 0.437088 wall; 0.440000 CPU; 0.440000 user; 0.000000 sys 492.17 MB/s (best of 7) On manifest chunks in the mozilla-unified repo: decompress discrete decompress() reuse zctx 1.367284 wall; 1.370000 CPU; 1.370000 user; 0.000000 sys 274.01 MB/s (best of 3) 1.086831 wall; 1.080000 CPU; 1.080000 user; 0.000000 sys 344.72 MB/s (best of 3) decompress discrete dict decompress() reuse zctx 0.993272 wall; 0.990000 CPU; 0.990000 user; 0.000000 sys 377.19 MB/s (best of 3) 0.678651 wall; 0.680000 CPU; 0.680000 user; 0.000000 sys 552.06 MB/s (best of 5) That should make reads on zstd revlogs a bit faster ;) # no-check-commit

File last commit:

r30895:c32454d6 default
r30895:c32454d6 default
Show More
decompressor.c
845 lines | 22.6 KiB | text/x-c | CLexer
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 /**
* Copyright (c) 2016-present, Gregory Szorc
* All rights reserved.
*
* This software may be modified and distributed under the terms
* of the BSD license. See the LICENSE file for details.
*/
#include "python-zstandard.h"
extern PyObject* ZstdError;
ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
ZSTD_DStream* dstream;
void* dictData = NULL;
size_t dictSize = 0;
size_t zresult;
dstream = ZSTD_createDStream();
if (!dstream) {
PyErr_SetString(ZstdError, "could not create DStream");
return NULL;
}
if (decompressor->dict) {
dictData = decompressor->dict->dictData;
dictSize = decompressor->dict->dictSize;
}
if (dictData) {
zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
}
else {
zresult = ZSTD_initDStream(dstream);
}
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "could not initialize DStream: %s",
ZSTD_getErrorName(zresult));
return NULL;
}
return dstream;
}
PyDoc_STRVAR(Decompressor__doc__,
"ZstdDecompressor(dict_data=None)\n"
"\n"
"Create an object used to perform Zstandard decompression.\n"
"\n"
"An instance can perform multiple decompression operations."
);
static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"dict_data",
NULL
};
ZstdCompressionDict* dict = NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 self->dctx = NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 self->dict = NULL;
self->ddict = NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 &ZstdCompressionDictType, &dict)) {
return -1;
}
/* TODO lazily initialize the reference ZSTD_DCtx on first use since
not instances of ZstdDecompressor will use a ZSTD_DCtx. */
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 self->dctx = ZSTD_createDCtx();
if (!self->dctx) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 PyErr_NoMemory();
goto except;
}
if (dict) {
self->dict = dict;
Py_INCREF(dict);
}
return 0;
except:
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (self->dctx) {
ZSTD_freeDCtx(self->dctx);
self->dctx = NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
return -1;
}
static void Decompressor_dealloc(ZstdDecompressor* self) {
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (self->dctx) {
ZSTD_freeDCtx(self->dctx);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
Py_XDECREF(self->dict);
if (self->ddict) {
ZSTD_freeDDict(self->ddict);
self->ddict = NULL;
}
PyObject_Del(self);
}
PyDoc_STRVAR(Decompressor_copy_stream__doc__,
"copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
"\n"
"Compressed data will be read from ``ifh``, decompressed, and written to\n"
"``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
"``write(data)`` method.\n"
"\n"
"The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
"size of data that is ``read()`` and ``write()`` between streams. They default\n"
"to the default input and output sizes of zstd decompressor streams.\n"
);
static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"ifh",
"ofh",
"read_size",
"write_size",
NULL
};
PyObject* source;
PyObject* dest;
size_t inSize = ZSTD_DStreamInSize();
size_t outSize = ZSTD_DStreamOutSize();
ZSTD_DStream* dstream;
ZSTD_inBuffer input;
ZSTD_outBuffer output;
Py_ssize_t totalRead = 0;
Py_ssize_t totalWrite = 0;
char* readBuffer;
Py_ssize_t readSize;
PyObject* readResult;
PyObject* res = NULL;
size_t zresult = 0;
PyObject* writeResult;
PyObject* totalReadPy;
PyObject* totalWritePy;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
&source, &dest, &inSize, &outSize)) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return NULL;
}
if (!PyObject_HasAttrString(source, "read")) {
PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
return NULL;
}
if (!PyObject_HasAttrString(dest, "write")) {
PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
return NULL;
}
Gregory Szorc
zstd: prevent potential free() of uninitialized memory...
r30830 /* Prevent free on uninitialized memory in finally. */
output.dst = NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 dstream = DStream_from_ZstdDecompressor(self);
if (!dstream) {
res = NULL;
goto finally;
}
output.dst = PyMem_Malloc(outSize);
if (!output.dst) {
PyErr_NoMemory();
res = NULL;
goto finally;
}
output.size = outSize;
output.pos = 0;
/* Read source stream until EOF */
while (1) {
readResult = PyObject_CallMethod(source, "read", "n", inSize);
if (!readResult) {
PyErr_SetString(ZstdError, "could not read() from source");
goto finally;
}
PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
/* If no data was read, we're at EOF. */
if (0 == readSize) {
break;
}
totalRead += readSize;
/* Send data to decompressor */
input.src = readBuffer;
input.size = readSize;
input.pos = 0;
while (input.pos < input.size) {
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_decompressStream(dstream, &output, &input);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "zstd decompressor error: %s",
ZSTD_getErrorName(zresult));
res = NULL;
goto finally;
}
if (output.pos) {
#if PY_MAJOR_VERSION >= 3
writeResult = PyObject_CallMethod(dest, "write", "y#",
#else
writeResult = PyObject_CallMethod(dest, "write", "s#",
#endif
output.dst, output.pos);
Py_XDECREF(writeResult);
totalWrite += output.pos;
output.pos = 0;
}
}
}
/* Source stream is exhausted. Finish up. */
ZSTD_freeDStream(dstream);
dstream = NULL;
totalReadPy = PyLong_FromSsize_t(totalRead);
totalWritePy = PyLong_FromSsize_t(totalWrite);
res = PyTuple_Pack(2, totalReadPy, totalWritePy);
Py_DecRef(totalReadPy);
Py_DecRef(totalWritePy);
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 finally:
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 if (output.dst) {
PyMem_Free(output.dst);
}
if (dstream) {
ZSTD_freeDStream(dstream);
}
return res;
}
PyDoc_STRVAR(Decompressor_decompress__doc__,
"decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
"\n"
"This method will decompress the entirety of the argument and return the\n"
"result.\n"
"\n"
"The input bytes are expected to contain a full Zstandard frame (something\n"
"compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
"not contain a full frame, an exception will be raised.\n"
"\n"
"If the frame header of the compressed data does not contain the content size\n"
"``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
"allocation of size ``max_output_size`` will be performed and an attempt will\n"
"be made to perform decompression into that buffer. If the buffer is too\n"
"small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
"be resized if it is too large.\n"
"\n"
"Uncompressed data could be much larger than compressed data. As a result,\n"
"calling this function could result in a very large memory allocation being\n"
"performed to hold the uncompressed data. Therefore it is **highly**\n"
"recommended to use a streaming decompression method instead of this one.\n"
);
PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"data",
"max_output_size",
NULL
};
const char* source;
Py_ssize_t sourceSize;
Py_ssize_t maxOutputSize = 0;
unsigned long long decompressedSize;
size_t destCapacity;
PyObject* result = NULL;
void* dictData = NULL;
size_t dictSize = 0;
size_t zresult;
#if PY_MAJOR_VERSION >= 3
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 #else
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 #endif
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 kwlist, &source, &sourceSize, &maxOutputSize)) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return NULL;
}
if (self->dict) {
dictData = self->dict->dictData;
dictSize = self->dict->dictSize;
}
if (dictData && !self->ddict) {
Py_BEGIN_ALLOW_THREADS
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 Py_END_ALLOW_THREADS
if (!self->ddict) {
PyErr_SetString(ZstdError, "could not create decompression dict");
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
}
decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
/* 0 returned if content size not in the zstd frame header */
if (0 == decompressedSize) {
if (0 == maxOutputSize) {
PyErr_SetString(ZstdError, "input data invalid or missing content size "
"in frame header");
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
else {
result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
destCapacity = maxOutputSize;
}
}
else {
result = PyBytes_FromStringAndSize(NULL, decompressedSize);
destCapacity = decompressedSize;
}
if (!result) {
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
Py_BEGIN_ALLOW_THREADS
if (self->ddict) {
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 zresult = ZSTD_decompress_usingDDict(self->dctx,
PyBytes_AsString(result), destCapacity,
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 source, sourceSize, self->ddict);
}
else {
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 zresult = ZSTD_decompressDCtx(self->dctx,
PyBytes_AsString(result), destCapacity, source, sourceSize);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_DecRef(result);
return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
else if (decompressedSize && zresult != decompressedSize) {
PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
zresult, decompressedSize);
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_DecRef(result);
return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
else if (zresult < destCapacity) {
if (_PyBytes_Resize(&result, zresult)) {
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_DecRef(result);
return NULL;
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
}
return result;
}
PyDoc_STRVAR(Decompressor_decompressobj__doc__,
"decompressobj()\n"
"\n"
"Incrementally feed data into a decompressor.\n"
"\n"
"The returned object exposes a ``decompress(data)`` method. This makes it\n"
"compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
"callers can swap in the zstd decompressor while using the same API.\n"
);
static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
if (!result) {
return NULL;
}
result->dstream = DStream_from_ZstdDecompressor(self);
if (!result->dstream) {
Py_DecRef((PyObject*)result);
return NULL;
}
result->decompressor = self;
Py_INCREF(result->decompressor);
result->finished = 0;
return result;
}
PyDoc_STRVAR(Decompressor_read_from__doc__,
"read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
"Read compressed data and return an iterator\n"
"\n"
"Returns an iterator of decompressed data chunks produced from reading from\n"
"the ``reader``.\n"
"\n"
"Compressed data will be obtained from ``reader`` by calling the\n"
"``read(size)`` method of it. The source data will be streamed into a\n"
"decompressor. As decompressed data is available, it will be exposed to the\n"
"returned iterator.\n"
"\n"
"Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
"iterator in chunks of size ``write_size``. The default values are the input\n"
"and output sizes for a zstd streaming decompressor.\n"
"\n"
"There is also support for skipping the first ``skip_bytes`` of data from\n"
"the source.\n"
);
static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"reader",
"read_size",
"write_size",
"skip_bytes",
NULL
};
PyObject* reader;
size_t inSize = ZSTD_DStreamInSize();
size_t outSize = ZSTD_DStreamOutSize();
ZstdDecompressorIterator* result;
size_t skipBytes = 0;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
&reader, &inSize, &outSize, &skipBytes)) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return NULL;
}
if (skipBytes >= inSize) {
PyErr_SetString(PyExc_ValueError,
"skip_bytes must be smaller than read_size");
return NULL;
}
result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
if (!result) {
return NULL;
}
result->decompressor = NULL;
result->reader = NULL;
result->buffer = NULL;
result->dstream = NULL;
result->input.src = NULL;
result->output.dst = NULL;
if (PyObject_HasAttrString(reader, "read")) {
result->reader = reader;
Py_INCREF(result->reader);
}
else if (1 == PyObject_CheckBuffer(reader)) {
/* Object claims it is a buffer. Try to get a handle to it. */
result->buffer = PyMem_Malloc(sizeof(Py_buffer));
if (!result->buffer) {
goto except;
}
memset(result->buffer, 0, sizeof(Py_buffer));
if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
goto except;
}
result->bufferOffset = 0;
}
else {
PyErr_SetString(PyExc_ValueError,
"must pass an object with a read() method or conforms to buffer protocol");
goto except;
}
result->decompressor = self;
Py_INCREF(result->decompressor);
result->inSize = inSize;
result->outSize = outSize;
result->skipBytes = skipBytes;
result->dstream = DStream_from_ZstdDecompressor(self);
if (!result->dstream) {
goto except;
}
result->input.src = PyMem_Malloc(inSize);
if (!result->input.src) {
PyErr_NoMemory();
goto except;
}
result->input.size = 0;
result->input.pos = 0;
result->output.dst = NULL;
result->output.size = 0;
result->output.pos = 0;
result->readCount = 0;
result->finishedInput = 0;
result->finishedOutput = 0;
goto finally;
except:
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_CLEAR(result->reader);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435
if (result->buffer) {
PyBuffer_Release(result->buffer);
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_CLEAR(result->buffer);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 }
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 Py_CLEAR(result);
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435
finally:
return result;
}
PyDoc_STRVAR(Decompressor_write_to__doc__,
"Create a context manager to write decompressed data to an object.\n"
"\n"
"The passed object must have a ``write()`` method.\n"
"\n"
"The caller feeds intput data to the object by calling ``write(data)``.\n"
"Decompressed data is written to the argument given as it is decompressed.\n"
"\n"
"An optional ``write_size`` argument defines the size of chunks to\n"
"``write()`` to the writer. It defaults to the default output size for a zstd\n"
"streaming decompressor.\n"
);
static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"writer",
"write_size",
NULL
};
PyObject* writer;
size_t outSize = ZSTD_DStreamOutSize();
ZstdDecompressionWriter* result;
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
&writer, &outSize)) {
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 return NULL;
}
if (!PyObject_HasAttrString(writer, "write")) {
PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
return NULL;
}
result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
if (!result) {
return NULL;
}
result->decompressor = self;
Py_INCREF(result->decompressor);
result->writer = writer;
Py_INCREF(result->writer);
result->outSize = outSize;
result->entered = 0;
result->dstream = NULL;
return result;
}
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
"Decompress a series of chunks using the content dictionary chaining technique\n"
);
static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
static char* kwlist[] = {
"frames",
NULL
};
PyObject* chunks;
Py_ssize_t chunksLen;
Py_ssize_t chunkIndex;
char parity = 0;
PyObject* chunk;
char* chunkData;
Py_ssize_t chunkSize;
ZSTD_DCtx* dctx = NULL;
size_t zresult;
ZSTD_frameParams frameParams;
void* buffer1 = NULL;
size_t buffer1Size = 0;
size_t buffer1ContentSize = 0;
void* buffer2 = NULL;
size_t buffer2Size = 0;
size_t buffer2ContentSize = 0;
void* destBuffer = NULL;
PyObject* result = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
kwlist, &PyList_Type, &chunks)) {
return NULL;
}
chunksLen = PyList_Size(chunks);
if (!chunksLen) {
PyErr_SetString(PyExc_ValueError, "empty input chain");
return NULL;
}
/* The first chunk should not be using a dictionary. We handle it specially. */
chunk = PyList_GetItem(chunks, 0);
if (!PyBytes_Check(chunk)) {
PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
return NULL;
}
/* We require that all chunks be zstd frames and that they have content size set. */
PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
if (ZSTD_isError(zresult)) {
PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
return NULL;
}
else if (zresult) {
PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
return NULL;
}
if (0 == frameParams.frameContentSize) {
PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
return NULL;
}
dctx = ZSTD_createDCtx();
if (!dctx) {
PyErr_NoMemory();
goto finally;
}
buffer1Size = frameParams.frameContentSize;
buffer1 = PyMem_Malloc(buffer1Size);
if (!buffer1) {
goto finally;
}
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
goto finally;
}
buffer1ContentSize = zresult;
/* Special case of a simple chain. */
if (1 == chunksLen) {
result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
goto finally;
}
/* This should ideally look at next chunk. But this is slightly simpler. */
buffer2Size = frameParams.frameContentSize;
buffer2 = PyMem_Malloc(buffer2Size);
if (!buffer2) {
goto finally;
}
/* For each subsequent chunk, use the previous fulltext as a content dictionary.
Our strategy is to have 2 buffers. One holds the previous fulltext (to be
used as a content dictionary) and the other holds the new fulltext. The
buffers grow when needed but never decrease in size. This limits the
memory allocator overhead.
*/
for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
chunk = PyList_GetItem(chunks, chunkIndex);
if (!PyBytes_Check(chunk)) {
PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
goto finally;
}
PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
if (ZSTD_isError(zresult)) {
PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
goto finally;
}
else if (zresult) {
PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
goto finally;
}
if (0 == frameParams.frameContentSize) {
PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
goto finally;
}
parity = chunkIndex % 2;
/* This could definitely be abstracted to reduce code duplication. */
if (parity) {
/* Resize destination buffer to hold larger content. */
if (buffer2Size < frameParams.frameContentSize) {
buffer2Size = frameParams.frameContentSize;
destBuffer = PyMem_Realloc(buffer2, buffer2Size);
if (!destBuffer) {
goto finally;
}
buffer2 = destBuffer;
}
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
chunkData, chunkSize, buffer1, buffer1ContentSize);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
chunkIndex, ZSTD_getErrorName(zresult));
goto finally;
}
buffer2ContentSize = zresult;
}
else {
if (buffer1Size < frameParams.frameContentSize) {
buffer1Size = frameParams.frameContentSize;
destBuffer = PyMem_Realloc(buffer1, buffer1Size);
if (!destBuffer) {
goto finally;
}
buffer1 = destBuffer;
}
Py_BEGIN_ALLOW_THREADS
zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
chunkData, chunkSize, buffer2, buffer2ContentSize);
Py_END_ALLOW_THREADS
if (ZSTD_isError(zresult)) {
PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
chunkIndex, ZSTD_getErrorName(zresult));
goto finally;
}
buffer1ContentSize = zresult;
}
}
result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
parity ? buffer2ContentSize : buffer1ContentSize);
finally:
if (buffer2) {
PyMem_Free(buffer2);
}
if (buffer1) {
PyMem_Free(buffer1);
}
if (dctx) {
ZSTD_freeDCtx(dctx);
}
return result;
}
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 static PyMethodDef Decompressor_methods[] = {
{ "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
Decompressor_copy_stream__doc__ },
{ "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
Decompressor_decompress__doc__ },
{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
Decompressor_decompressobj__doc__ },
{ "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
Decompressor_read_from__doc__ },
{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
Decompressor_write_to__doc__ },
Gregory Szorc
zstd: vendor python-zstandard 0.7.0...
r30895 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
Gregory Szorc
zstd: vendor python-zstandard 0.5.0...
r30435 { NULL, NULL }
};
PyTypeObject ZstdDecompressorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"zstd.ZstdDecompressor", /* tp_name */
sizeof(ZstdDecompressor), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)Decompressor_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Decompressor__doc__, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Decompressor_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)Decompressor_init, /* tp_init */
0, /* tp_alloc */
PyType_GenericNew, /* tp_new */
};
void decompressor_module_init(PyObject* mod) {
Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
if (PyType_Ready(&ZstdDecompressorType) < 0) {
return;
}
Py_INCREF((PyObject*)&ZstdDecompressorType);
PyModule_AddObject(mod, "ZstdDecompressor",
(PyObject*)&ZstdDecompressorType);
}