decompressor.c
845 lines
| 22.6 KiB
| text/x-c
|
CLexer
Gregory Szorc
|
r30435 | /** | ||
* Copyright (c) 2016-present, Gregory Szorc | ||||
* All rights reserved. | ||||
* | ||||
* This software may be modified and distributed under the terms | ||||
* of the BSD license. See the LICENSE file for details. | ||||
*/ | ||||
#include "python-zstandard.h" | ||||
extern PyObject* ZstdError; | ||||
ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) { | ||||
ZSTD_DStream* dstream; | ||||
void* dictData = NULL; | ||||
size_t dictSize = 0; | ||||
size_t zresult; | ||||
dstream = ZSTD_createDStream(); | ||||
if (!dstream) { | ||||
PyErr_SetString(ZstdError, "could not create DStream"); | ||||
return NULL; | ||||
} | ||||
if (decompressor->dict) { | ||||
dictData = decompressor->dict->dictData; | ||||
dictSize = decompressor->dict->dictSize; | ||||
} | ||||
if (dictData) { | ||||
zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize); | ||||
} | ||||
else { | ||||
zresult = ZSTD_initDStream(dstream); | ||||
} | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "could not initialize DStream: %s", | ||||
ZSTD_getErrorName(zresult)); | ||||
return NULL; | ||||
} | ||||
return dstream; | ||||
} | ||||
PyDoc_STRVAR(Decompressor__doc__, | ||||
"ZstdDecompressor(dict_data=None)\n" | ||||
"\n" | ||||
"Create an object used to perform Zstandard decompression.\n" | ||||
"\n" | ||||
"An instance can perform multiple decompression operations." | ||||
); | ||||
static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"dict_data", | ||||
NULL | ||||
}; | ||||
ZstdCompressionDict* dict = NULL; | ||||
Gregory Szorc
|
r30895 | self->dctx = NULL; | ||
Gregory Szorc
|
r30435 | self->dict = NULL; | ||
self->ddict = NULL; | ||||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist, | ||
Gregory Szorc
|
r30435 | &ZstdCompressionDictType, &dict)) { | ||
return -1; | ||||
} | ||||
/* TODO lazily initialize the reference ZSTD_DCtx on first use since | ||||
not instances of ZstdDecompressor will use a ZSTD_DCtx. */ | ||||
Gregory Szorc
|
r30895 | self->dctx = ZSTD_createDCtx(); | ||
if (!self->dctx) { | ||||
Gregory Szorc
|
r30435 | PyErr_NoMemory(); | ||
goto except; | ||||
} | ||||
if (dict) { | ||||
self->dict = dict; | ||||
Py_INCREF(dict); | ||||
} | ||||
return 0; | ||||
except: | ||||
Gregory Szorc
|
r30895 | if (self->dctx) { | ||
ZSTD_freeDCtx(self->dctx); | ||||
self->dctx = NULL; | ||||
Gregory Szorc
|
r30435 | } | ||
return -1; | ||||
} | ||||
static void Decompressor_dealloc(ZstdDecompressor* self) { | ||||
Gregory Szorc
|
r30895 | if (self->dctx) { | ||
ZSTD_freeDCtx(self->dctx); | ||||
Gregory Szorc
|
r30435 | } | ||
Py_XDECREF(self->dict); | ||||
if (self->ddict) { | ||||
ZSTD_freeDDict(self->ddict); | ||||
self->ddict = NULL; | ||||
} | ||||
PyObject_Del(self); | ||||
} | ||||
PyDoc_STRVAR(Decompressor_copy_stream__doc__, | ||||
"copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" | ||||
"\n" | ||||
"Compressed data will be read from ``ifh``, decompressed, and written to\n" | ||||
"``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n" | ||||
"``write(data)`` method.\n" | ||||
"\n" | ||||
"The optional ``read_size`` and ``write_size`` arguments control the chunk\n" | ||||
"size of data that is ``read()`` and ``write()`` between streams. They default\n" | ||||
"to the default input and output sizes of zstd decompressor streams.\n" | ||||
); | ||||
static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"ifh", | ||||
"ofh", | ||||
"read_size", | ||||
"write_size", | ||||
NULL | ||||
}; | ||||
PyObject* source; | ||||
PyObject* dest; | ||||
size_t inSize = ZSTD_DStreamInSize(); | ||||
size_t outSize = ZSTD_DStreamOutSize(); | ||||
ZSTD_DStream* dstream; | ||||
ZSTD_inBuffer input; | ||||
ZSTD_outBuffer output; | ||||
Py_ssize_t totalRead = 0; | ||||
Py_ssize_t totalWrite = 0; | ||||
char* readBuffer; | ||||
Py_ssize_t readSize; | ||||
PyObject* readResult; | ||||
PyObject* res = NULL; | ||||
size_t zresult = 0; | ||||
PyObject* writeResult; | ||||
PyObject* totalReadPy; | ||||
PyObject* totalWritePy; | ||||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist, | ||
&source, &dest, &inSize, &outSize)) { | ||||
Gregory Szorc
|
r30435 | return NULL; | ||
} | ||||
if (!PyObject_HasAttrString(source, "read")) { | ||||
PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); | ||||
return NULL; | ||||
} | ||||
if (!PyObject_HasAttrString(dest, "write")) { | ||||
PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); | ||||
return NULL; | ||||
} | ||||
Gregory Szorc
|
r30830 | /* Prevent free on uninitialized memory in finally. */ | ||
output.dst = NULL; | ||||
Gregory Szorc
|
r30435 | dstream = DStream_from_ZstdDecompressor(self); | ||
if (!dstream) { | ||||
res = NULL; | ||||
goto finally; | ||||
} | ||||
output.dst = PyMem_Malloc(outSize); | ||||
if (!output.dst) { | ||||
PyErr_NoMemory(); | ||||
res = NULL; | ||||
goto finally; | ||||
} | ||||
output.size = outSize; | ||||
output.pos = 0; | ||||
/* Read source stream until EOF */ | ||||
while (1) { | ||||
readResult = PyObject_CallMethod(source, "read", "n", inSize); | ||||
if (!readResult) { | ||||
PyErr_SetString(ZstdError, "could not read() from source"); | ||||
goto finally; | ||||
} | ||||
PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | ||||
/* If no data was read, we're at EOF. */ | ||||
if (0 == readSize) { | ||||
break; | ||||
} | ||||
totalRead += readSize; | ||||
/* Send data to decompressor */ | ||||
input.src = readBuffer; | ||||
input.size = readSize; | ||||
input.pos = 0; | ||||
while (input.pos < input.size) { | ||||
Py_BEGIN_ALLOW_THREADS | ||||
zresult = ZSTD_decompressStream(dstream, &output, &input); | ||||
Py_END_ALLOW_THREADS | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "zstd decompressor error: %s", | ||||
ZSTD_getErrorName(zresult)); | ||||
res = NULL; | ||||
goto finally; | ||||
} | ||||
if (output.pos) { | ||||
#if PY_MAJOR_VERSION >= 3 | ||||
writeResult = PyObject_CallMethod(dest, "write", "y#", | ||||
#else | ||||
writeResult = PyObject_CallMethod(dest, "write", "s#", | ||||
#endif | ||||
output.dst, output.pos); | ||||
Py_XDECREF(writeResult); | ||||
totalWrite += output.pos; | ||||
output.pos = 0; | ||||
} | ||||
} | ||||
} | ||||
/* Source stream is exhausted. Finish up. */ | ||||
ZSTD_freeDStream(dstream); | ||||
dstream = NULL; | ||||
totalReadPy = PyLong_FromSsize_t(totalRead); | ||||
totalWritePy = PyLong_FromSsize_t(totalWrite); | ||||
res = PyTuple_Pack(2, totalReadPy, totalWritePy); | ||||
Py_DecRef(totalReadPy); | ||||
Py_DecRef(totalWritePy); | ||||
Gregory Szorc
|
r30895 | finally: | ||
Gregory Szorc
|
r30435 | if (output.dst) { | ||
PyMem_Free(output.dst); | ||||
} | ||||
if (dstream) { | ||||
ZSTD_freeDStream(dstream); | ||||
} | ||||
return res; | ||||
} | ||||
PyDoc_STRVAR(Decompressor_decompress__doc__, | ||||
"decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n" | ||||
"\n" | ||||
"This method will decompress the entirety of the argument and return the\n" | ||||
"result.\n" | ||||
"\n" | ||||
"The input bytes are expected to contain a full Zstandard frame (something\n" | ||||
"compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n" | ||||
"not contain a full frame, an exception will be raised.\n" | ||||
"\n" | ||||
"If the frame header of the compressed data does not contain the content size\n" | ||||
"``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n" | ||||
"allocation of size ``max_output_size`` will be performed and an attempt will\n" | ||||
"be made to perform decompression into that buffer. If the buffer is too\n" | ||||
"small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n" | ||||
"be resized if it is too large.\n" | ||||
"\n" | ||||
"Uncompressed data could be much larger than compressed data. As a result,\n" | ||||
"calling this function could result in a very large memory allocation being\n" | ||||
"performed to hold the uncompressed data. Therefore it is **highly**\n" | ||||
"recommended to use a streaming decompression method instead of this one.\n" | ||||
); | ||||
PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"data", | ||||
"max_output_size", | ||||
NULL | ||||
}; | ||||
const char* source; | ||||
Py_ssize_t sourceSize; | ||||
Py_ssize_t maxOutputSize = 0; | ||||
unsigned long long decompressedSize; | ||||
size_t destCapacity; | ||||
PyObject* result = NULL; | ||||
void* dictData = NULL; | ||||
size_t dictSize = 0; | ||||
size_t zresult; | ||||
#if PY_MAJOR_VERSION >= 3 | ||||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress", | ||
Gregory Szorc
|
r30435 | #else | ||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress", | ||
Gregory Szorc
|
r30435 | #endif | ||
Gregory Szorc
|
r30895 | kwlist, &source, &sourceSize, &maxOutputSize)) { | ||
Gregory Szorc
|
r30435 | return NULL; | ||
} | ||||
if (self->dict) { | ||||
dictData = self->dict->dictData; | ||||
dictSize = self->dict->dictSize; | ||||
} | ||||
if (dictData && !self->ddict) { | ||||
Py_BEGIN_ALLOW_THREADS | ||||
Gregory Szorc
|
r30895 | self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); | ||
Gregory Szorc
|
r30435 | Py_END_ALLOW_THREADS | ||
if (!self->ddict) { | ||||
PyErr_SetString(ZstdError, "could not create decompression dict"); | ||||
Gregory Szorc
|
r30895 | return NULL; | ||
Gregory Szorc
|
r30435 | } | ||
} | ||||
decompressedSize = ZSTD_getDecompressedSize(source, sourceSize); | ||||
/* 0 returned if content size not in the zstd frame header */ | ||||
if (0 == decompressedSize) { | ||||
if (0 == maxOutputSize) { | ||||
PyErr_SetString(ZstdError, "input data invalid or missing content size " | ||||
"in frame header"); | ||||
Gregory Szorc
|
r30895 | return NULL; | ||
Gregory Szorc
|
r30435 | } | ||
else { | ||||
result = PyBytes_FromStringAndSize(NULL, maxOutputSize); | ||||
destCapacity = maxOutputSize; | ||||
} | ||||
} | ||||
else { | ||||
result = PyBytes_FromStringAndSize(NULL, decompressedSize); | ||||
destCapacity = decompressedSize; | ||||
} | ||||
if (!result) { | ||||
Gregory Szorc
|
r30895 | return NULL; | ||
Gregory Szorc
|
r30435 | } | ||
Py_BEGIN_ALLOW_THREADS | ||||
if (self->ddict) { | ||||
Gregory Szorc
|
r30895 | zresult = ZSTD_decompress_usingDDict(self->dctx, | ||
PyBytes_AsString(result), destCapacity, | ||||
Gregory Szorc
|
r30435 | source, sourceSize, self->ddict); | ||
} | ||||
else { | ||||
Gregory Szorc
|
r30895 | zresult = ZSTD_decompressDCtx(self->dctx, | ||
PyBytes_AsString(result), destCapacity, source, sourceSize); | ||||
Gregory Szorc
|
r30435 | } | ||
Py_END_ALLOW_THREADS | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); | ||||
Gregory Szorc
|
r30895 | Py_DecRef(result); | ||
return NULL; | ||||
Gregory Szorc
|
r30435 | } | ||
else if (decompressedSize && zresult != decompressedSize) { | ||||
PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", | ||||
zresult, decompressedSize); | ||||
Gregory Szorc
|
r30895 | Py_DecRef(result); | ||
return NULL; | ||||
Gregory Szorc
|
r30435 | } | ||
else if (zresult < destCapacity) { | ||||
if (_PyBytes_Resize(&result, zresult)) { | ||||
Gregory Szorc
|
r30895 | Py_DecRef(result); | ||
return NULL; | ||||
Gregory Szorc
|
r30435 | } | ||
} | ||||
return result; | ||||
} | ||||
PyDoc_STRVAR(Decompressor_decompressobj__doc__, | ||||
"decompressobj()\n" | ||||
"\n" | ||||
"Incrementally feed data into a decompressor.\n" | ||||
"\n" | ||||
"The returned object exposes a ``decompress(data)`` method. This makes it\n" | ||||
"compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n" | ||||
"callers can swap in the zstd decompressor while using the same API.\n" | ||||
); | ||||
static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) { | ||||
ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType); | ||||
if (!result) { | ||||
return NULL; | ||||
} | ||||
result->dstream = DStream_from_ZstdDecompressor(self); | ||||
if (!result->dstream) { | ||||
Py_DecRef((PyObject*)result); | ||||
return NULL; | ||||
} | ||||
result->decompressor = self; | ||||
Py_INCREF(result->decompressor); | ||||
result->finished = 0; | ||||
return result; | ||||
} | ||||
PyDoc_STRVAR(Decompressor_read_from__doc__, | ||||
"read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n" | ||||
"Read compressed data and return an iterator\n" | ||||
"\n" | ||||
"Returns an iterator of decompressed data chunks produced from reading from\n" | ||||
"the ``reader``.\n" | ||||
"\n" | ||||
"Compressed data will be obtained from ``reader`` by calling the\n" | ||||
"``read(size)`` method of it. The source data will be streamed into a\n" | ||||
"decompressor. As decompressed data is available, it will be exposed to the\n" | ||||
"returned iterator.\n" | ||||
"\n" | ||||
"Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n" | ||||
"iterator in chunks of size ``write_size``. The default values are the input\n" | ||||
"and output sizes for a zstd streaming decompressor.\n" | ||||
"\n" | ||||
"There is also support for skipping the first ``skip_bytes`` of data from\n" | ||||
"the source.\n" | ||||
); | ||||
static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"reader", | ||||
"read_size", | ||||
"write_size", | ||||
"skip_bytes", | ||||
NULL | ||||
}; | ||||
PyObject* reader; | ||||
size_t inSize = ZSTD_DStreamInSize(); | ||||
size_t outSize = ZSTD_DStreamOutSize(); | ||||
ZstdDecompressorIterator* result; | ||||
size_t skipBytes = 0; | ||||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist, | ||
&reader, &inSize, &outSize, &skipBytes)) { | ||||
Gregory Szorc
|
r30435 | return NULL; | ||
} | ||||
if (skipBytes >= inSize) { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"skip_bytes must be smaller than read_size"); | ||||
return NULL; | ||||
} | ||||
result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType); | ||||
if (!result) { | ||||
return NULL; | ||||
} | ||||
result->decompressor = NULL; | ||||
result->reader = NULL; | ||||
result->buffer = NULL; | ||||
result->dstream = NULL; | ||||
result->input.src = NULL; | ||||
result->output.dst = NULL; | ||||
if (PyObject_HasAttrString(reader, "read")) { | ||||
result->reader = reader; | ||||
Py_INCREF(result->reader); | ||||
} | ||||
else if (1 == PyObject_CheckBuffer(reader)) { | ||||
/* Object claims it is a buffer. Try to get a handle to it. */ | ||||
result->buffer = PyMem_Malloc(sizeof(Py_buffer)); | ||||
if (!result->buffer) { | ||||
goto except; | ||||
} | ||||
memset(result->buffer, 0, sizeof(Py_buffer)); | ||||
if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { | ||||
goto except; | ||||
} | ||||
result->bufferOffset = 0; | ||||
} | ||||
else { | ||||
PyErr_SetString(PyExc_ValueError, | ||||
"must pass an object with a read() method or conforms to buffer protocol"); | ||||
goto except; | ||||
} | ||||
result->decompressor = self; | ||||
Py_INCREF(result->decompressor); | ||||
result->inSize = inSize; | ||||
result->outSize = outSize; | ||||
result->skipBytes = skipBytes; | ||||
result->dstream = DStream_from_ZstdDecompressor(self); | ||||
if (!result->dstream) { | ||||
goto except; | ||||
} | ||||
result->input.src = PyMem_Malloc(inSize); | ||||
if (!result->input.src) { | ||||
PyErr_NoMemory(); | ||||
goto except; | ||||
} | ||||
result->input.size = 0; | ||||
result->input.pos = 0; | ||||
result->output.dst = NULL; | ||||
result->output.size = 0; | ||||
result->output.pos = 0; | ||||
result->readCount = 0; | ||||
result->finishedInput = 0; | ||||
result->finishedOutput = 0; | ||||
goto finally; | ||||
except: | ||||
Gregory Szorc
|
r30895 | Py_CLEAR(result->reader); | ||
Gregory Szorc
|
r30435 | |||
if (result->buffer) { | ||||
PyBuffer_Release(result->buffer); | ||||
Gregory Szorc
|
r30895 | Py_CLEAR(result->buffer); | ||
Gregory Szorc
|
r30435 | } | ||
Gregory Szorc
|
r30895 | Py_CLEAR(result); | ||
Gregory Szorc
|
r30435 | |||
finally: | ||||
return result; | ||||
} | ||||
PyDoc_STRVAR(Decompressor_write_to__doc__, | ||||
"Create a context manager to write decompressed data to an object.\n" | ||||
"\n" | ||||
"The passed object must have a ``write()`` method.\n" | ||||
"\n" | ||||
"The caller feeds intput data to the object by calling ``write(data)``.\n" | ||||
"Decompressed data is written to the argument given as it is decompressed.\n" | ||||
"\n" | ||||
"An optional ``write_size`` argument defines the size of chunks to\n" | ||||
"``write()`` to the writer. It defaults to the default output size for a zstd\n" | ||||
"streaming decompressor.\n" | ||||
); | ||||
static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"writer", | ||||
"write_size", | ||||
NULL | ||||
}; | ||||
PyObject* writer; | ||||
size_t outSize = ZSTD_DStreamOutSize(); | ||||
ZstdDecompressionWriter* result; | ||||
Gregory Szorc
|
r30895 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist, | ||
&writer, &outSize)) { | ||||
Gregory Szorc
|
r30435 | return NULL; | ||
} | ||||
if (!PyObject_HasAttrString(writer, "write")) { | ||||
PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); | ||||
return NULL; | ||||
} | ||||
result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType); | ||||
if (!result) { | ||||
return NULL; | ||||
} | ||||
result->decompressor = self; | ||||
Py_INCREF(result->decompressor); | ||||
result->writer = writer; | ||||
Py_INCREF(result->writer); | ||||
result->outSize = outSize; | ||||
result->entered = 0; | ||||
result->dstream = NULL; | ||||
return result; | ||||
} | ||||
Gregory Szorc
|
r30895 | PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, | ||
"Decompress a series of chunks using the content dictionary chaining technique\n" | ||||
); | ||||
static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) { | ||||
static char* kwlist[] = { | ||||
"frames", | ||||
NULL | ||||
}; | ||||
PyObject* chunks; | ||||
Py_ssize_t chunksLen; | ||||
Py_ssize_t chunkIndex; | ||||
char parity = 0; | ||||
PyObject* chunk; | ||||
char* chunkData; | ||||
Py_ssize_t chunkSize; | ||||
ZSTD_DCtx* dctx = NULL; | ||||
size_t zresult; | ||||
ZSTD_frameParams frameParams; | ||||
void* buffer1 = NULL; | ||||
size_t buffer1Size = 0; | ||||
size_t buffer1ContentSize = 0; | ||||
void* buffer2 = NULL; | ||||
size_t buffer2Size = 0; | ||||
size_t buffer2ContentSize = 0; | ||||
void* destBuffer = NULL; | ||||
PyObject* result = NULL; | ||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", | ||||
kwlist, &PyList_Type, &chunks)) { | ||||
return NULL; | ||||
} | ||||
chunksLen = PyList_Size(chunks); | ||||
if (!chunksLen) { | ||||
PyErr_SetString(PyExc_ValueError, "empty input chain"); | ||||
return NULL; | ||||
} | ||||
/* The first chunk should not be using a dictionary. We handle it specially. */ | ||||
chunk = PyList_GetItem(chunks, 0); | ||||
if (!PyBytes_Check(chunk)) { | ||||
PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); | ||||
return NULL; | ||||
} | ||||
/* We require that all chunks be zstd frames and that they have content size set. */ | ||||
PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | ||||
zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); | ||||
return NULL; | ||||
} | ||||
else if (zresult) { | ||||
PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); | ||||
return NULL; | ||||
} | ||||
if (0 == frameParams.frameContentSize) { | ||||
PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); | ||||
return NULL; | ||||
} | ||||
dctx = ZSTD_createDCtx(); | ||||
if (!dctx) { | ||||
PyErr_NoMemory(); | ||||
goto finally; | ||||
} | ||||
buffer1Size = frameParams.frameContentSize; | ||||
buffer1 = PyMem_Malloc(buffer1Size); | ||||
if (!buffer1) { | ||||
goto finally; | ||||
} | ||||
Py_BEGIN_ALLOW_THREADS | ||||
zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize); | ||||
Py_END_ALLOW_THREADS | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); | ||||
goto finally; | ||||
} | ||||
buffer1ContentSize = zresult; | ||||
/* Special case of a simple chain. */ | ||||
if (1 == chunksLen) { | ||||
result = PyBytes_FromStringAndSize(buffer1, buffer1Size); | ||||
goto finally; | ||||
} | ||||
/* This should ideally look at next chunk. But this is slightly simpler. */ | ||||
buffer2Size = frameParams.frameContentSize; | ||||
buffer2 = PyMem_Malloc(buffer2Size); | ||||
if (!buffer2) { | ||||
goto finally; | ||||
} | ||||
/* For each subsequent chunk, use the previous fulltext as a content dictionary. | ||||
Our strategy is to have 2 buffers. One holds the previous fulltext (to be | ||||
used as a content dictionary) and the other holds the new fulltext. The | ||||
buffers grow when needed but never decrease in size. This limits the | ||||
memory allocator overhead. | ||||
*/ | ||||
for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { | ||||
chunk = PyList_GetItem(chunks, chunkIndex); | ||||
if (!PyBytes_Check(chunk)) { | ||||
PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); | ||||
goto finally; | ||||
} | ||||
PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | ||||
zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); | ||||
goto finally; | ||||
} | ||||
else if (zresult) { | ||||
PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); | ||||
goto finally; | ||||
} | ||||
if (0 == frameParams.frameContentSize) { | ||||
PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); | ||||
goto finally; | ||||
} | ||||
parity = chunkIndex % 2; | ||||
/* This could definitely be abstracted to reduce code duplication. */ | ||||
if (parity) { | ||||
/* Resize destination buffer to hold larger content. */ | ||||
if (buffer2Size < frameParams.frameContentSize) { | ||||
buffer2Size = frameParams.frameContentSize; | ||||
destBuffer = PyMem_Realloc(buffer2, buffer2Size); | ||||
if (!destBuffer) { | ||||
goto finally; | ||||
} | ||||
buffer2 = destBuffer; | ||||
} | ||||
Py_BEGIN_ALLOW_THREADS | ||||
zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, | ||||
chunkData, chunkSize, buffer1, buffer1ContentSize); | ||||
Py_END_ALLOW_THREADS | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | ||||
chunkIndex, ZSTD_getErrorName(zresult)); | ||||
goto finally; | ||||
} | ||||
buffer2ContentSize = zresult; | ||||
} | ||||
else { | ||||
if (buffer1Size < frameParams.frameContentSize) { | ||||
buffer1Size = frameParams.frameContentSize; | ||||
destBuffer = PyMem_Realloc(buffer1, buffer1Size); | ||||
if (!destBuffer) { | ||||
goto finally; | ||||
} | ||||
buffer1 = destBuffer; | ||||
} | ||||
Py_BEGIN_ALLOW_THREADS | ||||
zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, | ||||
chunkData, chunkSize, buffer2, buffer2ContentSize); | ||||
Py_END_ALLOW_THREADS | ||||
if (ZSTD_isError(zresult)) { | ||||
PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | ||||
chunkIndex, ZSTD_getErrorName(zresult)); | ||||
goto finally; | ||||
} | ||||
buffer1ContentSize = zresult; | ||||
} | ||||
} | ||||
result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, | ||||
parity ? buffer2ContentSize : buffer1ContentSize); | ||||
finally: | ||||
if (buffer2) { | ||||
PyMem_Free(buffer2); | ||||
} | ||||
if (buffer1) { | ||||
PyMem_Free(buffer1); | ||||
} | ||||
if (dctx) { | ||||
ZSTD_freeDCtx(dctx); | ||||
} | ||||
return result; | ||||
} | ||||
Gregory Szorc
|
r30435 | static PyMethodDef Decompressor_methods[] = { | ||
{ "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, | ||||
Decompressor_copy_stream__doc__ }, | ||||
{ "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, | ||||
Decompressor_decompress__doc__ }, | ||||
{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS, | ||||
Decompressor_decompressobj__doc__ }, | ||||
{ "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS, | ||||
Decompressor_read_from__doc__ }, | ||||
{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, | ||||
Decompressor_write_to__doc__ }, | ||||
Gregory Szorc
|
r30895 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, | ||
METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, | ||||
Gregory Szorc
|
r30435 | { NULL, NULL } | ||
}; | ||||
PyTypeObject ZstdDecompressorType = { | ||||
PyVarObject_HEAD_INIT(NULL, 0) | ||||
"zstd.ZstdDecompressor", /* tp_name */ | ||||
sizeof(ZstdDecompressor), /* tp_basicsize */ | ||||
0, /* tp_itemsize */ | ||||
(destructor)Decompressor_dealloc, /* tp_dealloc */ | ||||
0, /* tp_print */ | ||||
0, /* tp_getattr */ | ||||
0, /* tp_setattr */ | ||||
0, /* tp_compare */ | ||||
0, /* tp_repr */ | ||||
0, /* tp_as_number */ | ||||
0, /* tp_as_sequence */ | ||||
0, /* tp_as_mapping */ | ||||
0, /* tp_hash */ | ||||
0, /* tp_call */ | ||||
0, /* tp_str */ | ||||
0, /* tp_getattro */ | ||||
0, /* tp_setattro */ | ||||
0, /* tp_as_buffer */ | ||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | ||||
Decompressor__doc__, /* tp_doc */ | ||||
0, /* tp_traverse */ | ||||
0, /* tp_clear */ | ||||
0, /* tp_richcompare */ | ||||
0, /* tp_weaklistoffset */ | ||||
0, /* tp_iter */ | ||||
0, /* tp_iternext */ | ||||
Decompressor_methods, /* tp_methods */ | ||||
0, /* tp_members */ | ||||
0, /* tp_getset */ | ||||
0, /* tp_base */ | ||||
0, /* tp_dict */ | ||||
0, /* tp_descr_get */ | ||||
0, /* tp_descr_set */ | ||||
0, /* tp_dictoffset */ | ||||
(initproc)Decompressor_init, /* tp_init */ | ||||
0, /* tp_alloc */ | ||||
PyType_GenericNew, /* tp_new */ | ||||
}; | ||||
void decompressor_module_init(PyObject* mod) { | ||||
Py_TYPE(&ZstdDecompressorType) = &PyType_Type; | ||||
if (PyType_Ready(&ZstdDecompressorType) < 0) { | ||||
return; | ||||
} | ||||
Py_INCREF((PyObject*)&ZstdDecompressorType); | ||||
PyModule_AddObject(mod, "ZstdDecompressor", | ||||
(PyObject*)&ZstdDecompressorType); | ||||
} | ||||