##// END OF EJS Templates
python-zstandard: apply big-endian fix (issue6188)...
Gregory Szorc -
r43083:a4e32fd5 5.1.1 stable
parent child Browse files
Show More
@@ -1,1822 +1,1822 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10 #include "pool.h"
10 #include "pool.h"
11
11
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 /**
14 /**
15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
16 */
16 */
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
18 size_t zresult;
18 size_t zresult;
19
19
20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
21
21
22 if (decompressor->maxWindowSize) {
22 if (decompressor->maxWindowSize) {
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
24 if (ZSTD_isError(zresult)) {
24 if (ZSTD_isError(zresult)) {
25 PyErr_Format(ZstdError, "unable to set max window size: %s",
25 PyErr_Format(ZstdError, "unable to set max window size: %s",
26 ZSTD_getErrorName(zresult));
26 ZSTD_getErrorName(zresult));
27 return 1;
27 return 1;
28 }
28 }
29 }
29 }
30
30
31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
32 if (ZSTD_isError(zresult)) {
32 if (ZSTD_isError(zresult)) {
33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
34 ZSTD_getErrorName(zresult));
34 ZSTD_getErrorName(zresult));
35 return 1;
35 return 1;
36 }
36 }
37
37
38 if (loadDict && decompressor->dict) {
38 if (loadDict && decompressor->dict) {
39 if (ensure_ddict(decompressor->dict)) {
39 if (ensure_ddict(decompressor->dict)) {
40 return 1;
40 return 1;
41 }
41 }
42
42
43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
44 if (ZSTD_isError(zresult)) {
44 if (ZSTD_isError(zresult)) {
45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
46 ZSTD_getErrorName(zresult));
46 ZSTD_getErrorName(zresult));
47 return 1;
47 return 1;
48 }
48 }
49 }
49 }
50
50
51 return 0;
51 return 0;
52 }
52 }
53
53
54 PyDoc_STRVAR(Decompressor__doc__,
54 PyDoc_STRVAR(Decompressor__doc__,
55 "ZstdDecompressor(dict_data=None)\n"
55 "ZstdDecompressor(dict_data=None)\n"
56 "\n"
56 "\n"
57 "Create an object used to perform Zstandard decompression.\n"
57 "Create an object used to perform Zstandard decompression.\n"
58 "\n"
58 "\n"
59 "An instance can perform multiple decompression operations."
59 "An instance can perform multiple decompression operations."
60 );
60 );
61
61
62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
63 static char* kwlist[] = {
63 static char* kwlist[] = {
64 "dict_data",
64 "dict_data",
65 "max_window_size",
65 "max_window_size",
66 "format",
66 "format",
67 NULL
67 NULL
68 };
68 };
69
69
70 ZstdCompressionDict* dict = NULL;
70 ZstdCompressionDict* dict = NULL;
71 size_t maxWindowSize = 0;
71 Py_ssize_t maxWindowSize = 0;
72 ZSTD_format_e format = ZSTD_f_zstd1;
72 ZSTD_format_e format = ZSTD_f_zstd1;
73
73
74 self->dctx = NULL;
74 self->dctx = NULL;
75 self->dict = NULL;
75 self->dict = NULL;
76
76
77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist,
77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!nI:ZstdDecompressor", kwlist,
78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
79 return -1;
79 return -1;
80 }
80 }
81
81
82 self->dctx = ZSTD_createDCtx();
82 self->dctx = ZSTD_createDCtx();
83 if (!self->dctx) {
83 if (!self->dctx) {
84 PyErr_NoMemory();
84 PyErr_NoMemory();
85 goto except;
85 goto except;
86 }
86 }
87
87
88 self->maxWindowSize = maxWindowSize;
88 self->maxWindowSize = maxWindowSize;
89 self->format = format;
89 self->format = format;
90
90
91 if (dict) {
91 if (dict) {
92 self->dict = dict;
92 self->dict = dict;
93 Py_INCREF(dict);
93 Py_INCREF(dict);
94 }
94 }
95
95
96 if (ensure_dctx(self, 1)) {
96 if (ensure_dctx(self, 1)) {
97 goto except;
97 goto except;
98 }
98 }
99
99
100 return 0;
100 return 0;
101
101
102 except:
102 except:
103 Py_CLEAR(self->dict);
103 Py_CLEAR(self->dict);
104
104
105 if (self->dctx) {
105 if (self->dctx) {
106 ZSTD_freeDCtx(self->dctx);
106 ZSTD_freeDCtx(self->dctx);
107 self->dctx = NULL;
107 self->dctx = NULL;
108 }
108 }
109
109
110 return -1;
110 return -1;
111 }
111 }
112
112
113 static void Decompressor_dealloc(ZstdDecompressor* self) {
113 static void Decompressor_dealloc(ZstdDecompressor* self) {
114 Py_CLEAR(self->dict);
114 Py_CLEAR(self->dict);
115
115
116 if (self->dctx) {
116 if (self->dctx) {
117 ZSTD_freeDCtx(self->dctx);
117 ZSTD_freeDCtx(self->dctx);
118 self->dctx = NULL;
118 self->dctx = NULL;
119 }
119 }
120
120
121 PyObject_Del(self);
121 PyObject_Del(self);
122 }
122 }
123
123
124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
125 "memory_size() -- Size of decompression context, in bytes\n"
125 "memory_size() -- Size of decompression context, in bytes\n"
126 );
126 );
127
127
128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
129 if (self->dctx) {
129 if (self->dctx) {
130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
131 }
131 }
132 else {
132 else {
133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
134 return NULL;
134 return NULL;
135 }
135 }
136 }
136 }
137
137
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
140 "\n"
140 "\n"
141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
143 "``write(data)`` method.\n"
143 "``write(data)`` method.\n"
144 "\n"
144 "\n"
145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
147 "to the default input and output sizes of zstd decompressor streams.\n"
147 "to the default input and output sizes of zstd decompressor streams.\n"
148 );
148 );
149
149
150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
151 static char* kwlist[] = {
151 static char* kwlist[] = {
152 "ifh",
152 "ifh",
153 "ofh",
153 "ofh",
154 "read_size",
154 "read_size",
155 "write_size",
155 "write_size",
156 NULL
156 NULL
157 };
157 };
158
158
159 PyObject* source;
159 PyObject* source;
160 PyObject* dest;
160 PyObject* dest;
161 size_t inSize = ZSTD_DStreamInSize();
161 size_t inSize = ZSTD_DStreamInSize();
162 size_t outSize = ZSTD_DStreamOutSize();
162 size_t outSize = ZSTD_DStreamOutSize();
163 ZSTD_inBuffer input;
163 ZSTD_inBuffer input;
164 ZSTD_outBuffer output;
164 ZSTD_outBuffer output;
165 Py_ssize_t totalRead = 0;
165 Py_ssize_t totalRead = 0;
166 Py_ssize_t totalWrite = 0;
166 Py_ssize_t totalWrite = 0;
167 char* readBuffer;
167 char* readBuffer;
168 Py_ssize_t readSize;
168 Py_ssize_t readSize;
169 PyObject* readResult = NULL;
169 PyObject* readResult = NULL;
170 PyObject* res = NULL;
170 PyObject* res = NULL;
171 size_t zresult = 0;
171 size_t zresult = 0;
172 PyObject* writeResult;
172 PyObject* writeResult;
173 PyObject* totalReadPy;
173 PyObject* totalReadPy;
174 PyObject* totalWritePy;
174 PyObject* totalWritePy;
175
175
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
177 &source, &dest, &inSize, &outSize)) {
177 &source, &dest, &inSize, &outSize)) {
178 return NULL;
178 return NULL;
179 }
179 }
180
180
181 if (!PyObject_HasAttrString(source, "read")) {
181 if (!PyObject_HasAttrString(source, "read")) {
182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
183 return NULL;
183 return NULL;
184 }
184 }
185
185
186 if (!PyObject_HasAttrString(dest, "write")) {
186 if (!PyObject_HasAttrString(dest, "write")) {
187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
188 return NULL;
188 return NULL;
189 }
189 }
190
190
191 /* Prevent free on uninitialized memory in finally. */
191 /* Prevent free on uninitialized memory in finally. */
192 output.dst = NULL;
192 output.dst = NULL;
193
193
194 if (ensure_dctx(self, 1)) {
194 if (ensure_dctx(self, 1)) {
195 res = NULL;
195 res = NULL;
196 goto finally;
196 goto finally;
197 }
197 }
198
198
199 output.dst = PyMem_Malloc(outSize);
199 output.dst = PyMem_Malloc(outSize);
200 if (!output.dst) {
200 if (!output.dst) {
201 PyErr_NoMemory();
201 PyErr_NoMemory();
202 res = NULL;
202 res = NULL;
203 goto finally;
203 goto finally;
204 }
204 }
205 output.size = outSize;
205 output.size = outSize;
206 output.pos = 0;
206 output.pos = 0;
207
207
208 /* Read source stream until EOF */
208 /* Read source stream until EOF */
209 while (1) {
209 while (1) {
210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
211 if (!readResult) {
211 if (!readResult) {
212 PyErr_SetString(ZstdError, "could not read() from source");
212 PyErr_SetString(ZstdError, "could not read() from source");
213 goto finally;
213 goto finally;
214 }
214 }
215
215
216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
217
217
218 /* If no data was read, we're at EOF. */
218 /* If no data was read, we're at EOF. */
219 if (0 == readSize) {
219 if (0 == readSize) {
220 break;
220 break;
221 }
221 }
222
222
223 totalRead += readSize;
223 totalRead += readSize;
224
224
225 /* Send data to decompressor */
225 /* Send data to decompressor */
226 input.src = readBuffer;
226 input.src = readBuffer;
227 input.size = readSize;
227 input.size = readSize;
228 input.pos = 0;
228 input.pos = 0;
229
229
230 while (input.pos < input.size) {
230 while (input.pos < input.size) {
231 Py_BEGIN_ALLOW_THREADS
231 Py_BEGIN_ALLOW_THREADS
232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
233 Py_END_ALLOW_THREADS
233 Py_END_ALLOW_THREADS
234
234
235 if (ZSTD_isError(zresult)) {
235 if (ZSTD_isError(zresult)) {
236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
237 ZSTD_getErrorName(zresult));
237 ZSTD_getErrorName(zresult));
238 res = NULL;
238 res = NULL;
239 goto finally;
239 goto finally;
240 }
240 }
241
241
242 if (output.pos) {
242 if (output.pos) {
243 #if PY_MAJOR_VERSION >= 3
243 #if PY_MAJOR_VERSION >= 3
244 writeResult = PyObject_CallMethod(dest, "write", "y#",
244 writeResult = PyObject_CallMethod(dest, "write", "y#",
245 #else
245 #else
246 writeResult = PyObject_CallMethod(dest, "write", "s#",
246 writeResult = PyObject_CallMethod(dest, "write", "s#",
247 #endif
247 #endif
248 output.dst, output.pos);
248 output.dst, output.pos);
249
249
250 Py_XDECREF(writeResult);
250 Py_XDECREF(writeResult);
251 totalWrite += output.pos;
251 totalWrite += output.pos;
252 output.pos = 0;
252 output.pos = 0;
253 }
253 }
254 }
254 }
255
255
256 Py_CLEAR(readResult);
256 Py_CLEAR(readResult);
257 }
257 }
258
258
259 /* Source stream is exhausted. Finish up. */
259 /* Source stream is exhausted. Finish up. */
260
260
261 totalReadPy = PyLong_FromSsize_t(totalRead);
261 totalReadPy = PyLong_FromSsize_t(totalRead);
262 totalWritePy = PyLong_FromSsize_t(totalWrite);
262 totalWritePy = PyLong_FromSsize_t(totalWrite);
263 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
263 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
264 Py_DECREF(totalReadPy);
264 Py_DECREF(totalReadPy);
265 Py_DECREF(totalWritePy);
265 Py_DECREF(totalWritePy);
266
266
267 finally:
267 finally:
268 if (output.dst) {
268 if (output.dst) {
269 PyMem_Free(output.dst);
269 PyMem_Free(output.dst);
270 }
270 }
271
271
272 Py_XDECREF(readResult);
272 Py_XDECREF(readResult);
273
273
274 return res;
274 return res;
275 }
275 }
276
276
277 PyDoc_STRVAR(Decompressor_decompress__doc__,
277 PyDoc_STRVAR(Decompressor_decompress__doc__,
278 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
278 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
279 "\n"
279 "\n"
280 "This method will decompress the entirety of the argument and return the\n"
280 "This method will decompress the entirety of the argument and return the\n"
281 "result.\n"
281 "result.\n"
282 "\n"
282 "\n"
283 "The input bytes are expected to contain a full Zstandard frame (something\n"
283 "The input bytes are expected to contain a full Zstandard frame (something\n"
284 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
284 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
285 "not contain a full frame, an exception will be raised.\n"
285 "not contain a full frame, an exception will be raised.\n"
286 "\n"
286 "\n"
287 "If the frame header of the compressed data does not contain the content size\n"
287 "If the frame header of the compressed data does not contain the content size\n"
288 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
288 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
289 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
289 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
290 "be made to perform decompression into that buffer. If the buffer is too\n"
290 "be made to perform decompression into that buffer. If the buffer is too\n"
291 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
291 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
292 "be resized if it is too large.\n"
292 "be resized if it is too large.\n"
293 "\n"
293 "\n"
294 "Uncompressed data could be much larger than compressed data. As a result,\n"
294 "Uncompressed data could be much larger than compressed data. As a result,\n"
295 "calling this function could result in a very large memory allocation being\n"
295 "calling this function could result in a very large memory allocation being\n"
296 "performed to hold the uncompressed data. Therefore it is **highly**\n"
296 "performed to hold the uncompressed data. Therefore it is **highly**\n"
297 "recommended to use a streaming decompression method instead of this one.\n"
297 "recommended to use a streaming decompression method instead of this one.\n"
298 );
298 );
299
299
300 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
300 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
301 static char* kwlist[] = {
301 static char* kwlist[] = {
302 "data",
302 "data",
303 "max_output_size",
303 "max_output_size",
304 NULL
304 NULL
305 };
305 };
306
306
307 Py_buffer source;
307 Py_buffer source;
308 Py_ssize_t maxOutputSize = 0;
308 Py_ssize_t maxOutputSize = 0;
309 unsigned long long decompressedSize;
309 unsigned long long decompressedSize;
310 size_t destCapacity;
310 size_t destCapacity;
311 PyObject* result = NULL;
311 PyObject* result = NULL;
312 size_t zresult;
312 size_t zresult;
313 ZSTD_outBuffer outBuffer;
313 ZSTD_outBuffer outBuffer;
314 ZSTD_inBuffer inBuffer;
314 ZSTD_inBuffer inBuffer;
315
315
316 #if PY_MAJOR_VERSION >= 3
316 #if PY_MAJOR_VERSION >= 3
317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
318 #else
318 #else
319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
320 #endif
320 #endif
321 kwlist, &source, &maxOutputSize)) {
321 kwlist, &source, &maxOutputSize)) {
322 return NULL;
322 return NULL;
323 }
323 }
324
324
325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
326 PyErr_SetString(PyExc_ValueError,
326 PyErr_SetString(PyExc_ValueError,
327 "data buffer should be contiguous and have at most one dimension");
327 "data buffer should be contiguous and have at most one dimension");
328 goto finally;
328 goto finally;
329 }
329 }
330
330
331 if (ensure_dctx(self, 1)) {
331 if (ensure_dctx(self, 1)) {
332 goto finally;
332 goto finally;
333 }
333 }
334
334
335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
336
336
337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
338 PyErr_SetString(ZstdError, "error determining content size from frame header");
338 PyErr_SetString(ZstdError, "error determining content size from frame header");
339 goto finally;
339 goto finally;
340 }
340 }
341 /* Special case of empty frame. */
341 /* Special case of empty frame. */
342 else if (0 == decompressedSize) {
342 else if (0 == decompressedSize) {
343 result = PyBytes_FromStringAndSize("", 0);
343 result = PyBytes_FromStringAndSize("", 0);
344 goto finally;
344 goto finally;
345 }
345 }
346 /* Missing content size in frame header. */
346 /* Missing content size in frame header. */
347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
348 if (0 == maxOutputSize) {
348 if (0 == maxOutputSize) {
349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
350 goto finally;
350 goto finally;
351 }
351 }
352
352
353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
354 destCapacity = maxOutputSize;
354 destCapacity = maxOutputSize;
355 decompressedSize = 0;
355 decompressedSize = 0;
356 }
356 }
357 /* Size is recorded in frame header. */
357 /* Size is recorded in frame header. */
358 else {
358 else {
359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
360 if (decompressedSize > PY_SSIZE_T_MAX) {
360 if (decompressedSize > PY_SSIZE_T_MAX) {
361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
362 goto finally;
362 goto finally;
363 }
363 }
364
364
365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
366 destCapacity = (size_t)decompressedSize;
366 destCapacity = (size_t)decompressedSize;
367 }
367 }
368
368
369 if (!result) {
369 if (!result) {
370 goto finally;
370 goto finally;
371 }
371 }
372
372
373 outBuffer.dst = PyBytes_AsString(result);
373 outBuffer.dst = PyBytes_AsString(result);
374 outBuffer.size = destCapacity;
374 outBuffer.size = destCapacity;
375 outBuffer.pos = 0;
375 outBuffer.pos = 0;
376
376
377 inBuffer.src = source.buf;
377 inBuffer.src = source.buf;
378 inBuffer.size = source.len;
378 inBuffer.size = source.len;
379 inBuffer.pos = 0;
379 inBuffer.pos = 0;
380
380
381 Py_BEGIN_ALLOW_THREADS
381 Py_BEGIN_ALLOW_THREADS
382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
383 Py_END_ALLOW_THREADS
383 Py_END_ALLOW_THREADS
384
384
385 if (ZSTD_isError(zresult)) {
385 if (ZSTD_isError(zresult)) {
386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
387 Py_CLEAR(result);
387 Py_CLEAR(result);
388 goto finally;
388 goto finally;
389 }
389 }
390 else if (zresult) {
390 else if (zresult) {
391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
392 Py_CLEAR(result);
392 Py_CLEAR(result);
393 goto finally;
393 goto finally;
394 }
394 }
395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
397 zresult, decompressedSize);
397 zresult, decompressedSize);
398 Py_CLEAR(result);
398 Py_CLEAR(result);
399 goto finally;
399 goto finally;
400 }
400 }
401 else if (outBuffer.pos < destCapacity) {
401 else if (outBuffer.pos < destCapacity) {
402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
403 Py_CLEAR(result);
403 Py_CLEAR(result);
404 goto finally;
404 goto finally;
405 }
405 }
406 }
406 }
407
407
408 finally:
408 finally:
409 PyBuffer_Release(&source);
409 PyBuffer_Release(&source);
410 return result;
410 return result;
411 }
411 }
412
412
413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
414 "decompressobj([write_size=default])\n"
414 "decompressobj([write_size=default])\n"
415 "\n"
415 "\n"
416 "Incrementally feed data into a decompressor.\n"
416 "Incrementally feed data into a decompressor.\n"
417 "\n"
417 "\n"
418 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
418 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
419 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
419 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
420 "callers can swap in the zstd decompressor while using the same API.\n"
420 "callers can swap in the zstd decompressor while using the same API.\n"
421 );
421 );
422
422
423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
424 static char* kwlist[] = {
424 static char* kwlist[] = {
425 "write_size",
425 "write_size",
426 NULL
426 NULL
427 };
427 };
428
428
429 ZstdDecompressionObj* result = NULL;
429 ZstdDecompressionObj* result = NULL;
430 size_t outSize = ZSTD_DStreamOutSize();
430 size_t outSize = ZSTD_DStreamOutSize();
431
431
432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
433 return NULL;
433 return NULL;
434 }
434 }
435
435
436 if (!outSize) {
436 if (!outSize) {
437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
438 return NULL;
438 return NULL;
439 }
439 }
440
440
441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
442 if (!result) {
442 if (!result) {
443 return NULL;
443 return NULL;
444 }
444 }
445
445
446 if (ensure_dctx(self, 1)) {
446 if (ensure_dctx(self, 1)) {
447 Py_DECREF(result);
447 Py_DECREF(result);
448 return NULL;
448 return NULL;
449 }
449 }
450
450
451 result->decompressor = self;
451 result->decompressor = self;
452 Py_INCREF(result->decompressor);
452 Py_INCREF(result->decompressor);
453 result->outSize = outSize;
453 result->outSize = outSize;
454
454
455 return result;
455 return result;
456 }
456 }
457
457
458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
460 "Read compressed data and return an iterator\n"
460 "Read compressed data and return an iterator\n"
461 "\n"
461 "\n"
462 "Returns an iterator of decompressed data chunks produced from reading from\n"
462 "Returns an iterator of decompressed data chunks produced from reading from\n"
463 "the ``reader``.\n"
463 "the ``reader``.\n"
464 "\n"
464 "\n"
465 "Compressed data will be obtained from ``reader`` by calling the\n"
465 "Compressed data will be obtained from ``reader`` by calling the\n"
466 "``read(size)`` method of it. The source data will be streamed into a\n"
466 "``read(size)`` method of it. The source data will be streamed into a\n"
467 "decompressor. As decompressed data is available, it will be exposed to the\n"
467 "decompressor. As decompressed data is available, it will be exposed to the\n"
468 "returned iterator.\n"
468 "returned iterator.\n"
469 "\n"
469 "\n"
470 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
470 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
471 "iterator in chunks of size ``write_size``. The default values are the input\n"
471 "iterator in chunks of size ``write_size``. The default values are the input\n"
472 "and output sizes for a zstd streaming decompressor.\n"
472 "and output sizes for a zstd streaming decompressor.\n"
473 "\n"
473 "\n"
474 "There is also support for skipping the first ``skip_bytes`` of data from\n"
474 "There is also support for skipping the first ``skip_bytes`` of data from\n"
475 "the source.\n"
475 "the source.\n"
476 );
476 );
477
477
478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
479 static char* kwlist[] = {
479 static char* kwlist[] = {
480 "reader",
480 "reader",
481 "read_size",
481 "read_size",
482 "write_size",
482 "write_size",
483 "skip_bytes",
483 "skip_bytes",
484 NULL
484 NULL
485 };
485 };
486
486
487 PyObject* reader;
487 PyObject* reader;
488 size_t inSize = ZSTD_DStreamInSize();
488 size_t inSize = ZSTD_DStreamInSize();
489 size_t outSize = ZSTD_DStreamOutSize();
489 size_t outSize = ZSTD_DStreamOutSize();
490 ZstdDecompressorIterator* result;
490 ZstdDecompressorIterator* result;
491 size_t skipBytes = 0;
491 size_t skipBytes = 0;
492
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
494 &reader, &inSize, &outSize, &skipBytes)) {
494 &reader, &inSize, &outSize, &skipBytes)) {
495 return NULL;
495 return NULL;
496 }
496 }
497
497
498 if (skipBytes >= inSize) {
498 if (skipBytes >= inSize) {
499 PyErr_SetString(PyExc_ValueError,
499 PyErr_SetString(PyExc_ValueError,
500 "skip_bytes must be smaller than read_size");
500 "skip_bytes must be smaller than read_size");
501 return NULL;
501 return NULL;
502 }
502 }
503
503
504 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
504 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
505 if (!result) {
505 if (!result) {
506 return NULL;
506 return NULL;
507 }
507 }
508
508
509 if (PyObject_HasAttrString(reader, "read")) {
509 if (PyObject_HasAttrString(reader, "read")) {
510 result->reader = reader;
510 result->reader = reader;
511 Py_INCREF(result->reader);
511 Py_INCREF(result->reader);
512 }
512 }
513 else if (1 == PyObject_CheckBuffer(reader)) {
513 else if (1 == PyObject_CheckBuffer(reader)) {
514 /* Object claims it is a buffer. Try to get a handle to it. */
514 /* Object claims it is a buffer. Try to get a handle to it. */
515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
516 goto except;
516 goto except;
517 }
517 }
518 }
518 }
519 else {
519 else {
520 PyErr_SetString(PyExc_ValueError,
520 PyErr_SetString(PyExc_ValueError,
521 "must pass an object with a read() method or conforms to buffer protocol");
521 "must pass an object with a read() method or conforms to buffer protocol");
522 goto except;
522 goto except;
523 }
523 }
524
524
525 result->decompressor = self;
525 result->decompressor = self;
526 Py_INCREF(result->decompressor);
526 Py_INCREF(result->decompressor);
527
527
528 result->inSize = inSize;
528 result->inSize = inSize;
529 result->outSize = outSize;
529 result->outSize = outSize;
530 result->skipBytes = skipBytes;
530 result->skipBytes = skipBytes;
531
531
532 if (ensure_dctx(self, 1)) {
532 if (ensure_dctx(self, 1)) {
533 goto except;
533 goto except;
534 }
534 }
535
535
536 result->input.src = PyMem_Malloc(inSize);
536 result->input.src = PyMem_Malloc(inSize);
537 if (!result->input.src) {
537 if (!result->input.src) {
538 PyErr_NoMemory();
538 PyErr_NoMemory();
539 goto except;
539 goto except;
540 }
540 }
541
541
542 goto finally;
542 goto finally;
543
543
544 except:
544 except:
545 Py_CLEAR(result);
545 Py_CLEAR(result);
546
546
547 finally:
547 finally:
548
548
549 return result;
549 return result;
550 }
550 }
551
551
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
554 "\n"
554 "\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
556 "reading decompressed output from an object.\n"
556 "reading decompressed output from an object.\n"
557 "\n"
557 "\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
559 "conforms to the buffer protocol.\n"
559 "conforms to the buffer protocol.\n"
560 "\n"
560 "\n"
561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
564 "will return when the end of a frame is reached.\n"
564 "will return when the end of a frame is reached.\n"
565 );
565 );
566
566
567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
568 static char* kwlist[] = {
568 static char* kwlist[] = {
569 "source",
569 "source",
570 "read_size",
570 "read_size",
571 "read_across_frames",
571 "read_across_frames",
572 NULL
572 NULL
573 };
573 };
574
574
575 PyObject* source;
575 PyObject* source;
576 size_t readSize = ZSTD_DStreamInSize();
576 size_t readSize = ZSTD_DStreamInSize();
577 PyObject* readAcrossFrames = NULL;
577 PyObject* readAcrossFrames = NULL;
578 ZstdDecompressionReader* result;
578 ZstdDecompressionReader* result;
579
579
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
581 &source, &readSize, &readAcrossFrames)) {
581 &source, &readSize, &readAcrossFrames)) {
582 return NULL;
582 return NULL;
583 }
583 }
584
584
585 if (ensure_dctx(self, 1)) {
585 if (ensure_dctx(self, 1)) {
586 return NULL;
586 return NULL;
587 }
587 }
588
588
589 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
589 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
590 if (NULL == result) {
590 if (NULL == result) {
591 return NULL;
591 return NULL;
592 }
592 }
593
593
594 if (PyObject_HasAttrString(source, "read")) {
594 if (PyObject_HasAttrString(source, "read")) {
595 result->reader = source;
595 result->reader = source;
596 Py_INCREF(source);
596 Py_INCREF(source);
597 result->readSize = readSize;
597 result->readSize = readSize;
598 }
598 }
599 else if (1 == PyObject_CheckBuffer(source)) {
599 else if (1 == PyObject_CheckBuffer(source)) {
600 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
600 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
601 Py_CLEAR(result);
601 Py_CLEAR(result);
602 return NULL;
602 return NULL;
603 }
603 }
604 }
604 }
605 else {
605 else {
606 PyErr_SetString(PyExc_TypeError,
606 PyErr_SetString(PyExc_TypeError,
607 "must pass an object with a read() method or that conforms to the buffer protocol");
607 "must pass an object with a read() method or that conforms to the buffer protocol");
608 Py_CLEAR(result);
608 Py_CLEAR(result);
609 return NULL;
609 return NULL;
610 }
610 }
611
611
612 result->decompressor = self;
612 result->decompressor = self;
613 Py_INCREF(self);
613 Py_INCREF(self);
614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
615
615
616 return result;
616 return result;
617 }
617 }
618
618
619 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
619 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
620 "Create a context manager to write decompressed data to an object.\n"
620 "Create a context manager to write decompressed data to an object.\n"
621 "\n"
621 "\n"
622 "The passed object must have a ``write()`` method.\n"
622 "The passed object must have a ``write()`` method.\n"
623 "\n"
623 "\n"
624 "The caller feeds intput data to the object by calling ``write(data)``.\n"
624 "The caller feeds intput data to the object by calling ``write(data)``.\n"
625 "Decompressed data is written to the argument given as it is decompressed.\n"
625 "Decompressed data is written to the argument given as it is decompressed.\n"
626 "\n"
626 "\n"
627 "An optional ``write_size`` argument defines the size of chunks to\n"
627 "An optional ``write_size`` argument defines the size of chunks to\n"
628 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
628 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
629 "streaming decompressor.\n"
629 "streaming decompressor.\n"
630 );
630 );
631
631
632 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
632 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
633 static char* kwlist[] = {
633 static char* kwlist[] = {
634 "writer",
634 "writer",
635 "write_size",
635 "write_size",
636 "write_return_read",
636 "write_return_read",
637 NULL
637 NULL
638 };
638 };
639
639
640 PyObject* writer;
640 PyObject* writer;
641 size_t outSize = ZSTD_DStreamOutSize();
641 size_t outSize = ZSTD_DStreamOutSize();
642 PyObject* writeReturnRead = NULL;
642 PyObject* writeReturnRead = NULL;
643 ZstdDecompressionWriter* result;
643 ZstdDecompressionWriter* result;
644
644
645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
646 &writer, &outSize, &writeReturnRead)) {
646 &writer, &outSize, &writeReturnRead)) {
647 return NULL;
647 return NULL;
648 }
648 }
649
649
650 if (!PyObject_HasAttrString(writer, "write")) {
650 if (!PyObject_HasAttrString(writer, "write")) {
651 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
651 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
652 return NULL;
652 return NULL;
653 }
653 }
654
654
655 if (ensure_dctx(self, 1)) {
655 if (ensure_dctx(self, 1)) {
656 return NULL;
656 return NULL;
657 }
657 }
658
658
659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
660 if (!result) {
660 if (!result) {
661 return NULL;
661 return NULL;
662 }
662 }
663
663
664 result->decompressor = self;
664 result->decompressor = self;
665 Py_INCREF(result->decompressor);
665 Py_INCREF(result->decompressor);
666
666
667 result->writer = writer;
667 result->writer = writer;
668 Py_INCREF(result->writer);
668 Py_INCREF(result->writer);
669
669
670 result->outSize = outSize;
670 result->outSize = outSize;
671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
672
672
673 return result;
673 return result;
674 }
674 }
675
675
676 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
676 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
677 "Decompress a series of chunks using the content dictionary chaining technique\n"
677 "Decompress a series of chunks using the content dictionary chaining technique\n"
678 );
678 );
679
679
680 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
680 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
681 static char* kwlist[] = {
681 static char* kwlist[] = {
682 "frames",
682 "frames",
683 NULL
683 NULL
684 };
684 };
685
685
686 PyObject* chunks;
686 PyObject* chunks;
687 Py_ssize_t chunksLen;
687 Py_ssize_t chunksLen;
688 Py_ssize_t chunkIndex;
688 Py_ssize_t chunkIndex;
689 char parity = 0;
689 char parity = 0;
690 PyObject* chunk;
690 PyObject* chunk;
691 char* chunkData;
691 char* chunkData;
692 Py_ssize_t chunkSize;
692 Py_ssize_t chunkSize;
693 size_t zresult;
693 size_t zresult;
694 ZSTD_frameHeader frameHeader;
694 ZSTD_frameHeader frameHeader;
695 void* buffer1 = NULL;
695 void* buffer1 = NULL;
696 size_t buffer1Size = 0;
696 size_t buffer1Size = 0;
697 size_t buffer1ContentSize = 0;
697 size_t buffer1ContentSize = 0;
698 void* buffer2 = NULL;
698 void* buffer2 = NULL;
699 size_t buffer2Size = 0;
699 size_t buffer2Size = 0;
700 size_t buffer2ContentSize = 0;
700 size_t buffer2ContentSize = 0;
701 void* destBuffer = NULL;
701 void* destBuffer = NULL;
702 PyObject* result = NULL;
702 PyObject* result = NULL;
703 ZSTD_outBuffer outBuffer;
703 ZSTD_outBuffer outBuffer;
704 ZSTD_inBuffer inBuffer;
704 ZSTD_inBuffer inBuffer;
705
705
706 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
706 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
707 kwlist, &PyList_Type, &chunks)) {
707 kwlist, &PyList_Type, &chunks)) {
708 return NULL;
708 return NULL;
709 }
709 }
710
710
711 chunksLen = PyList_Size(chunks);
711 chunksLen = PyList_Size(chunks);
712 if (!chunksLen) {
712 if (!chunksLen) {
713 PyErr_SetString(PyExc_ValueError, "empty input chain");
713 PyErr_SetString(PyExc_ValueError, "empty input chain");
714 return NULL;
714 return NULL;
715 }
715 }
716
716
717 /* The first chunk should not be using a dictionary. We handle it specially. */
717 /* The first chunk should not be using a dictionary. We handle it specially. */
718 chunk = PyList_GetItem(chunks, 0);
718 chunk = PyList_GetItem(chunks, 0);
719 if (!PyBytes_Check(chunk)) {
719 if (!PyBytes_Check(chunk)) {
720 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
720 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
721 return NULL;
721 return NULL;
722 }
722 }
723
723
724 /* We require that all chunks be zstd frames and that they have content size set. */
724 /* We require that all chunks be zstd frames and that they have content size set. */
725 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
725 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
726 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
726 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
727 if (ZSTD_isError(zresult)) {
727 if (ZSTD_isError(zresult)) {
728 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
728 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
729 return NULL;
729 return NULL;
730 }
730 }
731 else if (zresult) {
731 else if (zresult) {
732 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
732 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
733 return NULL;
733 return NULL;
734 }
734 }
735
735
736 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
736 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
737 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
737 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
738 return NULL;
738 return NULL;
739 }
739 }
740
740
741 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
741 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
742
742
743 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
743 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
744 * result to a Python object and it's length can be no greater than
744 * result to a Python object and it's length can be no greater than
745 * Py_ssize_t. In theory, we could have an intermediate frame that is
745 * Py_ssize_t. In theory, we could have an intermediate frame that is
746 * larger. But a) why would this API be used for frames that large b)
746 * larger. But a) why would this API be used for frames that large b)
747 * it isn't worth the complexity to support. */
747 * it isn't worth the complexity to support. */
748 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
748 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
749 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
749 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
750 PyErr_SetString(PyExc_ValueError,
750 PyErr_SetString(PyExc_ValueError,
751 "chunk 0 is too large to decompress on this platform");
751 "chunk 0 is too large to decompress on this platform");
752 return NULL;
752 return NULL;
753 }
753 }
754
754
755 if (ensure_dctx(self, 0)) {
755 if (ensure_dctx(self, 0)) {
756 goto finally;
756 goto finally;
757 }
757 }
758
758
759 buffer1Size = (size_t)frameHeader.frameContentSize;
759 buffer1Size = (size_t)frameHeader.frameContentSize;
760 buffer1 = PyMem_Malloc(buffer1Size);
760 buffer1 = PyMem_Malloc(buffer1Size);
761 if (!buffer1) {
761 if (!buffer1) {
762 goto finally;
762 goto finally;
763 }
763 }
764
764
765 outBuffer.dst = buffer1;
765 outBuffer.dst = buffer1;
766 outBuffer.size = buffer1Size;
766 outBuffer.size = buffer1Size;
767 outBuffer.pos = 0;
767 outBuffer.pos = 0;
768
768
769 inBuffer.src = chunkData;
769 inBuffer.src = chunkData;
770 inBuffer.size = chunkSize;
770 inBuffer.size = chunkSize;
771 inBuffer.pos = 0;
771 inBuffer.pos = 0;
772
772
773 Py_BEGIN_ALLOW_THREADS
773 Py_BEGIN_ALLOW_THREADS
774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
775 Py_END_ALLOW_THREADS
775 Py_END_ALLOW_THREADS
776 if (ZSTD_isError(zresult)) {
776 if (ZSTD_isError(zresult)) {
777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
778 goto finally;
778 goto finally;
779 }
779 }
780 else if (zresult) {
780 else if (zresult) {
781 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
781 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
782 goto finally;
782 goto finally;
783 }
783 }
784
784
785 buffer1ContentSize = outBuffer.pos;
785 buffer1ContentSize = outBuffer.pos;
786
786
787 /* Special case of a simple chain. */
787 /* Special case of a simple chain. */
788 if (1 == chunksLen) {
788 if (1 == chunksLen) {
789 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
789 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
790 goto finally;
790 goto finally;
791 }
791 }
792
792
793 /* This should ideally look at next chunk. But this is slightly simpler. */
793 /* This should ideally look at next chunk. But this is slightly simpler. */
794 buffer2Size = (size_t)frameHeader.frameContentSize;
794 buffer2Size = (size_t)frameHeader.frameContentSize;
795 buffer2 = PyMem_Malloc(buffer2Size);
795 buffer2 = PyMem_Malloc(buffer2Size);
796 if (!buffer2) {
796 if (!buffer2) {
797 goto finally;
797 goto finally;
798 }
798 }
799
799
800 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
800 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
801 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
801 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
802 used as a content dictionary) and the other holds the new fulltext. The
802 used as a content dictionary) and the other holds the new fulltext. The
803 buffers grow when needed but never decrease in size. This limits the
803 buffers grow when needed but never decrease in size. This limits the
804 memory allocator overhead.
804 memory allocator overhead.
805 */
805 */
806 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
806 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
807 chunk = PyList_GetItem(chunks, chunkIndex);
807 chunk = PyList_GetItem(chunks, chunkIndex);
808 if (!PyBytes_Check(chunk)) {
808 if (!PyBytes_Check(chunk)) {
809 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
809 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
810 goto finally;
810 goto finally;
811 }
811 }
812
812
813 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
813 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
814 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
814 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
815 if (ZSTD_isError(zresult)) {
815 if (ZSTD_isError(zresult)) {
816 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
816 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
817 goto finally;
817 goto finally;
818 }
818 }
819 else if (zresult) {
819 else if (zresult) {
820 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
820 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
821 goto finally;
821 goto finally;
822 }
822 }
823
823
824 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
824 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
825 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
825 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
826 goto finally;
826 goto finally;
827 }
827 }
828
828
829 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
829 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
830
830
831 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
831 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
832 PyErr_Format(PyExc_ValueError,
832 PyErr_Format(PyExc_ValueError,
833 "chunk %zd is too large to decompress on this platform", chunkIndex);
833 "chunk %zd is too large to decompress on this platform", chunkIndex);
834 goto finally;
834 goto finally;
835 }
835 }
836
836
837 inBuffer.src = chunkData;
837 inBuffer.src = chunkData;
838 inBuffer.size = chunkSize;
838 inBuffer.size = chunkSize;
839 inBuffer.pos = 0;
839 inBuffer.pos = 0;
840
840
841 parity = chunkIndex % 2;
841 parity = chunkIndex % 2;
842
842
843 /* This could definitely be abstracted to reduce code duplication. */
843 /* This could definitely be abstracted to reduce code duplication. */
844 if (parity) {
844 if (parity) {
845 /* Resize destination buffer to hold larger content. */
845 /* Resize destination buffer to hold larger content. */
846 if (buffer2Size < frameHeader.frameContentSize) {
846 if (buffer2Size < frameHeader.frameContentSize) {
847 buffer2Size = (size_t)frameHeader.frameContentSize;
847 buffer2Size = (size_t)frameHeader.frameContentSize;
848 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
848 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
849 if (!destBuffer) {
849 if (!destBuffer) {
850 goto finally;
850 goto finally;
851 }
851 }
852 buffer2 = destBuffer;
852 buffer2 = destBuffer;
853 }
853 }
854
854
855 Py_BEGIN_ALLOW_THREADS
855 Py_BEGIN_ALLOW_THREADS
856 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
856 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
857 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
857 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
858 Py_END_ALLOW_THREADS
858 Py_END_ALLOW_THREADS
859 if (ZSTD_isError(zresult)) {
859 if (ZSTD_isError(zresult)) {
860 PyErr_Format(ZstdError,
860 PyErr_Format(ZstdError,
861 "failed to load prefix dictionary at chunk %zd", chunkIndex);
861 "failed to load prefix dictionary at chunk %zd", chunkIndex);
862 goto finally;
862 goto finally;
863 }
863 }
864
864
865 outBuffer.dst = buffer2;
865 outBuffer.dst = buffer2;
866 outBuffer.size = buffer2Size;
866 outBuffer.size = buffer2Size;
867 outBuffer.pos = 0;
867 outBuffer.pos = 0;
868
868
869 Py_BEGIN_ALLOW_THREADS
869 Py_BEGIN_ALLOW_THREADS
870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
871 Py_END_ALLOW_THREADS
871 Py_END_ALLOW_THREADS
872 if (ZSTD_isError(zresult)) {
872 if (ZSTD_isError(zresult)) {
873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
874 chunkIndex, ZSTD_getErrorName(zresult));
874 chunkIndex, ZSTD_getErrorName(zresult));
875 goto finally;
875 goto finally;
876 }
876 }
877 else if (zresult) {
877 else if (zresult) {
878 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
878 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
879 chunkIndex);
879 chunkIndex);
880 goto finally;
880 goto finally;
881 }
881 }
882
882
883 buffer2ContentSize = outBuffer.pos;
883 buffer2ContentSize = outBuffer.pos;
884 }
884 }
885 else {
885 else {
886 if (buffer1Size < frameHeader.frameContentSize) {
886 if (buffer1Size < frameHeader.frameContentSize) {
887 buffer1Size = (size_t)frameHeader.frameContentSize;
887 buffer1Size = (size_t)frameHeader.frameContentSize;
888 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
888 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
889 if (!destBuffer) {
889 if (!destBuffer) {
890 goto finally;
890 goto finally;
891 }
891 }
892 buffer1 = destBuffer;
892 buffer1 = destBuffer;
893 }
893 }
894
894
895 Py_BEGIN_ALLOW_THREADS
895 Py_BEGIN_ALLOW_THREADS
896 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
896 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
897 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
897 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
898 Py_END_ALLOW_THREADS
898 Py_END_ALLOW_THREADS
899 if (ZSTD_isError(zresult)) {
899 if (ZSTD_isError(zresult)) {
900 PyErr_Format(ZstdError,
900 PyErr_Format(ZstdError,
901 "failed to load prefix dictionary at chunk %zd", chunkIndex);
901 "failed to load prefix dictionary at chunk %zd", chunkIndex);
902 goto finally;
902 goto finally;
903 }
903 }
904
904
905 outBuffer.dst = buffer1;
905 outBuffer.dst = buffer1;
906 outBuffer.size = buffer1Size;
906 outBuffer.size = buffer1Size;
907 outBuffer.pos = 0;
907 outBuffer.pos = 0;
908
908
909 Py_BEGIN_ALLOW_THREADS
909 Py_BEGIN_ALLOW_THREADS
910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
911 Py_END_ALLOW_THREADS
911 Py_END_ALLOW_THREADS
912 if (ZSTD_isError(zresult)) {
912 if (ZSTD_isError(zresult)) {
913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
914 chunkIndex, ZSTD_getErrorName(zresult));
914 chunkIndex, ZSTD_getErrorName(zresult));
915 goto finally;
915 goto finally;
916 }
916 }
917 else if (zresult) {
917 else if (zresult) {
918 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
918 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
919 chunkIndex);
919 chunkIndex);
920 goto finally;
920 goto finally;
921 }
921 }
922
922
923 buffer1ContentSize = outBuffer.pos;
923 buffer1ContentSize = outBuffer.pos;
924 }
924 }
925 }
925 }
926
926
927 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
927 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
928 parity ? buffer2ContentSize : buffer1ContentSize);
928 parity ? buffer2ContentSize : buffer1ContentSize);
929
929
930 finally:
930 finally:
931 if (buffer2) {
931 if (buffer2) {
932 PyMem_Free(buffer2);
932 PyMem_Free(buffer2);
933 }
933 }
934 if (buffer1) {
934 if (buffer1) {
935 PyMem_Free(buffer1);
935 PyMem_Free(buffer1);
936 }
936 }
937
937
938 return result;
938 return result;
939 }
939 }
940
940
941 typedef struct {
941 typedef struct {
942 void* sourceData;
942 void* sourceData;
943 size_t sourceSize;
943 size_t sourceSize;
944 size_t destSize;
944 size_t destSize;
945 } FramePointer;
945 } FramePointer;
946
946
947 typedef struct {
947 typedef struct {
948 FramePointer* frames;
948 FramePointer* frames;
949 Py_ssize_t framesSize;
949 Py_ssize_t framesSize;
950 unsigned long long compressedSize;
950 unsigned long long compressedSize;
951 } FrameSources;
951 } FrameSources;
952
952
953 typedef struct {
953 typedef struct {
954 void* dest;
954 void* dest;
955 Py_ssize_t destSize;
955 Py_ssize_t destSize;
956 BufferSegment* segments;
956 BufferSegment* segments;
957 Py_ssize_t segmentsSize;
957 Py_ssize_t segmentsSize;
958 } DestBuffer;
958 } DestBuffer;
959
959
960 typedef enum {
960 typedef enum {
961 WorkerError_none = 0,
961 WorkerError_none = 0,
962 WorkerError_zstd = 1,
962 WorkerError_zstd = 1,
963 WorkerError_memory = 2,
963 WorkerError_memory = 2,
964 WorkerError_sizeMismatch = 3,
964 WorkerError_sizeMismatch = 3,
965 WorkerError_unknownSize = 4,
965 WorkerError_unknownSize = 4,
966 } WorkerError;
966 } WorkerError;
967
967
968 typedef struct {
968 typedef struct {
969 /* Source records and length */
969 /* Source records and length */
970 FramePointer* framePointers;
970 FramePointer* framePointers;
971 /* Which records to process. */
971 /* Which records to process. */
972 Py_ssize_t startOffset;
972 Py_ssize_t startOffset;
973 Py_ssize_t endOffset;
973 Py_ssize_t endOffset;
974 unsigned long long totalSourceSize;
974 unsigned long long totalSourceSize;
975
975
976 /* Compression state and settings. */
976 /* Compression state and settings. */
977 ZSTD_DCtx* dctx;
977 ZSTD_DCtx* dctx;
978 int requireOutputSizes;
978 int requireOutputSizes;
979
979
980 /* Output storage. */
980 /* Output storage. */
981 DestBuffer* destBuffers;
981 DestBuffer* destBuffers;
982 Py_ssize_t destCount;
982 Py_ssize_t destCount;
983
983
984 /* Item that error occurred on. */
984 /* Item that error occurred on. */
985 Py_ssize_t errorOffset;
985 Py_ssize_t errorOffset;
986 /* If an error occurred. */
986 /* If an error occurred. */
987 WorkerError error;
987 WorkerError error;
988 /* result from zstd decompression operation */
988 /* result from zstd decompression operation */
989 size_t zresult;
989 size_t zresult;
990 } WorkerState;
990 } WorkerState;
991
991
992 static void decompress_worker(WorkerState* state) {
992 static void decompress_worker(WorkerState* state) {
993 size_t allocationSize;
993 size_t allocationSize;
994 DestBuffer* destBuffer;
994 DestBuffer* destBuffer;
995 Py_ssize_t frameIndex;
995 Py_ssize_t frameIndex;
996 Py_ssize_t localOffset = 0;
996 Py_ssize_t localOffset = 0;
997 Py_ssize_t currentBufferStartIndex = state->startOffset;
997 Py_ssize_t currentBufferStartIndex = state->startOffset;
998 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
998 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
999 void* tmpBuf;
999 void* tmpBuf;
1000 Py_ssize_t destOffset = 0;
1000 Py_ssize_t destOffset = 0;
1001 FramePointer* framePointers = state->framePointers;
1001 FramePointer* framePointers = state->framePointers;
1002 size_t zresult;
1002 size_t zresult;
1003 unsigned long long totalOutputSize = 0;
1003 unsigned long long totalOutputSize = 0;
1004
1004
1005 assert(NULL == state->destBuffers);
1005 assert(NULL == state->destBuffers);
1006 assert(0 == state->destCount);
1006 assert(0 == state->destCount);
1007 assert(state->endOffset - state->startOffset >= 0);
1007 assert(state->endOffset - state->startOffset >= 0);
1008
1008
1009 /* We could get here due to the way work is allocated. Ideally we wouldn't
1009 /* We could get here due to the way work is allocated. Ideally we wouldn't
1010 get here. But that would require a bit of a refactor in the caller. */
1010 get here. But that would require a bit of a refactor in the caller. */
1011 if (state->totalSourceSize > SIZE_MAX) {
1011 if (state->totalSourceSize > SIZE_MAX) {
1012 state->error = WorkerError_memory;
1012 state->error = WorkerError_memory;
1013 state->errorOffset = 0;
1013 state->errorOffset = 0;
1014 return;
1014 return;
1015 }
1015 }
1016
1016
1017 /*
1017 /*
1018 * We need to allocate a buffer to hold decompressed data. How we do this
1018 * We need to allocate a buffer to hold decompressed data. How we do this
1019 * depends on what we know about the output. The following scenarios are
1019 * depends on what we know about the output. The following scenarios are
1020 * possible:
1020 * possible:
1021 *
1021 *
1022 * 1. All structs defining frames declare the output size.
1022 * 1. All structs defining frames declare the output size.
1023 * 2. The decompressed size is embedded within the zstd frame.
1023 * 2. The decompressed size is embedded within the zstd frame.
1024 * 3. The decompressed size is not stored anywhere.
1024 * 3. The decompressed size is not stored anywhere.
1025 *
1025 *
1026 * For now, we only support #1 and #2.
1026 * For now, we only support #1 and #2.
1027 */
1027 */
1028
1028
1029 /* Resolve ouput segments. */
1029 /* Resolve ouput segments. */
1030 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1030 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1031 FramePointer* fp = &framePointers[frameIndex];
1031 FramePointer* fp = &framePointers[frameIndex];
1032 unsigned long long decompressedSize;
1032 unsigned long long decompressedSize;
1033
1033
1034 if (0 == fp->destSize) {
1034 if (0 == fp->destSize) {
1035 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
1035 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
1036
1036
1037 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
1037 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
1038 state->error = WorkerError_unknownSize;
1038 state->error = WorkerError_unknownSize;
1039 state->errorOffset = frameIndex;
1039 state->errorOffset = frameIndex;
1040 return;
1040 return;
1041 }
1041 }
1042 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1042 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1043 if (state->requireOutputSizes) {
1043 if (state->requireOutputSizes) {
1044 state->error = WorkerError_unknownSize;
1044 state->error = WorkerError_unknownSize;
1045 state->errorOffset = frameIndex;
1045 state->errorOffset = frameIndex;
1046 return;
1046 return;
1047 }
1047 }
1048
1048
1049 /* This will fail the assert for .destSize > 0 below. */
1049 /* This will fail the assert for .destSize > 0 below. */
1050 decompressedSize = 0;
1050 decompressedSize = 0;
1051 }
1051 }
1052
1052
1053 if (decompressedSize > SIZE_MAX) {
1053 if (decompressedSize > SIZE_MAX) {
1054 state->error = WorkerError_memory;
1054 state->error = WorkerError_memory;
1055 state->errorOffset = frameIndex;
1055 state->errorOffset = frameIndex;
1056 return;
1056 return;
1057 }
1057 }
1058
1058
1059 fp->destSize = (size_t)decompressedSize;
1059 fp->destSize = (size_t)decompressedSize;
1060 }
1060 }
1061
1061
1062 totalOutputSize += fp->destSize;
1062 totalOutputSize += fp->destSize;
1063 }
1063 }
1064
1064
1065 state->destBuffers = calloc(1, sizeof(DestBuffer));
1065 state->destBuffers = calloc(1, sizeof(DestBuffer));
1066 if (NULL == state->destBuffers) {
1066 if (NULL == state->destBuffers) {
1067 state->error = WorkerError_memory;
1067 state->error = WorkerError_memory;
1068 return;
1068 return;
1069 }
1069 }
1070
1070
1071 state->destCount = 1;
1071 state->destCount = 1;
1072
1072
1073 destBuffer = &state->destBuffers[state->destCount - 1];
1073 destBuffer = &state->destBuffers[state->destCount - 1];
1074
1074
1075 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1075 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1076
1076
1077 allocationSize = roundpow2((size_t)state->totalSourceSize);
1077 allocationSize = roundpow2((size_t)state->totalSourceSize);
1078
1078
1079 if (framePointers[state->startOffset].destSize > allocationSize) {
1079 if (framePointers[state->startOffset].destSize > allocationSize) {
1080 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1080 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1081 }
1081 }
1082
1082
1083 destBuffer->dest = malloc(allocationSize);
1083 destBuffer->dest = malloc(allocationSize);
1084 if (NULL == destBuffer->dest) {
1084 if (NULL == destBuffer->dest) {
1085 state->error = WorkerError_memory;
1085 state->error = WorkerError_memory;
1086 return;
1086 return;
1087 }
1087 }
1088
1088
1089 destBuffer->destSize = allocationSize;
1089 destBuffer->destSize = allocationSize;
1090
1090
1091 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1091 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1092 if (NULL == destBuffer->segments) {
1092 if (NULL == destBuffer->segments) {
1093 /* Caller will free state->dest as part of cleanup. */
1093 /* Caller will free state->dest as part of cleanup. */
1094 state->error = WorkerError_memory;
1094 state->error = WorkerError_memory;
1095 return;
1095 return;
1096 }
1096 }
1097
1097
1098 destBuffer->segmentsSize = remainingItems;
1098 destBuffer->segmentsSize = remainingItems;
1099
1099
1100 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1100 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1101 ZSTD_outBuffer outBuffer;
1101 ZSTD_outBuffer outBuffer;
1102 ZSTD_inBuffer inBuffer;
1102 ZSTD_inBuffer inBuffer;
1103 const void* source = framePointers[frameIndex].sourceData;
1103 const void* source = framePointers[frameIndex].sourceData;
1104 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1104 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1105 void* dest;
1105 void* dest;
1106 const size_t decompressedSize = framePointers[frameIndex].destSize;
1106 const size_t decompressedSize = framePointers[frameIndex].destSize;
1107 size_t destAvailable = destBuffer->destSize - destOffset;
1107 size_t destAvailable = destBuffer->destSize - destOffset;
1108
1108
1109 assert(decompressedSize > 0); /* For now. */
1109 assert(decompressedSize > 0); /* For now. */
1110
1110
1111 /*
1111 /*
1112 * Not enough space in current buffer. Finish current before and allocate and
1112 * Not enough space in current buffer. Finish current before and allocate and
1113 * switch to a new one.
1113 * switch to a new one.
1114 */
1114 */
1115 if (decompressedSize > destAvailable) {
1115 if (decompressedSize > destAvailable) {
1116 /*
1116 /*
1117 * Shrinking the destination buffer is optional. But it should be cheap,
1117 * Shrinking the destination buffer is optional. But it should be cheap,
1118 * so we just do it.
1118 * so we just do it.
1119 */
1119 */
1120 if (destAvailable) {
1120 if (destAvailable) {
1121 tmpBuf = realloc(destBuffer->dest, destOffset);
1121 tmpBuf = realloc(destBuffer->dest, destOffset);
1122 if (NULL == tmpBuf) {
1122 if (NULL == tmpBuf) {
1123 state->error = WorkerError_memory;
1123 state->error = WorkerError_memory;
1124 return;
1124 return;
1125 }
1125 }
1126
1126
1127 destBuffer->dest = tmpBuf;
1127 destBuffer->dest = tmpBuf;
1128 destBuffer->destSize = destOffset;
1128 destBuffer->destSize = destOffset;
1129 }
1129 }
1130
1130
1131 /* Truncate segments buffer. */
1131 /* Truncate segments buffer. */
1132 tmpBuf = realloc(destBuffer->segments,
1132 tmpBuf = realloc(destBuffer->segments,
1133 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
1133 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
1134 if (NULL == tmpBuf) {
1134 if (NULL == tmpBuf) {
1135 state->error = WorkerError_memory;
1135 state->error = WorkerError_memory;
1136 return;
1136 return;
1137 }
1137 }
1138
1138
1139 destBuffer->segments = tmpBuf;
1139 destBuffer->segments = tmpBuf;
1140 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
1140 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
1141
1141
1142 /* Grow space for new DestBuffer. */
1142 /* Grow space for new DestBuffer. */
1143 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1143 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1144 if (NULL == tmpBuf) {
1144 if (NULL == tmpBuf) {
1145 state->error = WorkerError_memory;
1145 state->error = WorkerError_memory;
1146 return;
1146 return;
1147 }
1147 }
1148
1148
1149 state->destBuffers = tmpBuf;
1149 state->destBuffers = tmpBuf;
1150 state->destCount++;
1150 state->destCount++;
1151
1151
1152 destBuffer = &state->destBuffers[state->destCount - 1];
1152 destBuffer = &state->destBuffers[state->destCount - 1];
1153
1153
1154 /* Don't take any chances will non-NULL pointers. */
1154 /* Don't take any chances will non-NULL pointers. */
1155 memset(destBuffer, 0, sizeof(DestBuffer));
1155 memset(destBuffer, 0, sizeof(DestBuffer));
1156
1156
1157 allocationSize = roundpow2((size_t)state->totalSourceSize);
1157 allocationSize = roundpow2((size_t)state->totalSourceSize);
1158
1158
1159 if (decompressedSize > allocationSize) {
1159 if (decompressedSize > allocationSize) {
1160 allocationSize = roundpow2(decompressedSize);
1160 allocationSize = roundpow2(decompressedSize);
1161 }
1161 }
1162
1162
1163 destBuffer->dest = malloc(allocationSize);
1163 destBuffer->dest = malloc(allocationSize);
1164 if (NULL == destBuffer->dest) {
1164 if (NULL == destBuffer->dest) {
1165 state->error = WorkerError_memory;
1165 state->error = WorkerError_memory;
1166 return;
1166 return;
1167 }
1167 }
1168
1168
1169 destBuffer->destSize = allocationSize;
1169 destBuffer->destSize = allocationSize;
1170 destAvailable = allocationSize;
1170 destAvailable = allocationSize;
1171 destOffset = 0;
1171 destOffset = 0;
1172 localOffset = 0;
1172 localOffset = 0;
1173
1173
1174 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1174 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1175 if (NULL == destBuffer->segments) {
1175 if (NULL == destBuffer->segments) {
1176 state->error = WorkerError_memory;
1176 state->error = WorkerError_memory;
1177 return;
1177 return;
1178 }
1178 }
1179
1179
1180 destBuffer->segmentsSize = remainingItems;
1180 destBuffer->segmentsSize = remainingItems;
1181 currentBufferStartIndex = frameIndex;
1181 currentBufferStartIndex = frameIndex;
1182 }
1182 }
1183
1183
1184 dest = (char*)destBuffer->dest + destOffset;
1184 dest = (char*)destBuffer->dest + destOffset;
1185
1185
1186 outBuffer.dst = dest;
1186 outBuffer.dst = dest;
1187 outBuffer.size = decompressedSize;
1187 outBuffer.size = decompressedSize;
1188 outBuffer.pos = 0;
1188 outBuffer.pos = 0;
1189
1189
1190 inBuffer.src = source;
1190 inBuffer.src = source;
1191 inBuffer.size = sourceSize;
1191 inBuffer.size = sourceSize;
1192 inBuffer.pos = 0;
1192 inBuffer.pos = 0;
1193
1193
1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1195 if (ZSTD_isError(zresult)) {
1195 if (ZSTD_isError(zresult)) {
1196 state->error = WorkerError_zstd;
1196 state->error = WorkerError_zstd;
1197 state->zresult = zresult;
1197 state->zresult = zresult;
1198 state->errorOffset = frameIndex;
1198 state->errorOffset = frameIndex;
1199 return;
1199 return;
1200 }
1200 }
1201 else if (zresult || outBuffer.pos != decompressedSize) {
1201 else if (zresult || outBuffer.pos != decompressedSize) {
1202 state->error = WorkerError_sizeMismatch;
1202 state->error = WorkerError_sizeMismatch;
1203 state->zresult = outBuffer.pos;
1203 state->zresult = outBuffer.pos;
1204 state->errorOffset = frameIndex;
1204 state->errorOffset = frameIndex;
1205 return;
1205 return;
1206 }
1206 }
1207
1207
1208 destBuffer->segments[localOffset].offset = destOffset;
1208 destBuffer->segments[localOffset].offset = destOffset;
1209 destBuffer->segments[localOffset].length = outBuffer.pos;
1209 destBuffer->segments[localOffset].length = outBuffer.pos;
1210 destOffset += outBuffer.pos;
1210 destOffset += outBuffer.pos;
1211 localOffset++;
1211 localOffset++;
1212 remainingItems--;
1212 remainingItems--;
1213 }
1213 }
1214
1214
1215 if (destBuffer->destSize > destOffset) {
1215 if (destBuffer->destSize > destOffset) {
1216 tmpBuf = realloc(destBuffer->dest, destOffset);
1216 tmpBuf = realloc(destBuffer->dest, destOffset);
1217 if (NULL == tmpBuf) {
1217 if (NULL == tmpBuf) {
1218 state->error = WorkerError_memory;
1218 state->error = WorkerError_memory;
1219 return;
1219 return;
1220 }
1220 }
1221
1221
1222 destBuffer->dest = tmpBuf;
1222 destBuffer->dest = tmpBuf;
1223 destBuffer->destSize = destOffset;
1223 destBuffer->destSize = destOffset;
1224 }
1224 }
1225 }
1225 }
1226
1226
1227 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1227 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1228 Py_ssize_t threadCount) {
1228 Py_ssize_t threadCount) {
1229 Py_ssize_t i = 0;
1229 Py_ssize_t i = 0;
1230 int errored = 0;
1230 int errored = 0;
1231 Py_ssize_t segmentsCount;
1231 Py_ssize_t segmentsCount;
1232 ZstdBufferWithSegments* bws = NULL;
1232 ZstdBufferWithSegments* bws = NULL;
1233 PyObject* resultArg = NULL;
1233 PyObject* resultArg = NULL;
1234 Py_ssize_t resultIndex;
1234 Py_ssize_t resultIndex;
1235 ZstdBufferWithSegmentsCollection* result = NULL;
1235 ZstdBufferWithSegmentsCollection* result = NULL;
1236 FramePointer* framePointers = frames->frames;
1236 FramePointer* framePointers = frames->frames;
1237 unsigned long long workerBytes = 0;
1237 unsigned long long workerBytes = 0;
1238 Py_ssize_t currentThread = 0;
1238 Py_ssize_t currentThread = 0;
1239 Py_ssize_t workerStartOffset = 0;
1239 Py_ssize_t workerStartOffset = 0;
1240 POOL_ctx* pool = NULL;
1240 POOL_ctx* pool = NULL;
1241 WorkerState* workerStates = NULL;
1241 WorkerState* workerStates = NULL;
1242 unsigned long long bytesPerWorker;
1242 unsigned long long bytesPerWorker;
1243
1243
1244 /* Caller should normalize 0 and negative values to 1 or larger. */
1244 /* Caller should normalize 0 and negative values to 1 or larger. */
1245 assert(threadCount >= 1);
1245 assert(threadCount >= 1);
1246
1246
1247 /* More threads than inputs makes no sense under any conditions. */
1247 /* More threads than inputs makes no sense under any conditions. */
1248 threadCount = frames->framesSize < threadCount ? frames->framesSize
1248 threadCount = frames->framesSize < threadCount ? frames->framesSize
1249 : threadCount;
1249 : threadCount;
1250
1250
1251 /* TODO lower thread count if input size is too small and threads would just
1251 /* TODO lower thread count if input size is too small and threads would just
1252 add overhead. */
1252 add overhead. */
1253
1253
1254 if (decompressor->dict) {
1254 if (decompressor->dict) {
1255 if (ensure_ddict(decompressor->dict)) {
1255 if (ensure_ddict(decompressor->dict)) {
1256 return NULL;
1256 return NULL;
1257 }
1257 }
1258 }
1258 }
1259
1259
1260 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1260 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1261 same API for dispatching work. */
1261 same API for dispatching work. */
1262 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1262 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1263 if (NULL == workerStates) {
1263 if (NULL == workerStates) {
1264 PyErr_NoMemory();
1264 PyErr_NoMemory();
1265 goto finally;
1265 goto finally;
1266 }
1266 }
1267
1267
1268 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1268 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1269
1269
1270 if (threadCount > 1) {
1270 if (threadCount > 1) {
1271 pool = POOL_create(threadCount, 1);
1271 pool = POOL_create(threadCount, 1);
1272 if (NULL == pool) {
1272 if (NULL == pool) {
1273 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1273 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1274 goto finally;
1274 goto finally;
1275 }
1275 }
1276 }
1276 }
1277
1277
1278 bytesPerWorker = frames->compressedSize / threadCount;
1278 bytesPerWorker = frames->compressedSize / threadCount;
1279
1279
1280 if (bytesPerWorker > SIZE_MAX) {
1280 if (bytesPerWorker > SIZE_MAX) {
1281 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1281 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1282 goto finally;
1282 goto finally;
1283 }
1283 }
1284
1284
1285 for (i = 0; i < threadCount; i++) {
1285 for (i = 0; i < threadCount; i++) {
1286 size_t zresult;
1286 size_t zresult;
1287
1287
1288 workerStates[i].dctx = ZSTD_createDCtx();
1288 workerStates[i].dctx = ZSTD_createDCtx();
1289 if (NULL == workerStates[i].dctx) {
1289 if (NULL == workerStates[i].dctx) {
1290 PyErr_NoMemory();
1290 PyErr_NoMemory();
1291 goto finally;
1291 goto finally;
1292 }
1292 }
1293
1293
1294 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1294 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1295
1295
1296 if (decompressor->dict) {
1296 if (decompressor->dict) {
1297 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1297 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1298 if (zresult) {
1298 if (zresult) {
1299 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1299 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1300 ZSTD_getErrorName(zresult));
1300 ZSTD_getErrorName(zresult));
1301 goto finally;
1301 goto finally;
1302 }
1302 }
1303 }
1303 }
1304
1304
1305 workerStates[i].framePointers = framePointers;
1305 workerStates[i].framePointers = framePointers;
1306 workerStates[i].requireOutputSizes = 1;
1306 workerStates[i].requireOutputSizes = 1;
1307 }
1307 }
1308
1308
1309 Py_BEGIN_ALLOW_THREADS
1309 Py_BEGIN_ALLOW_THREADS
1310 /* There are many ways to split work among workers.
1310 /* There are many ways to split work among workers.
1311
1311
1312 For now, we take a simple approach of splitting work so each worker
1312 For now, we take a simple approach of splitting work so each worker
1313 gets roughly the same number of input bytes. This will result in more
1313 gets roughly the same number of input bytes. This will result in more
1314 starvation than running N>threadCount jobs. But it avoids complications
1314 starvation than running N>threadCount jobs. But it avoids complications
1315 around state tracking, which could involve extra locking.
1315 around state tracking, which could involve extra locking.
1316 */
1316 */
1317 for (i = 0; i < frames->framesSize; i++) {
1317 for (i = 0; i < frames->framesSize; i++) {
1318 workerBytes += frames->frames[i].sourceSize;
1318 workerBytes += frames->frames[i].sourceSize;
1319
1319
1320 /*
1320 /*
1321 * The last worker/thread needs to handle all remaining work. Don't
1321 * The last worker/thread needs to handle all remaining work. Don't
1322 * trigger it prematurely. Defer to the block outside of the loop.
1322 * trigger it prematurely. Defer to the block outside of the loop.
1323 * (But still process this loop so workerBytes is correct.
1323 * (But still process this loop so workerBytes is correct.
1324 */
1324 */
1325 if (currentThread == threadCount - 1) {
1325 if (currentThread == threadCount - 1) {
1326 continue;
1326 continue;
1327 }
1327 }
1328
1328
1329 if (workerBytes >= bytesPerWorker) {
1329 if (workerBytes >= bytesPerWorker) {
1330 workerStates[currentThread].startOffset = workerStartOffset;
1330 workerStates[currentThread].startOffset = workerStartOffset;
1331 workerStates[currentThread].endOffset = i;
1331 workerStates[currentThread].endOffset = i;
1332 workerStates[currentThread].totalSourceSize = workerBytes;
1332 workerStates[currentThread].totalSourceSize = workerBytes;
1333
1333
1334 if (threadCount > 1) {
1334 if (threadCount > 1) {
1335 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1335 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1336 }
1336 }
1337 else {
1337 else {
1338 decompress_worker(&workerStates[currentThread]);
1338 decompress_worker(&workerStates[currentThread]);
1339 }
1339 }
1340 currentThread++;
1340 currentThread++;
1341 workerStartOffset = i + 1;
1341 workerStartOffset = i + 1;
1342 workerBytes = 0;
1342 workerBytes = 0;
1343 }
1343 }
1344 }
1344 }
1345
1345
1346 if (workerBytes) {
1346 if (workerBytes) {
1347 workerStates[currentThread].startOffset = workerStartOffset;
1347 workerStates[currentThread].startOffset = workerStartOffset;
1348 workerStates[currentThread].endOffset = frames->framesSize - 1;
1348 workerStates[currentThread].endOffset = frames->framesSize - 1;
1349 workerStates[currentThread].totalSourceSize = workerBytes;
1349 workerStates[currentThread].totalSourceSize = workerBytes;
1350
1350
1351 if (threadCount > 1) {
1351 if (threadCount > 1) {
1352 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1352 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1353 }
1353 }
1354 else {
1354 else {
1355 decompress_worker(&workerStates[currentThread]);
1355 decompress_worker(&workerStates[currentThread]);
1356 }
1356 }
1357 }
1357 }
1358
1358
1359 if (threadCount > 1) {
1359 if (threadCount > 1) {
1360 POOL_free(pool);
1360 POOL_free(pool);
1361 pool = NULL;
1361 pool = NULL;
1362 }
1362 }
1363 Py_END_ALLOW_THREADS
1363 Py_END_ALLOW_THREADS
1364
1364
1365 for (i = 0; i < threadCount; i++) {
1365 for (i = 0; i < threadCount; i++) {
1366 switch (workerStates[i].error) {
1366 switch (workerStates[i].error) {
1367 case WorkerError_none:
1367 case WorkerError_none:
1368 break;
1368 break;
1369
1369
1370 case WorkerError_zstd:
1370 case WorkerError_zstd:
1371 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1371 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1372 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1372 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1373 errored = 1;
1373 errored = 1;
1374 break;
1374 break;
1375
1375
1376 case WorkerError_memory:
1376 case WorkerError_memory:
1377 PyErr_NoMemory();
1377 PyErr_NoMemory();
1378 errored = 1;
1378 errored = 1;
1379 break;
1379 break;
1380
1380
1381 case WorkerError_sizeMismatch:
1381 case WorkerError_sizeMismatch:
1382 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1382 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1383 workerStates[i].errorOffset, workerStates[i].zresult,
1383 workerStates[i].errorOffset, workerStates[i].zresult,
1384 framePointers[workerStates[i].errorOffset].destSize);
1384 framePointers[workerStates[i].errorOffset].destSize);
1385 errored = 1;
1385 errored = 1;
1386 break;
1386 break;
1387
1387
1388 case WorkerError_unknownSize:
1388 case WorkerError_unknownSize:
1389 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1389 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1390 workerStates[i].errorOffset);
1390 workerStates[i].errorOffset);
1391 errored = 1;
1391 errored = 1;
1392 break;
1392 break;
1393
1393
1394 default:
1394 default:
1395 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1395 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1396 workerStates[i].error);
1396 workerStates[i].error);
1397 errored = 1;
1397 errored = 1;
1398 break;
1398 break;
1399 }
1399 }
1400
1400
1401 if (errored) {
1401 if (errored) {
1402 break;
1402 break;
1403 }
1403 }
1404 }
1404 }
1405
1405
1406 if (errored) {
1406 if (errored) {
1407 goto finally;
1407 goto finally;
1408 }
1408 }
1409
1409
1410 segmentsCount = 0;
1410 segmentsCount = 0;
1411 for (i = 0; i < threadCount; i++) {
1411 for (i = 0; i < threadCount; i++) {
1412 segmentsCount += workerStates[i].destCount;
1412 segmentsCount += workerStates[i].destCount;
1413 }
1413 }
1414
1414
1415 resultArg = PyTuple_New(segmentsCount);
1415 resultArg = PyTuple_New(segmentsCount);
1416 if (NULL == resultArg) {
1416 if (NULL == resultArg) {
1417 goto finally;
1417 goto finally;
1418 }
1418 }
1419
1419
1420 resultIndex = 0;
1420 resultIndex = 0;
1421
1421
1422 for (i = 0; i < threadCount; i++) {
1422 for (i = 0; i < threadCount; i++) {
1423 Py_ssize_t bufferIndex;
1423 Py_ssize_t bufferIndex;
1424 WorkerState* state = &workerStates[i];
1424 WorkerState* state = &workerStates[i];
1425
1425
1426 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1426 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1427 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1427 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1428
1428
1429 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1429 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1430 destBuffer->segments, destBuffer->segmentsSize);
1430 destBuffer->segments, destBuffer->segmentsSize);
1431 if (NULL == bws) {
1431 if (NULL == bws) {
1432 goto finally;
1432 goto finally;
1433 }
1433 }
1434
1434
1435 /*
1435 /*
1436 * Memory for buffer and segments was allocated using malloc() in worker
1436 * Memory for buffer and segments was allocated using malloc() in worker
1437 * and the memory is transferred to the BufferWithSegments instance. So
1437 * and the memory is transferred to the BufferWithSegments instance. So
1438 * tell instance to use free() and NULL the reference in the state struct
1438 * tell instance to use free() and NULL the reference in the state struct
1439 * so it isn't freed below.
1439 * so it isn't freed below.
1440 */
1440 */
1441 bws->useFree = 1;
1441 bws->useFree = 1;
1442 destBuffer->dest = NULL;
1442 destBuffer->dest = NULL;
1443 destBuffer->segments = NULL;
1443 destBuffer->segments = NULL;
1444
1444
1445 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1445 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1446 }
1446 }
1447 }
1447 }
1448
1448
1449 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1449 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1450 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1450 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1451
1451
1452 finally:
1452 finally:
1453 Py_CLEAR(resultArg);
1453 Py_CLEAR(resultArg);
1454
1454
1455 if (workerStates) {
1455 if (workerStates) {
1456 for (i = 0; i < threadCount; i++) {
1456 for (i = 0; i < threadCount; i++) {
1457 Py_ssize_t bufferIndex;
1457 Py_ssize_t bufferIndex;
1458 WorkerState* state = &workerStates[i];
1458 WorkerState* state = &workerStates[i];
1459
1459
1460 if (state->dctx) {
1460 if (state->dctx) {
1461 ZSTD_freeDCtx(state->dctx);
1461 ZSTD_freeDCtx(state->dctx);
1462 }
1462 }
1463
1463
1464 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1464 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1465 if (state->destBuffers) {
1465 if (state->destBuffers) {
1466 /*
1466 /*
1467 * Will be NULL if memory transfered to a BufferWithSegments.
1467 * Will be NULL if memory transfered to a BufferWithSegments.
1468 * Otherwise it is left over after an error occurred.
1468 * Otherwise it is left over after an error occurred.
1469 */
1469 */
1470 free(state->destBuffers[bufferIndex].dest);
1470 free(state->destBuffers[bufferIndex].dest);
1471 free(state->destBuffers[bufferIndex].segments);
1471 free(state->destBuffers[bufferIndex].segments);
1472 }
1472 }
1473 }
1473 }
1474
1474
1475 free(state->destBuffers);
1475 free(state->destBuffers);
1476 }
1476 }
1477
1477
1478 PyMem_Free(workerStates);
1478 PyMem_Free(workerStates);
1479 }
1479 }
1480
1480
1481 POOL_free(pool);
1481 POOL_free(pool);
1482
1482
1483 return result;
1483 return result;
1484 }
1484 }
1485
1485
1486 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1486 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1487 "Decompress multiple frames to output buffers\n"
1487 "Decompress multiple frames to output buffers\n"
1488 "\n"
1488 "\n"
1489 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1489 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1490 "list of bytes-like objects. Each item in the passed collection should be a\n"
1490 "list of bytes-like objects. Each item in the passed collection should be a\n"
1491 "compressed zstd frame.\n"
1491 "compressed zstd frame.\n"
1492 "\n"
1492 "\n"
1493 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1493 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1494 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1494 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1495 "it is an object conforming to the buffer protocol that represents an array\n"
1495 "it is an object conforming to the buffer protocol that represents an array\n"
1496 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1496 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1497 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1497 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1498 "output size.\n"
1498 "output size.\n"
1499 "\n"
1499 "\n"
1500 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1500 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1501 "data. All decompressed data is allocated in a single memory buffer. The\n"
1501 "data. All decompressed data is allocated in a single memory buffer. The\n"
1502 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1502 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1503 "and their respective lengths.\n"
1503 "and their respective lengths.\n"
1504 "\n"
1504 "\n"
1505 "The ``threads`` argument controls how many threads to use for operations.\n"
1505 "The ``threads`` argument controls how many threads to use for operations.\n"
1506 "Negative values will use the same number of threads as logical CPUs on the\n"
1506 "Negative values will use the same number of threads as logical CPUs on the\n"
1507 "machine.\n"
1507 "machine.\n"
1508 );
1508 );
1509
1509
1510 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1510 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1511 static char* kwlist[] = {
1511 static char* kwlist[] = {
1512 "frames",
1512 "frames",
1513 "decompressed_sizes",
1513 "decompressed_sizes",
1514 "threads",
1514 "threads",
1515 NULL
1515 NULL
1516 };
1516 };
1517
1517
1518 PyObject* frames;
1518 PyObject* frames;
1519 Py_buffer frameSizes;
1519 Py_buffer frameSizes;
1520 int threads = 0;
1520 int threads = 0;
1521 Py_ssize_t frameCount;
1521 Py_ssize_t frameCount;
1522 Py_buffer* frameBuffers = NULL;
1522 Py_buffer* frameBuffers = NULL;
1523 FramePointer* framePointers = NULL;
1523 FramePointer* framePointers = NULL;
1524 unsigned long long* frameSizesP = NULL;
1524 unsigned long long* frameSizesP = NULL;
1525 unsigned long long totalInputSize = 0;
1525 unsigned long long totalInputSize = 0;
1526 FrameSources frameSources;
1526 FrameSources frameSources;
1527 ZstdBufferWithSegmentsCollection* result = NULL;
1527 ZstdBufferWithSegmentsCollection* result = NULL;
1528 Py_ssize_t i;
1528 Py_ssize_t i;
1529
1529
1530 memset(&frameSizes, 0, sizeof(frameSizes));
1530 memset(&frameSizes, 0, sizeof(frameSizes));
1531
1531
1532 #if PY_MAJOR_VERSION >= 3
1532 #if PY_MAJOR_VERSION >= 3
1533 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1533 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1534 #else
1534 #else
1535 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1535 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1536 #endif
1536 #endif
1537 kwlist, &frames, &frameSizes, &threads)) {
1537 kwlist, &frames, &frameSizes, &threads)) {
1538 return NULL;
1538 return NULL;
1539 }
1539 }
1540
1540
1541 if (frameSizes.buf) {
1541 if (frameSizes.buf) {
1542 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1542 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1543 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1543 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1544 goto finally;
1544 goto finally;
1545 }
1545 }
1546
1546
1547 frameSizesP = (unsigned long long*)frameSizes.buf;
1547 frameSizesP = (unsigned long long*)frameSizes.buf;
1548 }
1548 }
1549
1549
1550 if (threads < 0) {
1550 if (threads < 0) {
1551 threads = cpu_count();
1551 threads = cpu_count();
1552 }
1552 }
1553
1553
1554 if (threads < 2) {
1554 if (threads < 2) {
1555 threads = 1;
1555 threads = 1;
1556 }
1556 }
1557
1557
1558 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1558 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1559 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1559 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1560 frameCount = buffer->segmentCount;
1560 frameCount = buffer->segmentCount;
1561
1561
1562 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1562 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1563 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1563 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1564 frameCount * sizeof(unsigned long long), frameSizes.len);
1564 frameCount * sizeof(unsigned long long), frameSizes.len);
1565 goto finally;
1565 goto finally;
1566 }
1566 }
1567
1567
1568 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1568 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1569 if (!framePointers) {
1569 if (!framePointers) {
1570 PyErr_NoMemory();
1570 PyErr_NoMemory();
1571 goto finally;
1571 goto finally;
1572 }
1572 }
1573
1573
1574 for (i = 0; i < frameCount; i++) {
1574 for (i = 0; i < frameCount; i++) {
1575 void* sourceData;
1575 void* sourceData;
1576 unsigned long long sourceSize;
1576 unsigned long long sourceSize;
1577 unsigned long long decompressedSize = 0;
1577 unsigned long long decompressedSize = 0;
1578
1578
1579 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1579 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1580 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1580 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1581 goto finally;
1581 goto finally;
1582 }
1582 }
1583
1583
1584 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1584 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1585 sourceSize = buffer->segments[i].length;
1585 sourceSize = buffer->segments[i].length;
1586 totalInputSize += sourceSize;
1586 totalInputSize += sourceSize;
1587
1587
1588 if (frameSizesP) {
1588 if (frameSizesP) {
1589 decompressedSize = frameSizesP[i];
1589 decompressedSize = frameSizesP[i];
1590 }
1590 }
1591
1591
1592 if (sourceSize > SIZE_MAX) {
1592 if (sourceSize > SIZE_MAX) {
1593 PyErr_Format(PyExc_ValueError,
1593 PyErr_Format(PyExc_ValueError,
1594 "item %zd is too large for this platform", i);
1594 "item %zd is too large for this platform", i);
1595 goto finally;
1595 goto finally;
1596 }
1596 }
1597
1597
1598 if (decompressedSize > SIZE_MAX) {
1598 if (decompressedSize > SIZE_MAX) {
1599 PyErr_Format(PyExc_ValueError,
1599 PyErr_Format(PyExc_ValueError,
1600 "decompressed size of item %zd is too large for this platform", i);
1600 "decompressed size of item %zd is too large for this platform", i);
1601 goto finally;
1601 goto finally;
1602 }
1602 }
1603
1603
1604 framePointers[i].sourceData = sourceData;
1604 framePointers[i].sourceData = sourceData;
1605 framePointers[i].sourceSize = (size_t)sourceSize;
1605 framePointers[i].sourceSize = (size_t)sourceSize;
1606 framePointers[i].destSize = (size_t)decompressedSize;
1606 framePointers[i].destSize = (size_t)decompressedSize;
1607 }
1607 }
1608 }
1608 }
1609 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1609 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1610 Py_ssize_t offset = 0;
1610 Py_ssize_t offset = 0;
1611 ZstdBufferWithSegments* buffer;
1611 ZstdBufferWithSegments* buffer;
1612 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1612 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1613
1613
1614 frameCount = BufferWithSegmentsCollection_length(collection);
1614 frameCount = BufferWithSegmentsCollection_length(collection);
1615
1615
1616 if (frameSizes.buf && frameSizes.len != frameCount) {
1616 if (frameSizes.buf && frameSizes.len != frameCount) {
1617 PyErr_Format(PyExc_ValueError,
1617 PyErr_Format(PyExc_ValueError,
1618 "decompressed_sizes size mismatch; expected %zd; got %zd",
1618 "decompressed_sizes size mismatch; expected %zd; got %zd",
1619 frameCount * sizeof(unsigned long long), frameSizes.len);
1619 frameCount * sizeof(unsigned long long), frameSizes.len);
1620 goto finally;
1620 goto finally;
1621 }
1621 }
1622
1622
1623 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1623 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1624 if (NULL == framePointers) {
1624 if (NULL == framePointers) {
1625 PyErr_NoMemory();
1625 PyErr_NoMemory();
1626 goto finally;
1626 goto finally;
1627 }
1627 }
1628
1628
1629 /* Iterate the data structure directly because it is faster. */
1629 /* Iterate the data structure directly because it is faster. */
1630 for (i = 0; i < collection->bufferCount; i++) {
1630 for (i = 0; i < collection->bufferCount; i++) {
1631 Py_ssize_t segmentIndex;
1631 Py_ssize_t segmentIndex;
1632 buffer = collection->buffers[i];
1632 buffer = collection->buffers[i];
1633
1633
1634 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1634 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1635 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1635 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1636
1636
1637 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1637 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1638 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1638 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1639 offset);
1639 offset);
1640 goto finally;
1640 goto finally;
1641 }
1641 }
1642
1642
1643 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1643 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1644 PyErr_Format(PyExc_ValueError,
1644 PyErr_Format(PyExc_ValueError,
1645 "item %zd in buffer %zd is too large for this platform",
1645 "item %zd in buffer %zd is too large for this platform",
1646 segmentIndex, i);
1646 segmentIndex, i);
1647 goto finally;
1647 goto finally;
1648 }
1648 }
1649
1649
1650 if (decompressedSize > SIZE_MAX) {
1650 if (decompressedSize > SIZE_MAX) {
1651 PyErr_Format(PyExc_ValueError,
1651 PyErr_Format(PyExc_ValueError,
1652 "decompressed size of item %zd in buffer %zd is too large for this platform",
1652 "decompressed size of item %zd in buffer %zd is too large for this platform",
1653 segmentIndex, i);
1653 segmentIndex, i);
1654 goto finally;
1654 goto finally;
1655 }
1655 }
1656
1656
1657 totalInputSize += buffer->segments[segmentIndex].length;
1657 totalInputSize += buffer->segments[segmentIndex].length;
1658
1658
1659 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1659 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1660 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1660 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1661 framePointers[offset].destSize = (size_t)decompressedSize;
1661 framePointers[offset].destSize = (size_t)decompressedSize;
1662
1662
1663 offset++;
1663 offset++;
1664 }
1664 }
1665 }
1665 }
1666 }
1666 }
1667 else if (PyList_Check(frames)) {
1667 else if (PyList_Check(frames)) {
1668 frameCount = PyList_GET_SIZE(frames);
1668 frameCount = PyList_GET_SIZE(frames);
1669
1669
1670 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1670 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1671 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1671 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1672 frameCount * sizeof(unsigned long long), frameSizes.len);
1672 frameCount * sizeof(unsigned long long), frameSizes.len);
1673 goto finally;
1673 goto finally;
1674 }
1674 }
1675
1675
1676 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1676 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1677 if (!framePointers) {
1677 if (!framePointers) {
1678 PyErr_NoMemory();
1678 PyErr_NoMemory();
1679 goto finally;
1679 goto finally;
1680 }
1680 }
1681
1681
1682 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1682 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1683 if (NULL == frameBuffers) {
1683 if (NULL == frameBuffers) {
1684 PyErr_NoMemory();
1684 PyErr_NoMemory();
1685 goto finally;
1685 goto finally;
1686 }
1686 }
1687
1687
1688 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1688 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1689
1689
1690 /* Do a pass to assemble info about our input buffers and output sizes. */
1690 /* Do a pass to assemble info about our input buffers and output sizes. */
1691 for (i = 0; i < frameCount; i++) {
1691 for (i = 0; i < frameCount; i++) {
1692 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1692 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1693
1693
1694 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1694 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1695 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1695 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1696 PyErr_Clear();
1696 PyErr_Clear();
1697 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1697 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1698 goto finally;
1698 goto finally;
1699 }
1699 }
1700
1700
1701 if (decompressedSize > SIZE_MAX) {
1701 if (decompressedSize > SIZE_MAX) {
1702 PyErr_Format(PyExc_ValueError,
1702 PyErr_Format(PyExc_ValueError,
1703 "decompressed size of item %zd is too large for this platform", i);
1703 "decompressed size of item %zd is too large for this platform", i);
1704 goto finally;
1704 goto finally;
1705 }
1705 }
1706
1706
1707 totalInputSize += frameBuffers[i].len;
1707 totalInputSize += frameBuffers[i].len;
1708
1708
1709 framePointers[i].sourceData = frameBuffers[i].buf;
1709 framePointers[i].sourceData = frameBuffers[i].buf;
1710 framePointers[i].sourceSize = frameBuffers[i].len;
1710 framePointers[i].sourceSize = frameBuffers[i].len;
1711 framePointers[i].destSize = (size_t)decompressedSize;
1711 framePointers[i].destSize = (size_t)decompressedSize;
1712 }
1712 }
1713 }
1713 }
1714 else {
1714 else {
1715 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1715 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1716 goto finally;
1716 goto finally;
1717 }
1717 }
1718
1718
1719 /* We now have an array with info about our inputs and outputs. Feed it into
1719 /* We now have an array with info about our inputs and outputs. Feed it into
1720 our generic decompression function. */
1720 our generic decompression function. */
1721 frameSources.frames = framePointers;
1721 frameSources.frames = framePointers;
1722 frameSources.framesSize = frameCount;
1722 frameSources.framesSize = frameCount;
1723 frameSources.compressedSize = totalInputSize;
1723 frameSources.compressedSize = totalInputSize;
1724
1724
1725 result = decompress_from_framesources(self, &frameSources, threads);
1725 result = decompress_from_framesources(self, &frameSources, threads);
1726
1726
1727 finally:
1727 finally:
1728 if (frameSizes.buf) {
1728 if (frameSizes.buf) {
1729 PyBuffer_Release(&frameSizes);
1729 PyBuffer_Release(&frameSizes);
1730 }
1730 }
1731 PyMem_Free(framePointers);
1731 PyMem_Free(framePointers);
1732
1732
1733 if (frameBuffers) {
1733 if (frameBuffers) {
1734 for (i = 0; i < frameCount; i++) {
1734 for (i = 0; i < frameCount; i++) {
1735 PyBuffer_Release(&frameBuffers[i]);
1735 PyBuffer_Release(&frameBuffers[i]);
1736 }
1736 }
1737
1737
1738 PyMem_Free(frameBuffers);
1738 PyMem_Free(frameBuffers);
1739 }
1739 }
1740
1740
1741 return result;
1741 return result;
1742 }
1742 }
1743
1743
1744 static PyMethodDef Decompressor_methods[] = {
1744 static PyMethodDef Decompressor_methods[] = {
1745 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1745 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1746 Decompressor_copy_stream__doc__ },
1746 Decompressor_copy_stream__doc__ },
1747 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1747 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1748 Decompressor_decompress__doc__ },
1748 Decompressor_decompress__doc__ },
1749 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1749 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1750 Decompressor_decompressobj__doc__ },
1750 Decompressor_decompressobj__doc__ },
1751 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1751 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1752 Decompressor_read_to_iter__doc__ },
1752 Decompressor_read_to_iter__doc__ },
1753 /* TODO Remove deprecated API */
1753 /* TODO Remove deprecated API */
1754 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1754 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1755 Decompressor_read_to_iter__doc__ },
1755 Decompressor_read_to_iter__doc__ },
1756 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1756 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1757 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1757 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1758 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1758 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1759 Decompressor_stream_writer__doc__ },
1759 Decompressor_stream_writer__doc__ },
1760 /* TODO remove deprecated API */
1760 /* TODO remove deprecated API */
1761 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1761 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1762 Decompressor_stream_writer__doc__ },
1762 Decompressor_stream_writer__doc__ },
1763 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1763 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1764 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1764 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1765 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1765 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1766 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1766 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1767 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1767 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1768 Decompressor_memory_size__doc__ },
1768 Decompressor_memory_size__doc__ },
1769 { NULL, NULL }
1769 { NULL, NULL }
1770 };
1770 };
1771
1771
1772 PyTypeObject ZstdDecompressorType = {
1772 PyTypeObject ZstdDecompressorType = {
1773 PyVarObject_HEAD_INIT(NULL, 0)
1773 PyVarObject_HEAD_INIT(NULL, 0)
1774 "zstd.ZstdDecompressor", /* tp_name */
1774 "zstd.ZstdDecompressor", /* tp_name */
1775 sizeof(ZstdDecompressor), /* tp_basicsize */
1775 sizeof(ZstdDecompressor), /* tp_basicsize */
1776 0, /* tp_itemsize */
1776 0, /* tp_itemsize */
1777 (destructor)Decompressor_dealloc, /* tp_dealloc */
1777 (destructor)Decompressor_dealloc, /* tp_dealloc */
1778 0, /* tp_print */
1778 0, /* tp_print */
1779 0, /* tp_getattr */
1779 0, /* tp_getattr */
1780 0, /* tp_setattr */
1780 0, /* tp_setattr */
1781 0, /* tp_compare */
1781 0, /* tp_compare */
1782 0, /* tp_repr */
1782 0, /* tp_repr */
1783 0, /* tp_as_number */
1783 0, /* tp_as_number */
1784 0, /* tp_as_sequence */
1784 0, /* tp_as_sequence */
1785 0, /* tp_as_mapping */
1785 0, /* tp_as_mapping */
1786 0, /* tp_hash */
1786 0, /* tp_hash */
1787 0, /* tp_call */
1787 0, /* tp_call */
1788 0, /* tp_str */
1788 0, /* tp_str */
1789 0, /* tp_getattro */
1789 0, /* tp_getattro */
1790 0, /* tp_setattro */
1790 0, /* tp_setattro */
1791 0, /* tp_as_buffer */
1791 0, /* tp_as_buffer */
1792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1793 Decompressor__doc__, /* tp_doc */
1793 Decompressor__doc__, /* tp_doc */
1794 0, /* tp_traverse */
1794 0, /* tp_traverse */
1795 0, /* tp_clear */
1795 0, /* tp_clear */
1796 0, /* tp_richcompare */
1796 0, /* tp_richcompare */
1797 0, /* tp_weaklistoffset */
1797 0, /* tp_weaklistoffset */
1798 0, /* tp_iter */
1798 0, /* tp_iter */
1799 0, /* tp_iternext */
1799 0, /* tp_iternext */
1800 Decompressor_methods, /* tp_methods */
1800 Decompressor_methods, /* tp_methods */
1801 0, /* tp_members */
1801 0, /* tp_members */
1802 0, /* tp_getset */
1802 0, /* tp_getset */
1803 0, /* tp_base */
1803 0, /* tp_base */
1804 0, /* tp_dict */
1804 0, /* tp_dict */
1805 0, /* tp_descr_get */
1805 0, /* tp_descr_get */
1806 0, /* tp_descr_set */
1806 0, /* tp_descr_set */
1807 0, /* tp_dictoffset */
1807 0, /* tp_dictoffset */
1808 (initproc)Decompressor_init, /* tp_init */
1808 (initproc)Decompressor_init, /* tp_init */
1809 0, /* tp_alloc */
1809 0, /* tp_alloc */
1810 PyType_GenericNew, /* tp_new */
1810 PyType_GenericNew, /* tp_new */
1811 };
1811 };
1812
1812
1813 void decompressor_module_init(PyObject* mod) {
1813 void decompressor_module_init(PyObject* mod) {
1814 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
1814 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
1815 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1815 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1816 return;
1816 return;
1817 }
1817 }
1818
1818
1819 Py_INCREF((PyObject*)&ZstdDecompressorType);
1819 Py_INCREF((PyObject*)&ZstdDecompressorType);
1820 PyModule_AddObject(mod, "ZstdDecompressor",
1820 PyModule_AddObject(mod, "ZstdDecompressor",
1821 (PyObject*)&ZstdDecompressorType);
1821 (PyObject*)&ZstdDecompressorType);
1822 }
1822 }
General Comments 0
You need to be logged in to leave comments. Login now