Show More
The requested changes are too big and content was truncated. Show full diff
@@ -0,0 +1,360 | |||
|
1 | /** | |
|
2 | * Copyright (c) 2018-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | ||
|
9 | #include "python-zstandard.h" | |
|
10 | ||
|
11 | extern PyObject* ZstdError; | |
|
12 | ||
|
13 | PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__, | |
|
14 | "Iterator of output chunks from ZstdCompressionChunker.\n" | |
|
15 | ); | |
|
16 | ||
|
17 | static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) { | |
|
18 | Py_XDECREF(self->chunker); | |
|
19 | ||
|
20 | PyObject_Del(self); | |
|
21 | } | |
|
22 | ||
|
23 | static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) { | |
|
24 | Py_INCREF(self); | |
|
25 | return self; | |
|
26 | } | |
|
27 | ||
|
28 | static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) { | |
|
29 | size_t zresult; | |
|
30 | PyObject* chunk; | |
|
31 | ZstdCompressionChunker* chunker = self->chunker; | |
|
32 | ZSTD_EndDirective zFlushMode; | |
|
33 | ||
|
34 | if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) { | |
|
35 | PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()"); | |
|
36 | return NULL; | |
|
37 | } | |
|
38 | ||
|
39 | if (chunker->finished) { | |
|
40 | return NULL; | |
|
41 | } | |
|
42 | ||
|
43 | /* If we have data left in the input, consume it. */ | |
|
44 | while (chunker->input.pos < chunker->input.size) { | |
|
45 | Py_BEGIN_ALLOW_THREADS | |
|
46 | zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output, | |
|
47 | &chunker->input, ZSTD_e_continue); | |
|
48 | Py_END_ALLOW_THREADS | |
|
49 | ||
|
50 | /* Input is fully consumed. */ | |
|
51 | if (chunker->input.pos == chunker->input.size) { | |
|
52 | chunker->input.src = NULL; | |
|
53 | chunker->input.pos = 0; | |
|
54 | chunker->input.size = 0; | |
|
55 | PyBuffer_Release(&chunker->inBuffer); | |
|
56 | } | |
|
57 | ||
|
58 | if (ZSTD_isError(zresult)) { | |
|
59 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
60 | return NULL; | |
|
61 | } | |
|
62 | ||
|
63 | /* If it produced a full output chunk, emit it. */ | |
|
64 | if (chunker->output.pos == chunker->output.size) { | |
|
65 | chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos); | |
|
66 | if (!chunk) { | |
|
67 | return NULL; | |
|
68 | } | |
|
69 | ||
|
70 | chunker->output.pos = 0; | |
|
71 | ||
|
72 | return chunk; | |
|
73 | } | |
|
74 | ||
|
75 | /* Else continue to compress available input data. */ | |
|
76 | } | |
|
77 | ||
|
78 | /* We also need this here for the special case of an empty input buffer. */ | |
|
79 | if (chunker->input.pos == chunker->input.size) { | |
|
80 | chunker->input.src = NULL; | |
|
81 | chunker->input.pos = 0; | |
|
82 | chunker->input.size = 0; | |
|
83 | PyBuffer_Release(&chunker->inBuffer); | |
|
84 | } | |
|
85 | ||
|
86 | /* No more input data. A partial chunk may be in chunker->output. | |
|
87 | * If we're in normal compression mode, we're done. Otherwise if we're in | |
|
88 | * flush or finish mode, we need to emit what data remains. | |
|
89 | */ | |
|
90 | if (self->mode == compressionchunker_mode_normal) { | |
|
91 | /* We don't need to set StopIteration. */ | |
|
92 | return NULL; | |
|
93 | } | |
|
94 | ||
|
95 | if (self->mode == compressionchunker_mode_flush) { | |
|
96 | zFlushMode = ZSTD_e_flush; | |
|
97 | } | |
|
98 | else if (self->mode == compressionchunker_mode_finish) { | |
|
99 | zFlushMode = ZSTD_e_end; | |
|
100 | } | |
|
101 | else { | |
|
102 | PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen"); | |
|
103 | return NULL; | |
|
104 | } | |
|
105 | ||
|
106 | Py_BEGIN_ALLOW_THREADS | |
|
107 | zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output, | |
|
108 | &chunker->input, zFlushMode); | |
|
109 | Py_END_ALLOW_THREADS | |
|
110 | ||
|
111 | if (ZSTD_isError(zresult)) { | |
|
112 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
113 | ZSTD_getErrorName(zresult)); | |
|
114 | return NULL; | |
|
115 | } | |
|
116 | ||
|
117 | if (!zresult && chunker->output.pos == 0) { | |
|
118 | return NULL; | |
|
119 | } | |
|
120 | ||
|
121 | chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos); | |
|
122 | if (!chunk) { | |
|
123 | return NULL; | |
|
124 | } | |
|
125 | ||
|
126 | chunker->output.pos = 0; | |
|
127 | ||
|
128 | if (!zresult && self->mode == compressionchunker_mode_finish) { | |
|
129 | chunker->finished = 1; | |
|
130 | } | |
|
131 | ||
|
132 | return chunk; | |
|
133 | } | |
|
134 | ||
|
135 | PyTypeObject ZstdCompressionChunkerIteratorType = { | |
|
136 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
137 | "zstd.ZstdCompressionChunkerIterator", /* tp_name */ | |
|
138 | sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */ | |
|
139 | 0, /* tp_itemsize */ | |
|
140 | (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */ | |
|
141 | 0, /* tp_print */ | |
|
142 | 0, /* tp_getattr */ | |
|
143 | 0, /* tp_setattr */ | |
|
144 | 0, /* tp_compare */ | |
|
145 | 0, /* tp_repr */ | |
|
146 | 0, /* tp_as_number */ | |
|
147 | 0, /* tp_as_sequence */ | |
|
148 | 0, /* tp_as_mapping */ | |
|
149 | 0, /* tp_hash */ | |
|
150 | 0, /* tp_call */ | |
|
151 | 0, /* tp_str */ | |
|
152 | 0, /* tp_getattro */ | |
|
153 | 0, /* tp_setattro */ | |
|
154 | 0, /* tp_as_buffer */ | |
|
155 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
156 | ZstdCompressionChunkerIterator__doc__, /* tp_doc */ | |
|
157 | 0, /* tp_traverse */ | |
|
158 | 0, /* tp_clear */ | |
|
159 | 0, /* tp_richcompare */ | |
|
160 | 0, /* tp_weaklistoffset */ | |
|
161 | ZstdCompressionChunkerIterator_iter, /* tp_iter */ | |
|
162 | (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */ | |
|
163 | 0, /* tp_methods */ | |
|
164 | 0, /* tp_members */ | |
|
165 | 0, /* tp_getset */ | |
|
166 | 0, /* tp_base */ | |
|
167 | 0, /* tp_dict */ | |
|
168 | 0, /* tp_descr_get */ | |
|
169 | 0, /* tp_descr_set */ | |
|
170 | 0, /* tp_dictoffset */ | |
|
171 | 0, /* tp_init */ | |
|
172 | 0, /* tp_alloc */ | |
|
173 | PyType_GenericNew, /* tp_new */ | |
|
174 | }; | |
|
175 | ||
|
176 | PyDoc_STRVAR(ZstdCompressionChunker__doc__, | |
|
177 | "Compress chunks iteratively into exact chunk sizes.\n" | |
|
178 | ); | |
|
179 | ||
|
180 | static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) { | |
|
181 | PyBuffer_Release(&self->inBuffer); | |
|
182 | self->input.src = NULL; | |
|
183 | ||
|
184 | PyMem_Free(self->output.dst); | |
|
185 | self->output.dst = NULL; | |
|
186 | ||
|
187 | Py_XDECREF(self->compressor); | |
|
188 | ||
|
189 | PyObject_Del(self); | |
|
190 | } | |
|
191 | ||
|
192 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) { | |
|
193 | static char* kwlist[] = { | |
|
194 | "data", | |
|
195 | NULL | |
|
196 | }; | |
|
197 | ||
|
198 | ZstdCompressionChunkerIterator* result; | |
|
199 | ||
|
200 | if (self->finished) { | |
|
201 | PyErr_SetString(ZstdError, "cannot call compress() after compression finished"); | |
|
202 | return NULL; | |
|
203 | } | |
|
204 | ||
|
205 | if (self->inBuffer.obj) { | |
|
206 | PyErr_SetString(ZstdError, | |
|
207 | "cannot perform operation before consuming output from previous operation"); | |
|
208 | return NULL; | |
|
209 | } | |
|
210 | ||
|
211 | #if PY_MAJOR_VERSION >= 3 | |
|
212 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", | |
|
213 | #else | |
|
214 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress", | |
|
215 | #endif | |
|
216 | kwlist, &self->inBuffer)) { | |
|
217 | return NULL; | |
|
218 | } | |
|
219 | ||
|
220 | if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) { | |
|
221 | PyErr_SetString(PyExc_ValueError, | |
|
222 | "data buffer should be contiguous and have at most one dimension"); | |
|
223 | PyBuffer_Release(&self->inBuffer); | |
|
224 | return NULL; | |
|
225 | } | |
|
226 | ||
|
227 | result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL); | |
|
228 | if (!result) { | |
|
229 | PyBuffer_Release(&self->inBuffer); | |
|
230 | return NULL; | |
|
231 | } | |
|
232 | ||
|
233 | self->input.src = self->inBuffer.buf; | |
|
234 | self->input.size = self->inBuffer.len; | |
|
235 | self->input.pos = 0; | |
|
236 | ||
|
237 | result->chunker = self; | |
|
238 | Py_INCREF(result->chunker); | |
|
239 | ||
|
240 | result->mode = compressionchunker_mode_normal; | |
|
241 | ||
|
242 | return result; | |
|
243 | } | |
|
244 | ||
|
245 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) { | |
|
246 | ZstdCompressionChunkerIterator* result; | |
|
247 | ||
|
248 | if (self->finished) { | |
|
249 | PyErr_SetString(ZstdError, "cannot call finish() after compression finished"); | |
|
250 | return NULL; | |
|
251 | } | |
|
252 | ||
|
253 | if (self->inBuffer.obj) { | |
|
254 | PyErr_SetString(ZstdError, | |
|
255 | "cannot call finish() before consuming output from previous operation"); | |
|
256 | return NULL; | |
|
257 | } | |
|
258 | ||
|
259 | result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL); | |
|
260 | if (!result) { | |
|
261 | return NULL; | |
|
262 | } | |
|
263 | ||
|
264 | result->chunker = self; | |
|
265 | Py_INCREF(result->chunker); | |
|
266 | ||
|
267 | result->mode = compressionchunker_mode_finish; | |
|
268 | ||
|
269 | return result; | |
|
270 | } | |
|
271 | ||
|
272 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) { | |
|
273 | ZstdCompressionChunkerIterator* result; | |
|
274 | ||
|
275 | if (self->finished) { | |
|
276 | PyErr_SetString(ZstdError, "cannot call flush() after compression finished"); | |
|
277 | return NULL; | |
|
278 | } | |
|
279 | ||
|
280 | if (self->inBuffer.obj) { | |
|
281 | PyErr_SetString(ZstdError, | |
|
282 | "cannot call flush() before consuming output from previous operation"); | |
|
283 | return NULL; | |
|
284 | } | |
|
285 | ||
|
286 | result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL); | |
|
287 | if (!result) { | |
|
288 | return NULL; | |
|
289 | } | |
|
290 | ||
|
291 | result->chunker = self; | |
|
292 | Py_INCREF(result->chunker); | |
|
293 | ||
|
294 | result->mode = compressionchunker_mode_flush; | |
|
295 | ||
|
296 | return result; | |
|
297 | } | |
|
298 | ||
|
299 | static PyMethodDef ZstdCompressionChunker_methods[] = { | |
|
300 | { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS, | |
|
301 | PyDoc_STR("compress data") }, | |
|
302 | { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS, | |
|
303 | PyDoc_STR("finish compression operation") }, | |
|
304 | { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS, | |
|
305 | PyDoc_STR("finish compression operation") }, | |
|
306 | { NULL, NULL } | |
|
307 | }; | |
|
308 | ||
|
309 | PyTypeObject ZstdCompressionChunkerType = { | |
|
310 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
311 | "zstd.ZstdCompressionChunkerType", /* tp_name */ | |
|
312 | sizeof(ZstdCompressionChunker), /* tp_basicsize */ | |
|
313 | 0, /* tp_itemsize */ | |
|
314 | (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */ | |
|
315 | 0, /* tp_print */ | |
|
316 | 0, /* tp_getattr */ | |
|
317 | 0, /* tp_setattr */ | |
|
318 | 0, /* tp_compare */ | |
|
319 | 0, /* tp_repr */ | |
|
320 | 0, /* tp_as_number */ | |
|
321 | 0, /* tp_as_sequence */ | |
|
322 | 0, /* tp_as_mapping */ | |
|
323 | 0, /* tp_hash */ | |
|
324 | 0, /* tp_call */ | |
|
325 | 0, /* tp_str */ | |
|
326 | 0, /* tp_getattro */ | |
|
327 | 0, /* tp_setattro */ | |
|
328 | 0, /* tp_as_buffer */ | |
|
329 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
330 | ZstdCompressionChunker__doc__, /* tp_doc */ | |
|
331 | 0, /* tp_traverse */ | |
|
332 | 0, /* tp_clear */ | |
|
333 | 0, /* tp_richcompare */ | |
|
334 | 0, /* tp_weaklistoffset */ | |
|
335 | 0, /* tp_iter */ | |
|
336 | 0, /* tp_iternext */ | |
|
337 | ZstdCompressionChunker_methods, /* tp_methods */ | |
|
338 | 0, /* tp_members */ | |
|
339 | 0, /* tp_getset */ | |
|
340 | 0, /* tp_base */ | |
|
341 | 0, /* tp_dict */ | |
|
342 | 0, /* tp_descr_get */ | |
|
343 | 0, /* tp_descr_set */ | |
|
344 | 0, /* tp_dictoffset */ | |
|
345 | 0, /* tp_init */ | |
|
346 | 0, /* tp_alloc */ | |
|
347 | PyType_GenericNew, /* tp_new */ | |
|
348 | }; | |
|
349 | ||
|
350 | void compressionchunker_module_init(PyObject* module) { | |
|
351 | Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type; | |
|
352 | if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) { | |
|
353 | return; | |
|
354 | } | |
|
355 | ||
|
356 | Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type; | |
|
357 | if (PyType_Ready(&ZstdCompressionChunkerType) < 0) { | |
|
358 | return; | |
|
359 | } | |
|
360 | } |
@@ -0,0 +1,44 | |||
|
1 | /* ****************************************************************** | |
|
2 | debug | |
|
3 | Part of FSE library | |
|
4 | Copyright (C) 2013-present, Yann Collet. | |
|
5 | ||
|
6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
|
7 | ||
|
8 | Redistribution and use in source and binary forms, with or without | |
|
9 | modification, are permitted provided that the following conditions are | |
|
10 | met: | |
|
11 | ||
|
12 | * Redistributions of source code must retain the above copyright | |
|
13 | notice, this list of conditions and the following disclaimer. | |
|
14 | * Redistributions in binary form must reproduce the above | |
|
15 | copyright notice, this list of conditions and the following disclaimer | |
|
16 | in the documentation and/or other materials provided with the | |
|
17 | distribution. | |
|
18 | ||
|
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
|
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
|
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
|
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
|
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
|
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
|
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
|
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
|
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
|
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
|
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
|
30 | ||
|
31 | You can contact the author at : | |
|
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
|
33 | ****************************************************************** */ | |
|
34 | ||
|
35 | ||
|
36 | /* | |
|
37 | * This module only hosts one global variable | |
|
38 | * which can be used to dynamically influence the verbosity of traces, | |
|
39 | * such as DEBUGLOG and RAWLOG | |
|
40 | */ | |
|
41 | ||
|
42 | #include "debug.h" | |
|
43 | ||
|
44 | int g_debuglevel = DEBUGLEVEL; |
@@ -0,0 +1,123 | |||
|
1 | /* ****************************************************************** | |
|
2 | debug | |
|
3 | Part of FSE library | |
|
4 | Copyright (C) 2013-present, Yann Collet. | |
|
5 | ||
|
6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
|
7 | ||
|
8 | Redistribution and use in source and binary forms, with or without | |
|
9 | modification, are permitted provided that the following conditions are | |
|
10 | met: | |
|
11 | ||
|
12 | * Redistributions of source code must retain the above copyright | |
|
13 | notice, this list of conditions and the following disclaimer. | |
|
14 | * Redistributions in binary form must reproduce the above | |
|
15 | copyright notice, this list of conditions and the following disclaimer | |
|
16 | in the documentation and/or other materials provided with the | |
|
17 | distribution. | |
|
18 | ||
|
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
|
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
|
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
|
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
|
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
|
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
|
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
|
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
|
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
|
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
|
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
|
30 | ||
|
31 | You can contact the author at : | |
|
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
|
33 | ****************************************************************** */ | |
|
34 | ||
|
35 | ||
|
36 | /* | |
|
37 | * The purpose of this header is to enable debug functions. | |
|
38 | * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, | |
|
39 | * and DEBUG_STATIC_ASSERT() for compile-time. | |
|
40 | * | |
|
41 | * By default, DEBUGLEVEL==0, which means run-time debug is disabled. | |
|
42 | * | |
|
43 | * Level 1 enables assert() only. | |
|
44 | * Starting level 2, traces can be generated and pushed to stderr. | |
|
45 | * The higher the level, the more verbose the traces. | |
|
46 | * | |
|
47 | * It's possible to dynamically adjust level using variable g_debug_level, | |
|
48 | * which is only declared if DEBUGLEVEL>=2, | |
|
49 | * and is a global variable, not multi-thread protected (use with care) | |
|
50 | */ | |
|
51 | ||
|
52 | #ifndef DEBUG_H_12987983217 | |
|
53 | #define DEBUG_H_12987983217 | |
|
54 | ||
|
55 | #if defined (__cplusplus) | |
|
56 | extern "C" { | |
|
57 | #endif | |
|
58 | ||
|
59 | ||
|
60 | /* static assert is triggered at compile time, leaving no runtime artefact, | |
|
61 | * but can only work with compile-time constants. | |
|
62 | * This variant can only be used inside a function. */ | |
|
63 | #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) | |
|
64 | ||
|
65 | ||
|
66 | /* DEBUGLEVEL is expected to be defined externally, | |
|
67 | * typically through compiler command line. | |
|
68 | * Value must be a number. */ | |
|
69 | #ifndef DEBUGLEVEL | |
|
70 | # define DEBUGLEVEL 0 | |
|
71 | #endif | |
|
72 | ||
|
73 | /* recommended values for DEBUGLEVEL : | |
|
74 | * 0 : no debug, all run-time functions disabled | |
|
75 | * 1 : no display, enables assert() only | |
|
76 | * 2 : reserved, for currently active debug path | |
|
77 | * 3 : events once per object lifetime (CCtx, CDict, etc.) | |
|
78 | * 4 : events once per frame | |
|
79 | * 5 : events once per block | |
|
80 | * 6 : events once per sequence (verbose) | |
|
81 | * 7+: events at every position (*very* verbose) | |
|
82 | * | |
|
83 | * It's generally inconvenient to output traces > 5. | |
|
84 | * In which case, it's possible to selectively enable higher verbosity levels | |
|
85 | * by modifying g_debug_level. | |
|
86 | */ | |
|
87 | ||
|
88 | #if (DEBUGLEVEL>=1) | |
|
89 | # include <assert.h> | |
|
90 | #else | |
|
91 | # ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ | |
|
92 | # define assert(condition) ((void)0) /* disable assert (default) */ | |
|
93 | # endif | |
|
94 | #endif | |
|
95 | ||
|
96 | #if (DEBUGLEVEL>=2) | |
|
97 | # include <stdio.h> | |
|
98 | extern int g_debuglevel; /* here, this variable is only declared, | |
|
99 | it actually lives in debug.c, | |
|
100 | and is shared by the whole process. | |
|
101 | It's typically used to enable very verbose levels | |
|
102 | on selective conditions (such as position in src) */ | |
|
103 | ||
|
104 | # define RAWLOG(l, ...) { \ | |
|
105 | if (l<=g_debuglevel) { \ | |
|
106 | fprintf(stderr, __VA_ARGS__); \ | |
|
107 | } } | |
|
108 | # define DEBUGLOG(l, ...) { \ | |
|
109 | if (l<=g_debuglevel) { \ | |
|
110 | fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ | |
|
111 | fprintf(stderr, " \n"); \ | |
|
112 | } } | |
|
113 | #else | |
|
114 | # define RAWLOG(l, ...) {} /* disabled */ | |
|
115 | # define DEBUGLOG(l, ...) {} /* disabled */ | |
|
116 | #endif | |
|
117 | ||
|
118 | ||
|
119 | #if defined (__cplusplus) | |
|
120 | } | |
|
121 | #endif | |
|
122 | ||
|
123 | #endif /* DEBUG_H_12987983217 */ |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
@@ -1,88 +1,95 | |||
|
1 | 1 | # Files that just need to be migrated to the formatter. |
|
2 | 2 | # Do not add new files here! |
|
3 | 3 | mercurial/cext/dirs.c |
|
4 | 4 | mercurial/cext/manifest.c |
|
5 | 5 | mercurial/cext/osutil.c |
|
6 | 6 | mercurial/cext/revlog.c |
|
7 | 7 | # Vendored code that we should never format: |
|
8 | 8 | contrib/python-zstandard/c-ext/bufferutil.c |
|
9 | contrib/python-zstandard/c-ext/compressionchunker.c | |
|
9 | 10 | contrib/python-zstandard/c-ext/compressiondict.c |
|
10 | 11 | contrib/python-zstandard/c-ext/compressionparams.c |
|
11 | 12 | contrib/python-zstandard/c-ext/compressionreader.c |
|
12 | 13 | contrib/python-zstandard/c-ext/compressionwriter.c |
|
13 | 14 | contrib/python-zstandard/c-ext/compressobj.c |
|
14 | 15 | contrib/python-zstandard/c-ext/compressor.c |
|
15 | 16 | contrib/python-zstandard/c-ext/compressoriterator.c |
|
16 | 17 | contrib/python-zstandard/c-ext/constants.c |
|
17 | 18 | contrib/python-zstandard/c-ext/decompressionreader.c |
|
18 | 19 | contrib/python-zstandard/c-ext/decompressionwriter.c |
|
19 | 20 | contrib/python-zstandard/c-ext/decompressobj.c |
|
20 | 21 | contrib/python-zstandard/c-ext/decompressor.c |
|
21 | 22 | contrib/python-zstandard/c-ext/decompressoriterator.c |
|
22 | 23 | contrib/python-zstandard/c-ext/frameparams.c |
|
23 | 24 | contrib/python-zstandard/c-ext/python-zstandard.h |
|
24 | 25 | contrib/python-zstandard/zstd.c |
|
25 | 26 | contrib/python-zstandard/zstd/common/bitstream.h |
|
26 | 27 | contrib/python-zstandard/zstd/common/compiler.h |
|
27 | 28 | contrib/python-zstandard/zstd/common/cpu.h |
|
29 | contrib/python-zstandard/zstd/common/debug.c | |
|
30 | contrib/python-zstandard/zstd/common/debug.h | |
|
28 | 31 | contrib/python-zstandard/zstd/common/entropy_common.c |
|
29 | 32 | contrib/python-zstandard/zstd/common/error_private.c |
|
30 | 33 | contrib/python-zstandard/zstd/common/error_private.h |
|
31 | 34 | contrib/python-zstandard/zstd/common/fse_decompress.c |
|
32 | 35 | contrib/python-zstandard/zstd/common/fse.h |
|
33 | 36 | contrib/python-zstandard/zstd/common/huf.h |
|
34 | 37 | contrib/python-zstandard/zstd/common/mem.h |
|
35 | 38 | contrib/python-zstandard/zstd/common/pool.c |
|
36 | 39 | contrib/python-zstandard/zstd/common/pool.h |
|
37 | 40 | contrib/python-zstandard/zstd/common/threading.c |
|
38 | 41 | contrib/python-zstandard/zstd/common/threading.h |
|
39 | 42 | contrib/python-zstandard/zstd/common/xxhash.c |
|
40 | 43 | contrib/python-zstandard/zstd/common/xxhash.h |
|
41 | 44 | contrib/python-zstandard/zstd/common/zstd_common.c |
|
42 | 45 | contrib/python-zstandard/zstd/common/zstd_errors.h |
|
43 | 46 | contrib/python-zstandard/zstd/common/zstd_internal.h |
|
44 | 47 | contrib/python-zstandard/zstd/compress/fse_compress.c |
|
48 | contrib/python-zstandard/zstd/compress/hist.c | |
|
49 | contrib/python-zstandard/zstd/compress/hist.h | |
|
45 | 50 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
46 | 51 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
47 | 52 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h |
|
48 | 53 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
49 | 54 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
50 | 55 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
|
51 | 56 | contrib/python-zstandard/zstd/compress/zstd_fast.h |
|
52 | 57 | contrib/python-zstandard/zstd/compress/zstd_lazy.c |
|
53 | 58 | contrib/python-zstandard/zstd/compress/zstd_lazy.h |
|
54 | 59 | contrib/python-zstandard/zstd/compress/zstd_ldm.c |
|
55 | 60 | contrib/python-zstandard/zstd/compress/zstd_ldm.h |
|
56 | 61 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c |
|
57 | 62 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h |
|
58 | 63 | contrib/python-zstandard/zstd/compress/zstd_opt.c |
|
59 | 64 | contrib/python-zstandard/zstd/compress/zstd_opt.h |
|
60 | 65 | contrib/python-zstandard/zstd/decompress/huf_decompress.c |
|
61 | 66 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c |
|
62 | 67 | contrib/python-zstandard/zstd/deprecated/zbuff_common.c |
|
63 | 68 | contrib/python-zstandard/zstd/deprecated/zbuff_compress.c |
|
64 | 69 | contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c |
|
65 | 70 | contrib/python-zstandard/zstd/deprecated/zbuff.h |
|
66 | 71 | contrib/python-zstandard/zstd/dictBuilder/cover.c |
|
72 | contrib/python-zstandard/zstd/dictBuilder/cover.h | |
|
67 | 73 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c |
|
68 | 74 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h |
|
75 | contrib/python-zstandard/zstd/dictBuilder/fastcover.c | |
|
69 | 76 | contrib/python-zstandard/zstd/dictBuilder/zdict.c |
|
70 | 77 | contrib/python-zstandard/zstd/dictBuilder/zdict.h |
|
71 | 78 | contrib/python-zstandard/zstd/zstd.h |
|
72 | 79 | hgext/fsmonitor/pywatchman/bser.c |
|
73 | 80 | mercurial/thirdparty/xdiff/xdiff.h |
|
74 | 81 | mercurial/thirdparty/xdiff/xdiffi.c |
|
75 | 82 | mercurial/thirdparty/xdiff/xdiffi.h |
|
76 | 83 | mercurial/thirdparty/xdiff/xemit.c |
|
77 | 84 | mercurial/thirdparty/xdiff/xemit.h |
|
78 | 85 | mercurial/thirdparty/xdiff/xhistogram.c |
|
79 | 86 | mercurial/thirdparty/xdiff/xinclude.h |
|
80 | 87 | mercurial/thirdparty/xdiff/xmacros.h |
|
81 | 88 | mercurial/thirdparty/xdiff/xmerge.c |
|
82 | 89 | mercurial/thirdparty/xdiff/xpatience.c |
|
83 | 90 | mercurial/thirdparty/xdiff/xprepare.c |
|
84 | 91 | mercurial/thirdparty/xdiff/xprepare.h |
|
85 | 92 | mercurial/thirdparty/xdiff/xtypes.h |
|
86 | 93 | mercurial/thirdparty/xdiff/xutils.c |
|
87 | 94 | mercurial/thirdparty/xdiff/xutils.h |
|
88 | 95 | mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c |
@@ -1,7 +1,10 | |||
|
1 | 1 | graft c-ext |
|
2 | graft debian | |
|
2 | 3 | graft zstd |
|
3 | 4 | graft tests |
|
4 | 5 | include make_cffi.py |
|
5 | 6 | include setup_zstd.py |
|
6 | 7 | include zstd.c |
|
8 | include zstd_cffi.py | |
|
7 | 9 | include LICENSE |
|
10 | include NEWS.rst |
@@ -1,338 +1,456 | |||
|
1 | 1 | =============== |
|
2 | 2 | Version History |
|
3 | 3 | =============== |
|
4 | 4 | |
|
5 | 5 | 1.0.0 (not yet released) |
|
6 | 6 | ======================== |
|
7 | 7 | |
|
8 | 8 | Actions Blocking Release |
|
9 | 9 | ------------------------ |
|
10 | 10 | |
|
11 | 11 | * compression and decompression APIs that support ``io.rawIOBase`` interface |
|
12 | 12 | (#13). |
|
13 | 13 | * Refactor module names so C and CFFI extensions live under ``zstandard`` |
|
14 | 14 | package. |
|
15 | 15 | * Overall API design review. |
|
16 | 16 | * Use Python allocator where possible. |
|
17 | 17 | * Figure out what to do about experimental APIs not implemented by CFFI. |
|
18 | 18 | * APIs for auto adjusting compression parameters based on input size. e.g. |
|
19 | 19 | clamping the window log so it isn't too large for input. |
|
20 | 20 | * Consider allowing compressor and decompressor instances to be thread safe, |
|
21 | 21 | support concurrent operations. Or track when an operation is in progress and |
|
22 | 22 | refuse to let concurrent operations use the same instance. |
|
23 | 23 | * Support for magic-less frames for all decompression operations (``decompress()`` |
|
24 | 24 | doesn't work due to sniffing the content size and the lack of a ZSTD API to |
|
25 | 25 | sniff magic-less frames - this should be fixed in 1.3.5.). |
|
26 | 26 | * Audit for complete flushing when ending compression streams. |
|
27 | 27 | * Deprecate legacy APIs. |
|
28 | 28 | * Audit for ability to control read/write sizes on all APIs. |
|
29 | 29 | * Detect memory leaks via bench.py. |
|
30 | 30 | * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and |
|
31 | 31 | require use of ``CompressionParameters``. |
|
32 | 32 | * Expose ``ZSTD_getFrameProgression()`` from more compressor types. |
|
33 | * Support modifying compression parameters mid operation when supported by | |
|
34 | zstd API. | |
|
35 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. | |
|
36 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. | |
|
37 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving | |
|
38 | compression parameters. | |
|
39 | * Consider exposing ``ZSTDMT_toFlushNow()``. | |
|
40 | * Expose ``ZDICT_trainFromBuffer_fastCover()``, | |
|
41 | ``ZDICT_optimizeTrainFromBuffer_fastCover``. | |
|
42 | * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. | |
|
43 | * Consider a ``chunker()`` API for decompression. | |
|
44 | * Consider stats for ``chunker()`` API, including finding the last consumed | |
|
45 | offset of input data. | |
|
33 | 46 | |
|
34 | 47 | Other Actions Not Blocking Release |
|
35 | 48 | --------------------------------------- |
|
36 | 49 | |
|
37 | 50 | * Support for block compression APIs. |
|
38 | 51 | * API for ensuring max memory ceiling isn't exceeded. |
|
39 | 52 | * Move off nose for testing. |
|
40 | 53 | |
|
54 | 0.10.1 (released 2018-10-08) | |
|
55 | ============================ | |
|
56 | ||
|
57 | Backwards Compatibility Notes | |
|
58 | ----------------------------- | |
|
59 | ||
|
60 | * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a | |
|
61 | method (#58). | |
|
62 | * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a | |
|
63 | method (#58). | |
|
64 | ||
|
65 | Changes | |
|
66 | ------- | |
|
67 | ||
|
68 | * Stop attempting to package Python 3.6 for Miniconda. The latest version of | |
|
69 | Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie | |
|
70 | since this were built against Python 3.7. | |
|
71 | * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s | |
|
72 | ``closed`` attribute is now a read-only property instead of a method. This now | |
|
73 | properly matches the ``IOBase`` API and allows instances to be used in more | |
|
74 | places that accept ``IOBase`` instances. | |
|
75 | ||
|
76 | 0.10.0 (released 2018-10-08) | |
|
77 | ============================ | |
|
78 | ||
|
79 | Backwards Compatibility Notes | |
|
80 | ----------------------------- | |
|
81 | ||
|
82 | * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an | |
|
83 | argument in both the C and CFFI backends. Before, the CFFI implementation | |
|
84 | would assume a default value of ``-1``, which was later rejected. | |
|
85 | * The ``compress_literals`` argument and attribute has been removed from | |
|
86 | ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5 | |
|
87 | API. | |
|
88 | * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every | |
|
89 | operation performed against ``ZstdCompressor`` instances. The reason for this | |
|
90 | change is that the zstd 1.3.5 API no longer allows this without calling | |
|
91 | ``ZSTD_CCtx_resetParameters()`` first. But if we called | |
|
92 | ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo | |
|
93 | potentially expensive setup when using dictionaries. We now call | |
|
94 | ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change | |
|
95 | compression parameters. | |
|
96 | * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be | |
|
97 | used as a context manager. The context manager interface still exists and its | |
|
98 | behavior is unchanged. | |
|
99 | * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be | |
|
100 | used as a context manager. The context manager interface still exists and its | |
|
101 | behavior is unchanged. | |
|
102 | ||
|
103 | Bug Fixes | |
|
104 | --------- | |
|
105 | ||
|
106 | * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data | |
|
107 | from internal buffers in more scenarios. Before, it was possible for data to | |
|
108 | remain in internal buffers. This data would be emitted on a subsequent call | |
|
109 | to ``decompress()``. The overall output stream would still be valid. But if | |
|
110 | callers were expecting input data to exactly map to output data (say the | |
|
111 | producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to | |
|
112 | map input chunks to output chunks), then the previous behavior would be | |
|
113 | wrong. The new behavior is such that output from | |
|
114 | ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()`` | |
|
115 | should produce all available compressed input. | |
|
116 | * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after | |
|
117 | a previous context manager resulted in error (#56). | |
|
118 | * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns | |
|
119 | all data necessary to flush a block. Before, it was possible for the | |
|
120 | ``flush()`` to not emit all data necessary to fully represent a block. This | |
|
121 | would mean decompressors wouldn't be able to decompress all data that had been | |
|
122 | fed into the compressor and ``flush()``ed. (#55). | |
|
123 | ||
|
124 | New Features | |
|
125 | ------------ | |
|
126 | ||
|
127 | * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``, | |
|
128 | ``TARGETLENGTH_MAX`` that expose constants from libzstd. | |
|
129 | * New ``ZstdCompressor.chunker()`` API for manually feeding data into a | |
|
130 | compressor and emitting chunks of a fixed size. Like ``compressobj()``, the | |
|
131 | API doesn't impose restrictions on the input or output types for the | |
|
132 | data streams. Unlike ``compressobj()``, it ensures output chunks are of a | |
|
133 | fixed size. This makes this API useful when the compressed output is being | |
|
134 | fed into an I/O layer, where uniform write sizes are useful. | |
|
135 | * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context | |
|
136 | manager (#34). | |
|
137 | * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context | |
|
138 | manager (#34). | |
|
139 | * Bundled zstandard library upgraded from 1.3.4 to 1.3.6. | |
|
140 | ||
|
141 | Changes | |
|
142 | ------- | |
|
143 | ||
|
144 | * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``. | |
|
145 | * ``zstandard.__version__`` is now defined (#50). | |
|
146 | * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions. | |
|
147 | * Upgrade various packages used in CI to latest versions. Notably tox (in | |
|
148 | order to support Python 3.7). | |
|
149 | * Use relative paths in setup.py to appease Python 3.7 (#51). | |
|
150 | * Added CI for Python 3.7. | |
|
151 | ||
|
152 | 0.9.1 (released 2018-06-04) | |
|
153 | =========================== | |
|
154 | ||
|
155 | * Debian packaging support. | |
|
156 | * Fix typo in setup.py (#44). | |
|
157 | * Support building with mingw compiler (#46). | |
|
158 | ||
|
41 | 159 | 0.9.0 (released 2018-04-08) |
|
42 | 160 | =========================== |
|
43 | 161 | |
|
44 | 162 | Backwards Compatibility Notes |
|
45 | 163 | ----------------------------- |
|
46 | 164 | |
|
47 | 165 | * CFFI 1.11 or newer is now required (previous requirement was 1.8). |
|
48 | 166 | * The primary module is now ``zstandard``. Please change imports of ``zstd`` |
|
49 | 167 | and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support |
|
50 | 168 | for importing the old names will be dropped in the next release. |
|
51 | 169 | * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have |
|
52 | 170 | been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new |
|
53 | 171 | name and will be deleted in a future release. |
|
54 | 172 | * Support for Python 2.6 has been removed. |
|
55 | 173 | * Support for Python 3.3 has been removed. |
|
56 | 174 | * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as |
|
57 | 175 | the feature disappeared from zstd 1.3. |
|
58 | 176 | * Support for legacy dictionaries has been removed. Cover dictionaries are now |
|
59 | 177 | the default. ``train_cover_dictionary()`` has effectively been renamed to |
|
60 | 178 | ``train_dictionary()``. |
|
61 | 179 | * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been |
|
62 | 180 | deleted and the method now allows empty inputs to be compressed by default. |
|
63 | 181 | * ``estimate_compression_context_size()`` has been removed. Use |
|
64 | 182 | ``CompressionParameters.estimated_compression_context_size()`` instead. |
|
65 | 183 | * ``get_compression_parameters()`` has been removed. Use |
|
66 | 184 | ``CompressionParameters.from_level()`` instead. |
|
67 | 185 | * The arguments to ``CompressionParameters.__init__()`` have changed. If you |
|
68 | 186 | were using positional arguments before, the positions now map to different |
|
69 | 187 | arguments. It is recommended to use keyword arguments to construct |
|
70 | 188 | ``CompressionParameters`` instances. |
|
71 | 189 | * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard |
|
72 | 190 | 1.3.4). |
|
73 | 191 | * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been |
|
74 | 192 | renamed to ``ZstdCompressor.stream_writer()`` and |
|
75 | 193 | ``ZstdDecompressor.stream_writer()``, respectively. The old names are still |
|
76 | 194 | aliased, but will be removed in the next major release. |
|
77 | 195 | * Content sizes are written into frame headers by default |
|
78 | 196 | (``ZstdCompressor(write_content_size=True)`` is now the default). |
|
79 | 197 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters`` |
|
80 | 198 | for consistency with other types. The old name is an alias and will be removed |
|
81 | 199 | in the next major release. |
|
82 | 200 | |
|
83 | 201 | Bug Fixes |
|
84 | 202 | --------- |
|
85 | 203 | |
|
86 | 204 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2). |
|
87 | 205 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2). |
|
88 | 206 | * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``. |
|
89 | 207 | |
|
90 | 208 | New Features |
|
91 | 209 | ------------ |
|
92 | 210 | |
|
93 |
* Bundl |
|
|
211 | * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various | |
|
94 | 212 | bug fixes and performance improvements. It also gives us access to newer |
|
95 | 213 | features. |
|
96 | 214 | * Support for negative compression levels. |
|
97 | 215 | * Support for *long distance matching* (facilitates compression ratios that approach |
|
98 | 216 | LZMA). |
|
99 | 217 | * Supporting for reading empty zstandard frames (with an embedded content size |
|
100 | 218 | of 0). |
|
101 | 219 | * Support for writing and partial support for reading zstandard frames without a |
|
102 | 220 | magic header. |
|
103 | 221 | * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows |
|
104 | 222 | you to ``.read()`` from a file-like object). |
|
105 | 223 | * Several minor features, bug fixes, and performance enhancements. |
|
106 | 224 | * Wheels for Linux and macOS are now provided with releases. |
|
107 | 225 | |
|
108 | 226 | Changes |
|
109 | 227 | ------- |
|
110 | 228 | |
|
111 | 229 | * Functions accepting bytes data now use the buffer protocol and can accept |
|
112 | 230 | more types (like ``memoryview`` and ``bytearray``) (#26). |
|
113 | 231 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
114 | 232 | * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream |
|
115 | 233 | of decompressed data for a source. |
|
116 | 234 | * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of |
|
117 | 235 | compressed data for a source. |
|
118 | 236 | * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``. |
|
119 | 237 | The old name is still available. |
|
120 | 238 | * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``. |
|
121 | 239 | ``read_from()`` is still available at its old location. |
|
122 | 240 | * Introduce the ``zstandard`` module to import and re-export the C or CFFI |
|
123 | 241 | *backend* as appropriate. Behavior can be controlled via the |
|
124 | 242 | ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for |
|
125 | 243 | usage info. |
|
126 | 244 | * Vendored version of zstd upgraded to 1.3.4. |
|
127 | 245 | * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``. |
|
128 | 246 | * Add ``STRATEGY_BTULTRA`` compression strategy constant. |
|
129 | 247 | * Switch from deprecated ``ZSTD_getDecompressedSize()`` to |
|
130 | 248 | ``ZSTD_getFrameContentSize()`` replacement. |
|
131 | 249 | * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring |
|
132 | 250 | special handling. |
|
133 | 251 | * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()`` |
|
134 | 252 | method for determining the current memory utilization of the underlying zstd |
|
135 | 253 | primitive. |
|
136 | 254 | * ``train_dictionary()`` has new arguments and functionality for trying multiple |
|
137 | 255 | variations of COVER parameters and selecting the best one. |
|
138 | 256 | * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and |
|
139 | 257 | ``LDM_BUCKETSIZELOG_MAX``. |
|
140 | 258 | * Converted all consumers to the zstandard *new advanced API*, which uses |
|
141 | 259 | ``ZSTD_compress_generic()`` |
|
142 | 260 | * ``CompressionParameters.__init__`` now accepts several more arguments, |
|
143 | 261 | including support for *long distance matching*. |
|
144 | 262 | * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that |
|
145 | 263 | controls how the dictionary should be interpreted. This can be used to |
|
146 | 264 | force the use of *content-only* dictionaries or to require the presence |
|
147 | 265 | of the dictionary magic header. |
|
148 | 266 | * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the |
|
149 | 267 | compression dictionary so it can efficiently be used with multiple |
|
150 | 268 | ``ZstdCompressor`` instances. |
|
151 | 269 | * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances, |
|
152 | 270 | created automatically on first use, and automatically reused by all |
|
153 | 271 | ``ZstdDecompressor`` instances bound to that dictionary. |
|
154 | 272 | * All meaningful functions now accept keyword arguments. |
|
155 | 273 | * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument |
|
156 | 274 | to control how much work to perform on every decompressor invocation. |
|
157 | 275 | * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the |
|
158 | 276 | total number of bytes written so far. |
|
159 | 277 | * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving |
|
160 | 278 | forward in the stream. |
|
161 | 279 | * Removed ``TARGETLENGTH_MAX`` constant. |
|
162 | 280 | * Added ``frame_header_size(data)`` function. |
|
163 | 281 | * Added ``frame_content_size(data)`` function. |
|
164 | 282 | * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced |
|
165 | 283 | decompression* API. |
|
166 | 284 | * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with |
|
167 | 285 | negative compression levels. |
|
168 | 286 | * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the |
|
169 | 287 | amount of memory required for decompression operations. |
|
170 | 288 | * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with |
|
171 | 289 | the ``format`` compression parameter to control whether the frame magic |
|
172 | 290 | header is written. |
|
173 | 291 | * ``ZstdDecompressor`` now accepts a ``format`` argument to control the |
|
174 | 292 | expected frame format. |
|
175 | 293 | * ``ZstdCompressor`` now has a ``frame_progression()`` method to return |
|
176 | 294 | information about the current compression operation. |
|
177 | 295 | * Error messages in CFFI no longer have ``b''`` literals. |
|
178 | 296 | * Compiler warnings and underlying overflow issues on 32-bit platforms have been |
|
179 | 297 | fixed. |
|
180 | 298 | * Builds in CI now build with compiler warnings as errors. This should hopefully |
|
181 | 299 | fix new compiler warnings from being introduced. |
|
182 | 300 | * Make ``ZstdCompressor(write_content_size=True)`` and |
|
183 | 301 | ``CompressionParameters(write_content_size=True)`` the default. |
|
184 | 302 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``. |
|
185 | 303 | |
|
186 | 304 | 0.8.2 (released 2018-02-22) |
|
187 | 305 | --------------------------- |
|
188 | 306 | |
|
189 | 307 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40). |
|
190 | 308 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35). |
|
191 | 309 | |
|
192 | 310 | 0.8.1 (released 2017-04-08) |
|
193 | 311 | --------------------------- |
|
194 | 312 | |
|
195 | 313 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
196 | 314 | |
|
197 | 315 | 0.8.0 (released 2017-03-08) |
|
198 | 316 | =========================== |
|
199 | 317 | |
|
200 | 318 | * CompressionParameters now has a estimated_compression_context_size() method. |
|
201 | 319 | zstd.estimate_compression_context_size() is now deprecated and slated for |
|
202 | 320 | removal. |
|
203 | 321 | * Implemented a lot of fuzzing tests. |
|
204 | 322 | * CompressionParameters instances now perform extra validation by calling |
|
205 | 323 | ZSTD_checkCParams() at construction time. |
|
206 | 324 | * multi_compress_to_buffer() API for compressing multiple inputs as a |
|
207 | 325 | single operation, as efficiently as possible. |
|
208 | 326 | * ZSTD_CStream instances are now used across multiple operations on |
|
209 | 327 | ZstdCompressor instances, resulting in much better performance for |
|
210 | 328 | APIs that do streaming. |
|
211 | 329 | * ZSTD_DStream instances are now used across multiple operations on |
|
212 | 330 | ZstdDecompressor instances, resulting in much better performance for |
|
213 | 331 | APIs that do streaming. |
|
214 | 332 | * train_dictionary() now releases the GIL. |
|
215 | 333 | * Support for training dictionaries using the COVER algorithm. |
|
216 | 334 | * multi_decompress_to_buffer() API for decompressing multiple frames as a |
|
217 | 335 | single operation, as efficiently as possible. |
|
218 | 336 | * Support for multi-threaded compression. |
|
219 | 337 | * Disable deprecation warnings when compiling CFFI module. |
|
220 | 338 | * Fixed memory leak in train_dictionary(). |
|
221 | 339 | * Removed DictParameters type. |
|
222 | 340 | * train_dictionary() now accepts keyword arguments instead of a |
|
223 | 341 | DictParameters instance to control dictionary generation. |
|
224 | 342 | |
|
225 | 343 | 0.7.0 (released 2017-02-07) |
|
226 | 344 | =========================== |
|
227 | 345 | |
|
228 | 346 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. |
|
229 | 347 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient |
|
230 | 348 | decompression of *content-only dictionary chains*. |
|
231 | 349 | * CFFI module fully implemented; all tests run against both C extension and |
|
232 | 350 | CFFI implementation. |
|
233 | 351 | * Vendored version of zstd updated to 1.1.3. |
|
234 | 352 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() |
|
235 | 353 | to avoid extra memory allocation of dict data. |
|
236 | 354 | * Add function names to error messages (by using ":name" in PyArg_Parse* |
|
237 | 355 | functions). |
|
238 | 356 | * Reuse decompression context across operations. Previously, we created a |
|
239 | 357 | new ZSTD_DCtx for each decompress(). This was measured to slow down |
|
240 | 358 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor |
|
241 | 359 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make |
|
242 | 360 | things faster in the process. |
|
243 | 361 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number |
|
244 | 362 | of bytes written. |
|
245 | 363 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes |
|
246 | 364 | written to the underlying output object. |
|
247 | 365 | * CompressionParameters instances now expose their values as attributes. |
|
248 | 366 | * CompressionParameters instances no longer are subscriptable nor behave |
|
249 | 367 | as tuples (backwards incompatible). Use attributes to obtain values. |
|
250 | 368 | * DictParameters instances now expose their values as attributes. |
|
251 | 369 | |
|
252 | 370 | 0.6.0 (released 2017-01-14) |
|
253 | 371 | =========================== |
|
254 | 372 | |
|
255 | 373 | * Support for legacy zstd protocols (build time opt in feature). |
|
256 | 374 | * Automation improvements to test against Python 3.6, latest versions |
|
257 | 375 | of Tox, more deterministic AppVeyor behavior. |
|
258 | 376 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting |
|
259 | 377 | source code manually. |
|
260 | 378 | * Vendored version of zstd updated to 1.1.2. |
|
261 | 379 | * Documentation improvements. |
|
262 | 380 | * Introduce a bench.py script for performing (crude) benchmarks. |
|
263 | 381 | * ZSTD_CCtx instances are now reused across multiple compress() operations. |
|
264 | 382 | * ZstdCompressor.write_to() now has a flush() method. |
|
265 | 383 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to |
|
266 | 384 | flush a block (as opposed to ending the stream). |
|
267 | 385 | * Disallow compress(b'') when writing content sizes by default (issue #11). |
|
268 | 386 | |
|
269 | 387 | 0.5.2 (released 2016-11-12) |
|
270 | 388 | =========================== |
|
271 | 389 | |
|
272 | 390 | * more packaging fixes for source distribution |
|
273 | 391 | |
|
274 | 392 | 0.5.1 (released 2016-11-12) |
|
275 | 393 | =========================== |
|
276 | 394 | |
|
277 | 395 | * setup_zstd.py is included in the source distribution |
|
278 | 396 | |
|
279 | 397 | 0.5.0 (released 2016-11-10) |
|
280 | 398 | =========================== |
|
281 | 399 | |
|
282 | 400 | * Vendored version of zstd updated to 1.1.1. |
|
283 | 401 | * Continuous integration for Python 3.6 and 3.7 |
|
284 | 402 | * Continuous integration for Conda |
|
285 | 403 | * Added compression and decompression APIs providing similar interfaces |
|
286 | 404 | to the standard library ``zlib`` and ``bz2`` modules. This allows |
|
287 | 405 | coding to a common interface. |
|
288 | 406 | * ``zstd.__version__` is now defined. |
|
289 | 407 | * ``read_from()`` on various APIs now accepts objects implementing the buffer |
|
290 | 408 | protocol. |
|
291 | 409 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers |
|
292 | 410 | to pass in an existing buffer with a header without having to create a |
|
293 | 411 | slice or a new object. |
|
294 | 412 | * Implemented ``ZstdCompressionDict.as_bytes()``. |
|
295 | 413 | * Python's memory allocator is now used instead of ``malloc()``. |
|
296 | 414 | * Low-level zstd data structures are reused in more instances, cutting down |
|
297 | 415 | on overhead for certain operations. |
|
298 | 416 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance |
|
299 | 417 | has now been refactored into a standalone ``setup_zstd.py`` file. This |
|
300 | 418 | allows other projects with ``setup.py`` files to reuse the |
|
301 | 419 | ``distutils`` code for this project without copying code. |
|
302 | 420 | * The monolithic ``zstd.c`` file has been split into a header file defining |
|
303 | 421 | types and separate ``.c`` source files for the implementation. |
|
304 | 422 | |
|
305 | 423 | Older History |
|
306 | 424 | ============= |
|
307 | 425 | |
|
308 | 426 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a |
|
309 | 427 | Python extension for use by the Mercurial project. A very hacky prototype |
|
310 | 428 | is sent to the mercurial-devel list for RFC. |
|
311 | 429 | |
|
312 | 430 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source |
|
313 | 431 | code published on https://github.com/indygreg/python-zstandard. Travis-CI |
|
314 | 432 | automation configured. 0.0.1 release on PyPI. |
|
315 | 433 | |
|
316 | 434 | 2016-09-05 - After the API was rounded out a bit and support for Python |
|
317 | 435 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. |
|
318 | 436 | |
|
319 | 437 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 |
|
320 | 438 | was released to PyPI. |
|
321 | 439 | |
|
322 | 440 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor |
|
323 | 441 | now accepts arguments controlling frame parameters. The source size can now |
|
324 | 442 | be declared when performing streaming compression. ZstdDecompressor.decompress() |
|
325 | 443 | is implemented. Compression dictionaries are now cached when using the simple |
|
326 | 444 | compression and decompression APIs. Memory size APIs added. |
|
327 | 445 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been |
|
328 | 446 | implemented. This rounds out the major compression/decompression APIs planned |
|
329 | 447 | by the author. |
|
330 | 448 | |
|
331 | 449 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully |
|
332 | 450 | decoding a zstd frame (issue #2). |
|
333 | 451 | |
|
334 | 452 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and |
|
335 | 453 | write buffer sizes, and a few bug fixes involving failure to read/write |
|
336 | 454 | all data when buffer sizes were too small to hold remaining data. |
|
337 | 455 | |
|
338 | 456 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
@@ -1,1420 +1,1495 | |||
|
1 | 1 | ================ |
|
2 | 2 | python-zstandard |
|
3 | 3 | ================ |
|
4 | 4 | |
|
5 | 5 | This project provides Python bindings for interfacing with the |
|
6 | 6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
7 | 7 | and CFFI interface are provided. |
|
8 | 8 | |
|
9 | 9 | The primary goal of the project is to provide a rich interface to the |
|
10 | 10 | underlying C API through a Pythonic interface while not sacrificing |
|
11 | 11 | performance. This means exposing most of the features and flexibility |
|
12 | 12 | of the C API while not sacrificing usability or safety that Python provides. |
|
13 | 13 | |
|
14 | 14 | The canonical home for this project lives in a Mercurial repository run by |
|
15 | 15 | the author. For convenience, that repository is frequently synchronized to |
|
16 | 16 | https://github.com/indygreg/python-zstandard. |
|
17 | 17 | |
|
18 | 18 | | |ci-status| |win-ci-status| |
|
19 | 19 | |
|
20 | 20 | Requirements |
|
21 | 21 | ============ |
|
22 | 22 | |
|
23 | 23 | This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6 |
|
24 | 24 | on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested |
|
25 | 25 | on Windows. Only x86_64 is well-tested on Linux and macOS. |
|
26 | 26 | |
|
27 | 27 | Installing |
|
28 | 28 | ========== |
|
29 | 29 | |
|
30 | 30 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. |
|
31 | 31 | So, to install this package:: |
|
32 | 32 | |
|
33 | 33 | $ pip install zstandard |
|
34 | 34 | |
|
35 | 35 | Binary wheels are made available for some platforms. If you need to |
|
36 | 36 | install from a source distribution, all you should need is a working C |
|
37 | 37 | compiler and the Python development headers/libraries. On many Linux |
|
38 | 38 | distributions, you can install a ``python-dev`` or ``python-devel`` |
|
39 | 39 | package to provide these dependencies. |
|
40 | 40 | |
|
41 | 41 | Packages are also uploaded to Anaconda Cloud at |
|
42 | 42 | https://anaconda.org/indygreg/zstandard. See that URL for how to install |
|
43 | 43 | this package with ``conda``. |
|
44 | 44 | |
|
45 | 45 | Performance |
|
46 | 46 | =========== |
|
47 | 47 | |
|
48 | 48 | zstandard is a highly tunable compression algorithm. In its default settings |
|
49 | 49 | (compression level 3), it will be faster at compression and decompression and |
|
50 | 50 | will have better compression ratios than zlib on most data sets. When tuned |
|
51 | 51 | for speed, it approaches lz4's speed and ratios. When tuned for compression |
|
52 | 52 | ratio, it approaches lzma ratios and compression speed, but decompression |
|
53 | 53 | speed is much faster. See the official zstandard documentation for more. |
|
54 | 54 | |
|
55 | 55 | zstandard and this library support multi-threaded compression. There is a |
|
56 | 56 | mechanism to compress large inputs using multiple threads. |
|
57 | 57 | |
|
58 | 58 | The performance of this library is usually very similar to what the zstandard |
|
59 | 59 | C API can deliver. Overhead in this library is due to general Python overhead |
|
60 | 60 | and can't easily be avoided by *any* zstandard Python binding. This library |
|
61 | 61 | exposes multiple APIs for performing compression and decompression so callers |
|
62 | 62 | can pick an API suitable for their need. Contrast with the compression |
|
63 | 63 | modules in Python's standard library (like ``zlib``), which only offer limited |
|
64 | 64 | mechanisms for performing operations. The API flexibility means consumers can |
|
65 | 65 | choose to use APIs that facilitate zero copying or minimize Python object |
|
66 | 66 | creation and garbage collection overhead. |
|
67 | 67 | |
|
68 | 68 | This library is capable of single-threaded throughputs well over 1 GB/s. For |
|
69 | 69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` |
|
70 | 70 | script that can be used to measure things. |
|
71 | 71 | |
|
72 | 72 | API |
|
73 | 73 | === |
|
74 | 74 | |
|
75 | 75 | To interface with Zstandard, simply import the ``zstandard`` module:: |
|
76 | 76 | |
|
77 | 77 | import zstandard |
|
78 | 78 | |
|
79 | 79 | It is a popular convention to alias the module as a different name for |
|
80 | 80 | brevity:: |
|
81 | 81 | |
|
82 | 82 | import zstandard as zstd |
|
83 | 83 | |
|
84 | 84 | This module attempts to import and use either the C extension or CFFI |
|
85 | 85 | implementation. On Python platforms known to support C extensions (like |
|
86 | 86 | CPython), it raises an ImportError if the C extension cannot be imported. |
|
87 | 87 | On Python platforms known to not support C extensions (like PyPy), it only |
|
88 | 88 | attempts to import the CFFI implementation and raises ImportError if that |
|
89 | 89 | can't be done. On other platforms, it first tries to import the C extension |
|
90 | 90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. |
|
91 | 91 | |
|
92 | 92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` |
|
93 | 93 | environment variable can be set. The following values are accepted: |
|
94 | 94 | |
|
95 | 95 | default |
|
96 | 96 | The behavior described above. |
|
97 | 97 | cffi_fallback |
|
98 | 98 | Always try to import the C extension then fall back to CFFI if that |
|
99 | 99 | fails. |
|
100 | 100 | cext |
|
101 | 101 | Only attempt to import the C extension. |
|
102 | 102 | cffi |
|
103 | 103 | Only attempt to import the CFFI implementation. |
|
104 | 104 | |
|
105 | 105 | In addition, the ``zstandard`` module exports a ``backend`` attribute |
|
106 | 106 | containing the string name of the backend being used. It will be one |
|
107 | 107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). |
|
108 | 108 | |
|
109 | 109 | The types, functions, and attributes exposed by the ``zstandard`` module |
|
110 | 110 | are documented in the sections below. |
|
111 | 111 | |
|
112 | 112 | .. note:: |
|
113 | 113 | |
|
114 | 114 | The documentation in this section makes references to various zstd |
|
115 | 115 | concepts and functionality. The source repository contains a |
|
116 | 116 | ``docs/concepts.rst`` file explaining these in more detail. |
|
117 | 117 | |
|
118 | 118 | ZstdCompressor |
|
119 | 119 | -------------- |
|
120 | 120 | |
|
121 | 121 | The ``ZstdCompressor`` class provides an interface for performing |
|
122 | 122 | compression operations. Each instance is essentially a wrapper around a |
|
123 | 123 | ``ZSTD_CCtx`` from the C API. |
|
124 | 124 | |
|
125 | 125 | Each instance is associated with parameters that control compression |
|
126 | 126 | behavior. These come from the following named arguments (all optional): |
|
127 | 127 | |
|
128 | 128 | level |
|
129 | 129 | Integer compression level. Valid values are between 1 and 22. |
|
130 | 130 | dict_data |
|
131 | 131 | Compression dictionary to use. |
|
132 | 132 | |
|
133 | 133 | Note: When using dictionary data and ``compress()`` is called multiple |
|
134 | 134 | times, the ``ZstdCompressionParameters`` derived from an integer |
|
135 | 135 | compression ``level`` and the first compressed data's size will be reused |
|
136 | 136 | for all subsequent operations. This may not be desirable if source data |
|
137 | 137 | size varies significantly. |
|
138 | 138 | compression_params |
|
139 | 139 | A ``ZstdCompressionParameters`` instance defining compression settings. |
|
140 | 140 | write_checksum |
|
141 | 141 | Whether a 4 byte checksum should be written with the compressed data. |
|
142 | 142 | Defaults to False. If True, the decompressor can verify that decompressed |
|
143 | 143 | data matches the original input data. |
|
144 | 144 | write_content_size |
|
145 | 145 | Whether the size of the uncompressed data will be written into the |
|
146 | 146 | header of compressed data. Defaults to True. The data will only be |
|
147 | 147 | written if the compressor knows the size of the input data. This is |
|
148 | 148 | often not true for streaming compression. |
|
149 | 149 | write_dict_id |
|
150 | 150 | Whether to write the dictionary ID into the compressed data. |
|
151 | 151 | Defaults to True. The dictionary ID is only written if a dictionary |
|
152 | 152 | is being used. |
|
153 | 153 | threads |
|
154 | 154 | Enables and sets the number of threads to use for multi-threaded compression |
|
155 | 155 | operations. Defaults to 0, which means to use single-threaded compression. |
|
156 | 156 | Negative values will resolve to the number of logical CPUs in the system. |
|
157 | 157 | Read below for more info on multi-threaded compression. This argument only |
|
158 | 158 | controls thread count for operations that operate on individual pieces of |
|
159 | 159 | data. APIs that spawn multiple threads for working on multiple pieces of |
|
160 | 160 | data have their own ``threads`` argument. |
|
161 | 161 | |
|
162 | 162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, |
|
163 | 163 | ``write_content_size``, ``write_dict_id``, and ``threads``. |
|
164 | 164 | |
|
165 | 165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` |
|
166 | 166 | instances can be called from multiple Python threads simultaneously. In other |
|
167 | 167 | words, assume instances are not thread safe unless stated otherwise. |
|
168 | 168 | |
|
169 | 169 | Utility Methods |
|
170 | 170 | ^^^^^^^^^^^^^^^ |
|
171 | 171 | |
|
172 | 172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes |
|
173 | 173 | ingested, consumed, and produced by the current compression operation. |
|
174 | 174 | |
|
175 | 175 | ``memory_size()`` obtains the memory utilization of the underlying zstd |
|
176 | 176 | compression context, in bytes.:: |
|
177 | 177 | |
|
178 | 178 | cctx = zstd.ZstdCompressor() |
|
179 | 179 | memory = cctx.memory_size() |
|
180 | 180 | |
|
181 | 181 | Simple API |
|
182 | 182 | ^^^^^^^^^^ |
|
183 | 183 | |
|
184 | 184 | ``compress(data)`` compresses and returns data as a one-shot operation.:: |
|
185 | 185 | |
|
186 | 186 | cctx = zstd.ZstdCompressor() |
|
187 | 187 | compressed = cctx.compress(b'data to compress') |
|
188 | 188 | |
|
189 | 189 | The ``data`` argument can be any object that implements the *buffer protocol*. |
|
190 | 190 | |
|
191 | 191 | Stream Reader API |
|
192 | 192 | ^^^^^^^^^^^^^^^^^ |
|
193 | 193 | |
|
194 | 194 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
195 | 195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: |
|
196 | 196 | |
|
197 | 197 | with open(path, 'rb') as fh: |
|
198 | 198 | cctx = zstd.ZstdCompressor() |
|
199 | reader = cctx.stream_reader(fh) | |
|
200 | while True: | |
|
201 | chunk = reader.read(16384) | |
|
202 | if not chunk: | |
|
203 | break | |
|
204 | ||
|
205 | # Do something with compressed chunk. | |
|
206 | ||
|
207 | Instances can also be used as context managers:: | |
|
208 | ||
|
209 | with open(path, 'rb') as fh: | |
|
199 | 210 | with cctx.stream_reader(fh) as reader: |
|
200 | 211 | while True: |
|
201 | 212 | chunk = reader.read(16384) |
|
202 | 213 | if not chunk: |
|
203 | 214 | break |
|
204 | 215 | |
|
205 | 216 | # Do something with compressed chunk. |
|
206 | 217 | |
|
207 | The stream can only be read within a context manager. When the context | |
|
208 | manager exits, the stream is closed and the underlying resource is | |
|
209 | released and future operations against the compression stream stream will fail. | |
|
218 | When the context manager exists or ``close()`` is called, the stream is closed, | |
|
219 | underlying resources are released, and future operations against the compression | |
|
220 | stream will fail. | |
|
210 | 221 | |
|
211 | 222 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
212 | 223 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
213 | 224 | |
|
214 | 225 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input |
|
215 | 226 | stream is. This is used to adjust compression parameters so they are |
|
216 | 227 | tailored to the source size.:: |
|
217 | 228 | |
|
218 | 229 | with open(path, 'rb') as fh: |
|
219 | 230 | cctx = zstd.ZstdCompressor() |
|
220 | 231 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: |
|
221 | 232 | ... |
|
222 | 233 | |
|
223 | 234 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
224 | 235 | to that stream should be via the ``read_size`` argument. It defaults to |
|
225 | 236 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
226 | 237 | |
|
227 | 238 | with open(path, 'rb') as fh: |
|
228 | 239 | cctx = zstd.ZstdCompressor() |
|
229 | 240 | # Will perform fh.read(8192) when obtaining data to feed into the |
|
230 | 241 | # compressor. |
|
231 | 242 | with cctx.stream_reader(fh, read_size=8192) as reader: |
|
232 | 243 | ... |
|
233 | 244 | |
|
234 | 245 | The stream returned by ``stream_reader()`` is neither writable nor seekable |
|
235 | 246 | (even if the underlying source is seekable). ``readline()`` and |
|
236 | 247 | ``readlines()`` are not implemented because they don't make sense for |
|
237 | 248 | compressed data. ``tell()`` returns the number of compressed bytes |
|
238 | 249 | emitted so far. |
|
239 | 250 | |
|
240 | 251 | Streaming Input API |
|
241 | 252 | ^^^^^^^^^^^^^^^^^^^ |
|
242 | 253 | |
|
243 | 254 | ``stream_writer(fh)`` (which behaves as a context manager) allows you to *stream* |
|
244 | 255 | data into a compressor.:: |
|
245 | 256 | |
|
246 | 257 | cctx = zstd.ZstdCompressor(level=10) |
|
247 | 258 | with cctx.stream_writer(fh) as compressor: |
|
248 | 259 | compressor.write(b'chunk 0') |
|
249 | 260 | compressor.write(b'chunk 1') |
|
250 | 261 | ... |
|
251 | 262 | |
|
252 | 263 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As |
|
253 | 264 | compressed data is available, ``write()`` will be called with the compressed |
|
254 | 265 | data as its argument. Many common Python types implement ``write()``, including |
|
255 | 266 | open file handles and ``io.BytesIO``. |
|
256 | 267 | |
|
257 | 268 | ``stream_writer()`` returns an object representing a streaming compressor |
|
258 | 269 | instance. It **must** be used as a context manager. That object's |
|
259 | 270 | ``write(data)`` method is used to feed data into the compressor. |
|
260 | 271 | |
|
261 | 272 | A ``flush()`` method can be called to evict whatever data remains within the |
|
262 | 273 | compressor's internal state into the output object. This may result in 0 or |
|
263 | 274 | more ``write()`` calls to the output object. |
|
264 | 275 | |
|
265 | 276 | Both ``write()`` and ``flush()`` return the number of bytes written to the |
|
266 | 277 | object's ``write()``. In many cases, small inputs do not accumulate enough |
|
267 | 278 | data to cause a write and ``write()`` will return ``0``. |
|
268 | 279 | |
|
269 | 280 | If the size of the data being fed to this streaming compressor is known, |
|
270 | 281 | you can declare it before compression begins:: |
|
271 | 282 | |
|
272 | 283 | cctx = zstd.ZstdCompressor() |
|
273 | 284 | with cctx.stream_writer(fh, size=data_len) as compressor: |
|
274 | 285 | compressor.write(chunk0) |
|
275 | 286 | compressor.write(chunk1) |
|
276 | 287 | ... |
|
277 | 288 | |
|
278 | 289 | Declaring the size of the source data allows compression parameters to |
|
279 | 290 | be tuned. And if ``write_content_size`` is used, it also results in the |
|
280 | 291 | content size being written into the frame header of the output data. |
|
281 | 292 | |
|
282 | 293 | The size of chunks being ``write()`` to the destination can be specified:: |
|
283 | 294 | |
|
284 | 295 | cctx = zstd.ZstdCompressor() |
|
285 | 296 | with cctx.stream_writer(fh, write_size=32768) as compressor: |
|
286 | 297 | ... |
|
287 | 298 | |
|
288 | 299 | To see how much memory is being used by the streaming compressor:: |
|
289 | 300 | |
|
290 | 301 | cctx = zstd.ZstdCompressor() |
|
291 | 302 | with cctx.stream_writer(fh) as compressor: |
|
292 | 303 | ... |
|
293 | 304 | byte_size = compressor.memory_size() |
|
294 | 305 | |
|
295 | 306 | Thte total number of bytes written so far are exposed via ``tell()``:: |
|
296 | 307 | |
|
297 | 308 | cctx = zstd.ZstdCompressor() |
|
298 | 309 | with cctx.stream_writer(fh) as compressor: |
|
299 | 310 | ... |
|
300 | 311 | total_written = compressor.tell() |
|
301 | 312 | |
|
302 | 313 | Streaming Output API |
|
303 | 314 | ^^^^^^^^^^^^^^^^^^^^ |
|
304 | 315 | |
|
305 | 316 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a |
|
306 | 317 | compressor as an iterator of data chunks.:: |
|
307 | 318 | |
|
308 | 319 | cctx = zstd.ZstdCompressor() |
|
309 | 320 | for chunk in cctx.read_to_iter(fh): |
|
310 | 321 | # Do something with emitted data. |
|
311 | 322 | |
|
312 | 323 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or |
|
313 | 324 | conforms to the buffer protocol. |
|
314 | 325 | |
|
315 | 326 | Uncompressed data is fetched from the source either by calling ``read(size)`` |
|
316 | 327 | or by fetching a slice of data from the object directly (in the case where |
|
317 | 328 | the buffer protocol is being used). The returned iterator consists of chunks |
|
318 | 329 | of compressed data. |
|
319 | 330 | |
|
320 | 331 | If reading from the source via ``read()``, ``read()`` will be called until |
|
321 | 332 | it raises or returns an empty bytes (``b''``). It is perfectly valid for |
|
322 | 333 | the source to deliver fewer bytes than were what requested by ``read(size)``. |
|
323 | 334 | |
|
324 | 335 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument |
|
325 | 336 | declaring the size of the input stream:: |
|
326 | 337 | |
|
327 | 338 | cctx = zstd.ZstdCompressor() |
|
328 | 339 | for chunk in cctx.read_to_iter(fh, size=some_int): |
|
329 | 340 | pass |
|
330 | 341 | |
|
331 | 342 | You can also control the size that data is ``read()`` from the source and |
|
332 | 343 | the ideal size of output chunks:: |
|
333 | 344 | |
|
334 | 345 | cctx = zstd.ZstdCompressor() |
|
335 | 346 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): |
|
336 | 347 | pass |
|
337 | 348 | |
|
338 | 349 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control |
|
339 | 350 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will |
|
340 | 351 | be whatever the object being read from delivers. These will often be of a |
|
341 | 352 | uniform size. |
|
342 | 353 | |
|
343 | 354 | Stream Copying API |
|
344 | 355 | ^^^^^^^^^^^^^^^^^^ |
|
345 | 356 | |
|
346 | 357 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while |
|
347 | 358 | compressing it.:: |
|
348 | 359 | |
|
349 | 360 | cctx = zstd.ZstdCompressor() |
|
350 | 361 | cctx.copy_stream(ifh, ofh) |
|
351 | 362 | |
|
352 | 363 | For example, say you wish to compress a file:: |
|
353 | 364 | |
|
354 | 365 | cctx = zstd.ZstdCompressor() |
|
355 | 366 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
356 | 367 | cctx.copy_stream(ifh, ofh) |
|
357 | 368 | |
|
358 | 369 | It is also possible to declare the size of the source stream:: |
|
359 | 370 | |
|
360 | 371 | cctx = zstd.ZstdCompressor() |
|
361 | 372 | cctx.copy_stream(ifh, ofh, size=len_of_input) |
|
362 | 373 | |
|
363 | 374 | You can also specify how large the chunks that are ``read()`` and ``write()`` |
|
364 | 375 | from and to the streams:: |
|
365 | 376 | |
|
366 | 377 | cctx = zstd.ZstdCompressor() |
|
367 | 378 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) |
|
368 | 379 | |
|
369 | 380 | The stream copier returns a 2-tuple of bytes read and written:: |
|
370 | 381 | |
|
371 | 382 | cctx = zstd.ZstdCompressor() |
|
372 | 383 | read_count, write_count = cctx.copy_stream(ifh, ofh) |
|
373 | 384 | |
|
374 | 385 | Compressor API |
|
375 | 386 | ^^^^^^^^^^^^^^ |
|
376 | 387 | |
|
377 | 388 | ``compressobj()`` returns an object that exposes ``compress(data)`` and |
|
378 | 389 | ``flush()`` methods. Each returns compressed data or an empty bytes. |
|
379 | 390 | |
|
380 | 391 | The purpose of ``compressobj()`` is to provide an API-compatible interface |
|
381 | 392 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to |
|
382 | 393 | swap in different compressor objects while using the same API. |
|
383 | 394 | |
|
384 | 395 | ``flush()`` accepts an optional argument indicating how to end the stream. |
|
385 | 396 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. |
|
386 | 397 | Once this type of flush is performed, ``compress()`` and ``flush()`` can |
|
387 | 398 | no longer be called. This type of flush **must** be called to end the |
|
388 | 399 | compression context. If not called, returned data may be incomplete. |
|
389 | 400 | |
|
390 | 401 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a |
|
391 | 402 | zstd block. Flushes of this type can be performed multiple times. The next |
|
392 | 403 | call to ``compress()`` will begin a new zstd block. |
|
393 | 404 | |
|
394 | 405 | Here is how this API should be used:: |
|
395 | 406 | |
|
396 | 407 | cctx = zstd.ZstdCompressor() |
|
397 | 408 | cobj = cctx.compressobj() |
|
398 | 409 | data = cobj.compress(b'raw input 0') |
|
399 | 410 | data = cobj.compress(b'raw input 1') |
|
400 | 411 | data = cobj.flush() |
|
401 | 412 | |
|
402 | 413 | Or to flush blocks:: |
|
403 | 414 | |
|
404 | 415 | cctx.zstd.ZstdCompressor() |
|
405 | 416 | cobj = cctx.compressobj() |
|
406 | 417 | data = cobj.compress(b'chunk in first block') |
|
407 | 418 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
408 | 419 | data = cobj.compress(b'chunk in second block') |
|
409 | 420 | data = cobj.flush() |
|
410 | 421 | |
|
411 | 422 | For best performance results, keep input chunks under 256KB. This avoids |
|
412 | 423 | extra allocations for a large output object. |
|
413 | 424 | |
|
414 | 425 | It is possible to declare the input size of the data that will be fed into |
|
415 | 426 | the compressor:: |
|
416 | 427 | |
|
417 | 428 | cctx = zstd.ZstdCompressor() |
|
418 | 429 | cobj = cctx.compressobj(size=6) |
|
419 | 430 | data = cobj.compress(b'foobar') |
|
420 | 431 | data = cobj.flush() |
|
421 | 432 | |
|
433 | Chunker API | |
|
434 | ^^^^^^^^^^^ | |
|
435 | ||
|
436 | ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns | |
|
437 | an object that can be used to iteratively feed chunks of data into a compressor | |
|
438 | and produce output chunks of a uniform size. | |
|
439 | ||
|
440 | The object returned by ``chunker()`` exposes the following methods: | |
|
441 | ||
|
442 | ``compress(data)`` | |
|
443 | Feeds new input data into the compressor. | |
|
444 | ||
|
445 | ``flush()`` | |
|
446 | Flushes all data currently in the compressor. | |
|
447 | ||
|
448 | ``finish()`` | |
|
449 | Signals the end of input data. No new data can be compressed after this | |
|
450 | method is called. | |
|
451 | ||
|
452 | ``compress()``, ``flush()``, and ``finish()`` all return an iterator of | |
|
453 | ``bytes`` instances holding compressed data. The iterator may be empty. Callers | |
|
454 | MUST iterate through all elements of the returned iterator before performing | |
|
455 | another operation on the object. | |
|
456 | ||
|
457 | All chunks emitted by ``compress()`` will have a length of ``chunk_size``. | |
|
458 | ||
|
459 | ``flush()`` and ``finish()`` may return a final chunk smaller than | |
|
460 | ``chunk_size``. | |
|
461 | ||
|
462 | Here is how the API should be used:: | |
|
463 | ||
|
464 | cctx = zstd.ZstdCompressor() | |
|
465 | chunker = cctx.chunker(chunk_size=32768) | |
|
466 | ||
|
467 | with open(path, 'rb') as fh: | |
|
468 | while True: | |
|
469 | in_chunk = fh.read(32768) | |
|
470 | if not in_chunk: | |
|
471 | break | |
|
472 | ||
|
473 | for out_chunk in chunker.compress(in_chunk): | |
|
474 | # Do something with output chunk of size 32768. | |
|
475 | ||
|
476 | for out_chunk in chunker.finish(): | |
|
477 | # Do something with output chunks that finalize the zstd frame. | |
|
478 | ||
|
479 | The ``chunker()`` API is often a better alternative to ``compressobj()``. | |
|
480 | ||
|
481 | ``compressobj()`` will emit output data as it is available. This results in a | |
|
482 | *stream* of output chunks of varying sizes. The consistency of the output chunk | |
|
483 | size with ``chunker()`` is more appropriate for many usages, such as sending | |
|
484 | compressed data to a socket. | |
|
485 | ||
|
486 | ``compressobj()`` may also perform extra memory reallocations in order to | |
|
487 | dynamically adjust the sizes of the output chunks. Since ``chunker()`` output | |
|
488 | chunks are all the same size (except for flushed or final chunks), there is | |
|
489 | less memory allocation overhead. | |
|
490 | ||
|
422 | 491 | Batch Compression API |
|
423 | 492 | ^^^^^^^^^^^^^^^^^^^^^ |
|
424 | 493 | |
|
425 | 494 | (Experimental. Not yet supported in CFFI bindings.) |
|
426 | 495 | |
|
427 | 496 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple |
|
428 | 497 | inputs as a single operation. |
|
429 | 498 | |
|
430 | 499 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a |
|
431 | 500 | ``BufferWithSegments``, or a list containing byte like objects. Each element of |
|
432 | 501 | the container will be compressed individually using the configured parameters |
|
433 | 502 | on the ``ZstdCompressor`` instance. |
|
434 | 503 | |
|
435 | 504 | The ``threads`` argument controls how many threads to use for compression. The |
|
436 | 505 | default is ``0`` which means to use a single thread. Negative values use the |
|
437 | 506 | number of logical CPUs in the machine. |
|
438 | 507 | |
|
439 | 508 | The function returns a ``BufferWithSegmentsCollection``. This type represents |
|
440 | 509 | N discrete memory allocations, eaching holding 1 or more compressed frames. |
|
441 | 510 | |
|
442 | 511 | Output data is written to shared memory buffers. This means that unlike |
|
443 | 512 | regular Python objects, a reference to *any* object within the collection |
|
444 | 513 | keeps the shared buffer and therefore memory backing it alive. This can have |
|
445 | 514 | undesirable effects on process memory usage. |
|
446 | 515 | |
|
447 | 516 | The API and behavior of this function is experimental and will likely change. |
|
448 | 517 | Known deficiencies include: |
|
449 | 518 | |
|
450 | 519 | * If asked to use multiple threads, it will always spawn that many threads, |
|
451 | 520 | even if the input is too small to use them. It should automatically lower |
|
452 | 521 | the thread count when the extra threads would just add overhead. |
|
453 | 522 | * The buffer allocation strategy is fixed. There is room to make it dynamic, |
|
454 | 523 | perhaps even to allow one output buffer per input, facilitating a variation |
|
455 | 524 | of the API to return a list without the adverse effects of shared memory |
|
456 | 525 | buffers. |
|
457 | 526 | |
|
458 | 527 | ZstdDecompressor |
|
459 | 528 | ---------------- |
|
460 | 529 | |
|
461 | 530 | The ``ZstdDecompressor`` class provides an interface for performing |
|
462 | 531 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from |
|
463 | 532 | the C API. |
|
464 | 533 | |
|
465 | 534 | Each instance is associated with parameters that control decompression. These |
|
466 | 535 | come from the following named arguments (all optional): |
|
467 | 536 | |
|
468 | 537 | dict_data |
|
469 | 538 | Compression dictionary to use. |
|
470 | 539 | max_window_size |
|
471 | 540 | Sets an uppet limit on the window size for decompression operations in |
|
472 | 541 | kibibytes. This setting can be used to prevent large memory allocations |
|
473 | 542 | for inputs using large compression windows. |
|
474 | 543 | format |
|
475 | 544 | Set the format of data for the decoder. By default, this is |
|
476 | 545 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to |
|
477 | 546 | allow decoding frames without the 4 byte magic header. Not all decompression |
|
478 | 547 | APIs support this mode. |
|
479 | 548 | |
|
480 | 549 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
481 | 550 | |
|
482 | 551 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` |
|
483 | 552 | instances can be called from multiple Python threads simultaneously. In other |
|
484 | 553 | words, assume instances are not thread safe unless stated otherwise. |
|
485 | 554 | |
|
486 | 555 | Utility Methods |
|
487 | 556 | ^^^^^^^^^^^^^^^ |
|
488 | 557 | |
|
489 | 558 | ``memory_size()`` obtains the size of the underlying zstd decompression context, |
|
490 | 559 | in bytes.:: |
|
491 | 560 | |
|
492 | 561 | dctx = zstd.ZstdDecompressor() |
|
493 | 562 | size = dctx.memory_size() |
|
494 | 563 | |
|
495 | 564 | Simple API |
|
496 | 565 | ^^^^^^^^^^ |
|
497 | 566 | |
|
498 | 567 | ``decompress(data)`` can be used to decompress an entire compressed zstd |
|
499 | 568 | frame in a single operation.:: |
|
500 | 569 | |
|
501 | 570 | dctx = zstd.ZstdDecompressor() |
|
502 | 571 | decompressed = dctx.decompress(data) |
|
503 | 572 | |
|
504 | 573 | By default, ``decompress(data)`` will only work on data written with the content |
|
505 | 574 | size encoded in its header (this is the default behavior of |
|
506 | 575 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If |
|
507 | 576 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will |
|
508 | 577 | be raised. |
|
509 | 578 | |
|
510 | 579 | If the compressed data doesn't have its content size embedded within it, |
|
511 | 580 | decompression can be attempted by specifying the ``max_output_size`` |
|
512 | 581 | argument.:: |
|
513 | 582 | |
|
514 | 583 | dctx = zstd.ZstdDecompressor() |
|
515 | 584 | uncompressed = dctx.decompress(data, max_output_size=1048576) |
|
516 | 585 | |
|
517 | 586 | Ideally, ``max_output_size`` will be identical to the decompressed output |
|
518 | 587 | size. |
|
519 | 588 | |
|
520 | 589 | If ``max_output_size`` is too small to hold the decompressed data, |
|
521 | 590 | ``zstd.ZstdError`` will be raised. |
|
522 | 591 | |
|
523 | 592 | If ``max_output_size`` is larger than the decompressed data, the allocated |
|
524 | 593 | output buffer will be resized to only use the space required. |
|
525 | 594 | |
|
526 | 595 | Please note that an allocation of the requested ``max_output_size`` will be |
|
527 | 596 | performed every time the method is called. Setting to a very large value could |
|
528 | 597 | result in a lot of work for the memory allocator and may result in |
|
529 | 598 | ``MemoryError`` being raised if the allocation fails. |
|
530 | 599 | |
|
531 | 600 | .. important:: |
|
532 | 601 | |
|
533 | 602 | If the exact size of decompressed data is unknown (not passed in explicitly |
|
534 | 603 | and not stored in the zstandard frame), for performance reasons it is |
|
535 | 604 | encouraged to use a streaming API. |
|
536 | 605 | |
|
537 | 606 | Stream Reader API |
|
538 | 607 | ^^^^^^^^^^^^^^^^^ |
|
539 | 608 | |
|
540 | 609 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
541 | 610 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: |
|
542 | 611 | |
|
543 | 612 | with open(path, 'rb') as fh: |
|
544 | 613 | dctx = zstd.ZstdDecompressor() |
|
545 |
|
|
|
614 | reader = dctx.stream_reader(fh) | |
|
546 | 615 |
|
|
547 | 616 |
|
|
548 | 617 |
|
|
549 | 618 |
|
|
550 | 619 | |
|
551 | 620 |
|
|
552 | 621 | |
|
553 |
The stream can |
|
|
554 | manager exits, the stream is closed and the underlying resource is | |
|
555 | released and future operations against the stream will fail. | |
|
622 | The stream can also be used as a context manager:: | |
|
623 | ||
|
624 | with open(path, 'rb') as fh: | |
|
625 | dctx = zstd.ZstdDecompressor() | |
|
626 | with dctx.stream_reader(fh) as reader: | |
|
627 | ... | |
|
628 | ||
|
629 | When used as a context manager, the stream is closed and the underlying | |
|
630 | resources are released when the context manager exits. Future operations against | |
|
631 | the stream will fail. | |
|
556 | 632 | |
|
557 | 633 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
558 | 634 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
559 | 635 | |
|
560 | 636 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
561 | 637 | to that stream should be via the ``read_size`` argument. It defaults to |
|
562 | 638 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
563 | 639 | |
|
564 | 640 | with open(path, 'rb') as fh: |
|
565 | 641 | dctx = zstd.ZstdDecompressor() |
|
566 | 642 | # Will perform fh.read(8192) when obtaining data for the decompressor. |
|
567 | 643 | with dctx.stream_reader(fh, read_size=8192) as reader: |
|
568 | 644 | ... |
|
569 | 645 | |
|
570 | 646 | The stream returned by ``stream_reader()`` is not writable. |
|
571 | 647 | |
|
572 | 648 | The stream returned by ``stream_reader()`` is *partially* seekable. |
|
573 | 649 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward |
|
574 | 650 | of the current position are allowed. Offsets behind the current read |
|
575 | 651 | position and offsets relative to the end of stream are not allowed and |
|
576 | 652 | will raise ``ValueError`` if attempted. |
|
577 | 653 | |
|
578 | 654 | ``tell()`` returns the number of decompressed bytes read so far. |
|
579 | 655 | |
|
580 | 656 | Not all I/O methods are implemented. Notably missing is support for |
|
581 | 657 | ``readline()``, ``readlines()``, and linewise iteration support. Support for |
|
582 | 658 | these is planned for a future release. |
|
583 | 659 | |
|
584 | 660 | Streaming Input API |
|
585 | 661 | ^^^^^^^^^^^^^^^^^^^ |
|
586 | 662 | |
|
587 | 663 | ``stream_writer(fh)`` can be used to incrementally send compressed data to a |
|
588 | 664 | decompressor.:: |
|
589 | 665 | |
|
590 | 666 | dctx = zstd.ZstdDecompressor() |
|
591 | 667 | with dctx.stream_writer(fh) as decompressor: |
|
592 | 668 | decompressor.write(compressed_data) |
|
593 | 669 | |
|
594 | 670 | This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to |
|
595 | 671 | the decompressor by calling ``write(data)`` and decompressed output is written |
|
596 | 672 | to the output object by calling its ``write(data)`` method. |
|
597 | 673 | |
|
598 | 674 | Calls to ``write()`` will return the number of bytes written to the output |
|
599 | 675 | object. Not all inputs will result in bytes being written, so return values |
|
600 | 676 | of ``0`` are possible. |
|
601 | 677 | |
|
602 | 678 | The size of chunks being ``write()`` to the destination can be specified:: |
|
603 | 679 | |
|
604 | 680 | dctx = zstd.ZstdDecompressor() |
|
605 | 681 | with dctx.stream_writer(fh, write_size=16384) as decompressor: |
|
606 | 682 | pass |
|
607 | 683 | |
|
608 | 684 | You can see how much memory is being used by the decompressor:: |
|
609 | 685 | |
|
610 | 686 | dctx = zstd.ZstdDecompressor() |
|
611 | 687 | with dctx.stream_writer(fh) as decompressor: |
|
612 | 688 | byte_size = decompressor.memory_size() |
|
613 | 689 | |
|
614 | 690 | Streaming Output API |
|
615 | 691 | ^^^^^^^^^^^^^^^^^^^^ |
|
616 | 692 | |
|
617 | 693 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a |
|
618 | 694 | compressed source as an iterator of data chunks.:: |
|
619 | 695 | |
|
620 | 696 | dctx = zstd.ZstdDecompressor() |
|
621 | 697 | for chunk in dctx.read_to_iter(fh): |
|
622 | 698 | # Do something with original data. |
|
623 | 699 | |
|
624 | 700 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will |
|
625 | 701 | return compressed bytes or an object conforming to the buffer protocol that |
|
626 | 702 | can expose its data as a contiguous range of bytes. |
|
627 | 703 | |
|
628 | 704 | ``read_to_iter()`` returns an iterator whose elements are chunks of the |
|
629 | 705 | decompressed data. |
|
630 | 706 | |
|
631 | 707 | The size of requested ``read()`` from the source can be specified:: |
|
632 | 708 | |
|
633 | 709 | dctx = zstd.ZstdDecompressor() |
|
634 | 710 | for chunk in dctx.read_to_iter(fh, read_size=16384): |
|
635 | 711 | pass |
|
636 | 712 | |
|
637 | 713 | It is also possible to skip leading bytes in the input data:: |
|
638 | 714 | |
|
639 | 715 | dctx = zstd.ZstdDecompressor() |
|
640 | 716 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): |
|
641 | 717 | pass |
|
642 | 718 | |
|
643 | 719 | .. tip:: |
|
644 | 720 | |
|
645 | 721 | Skipping leading bytes is useful if the source data contains extra |
|
646 | 722 | *header* data. Traditionally, you would need to create a slice or |
|
647 | 723 | ``memoryview`` of the data you want to decompress. This would create |
|
648 | 724 | overhead. It is more efficient to pass the offset into this API. |
|
649 | 725 | |
|
650 | 726 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator |
|
651 | 727 | controls when data is decompressed. If the iterator isn't consumed, |
|
652 | 728 | decompression is put on hold. |
|
653 | 729 | |
|
654 | 730 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, |
|
655 | 731 | the behavior may seem similar to what occurs when the simple decompression |
|
656 | 732 | API is used. However, this API works when the decompressed size is unknown. |
|
657 | 733 | Furthermore, if feeding large inputs, the decompressor will work in chunks |
|
658 | 734 | instead of performing a single operation. |
|
659 | 735 | |
|
660 | 736 | Stream Copying API |
|
661 | 737 | ^^^^^^^^^^^^^^^^^^ |
|
662 | 738 | |
|
663 | 739 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while |
|
664 | 740 | performing decompression.:: |
|
665 | 741 | |
|
666 | 742 | dctx = zstd.ZstdDecompressor() |
|
667 | 743 | dctx.copy_stream(ifh, ofh) |
|
668 | 744 | |
|
669 | 745 | e.g. to decompress a file to another file:: |
|
670 | 746 | |
|
671 | 747 | dctx = zstd.ZstdDecompressor() |
|
672 | 748 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
673 | 749 | dctx.copy_stream(ifh, ofh) |
|
674 | 750 | |
|
675 | 751 | The size of chunks being ``read()`` and ``write()`` from and to the streams |
|
676 | 752 | can be specified:: |
|
677 | 753 | |
|
678 | 754 | dctx = zstd.ZstdDecompressor() |
|
679 | 755 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) |
|
680 | 756 | |
|
681 | 757 | Decompressor API |
|
682 | 758 | ^^^^^^^^^^^^^^^^ |
|
683 | 759 | |
|
684 | 760 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` |
|
685 | 761 | method. Compressed data chunks are fed into ``decompress(data)`` and |
|
686 | 762 | uncompressed output (or an empty bytes) is returned. Output from subsequent |
|
687 | 763 | calls needs to be concatenated to reassemble the full decompressed byte |
|
688 | 764 | sequence. |
|
689 | 765 | |
|
690 | 766 | The purpose of ``decompressobj()`` is to provide an API-compatible interface |
|
691 | 767 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers |
|
692 | 768 | to swap in different decompressor objects while using the same API. |
|
693 | 769 | |
|
694 | 770 | Each object is single use: once an input frame is decoded, ``decompress()`` |
|
695 | 771 | can no longer be called. |
|
696 | 772 | |
|
697 | 773 | Here is how this API should be used:: |
|
698 | 774 | |
|
699 | 775 | dctx = zstd.ZstdDecompressor() |
|
700 | 776 | dobj = dctx.decompressobj() |
|
701 | 777 | data = dobj.decompress(compressed_chunk_0) |
|
702 | 778 | data = dobj.decompress(compressed_chunk_1) |
|
703 | 779 | |
|
704 | 780 | By default, calls to ``decompress()`` write output data in chunks of size |
|
705 | 781 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated |
|
706 | 782 | before being returned to the caller. It is possible to define the size of |
|
707 | 783 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: |
|
708 | 784 | |
|
709 | 785 | dctx = zstd.ZstdDecompressor() |
|
710 | 786 | dobj = dctx.decompressobj(write_size=1048576) |
|
711 | 787 | |
|
712 | 788 | .. note:: |
|
713 | 789 | |
|
714 | 790 | Because calls to ``decompress()`` may need to perform multiple |
|
715 | 791 | memory (re)allocations, this streaming decompression API isn't as |
|
716 | 792 | efficient as other APIs. |
|
717 | 793 | |
|
718 | 794 | Batch Decompression API |
|
719 | 795 | ^^^^^^^^^^^^^^^^^^^^^^^ |
|
720 | 796 | |
|
721 | 797 | (Experimental. Not yet supported in CFFI bindings.) |
|
722 | 798 | |
|
723 | 799 | ``multi_decompress_to_buffer()`` performs decompression of multiple |
|
724 | 800 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` |
|
725 | 801 | containing decompressed data for all inputs. |
|
726 | 802 | |
|
727 | 803 | Compressed frames can be passed to the function as a ``BufferWithSegments``, |
|
728 | 804 | a ``BufferWithSegmentsCollection``, or as a list containing objects that |
|
729 | 805 | conform to the buffer protocol. For best performance, pass a |
|
730 | 806 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as |
|
731 | 807 | minimal input validation will be done for that type. If calling from |
|
732 | 808 | Python (as opposed to C), constructing one of these instances may add |
|
733 | 809 | overhead cancelling out the performance overhead of validation for list |
|
734 | 810 | inputs.:: |
|
735 | 811 | |
|
736 | 812 | dctx = zstd.ZstdDecompressor() |
|
737 | 813 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) |
|
738 | 814 | |
|
739 | 815 | The decompressed size of each frame MUST be discoverable. It can either be |
|
740 | 816 | embedded within the zstd frame (``write_content_size=True`` argument to |
|
741 | 817 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. |
|
742 | 818 | |
|
743 | 819 | The ``decompressed_sizes`` argument is an object conforming to the buffer |
|
744 | 820 | protocol which holds an array of 64-bit unsigned integers in the machine's |
|
745 | 821 | native format defining the decompressed sizes of each frame. If this argument |
|
746 | 822 | is passed, it avoids having to scan each frame for its decompressed size. |
|
747 | 823 | This frame scanning can add noticeable overhead in some scenarios.:: |
|
748 | 824 | |
|
749 | 825 | frames = [...] |
|
750 | 826 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) |
|
751 | 827 | |
|
752 | 828 | dctx = zstd.ZstdDecompressor() |
|
753 | 829 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
754 | 830 | |
|
755 | 831 | The ``threads`` argument controls the number of threads to use to perform |
|
756 | 832 | decompression operations. The default (``0``) or the value ``1`` means to |
|
757 | 833 | use a single thread. Negative values use the number of logical CPUs in the |
|
758 | 834 | machine. |
|
759 | 835 | |
|
760 | 836 | .. note:: |
|
761 | 837 | |
|
762 | 838 | It is possible to pass a ``mmap.mmap()`` instance into this function by |
|
763 | 839 | wrapping it with a ``BufferWithSegments`` instance (which will define the |
|
764 | 840 | offsets of frames within the memory mapped region). |
|
765 | 841 | |
|
766 | 842 | This function is logically equivalent to performing ``dctx.decompress()`` |
|
767 | 843 | on each input frame and returning the result. |
|
768 | 844 | |
|
769 | 845 | This function exists to perform decompression on multiple frames as fast |
|
770 | 846 | as possible by having as little overhead as possible. Since decompression is |
|
771 | 847 | performed as a single operation and since the decompressed output is stored in |
|
772 | 848 | a single buffer, extra memory allocations, Python objects, and Python function |
|
773 | 849 | calls are avoided. This is ideal for scenarios where callers know up front that |
|
774 | 850 | they need to access data for multiple frames, such as when *delta chains* are |
|
775 | 851 | being used. |
|
776 | 852 | |
|
777 | 853 | Currently, the implementation always spawns multiple threads when requested, |
|
778 | 854 | even if the amount of work to do is small. In the future, it will be smarter |
|
779 | 855 | about avoiding threads and their associated overhead when the amount of |
|
780 | 856 | work to do is small. |
|
781 | 857 | |
|
782 | 858 | Prefix Dictionary Chain Decompression |
|
783 | 859 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
784 | 860 | |
|
785 | 861 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of |
|
786 | 862 | zstd frames produced using chained *prefix* dictionary compression. Such |
|
787 | 863 | a list of frames is produced by compressing discrete inputs where each |
|
788 | 864 | non-initial input is compressed with a *prefix* dictionary consisting of the |
|
789 | 865 | content of the previous input. |
|
790 | 866 | |
|
791 | 867 | For example, say you have the following inputs:: |
|
792 | 868 | |
|
793 | 869 | inputs = [b'input 1', b'input 2', b'input 3'] |
|
794 | 870 | |
|
795 | 871 | The zstd frame chain consists of: |
|
796 | 872 | |
|
797 | 873 | 1. ``b'input 1'`` compressed in standalone/discrete mode |
|
798 | 874 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary |
|
799 | 875 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary |
|
800 | 876 | |
|
801 | 877 | Each zstd frame **must** have the content size written. |
|
802 | 878 | |
|
803 | 879 | The following Python code can be used to produce a *prefix dictionary chain*:: |
|
804 | 880 | |
|
805 | 881 | def make_chain(inputs): |
|
806 | 882 | frames = [] |
|
807 | 883 | |
|
808 | 884 | # First frame is compressed in standalone/discrete mode. |
|
809 | 885 | zctx = zstd.ZstdCompressor() |
|
810 | 886 | frames.append(zctx.compress(inputs[0])) |
|
811 | 887 | |
|
812 | 888 | # Subsequent frames use the previous fulltext as a prefix dictionary |
|
813 | 889 | for i, raw in enumerate(inputs[1:]): |
|
814 | 890 | dict_data = zstd.ZstdCompressionDict( |
|
815 | 891 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
816 | 892 | zctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
817 | 893 | frames.append(zctx.compress(raw)) |
|
818 | 894 | |
|
819 | 895 | return frames |
|
820 | 896 | |
|
821 | 897 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last |
|
822 | 898 | element in the input chain. |
|
823 | 899 | |
|
824 | 900 | |
|
825 | 901 | .. note:: |
|
826 | 902 | |
|
827 | 903 | It is possible to implement *prefix dictionary chain* decompression |
|
828 | 904 | on top of other APIs. However, this function will likely be faster - |
|
829 | 905 | especially for long input chains - as it avoids the overhead of instantiating |
|
830 | 906 | and passing around intermediate objects between C and Python. |
|
831 | 907 | |
|
832 | 908 | Multi-Threaded Compression |
|
833 | 909 | -------------------------- |
|
834 | 910 | |
|
835 | 911 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number |
|
836 | 912 | of threads to use for compression. The way this works is that input is split |
|
837 | 913 | into segments and each segment is fed into a worker pool for compression. Once |
|
838 | 914 | a segment is compressed, it is flushed/appended to the output. |
|
839 | 915 | |
|
840 | 916 | .. note:: |
|
841 | 917 | |
|
842 | 918 | These threads are created at the C layer and are not Python threads. So they |
|
843 | 919 | work outside the GIL. It is therefore possible to CPU saturate multiple cores |
|
844 | 920 | from Python. |
|
845 | 921 | |
|
846 | 922 | The segment size for multi-threaded compression is chosen from the window size |
|
847 | 923 | of the compressor. This is derived from the ``window_log`` attribute of a |
|
848 | 924 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB |
|
849 | 925 | range. |
|
850 | 926 | |
|
851 | 927 | If multi-threaded compression is requested and the input is smaller than the |
|
852 | 928 | configured segment size, only a single compression thread will be used. If the |
|
853 | 929 | input is smaller than the segment size multiplied by the thread pool size or |
|
854 | 930 | if data cannot be delivered to the compressor fast enough, not all requested |
|
855 | 931 | compressor threads may be active simultaneously. |
|
856 | 932 | |
|
857 | 933 | Compared to non-multi-threaded compression, multi-threaded compression has |
|
858 | 934 | higher per-operation overhead. This includes extra memory operations, |
|
859 | 935 | thread creation, lock acquisition, etc. |
|
860 | 936 | |
|
861 | 937 | Due to the nature of multi-threaded compression using *N* compression |
|
862 | 938 | *states*, the output from multi-threaded compression will likely be larger |
|
863 | 939 | than non-multi-threaded compression. The difference is usually small. But |
|
864 | 940 | there is a CPU/wall time versus size trade off that may warrant investigation. |
|
865 | 941 | |
|
866 | 942 | Output from multi-threaded compression does not require any special handling |
|
867 | 943 | on the decompression side. To the decompressor, data generated with single |
|
868 | 944 | threaded compressor looks the same as data generated by a multi-threaded |
|
869 | 945 | compressor and does not require any special handling or additional resource |
|
870 | 946 | requirements. |
|
871 | 947 | |
|
872 | 948 | Dictionary Creation and Management |
|
873 | 949 | ---------------------------------- |
|
874 | 950 | |
|
875 | 951 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. |
|
876 | 952 | |
|
877 | 953 | Instances can be constructed from bytes:: |
|
878 | 954 | |
|
879 | 955 | dict_data = zstd.ZstdCompressionDict(data) |
|
880 | 956 | |
|
881 | 957 | It is possible to construct a dictionary from *any* data. If the data doesn't |
|
882 | 958 | begin with a magic header, it will be treated as a *prefix* dictionary. |
|
883 | 959 | *Prefix* dictionaries allow compression operations to reference raw data |
|
884 | 960 | within the dictionary. |
|
885 | 961 | |
|
886 | 962 | It is possible to force the use of *prefix* dictionaries or to require a |
|
887 | 963 | dictionary header: |
|
888 | 964 | |
|
889 | 965 | dict_data = zstd.ZstdCompressionDict(data, |
|
890 | 966 | dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
891 | 967 | |
|
892 | 968 | dict_data = zstd.ZstdCompressionDict(data, |
|
893 | 969 | dict_type=zstd.DICT_TYPE_FULLDICT) |
|
894 | 970 | |
|
895 | 971 | You can see how many bytes are in the dictionary by calling ``len()``:: |
|
896 | 972 | |
|
897 | 973 | dict_data = zstd.train_dictionary(size, samples) |
|
898 | 974 | dict_size = len(dict_data) # will not be larger than ``size`` |
|
899 | 975 | |
|
900 | 976 | Once you have a dictionary, you can pass it to the objects performing |
|
901 | 977 | compression and decompression:: |
|
902 | 978 | |
|
903 | 979 | dict_data = zstd.train_dictionary(131072, samples) |
|
904 | 980 | |
|
905 | 981 | cctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
906 | 982 | for source_data in input_data: |
|
907 | 983 | compressed = cctx.compress(source_data) |
|
908 | 984 | # Do something with compressed data. |
|
909 | 985 | |
|
910 | 986 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) |
|
911 | 987 | for compressed_data in input_data: |
|
912 | 988 | buffer = io.BytesIO() |
|
913 | 989 | with dctx.stream_writer(buffer) as decompressor: |
|
914 | 990 | decompressor.write(compressed_data) |
|
915 | 991 | # Do something with raw data in ``buffer``. |
|
916 | 992 | |
|
917 | 993 | Dictionaries have unique integer IDs. You can retrieve this ID via:: |
|
918 | 994 | |
|
919 | 995 | dict_id = zstd.dictionary_id(dict_data) |
|
920 | 996 | |
|
921 | 997 | You can obtain the raw data in the dict (useful for persisting and constructing |
|
922 | 998 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: |
|
923 | 999 | |
|
924 | 1000 | dict_data = zstd.train_dictionary(size, samples) |
|
925 | 1001 | raw_data = dict_data.as_bytes() |
|
926 | 1002 | |
|
927 | 1003 | By default, when a ``ZstdCompressionDict`` is *attached* to a |
|
928 | 1004 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the |
|
929 | 1005 | dictionary for use. This is fine if only 1 compression operation is being |
|
930 | 1006 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. |
|
931 | 1007 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, |
|
932 | 1008 | this can add overhead. |
|
933 | 1009 | |
|
934 | 1010 | It is possible to *precompute* the dictionary so it can readily be consumed |
|
935 | 1011 | by multiple ``ZstdCompressor`` instances:: |
|
936 | 1012 | |
|
937 | 1013 | d = zstd.ZstdCompressionDict(data) |
|
938 | 1014 | |
|
939 | 1015 | # Precompute for compression level 3. |
|
940 | 1016 | d.precompute_compress(level=3) |
|
941 | 1017 | |
|
942 | 1018 | # Precompute with specific compression parameters. |
|
943 | 1019 | params = zstd.ZstdCompressionParameters(...) |
|
944 | 1020 | d.precompute_compress(compression_params=params) |
|
945 | 1021 | |
|
946 | 1022 | .. note:: |
|
947 | 1023 | |
|
948 | 1024 | When a dictionary is precomputed, the compression parameters used to |
|
949 | 1025 | precompute the dictionary overwrite some of the compression parameters |
|
950 | 1026 | specified to ``ZstdCompressor.__init__``. |
|
951 | 1027 | |
|
952 | 1028 | Training Dictionaries |
|
953 | 1029 | ^^^^^^^^^^^^^^^^^^^^^ |
|
954 | 1030 | |
|
955 | 1031 | Unless using *prefix* dictionaries, dictionary data is produced by *training* |
|
956 | 1032 | on existing data:: |
|
957 | 1033 | |
|
958 | 1034 | dict_data = zstd.train_dictionary(size, samples) |
|
959 | 1035 | |
|
960 | 1036 | This takes a target dictionary size and list of bytes instances and creates and |
|
961 | 1037 | returns a ``ZstdCompressionDict``. |
|
962 | 1038 | |
|
963 | 1039 | The dictionary training mechanism is known as *cover*. More details about it are |
|
964 | 1040 | available in the paper *Effective Construction of Relative Lempel-Ziv |
|
965 | 1041 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). |
|
966 | 1042 | |
|
967 | 1043 | The cover algorithm takes parameters ``k` and ``d``. These are the |
|
968 | 1044 | *segment size* and *dmer size*, respectively. The returned dictionary |
|
969 | 1045 | instance created by this function has ``k`` and ``d`` attributes |
|
970 | 1046 | containing the values for these parameters. If a ``ZstdCompressionDict`` |
|
971 | 1047 | is constructed from raw bytes data (a content-only dictionary), the |
|
972 | 1048 | ``k`` and ``d`` attributes will be ``0``. |
|
973 | 1049 | |
|
974 | 1050 | The segment and dmer size parameters to the cover algorithm can either be |
|
975 | 1051 | specified manually or ``train_dictionary()`` can try multiple values |
|
976 | 1052 | and pick the best one, where *best* means the smallest compressed data size. |
|
977 | 1053 | This later mode is called *optimization* mode. |
|
978 | 1054 | |
|
979 | 1055 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, |
|
980 | 1056 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` |
|
981 | 1057 | struct) are defined, *optimization* mode is used with default parameter |
|
982 | 1058 | values. |
|
983 | 1059 | |
|
984 | 1060 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged |
|
985 | 1061 | with explicit control over those parameters. Specifying ``threads=0`` or |
|
986 | 1062 | ``threads=1`` can be used to engage *optimization* mode if other parameters |
|
987 | 1063 | are not defined. |
|
988 | 1064 | |
|
989 | 1065 | Otherwise, non-*optimization* mode is used with the parameters specified. |
|
990 | 1066 | |
|
991 | 1067 | This function takes the following arguments: |
|
992 | 1068 | |
|
993 | 1069 | dict_size |
|
994 | 1070 | Target size in bytes of the dictionary to generate. |
|
995 | 1071 | samples |
|
996 | 1072 | A list of bytes holding samples the dictionary will be trained from. |
|
997 | 1073 | k |
|
998 | 1074 | Parameter to cover algorithm defining the segment size. A reasonable range |
|
999 | 1075 | is [16, 2048+]. |
|
1000 | 1076 | d |
|
1001 | 1077 | Parameter to cover algorithm defining the dmer size. A reasonable range is |
|
1002 | 1078 | [6, 16]. ``d`` must be less than or equal to ``k``. |
|
1003 | 1079 | dict_id |
|
1004 | 1080 | Integer dictionary ID for the produced dictionary. Default is 0, which uses |
|
1005 | 1081 | a random value. |
|
1006 | 1082 | steps |
|
1007 | 1083 | Number of steps through ``k`` values to perform when trying parameter |
|
1008 | 1084 | variations. |
|
1009 | 1085 | threads |
|
1010 | 1086 | Number of threads to use when trying parameter variations. Default is 0, |
|
1011 | 1087 | which means to use a single thread. A negative value can be specified to |
|
1012 | 1088 | use as many threads as there are detected logical CPUs. |
|
1013 | 1089 | level |
|
1014 | 1090 | Integer target compression level when trying parameter variations. |
|
1015 | 1091 | notifications |
|
1016 | 1092 | Controls writing of informational messages to ``stderr``. ``0`` (the |
|
1017 | 1093 | default) means to write nothing. ``1`` writes errors. ``2`` writes |
|
1018 | 1094 | progression info. ``3`` writes more details. And ``4`` writes all info. |
|
1019 | 1095 | |
|
1020 | 1096 | Explicit Compression Parameters |
|
1021 | 1097 | ------------------------------- |
|
1022 | 1098 | |
|
1023 | 1099 | Zstandard offers a high-level *compression level* that maps to lower-level |
|
1024 | 1100 | compression parameters. For many consumers, this numeric level is the only |
|
1025 | 1101 | compression setting you'll need to touch. |
|
1026 | 1102 | |
|
1027 | 1103 | But for advanced use cases, it might be desirable to tweak these lower-level |
|
1028 | 1104 | settings. |
|
1029 | 1105 | |
|
1030 | 1106 | The ``ZstdCompressionParameters`` type represents these low-level compression |
|
1031 | 1107 | settings. |
|
1032 | 1108 | |
|
1033 | 1109 | Instances of this type can be constructed from a myriad of keyword arguments |
|
1034 | 1110 | (defined below) for complete low-level control over each adjustable |
|
1035 | 1111 | compression setting. |
|
1036 | 1112 | |
|
1037 | 1113 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance |
|
1038 | 1114 | given a desired compression level and target input and dictionary size |
|
1039 | 1115 | using ``ZstdCompressionParameters.from_level()``. e.g.:: |
|
1040 | 1116 | |
|
1041 | 1117 | # Derive compression settings for compression level 7. |
|
1042 | 1118 | params = zstd.ZstdCompressionParameters.from_level(7) |
|
1043 | 1119 | |
|
1044 | 1120 | # With an input size of 1MB |
|
1045 | 1121 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) |
|
1046 | 1122 | |
|
1047 | 1123 | Using ``from_level()``, it is also possible to override individual compression |
|
1048 | 1124 | parameters or to define additional settings that aren't automatically derived. |
|
1049 | 1125 | e.g.:: |
|
1050 | 1126 | |
|
1051 | 1127 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) |
|
1052 | 1128 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) |
|
1053 | 1129 | |
|
1054 | 1130 | Or you can define low-level compression settings directly:: |
|
1055 | 1131 | |
|
1056 | 1132 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) |
|
1057 | 1133 | |
|
1058 | 1134 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to |
|
1059 | 1135 | configure a compressor:: |
|
1060 | 1136 | |
|
1061 | 1137 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1062 | 1138 | |
|
1063 | 1139 | The named arguments and attributes of ``ZstdCompressionParameters`` are as |
|
1064 | 1140 | follows: |
|
1065 | 1141 | |
|
1066 | 1142 | * format |
|
1067 | 1143 | * compression_level |
|
1068 | 1144 | * window_log |
|
1069 | 1145 | * hash_log |
|
1070 | 1146 | * chain_log |
|
1071 | 1147 | * search_log |
|
1072 | 1148 | * min_match |
|
1073 | 1149 | * target_length |
|
1074 | 1150 | * compression_strategy |
|
1075 | 1151 | * write_content_size |
|
1076 | 1152 | * write_checksum |
|
1077 | 1153 | * write_dict_id |
|
1078 | 1154 | * job_size |
|
1079 | 1155 | * overlap_size_log |
|
1080 | * compress_literals | |
|
1081 | 1156 | * force_max_window |
|
1082 | 1157 | * enable_ldm |
|
1083 | 1158 | * ldm_hash_log |
|
1084 | 1159 | * ldm_min_match |
|
1085 | 1160 | * ldm_bucket_size_log |
|
1086 | 1161 | * ldm_hash_every_log |
|
1087 | 1162 | * threads |
|
1088 | 1163 | |
|
1089 | 1164 | Some of these are very low-level settings. It may help to consult the official |
|
1090 | 1165 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants |
|
1091 | 1166 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). |
|
1092 | 1167 | |
|
1093 | 1168 | Frame Inspection |
|
1094 | 1169 | ---------------- |
|
1095 | 1170 | |
|
1096 | 1171 | Data emitted from zstd compression is encapsulated in a *frame*. This frame |
|
1097 | 1172 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing |
|
1098 | 1173 | the frame in more detail. For more info, see |
|
1099 | 1174 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. |
|
1100 | 1175 | |
|
1101 | 1176 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes |
|
1102 | 1177 | instance and return a ``FrameParameters`` object describing the frame. |
|
1103 | 1178 | |
|
1104 | 1179 | Depending on which fields are present in the frame and their values, the |
|
1105 | 1180 | length of the frame parameters varies. If insufficient bytes are passed |
|
1106 | 1181 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure |
|
1107 | 1182 | frame parameters can be parsed, pass in at least 18 bytes. |
|
1108 | 1183 | |
|
1109 | 1184 | ``FrameParameters`` instances have the following attributes: |
|
1110 | 1185 | |
|
1111 | 1186 | content_size |
|
1112 | 1187 | Integer size of original, uncompressed content. This will be ``0`` if the |
|
1113 | 1188 | original content size isn't written to the frame (controlled with the |
|
1114 | 1189 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input |
|
1115 | 1190 | content size was ``0``. |
|
1116 | 1191 | |
|
1117 | 1192 | window_size |
|
1118 | 1193 | Integer size of maximum back-reference distance in compressed data. |
|
1119 | 1194 | |
|
1120 | 1195 | dict_id |
|
1121 | 1196 | Integer of dictionary ID used for compression. ``0`` if no dictionary |
|
1122 | 1197 | ID was used or if the dictionary ID was ``0``. |
|
1123 | 1198 | |
|
1124 | 1199 | has_checksum |
|
1125 | 1200 | Bool indicating whether a 4 byte content checksum is stored at the end |
|
1126 | 1201 | of the frame. |
|
1127 | 1202 | |
|
1128 | 1203 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame |
|
1129 | 1204 | header. |
|
1130 | 1205 | |
|
1131 | 1206 | ``zstd.frame_content_size(data)`` returns the content size as parsed from |
|
1132 | 1207 | the frame header. ``-1`` means the content size is unknown. ``0`` means |
|
1133 | 1208 | an empty frame. The content size is usually correct. However, it may not |
|
1134 | 1209 | be accurate. |
|
1135 | 1210 | |
|
1136 | 1211 | Misc Functionality |
|
1137 | 1212 | ------------------ |
|
1138 | 1213 | |
|
1139 | 1214 | estimate_decompression_context_size() |
|
1140 | 1215 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1141 | 1216 | |
|
1142 | 1217 | Estimate the memory size requirements for a decompressor instance. |
|
1143 | 1218 | |
|
1144 | 1219 | Constants |
|
1145 | 1220 | --------- |
|
1146 | 1221 | |
|
1147 | 1222 | The following module constants/attributes are exposed: |
|
1148 | 1223 | |
|
1149 | 1224 | ZSTD_VERSION |
|
1150 | 1225 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. |
|
1151 | 1226 | ``(1, 0, 0)`` |
|
1152 | 1227 | MAX_COMPRESSION_LEVEL |
|
1153 | 1228 | Integer max compression level accepted by compression functions |
|
1154 | 1229 | COMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1155 | 1230 | Recommended chunk size to feed to compressor functions |
|
1156 | 1231 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1157 | 1232 | Recommended chunk size for compression output |
|
1158 | 1233 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1159 | 1234 | Recommended chunk size to feed into decompresor functions |
|
1160 | 1235 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1161 | 1236 | Recommended chunk size for decompression output |
|
1162 | 1237 | |
|
1163 | 1238 | FRAME_HEADER |
|
1164 | 1239 | bytes containing header of the Zstandard frame |
|
1165 | 1240 | MAGIC_NUMBER |
|
1166 | 1241 | Frame header as an integer |
|
1167 | 1242 | |
|
1168 | 1243 | CONTENTSIZE_UNKNOWN |
|
1169 | 1244 | Value for content size when the content size is unknown. |
|
1170 | 1245 | CONTENTSIZE_ERROR |
|
1171 | 1246 | Value for content size when content size couldn't be determined. |
|
1172 | 1247 | |
|
1173 | 1248 | WINDOWLOG_MIN |
|
1174 | 1249 | Minimum value for compression parameter |
|
1175 | 1250 | WINDOWLOG_MAX |
|
1176 | 1251 | Maximum value for compression parameter |
|
1177 | 1252 | CHAINLOG_MIN |
|
1178 | 1253 | Minimum value for compression parameter |
|
1179 | 1254 | CHAINLOG_MAX |
|
1180 | 1255 | Maximum value for compression parameter |
|
1181 | 1256 | HASHLOG_MIN |
|
1182 | 1257 | Minimum value for compression parameter |
|
1183 | 1258 | HASHLOG_MAX |
|
1184 | 1259 | Maximum value for compression parameter |
|
1185 | 1260 | SEARCHLOG_MIN |
|
1186 | 1261 | Minimum value for compression parameter |
|
1187 | 1262 | SEARCHLOG_MAX |
|
1188 | 1263 | Maximum value for compression parameter |
|
1189 | 1264 | SEARCHLENGTH_MIN |
|
1190 | 1265 | Minimum value for compression parameter |
|
1191 | 1266 | SEARCHLENGTH_MAX |
|
1192 | 1267 | Maximum value for compression parameter |
|
1193 | 1268 | TARGETLENGTH_MIN |
|
1194 | 1269 | Minimum value for compression parameter |
|
1195 | 1270 | STRATEGY_FAST |
|
1196 | 1271 | Compression strategy |
|
1197 | 1272 | STRATEGY_DFAST |
|
1198 | 1273 | Compression strategy |
|
1199 | 1274 | STRATEGY_GREEDY |
|
1200 | 1275 | Compression strategy |
|
1201 | 1276 | STRATEGY_LAZY |
|
1202 | 1277 | Compression strategy |
|
1203 | 1278 | STRATEGY_LAZY2 |
|
1204 | 1279 | Compression strategy |
|
1205 | 1280 | STRATEGY_BTLAZY2 |
|
1206 | 1281 | Compression strategy |
|
1207 | 1282 | STRATEGY_BTOPT |
|
1208 | 1283 | Compression strategy |
|
1209 | 1284 | STRATEGY_BTULTRA |
|
1210 | 1285 | Compression strategy |
|
1211 | 1286 | |
|
1212 | 1287 | FORMAT_ZSTD1 |
|
1213 | 1288 | Zstandard frame format |
|
1214 | 1289 | FORMAT_ZSTD1_MAGICLESS |
|
1215 | 1290 | Zstandard frame format without magic header |
|
1216 | 1291 | |
|
1217 | 1292 | Performance Considerations |
|
1218 | 1293 | -------------------------- |
|
1219 | 1294 | |
|
1220 | 1295 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a |
|
1221 | 1296 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` |
|
1222 | 1297 | or ``ZstdDecompressor`` instance for multiple operations is faster than |
|
1223 | 1298 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each |
|
1224 | 1299 | operation. The differences are magnified as the size of data decreases. For |
|
1225 | 1300 | example, the difference between *context* reuse and non-reuse for 100,000 |
|
1226 | 1301 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) |
|
1227 | 1302 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the |
|
1228 | 1303 | time spent doing compression dwarfs time spent creating new *contexts*). |
|
1229 | 1304 | |
|
1230 | 1305 | Buffer Types |
|
1231 | 1306 | ------------ |
|
1232 | 1307 | |
|
1233 | 1308 | The API exposes a handful of custom types for interfacing with memory buffers. |
|
1234 | 1309 | The primary goal of these types is to facilitate efficient multi-object |
|
1235 | 1310 | operations. |
|
1236 | 1311 | |
|
1237 | 1312 | The essential idea is to have a single memory allocation provide backing |
|
1238 | 1313 | storage for multiple logical objects. This has 2 main advantages: fewer |
|
1239 | 1314 | allocations and optimal memory access patterns. This avoids having to allocate |
|
1240 | 1315 | a Python object for each logical object and furthermore ensures that access of |
|
1241 | 1316 | data for objects can be sequential (read: fast) in memory. |
|
1242 | 1317 | |
|
1243 | 1318 | BufferWithSegments |
|
1244 | 1319 | ^^^^^^^^^^^^^^^^^^ |
|
1245 | 1320 | |
|
1246 | 1321 | The ``BufferWithSegments`` type represents a memory buffer containing N |
|
1247 | 1322 | discrete items of known lengths (segments). It is essentially a fixed size |
|
1248 | 1323 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit |
|
1249 | 1324 | unsigned native endian integers defining the byte offset and length of each |
|
1250 | 1325 | segment within the buffer. |
|
1251 | 1326 | |
|
1252 | 1327 | Instances behave like containers. |
|
1253 | 1328 | |
|
1254 | 1329 | ``len()`` returns the number of segments within the instance. |
|
1255 | 1330 | |
|
1256 | 1331 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an |
|
1257 | 1332 | individual segment within the backing buffer. That returned object references |
|
1258 | 1333 | (not copies) memory. This means that iterating all objects doesn't copy |
|
1259 | 1334 | data within the buffer. |
|
1260 | 1335 | |
|
1261 | 1336 | The ``.size`` attribute contains the total size in bytes of the backing |
|
1262 | 1337 | buffer. |
|
1263 | 1338 | |
|
1264 | 1339 | Instances conform to the buffer protocol. So a reference to the backing bytes |
|
1265 | 1340 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also |
|
1266 | 1341 | be obtained via ``.tobytes()``. |
|
1267 | 1342 | |
|
1268 | 1343 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for |
|
1269 | 1344 | segments within the buffer. It is a ``BufferSegments`` type. |
|
1270 | 1345 | |
|
1271 | 1346 | BufferSegment |
|
1272 | 1347 | ^^^^^^^^^^^^^ |
|
1273 | 1348 | |
|
1274 | 1349 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. |
|
1275 | 1350 | It is essentially a reference to N bytes within a ``BufferWithSegments``. |
|
1276 | 1351 | |
|
1277 | 1352 | ``len()`` returns the length of the segment in bytes. |
|
1278 | 1353 | |
|
1279 | 1354 | ``.offset`` contains the byte offset of this segment within its parent |
|
1280 | 1355 | ``BufferWithSegments`` instance. |
|
1281 | 1356 | |
|
1282 | 1357 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to |
|
1283 | 1358 | obtain a ``bytes`` instance with a copy of the backing bytes. |
|
1284 | 1359 | |
|
1285 | 1360 | BufferSegments |
|
1286 | 1361 | ^^^^^^^^^^^^^^ |
|
1287 | 1362 | |
|
1288 | 1363 | This type represents an array of ``(offset, length)`` integers defining segments |
|
1289 | 1364 | within a ``BufferWithSegments``. |
|
1290 | 1365 | |
|
1291 | 1366 | The array members are 64-bit unsigned integers using host/native bit order. |
|
1292 | 1367 | |
|
1293 | 1368 | Instances conform to the buffer protocol. |
|
1294 | 1369 | |
|
1295 | 1370 | BufferWithSegmentsCollection |
|
1296 | 1371 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1297 | 1372 | |
|
1298 | 1373 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view |
|
1299 | 1374 | of multiple ``BufferWithSegments`` instances. |
|
1300 | 1375 | |
|
1301 | 1376 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The |
|
1302 | 1377 | resulting object behaves like an ordered sequence whose members are the |
|
1303 | 1378 | segments within each ``BufferWithSegments``. |
|
1304 | 1379 | |
|
1305 | 1380 | ``len()`` returns the number of segments within all ``BufferWithSegments`` |
|
1306 | 1381 | instances. |
|
1307 | 1382 | |
|
1308 | 1383 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at |
|
1309 | 1384 | that offset as if all ``BufferWithSegments`` instances were a single |
|
1310 | 1385 | entity. |
|
1311 | 1386 | |
|
1312 | 1387 | If the object is composed of 2 ``BufferWithSegments`` instances with the |
|
1313 | 1388 | first having 2 segments and the second have 3 segments, then ``b[0]`` |
|
1314 | 1389 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, |
|
1315 | 1390 | and ``b[4]`` access segments from the second. |
|
1316 | 1391 | |
|
1317 | 1392 | Choosing an API |
|
1318 | 1393 | =============== |
|
1319 | 1394 | |
|
1320 | 1395 | There are multiple APIs for performing compression and decompression. This is |
|
1321 | 1396 | because different applications have different needs and the library wants to |
|
1322 | 1397 | facilitate optimal use in as many use cases as possible. |
|
1323 | 1398 | |
|
1324 | 1399 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you |
|
1325 | 1400 | are operating on all data at once or you operate on it piecemeal. |
|
1326 | 1401 | |
|
1327 | 1402 | The *one-shot* APIs are useful for small data, where the input or output |
|
1328 | 1403 | size is known. (The size can come from a buffer length, file size, or |
|
1329 | 1404 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that |
|
1330 | 1405 | input and output must fit in memory simultaneously. For say a 4 GB input, |
|
1331 | 1406 | this is often not feasible. |
|
1332 | 1407 | |
|
1333 | 1408 | The *one-shot* APIs also perform all work as a single operation. So, if you |
|
1334 | 1409 | feed it large input, it could take a long time for the function to return. |
|
1335 | 1410 | |
|
1336 | 1411 | The streaming APIs do not have the limitations of the simple API. But the |
|
1337 | 1412 | price you pay for this flexibility is that they are more complex than a |
|
1338 | 1413 | single function call. |
|
1339 | 1414 | |
|
1340 | 1415 | The streaming APIs put the caller in control of compression and decompression |
|
1341 | 1416 | behavior by allowing them to directly control either the input or output side |
|
1342 | 1417 | of the operation. |
|
1343 | 1418 | |
|
1344 | 1419 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller |
|
1345 | 1420 | has full control over the input to the compression or decompression stream. |
|
1346 | 1421 | They can directly choose when new data is operated on. |
|
1347 | 1422 | |
|
1348 | 1423 | With the *streaming ouput* APIs, the caller has full control over the output |
|
1349 | 1424 | of the compression or decompression stream. It can choose when to receive |
|
1350 | 1425 | new data. |
|
1351 | 1426 | |
|
1352 | 1427 | When using the *streaming* APIs that operate on file-like or stream objects, |
|
1353 | 1428 | it is important to consider what happens in that object when I/O is requested. |
|
1354 | 1429 | There is potential for long pauses as data is read or written from the |
|
1355 | 1430 | underlying stream (say from interacting with a filesystem or network). This |
|
1356 | 1431 | could add considerable overhead. |
|
1357 | 1432 | |
|
1358 | 1433 | Thread Safety |
|
1359 | 1434 | ============= |
|
1360 | 1435 | |
|
1361 | 1436 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees |
|
1362 | 1437 | about thread safety. Do not operate on the same ``ZstdCompressor`` and |
|
1363 | 1438 | ``ZstdDecompressor`` instance simultaneously from different threads. It is |
|
1364 | 1439 | fine to have different threads call into a single instance, just not at the |
|
1365 | 1440 | same time. |
|
1366 | 1441 | |
|
1367 | 1442 | Some operations require multiple function calls to complete. e.g. streaming |
|
1368 | 1443 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used |
|
1369 | 1444 | for simultaneously active operations. e.g. you must not start a streaming |
|
1370 | 1445 | operation when another streaming operation is already active. |
|
1371 | 1446 | |
|
1372 | 1447 | The C extension releases the GIL during non-trivial calls into the zstd C |
|
1373 | 1448 | API. Non-trivial calls are notably compression and decompression. Trivial |
|
1374 | 1449 | calls are things like parsing frame parameters. Where the GIL is released |
|
1375 | 1450 | is considered an implementation detail and can change in any release. |
|
1376 | 1451 | |
|
1377 | 1452 | APIs that accept bytes-like objects don't enforce that the underlying object |
|
1378 | 1453 | is read-only. However, it is assumed that the passed object is read-only for |
|
1379 | 1454 | the duration of the function call. It is possible to pass a mutable object |
|
1380 | 1455 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL |
|
1381 | 1456 | released, and mutate the object from another thread. Such a race condition |
|
1382 | 1457 | is a bug in the consumer of python-zstandard. Most Python data types are |
|
1383 | 1458 | immutable, so unless you are doing something fancy, you don't need to |
|
1384 | 1459 | worry about this. |
|
1385 | 1460 | |
|
1386 | 1461 | Note on Zstandard's *Experimental* API |
|
1387 | 1462 | ====================================== |
|
1388 | 1463 | |
|
1389 | 1464 | Many of the Zstandard APIs used by this module are marked as *experimental* |
|
1390 | 1465 | within the Zstandard project. |
|
1391 | 1466 | |
|
1392 | 1467 | It is unclear how Zstandard's C API will evolve over time, especially with |
|
1393 | 1468 | regards to this *experimental* functionality. We will try to maintain |
|
1394 | 1469 | backwards compatibility at the Python API level. However, we cannot |
|
1395 | 1470 | guarantee this for things not under our control. |
|
1396 | 1471 | |
|
1397 | 1472 | Since a copy of the Zstandard source code is distributed with this |
|
1398 | 1473 | module and since we compile against it, the behavior of a specific |
|
1399 | 1474 | version of this module should be constant for all of time. So if you |
|
1400 | 1475 | pin the version of this module used in your projects (which is a Python |
|
1401 | 1476 | best practice), you should be shielded from unwanted future changes. |
|
1402 | 1477 | |
|
1403 | 1478 | Donate |
|
1404 | 1479 | ====== |
|
1405 | 1480 | |
|
1406 | 1481 | A lot of time has been invested into this project by the author. |
|
1407 | 1482 | |
|
1408 | 1483 | If you find this project useful and would like to thank the author for |
|
1409 | 1484 | their work, consider donating some money. Any amount is appreciated. |
|
1410 | 1485 | |
|
1411 | 1486 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif |
|
1412 | 1487 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1413 | 1488 | :alt: Donate via PayPal |
|
1414 | 1489 | |
|
1415 | 1490 | .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master |
|
1416 | 1491 | :target: https://travis-ci.org/indygreg/python-zstandard |
|
1417 | 1492 | |
|
1418 | 1493 | .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true |
|
1419 | 1494 | :target: https://ci.appveyor.com/project/indygreg/python-zstandard |
|
1420 | 1495 | :alt: Windows build status |
@@ -1,502 +1,477 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) { |
|
14 | 14 | size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value); |
|
15 | 15 | if (ZSTD_isError(zresult)) { |
|
16 | 16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", |
|
17 | 17 | ZSTD_getErrorName(zresult)); |
|
18 | 18 | return 1; |
|
19 | 19 | } |
|
20 | 20 | |
|
21 | 21 | return 0; |
|
22 | 22 | } |
|
23 | 23 | |
|
24 | 24 | #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1; |
|
25 | 25 | |
|
26 | 26 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) { |
|
27 | 27 | TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format); |
|
28 | 28 | TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel); |
|
29 | 29 | TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog); |
|
30 | 30 | TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog); |
|
31 | 31 | TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog); |
|
32 | 32 | TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog); |
|
33 | 33 | TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch); |
|
34 | 34 | TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength); |
|
35 | 35 | TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy); |
|
36 | 36 | TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag); |
|
37 | 37 | TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag); |
|
38 | 38 | TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag); |
|
39 | 39 | TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads); |
|
40 | 40 | TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize); |
|
41 | 41 | TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog); |
|
42 | TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals); | |
|
43 | 42 | TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow); |
|
44 | 43 | TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching); |
|
45 | 44 | TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog); |
|
46 | 45 | TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch); |
|
47 | 46 | TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog); |
|
48 | 47 | TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog); |
|
49 | 48 | |
|
50 | 49 | return 0; |
|
51 | 50 | } |
|
52 | 51 | |
|
53 | 52 | int reset_params(ZstdCompressionParametersObject* params) { |
|
54 | 53 | if (params->params) { |
|
55 | 54 | ZSTD_CCtxParams_reset(params->params); |
|
56 | 55 | } |
|
57 | 56 | else { |
|
58 | 57 | params->params = ZSTD_createCCtxParams(); |
|
59 | 58 | if (!params->params) { |
|
60 | 59 | PyErr_NoMemory(); |
|
61 | 60 | return 1; |
|
62 | 61 | } |
|
63 | 62 | } |
|
64 | 63 | |
|
65 | 64 | return set_parameters(params->params, params); |
|
66 | 65 | } |
|
67 | 66 | |
|
68 | 67 | static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) { |
|
69 | 68 | static char* kwlist[] = { |
|
70 | 69 | "format", |
|
71 | 70 | "compression_level", |
|
72 | 71 | "window_log", |
|
73 | 72 | "hash_log", |
|
74 | 73 | "chain_log", |
|
75 | 74 | "search_log", |
|
76 | 75 | "min_match", |
|
77 | 76 | "target_length", |
|
78 | 77 | "compression_strategy", |
|
79 | 78 | "write_content_size", |
|
80 | 79 | "write_checksum", |
|
81 | 80 | "write_dict_id", |
|
82 | 81 | "job_size", |
|
83 | 82 | "overlap_size_log", |
|
84 | 83 | "force_max_window", |
|
85 | 84 | "enable_ldm", |
|
86 | 85 | "ldm_hash_log", |
|
87 | 86 | "ldm_min_match", |
|
88 | 87 | "ldm_bucket_size_log", |
|
89 | 88 | "ldm_hash_every_log", |
|
90 | 89 | "threads", |
|
91 | "compress_literals", | |
|
92 | 90 | NULL |
|
93 | 91 | }; |
|
94 | 92 | |
|
95 | 93 | unsigned format = 0; |
|
96 | 94 | int compressionLevel = 0; |
|
97 | 95 | unsigned windowLog = 0; |
|
98 | 96 | unsigned hashLog = 0; |
|
99 | 97 | unsigned chainLog = 0; |
|
100 | 98 | unsigned searchLog = 0; |
|
101 | 99 | unsigned minMatch = 0; |
|
102 | 100 | unsigned targetLength = 0; |
|
103 | 101 | unsigned compressionStrategy = 0; |
|
104 | 102 | unsigned contentSizeFlag = 1; |
|
105 | 103 | unsigned checksumFlag = 0; |
|
106 | 104 | unsigned dictIDFlag = 0; |
|
107 | 105 | unsigned jobSize = 0; |
|
108 | 106 | unsigned overlapSizeLog = 0; |
|
109 | 107 | unsigned forceMaxWindow = 0; |
|
110 | 108 | unsigned enableLDM = 0; |
|
111 | 109 | unsigned ldmHashLog = 0; |
|
112 | 110 | unsigned ldmMinMatch = 0; |
|
113 | 111 | unsigned ldmBucketSizeLog = 0; |
|
114 | 112 | unsigned ldmHashEveryLog = 0; |
|
115 | 113 | int threads = 0; |
|
116 | 114 | |
|
117 | /* Setting value 0 has the effect of disabling. So we use -1 as a default | |
|
118 | * to detect whether to set. Then we automatically derive the expected value | |
|
119 | * based on the level, just like zstandard does itself. */ | |
|
120 | int compressLiterals = -1; | |
|
121 | ||
|
122 | 115 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, |
|
123 |
"|IiIIIIIIIIIIIIIIIIIIi |
|
|
116 | "|IiIIIIIIIIIIIIIIIIIIi:CompressionParameters", | |
|
124 | 117 | kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog, |
|
125 | 118 | &searchLog, &minMatch, &targetLength, &compressionStrategy, |
|
126 | 119 | &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog, |
|
127 | 120 | &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog, |
|
128 |
&ldmHashEveryLog, &threads |
|
|
121 | &ldmHashEveryLog, &threads)) { | |
|
129 | 122 | return -1; |
|
130 | 123 | } |
|
131 | 124 | |
|
132 | 125 | if (threads < 0) { |
|
133 | 126 | threads = cpu_count(); |
|
134 | 127 | } |
|
135 | 128 | |
|
136 | if (compressLiterals < 0) { | |
|
137 | compressLiterals = compressionLevel >= 0; | |
|
138 | } | |
|
139 | ||
|
140 | 129 | self->format = format; |
|
141 | 130 | self->compressionLevel = compressionLevel; |
|
142 | 131 | self->windowLog = windowLog; |
|
143 | 132 | self->hashLog = hashLog; |
|
144 | 133 | self->chainLog = chainLog; |
|
145 | 134 | self->searchLog = searchLog; |
|
146 | 135 | self->minMatch = minMatch; |
|
147 | 136 | self->targetLength = targetLength; |
|
148 | 137 | self->compressionStrategy = compressionStrategy; |
|
149 | 138 | self->contentSizeFlag = contentSizeFlag; |
|
150 | 139 | self->checksumFlag = checksumFlag; |
|
151 | 140 | self->dictIDFlag = dictIDFlag; |
|
152 | 141 | self->threads = threads; |
|
153 | 142 | self->jobSize = jobSize; |
|
154 | 143 | self->overlapSizeLog = overlapSizeLog; |
|
155 | self->compressLiterals = compressLiterals; | |
|
156 | 144 | self->forceMaxWindow = forceMaxWindow; |
|
157 | 145 | self->enableLongDistanceMatching = enableLDM; |
|
158 | 146 | self->ldmHashLog = ldmHashLog; |
|
159 | 147 | self->ldmMinMatch = ldmMinMatch; |
|
160 | 148 | self->ldmBucketSizeLog = ldmBucketSizeLog; |
|
161 | 149 | self->ldmHashEveryLog = ldmHashEveryLog; |
|
162 | 150 | |
|
163 | 151 | if (reset_params(self)) { |
|
164 | 152 | return -1; |
|
165 | 153 | } |
|
166 | 154 | |
|
167 | 155 | return 0; |
|
168 | 156 | } |
|
169 | 157 | |
|
170 | 158 | PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__, |
|
171 | 159 | "Create a CompressionParameters from a compression level and target sizes\n" |
|
172 | 160 | ); |
|
173 | 161 | |
|
174 | 162 | ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) { |
|
175 | 163 | int managedKwargs = 0; |
|
176 | 164 | int level; |
|
177 | 165 | PyObject* sourceSize = NULL; |
|
178 | 166 | PyObject* dictSize = NULL; |
|
179 | 167 | unsigned PY_LONG_LONG iSourceSize = 0; |
|
180 | 168 | Py_ssize_t iDictSize = 0; |
|
181 | 169 | PyObject* val; |
|
182 | 170 | ZSTD_compressionParameters params; |
|
183 | 171 | ZstdCompressionParametersObject* result = NULL; |
|
184 | 172 | int res; |
|
185 | 173 | |
|
186 | 174 | if (!PyArg_ParseTuple(args, "i:from_level", |
|
187 | 175 | &level)) { |
|
188 | 176 | return NULL; |
|
189 | 177 | } |
|
190 | 178 | |
|
191 | 179 | if (!kwargs) { |
|
192 | 180 | kwargs = PyDict_New(); |
|
193 | 181 | if (!kwargs) { |
|
194 | 182 | return NULL; |
|
195 | 183 | } |
|
196 | 184 | managedKwargs = 1; |
|
197 | 185 | } |
|
198 | 186 | |
|
199 | 187 | sourceSize = PyDict_GetItemString(kwargs, "source_size"); |
|
200 | 188 | if (sourceSize) { |
|
201 | 189 | #if PY_MAJOR_VERSION >= 3 |
|
202 | 190 | iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); |
|
203 | 191 | if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { |
|
204 | 192 | goto cleanup; |
|
205 | 193 | } |
|
206 | 194 | #else |
|
207 | 195 | iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize); |
|
208 | 196 | #endif |
|
209 | 197 | |
|
210 | 198 | PyDict_DelItemString(kwargs, "source_size"); |
|
211 | 199 | } |
|
212 | 200 | |
|
213 | 201 | dictSize = PyDict_GetItemString(kwargs, "dict_size"); |
|
214 | 202 | if (dictSize) { |
|
215 | 203 | #if PY_MAJOR_VERSION >= 3 |
|
216 | 204 | iDictSize = PyLong_AsSsize_t(dictSize); |
|
217 | 205 | #else |
|
218 | 206 | iDictSize = PyInt_AsSsize_t(dictSize); |
|
219 | 207 | #endif |
|
220 | 208 | if (iDictSize == -1) { |
|
221 | 209 | goto cleanup; |
|
222 | 210 | } |
|
223 | 211 | |
|
224 | 212 | PyDict_DelItemString(kwargs, "dict_size"); |
|
225 | 213 | } |
|
226 | 214 | |
|
227 | 215 | |
|
228 | 216 | params = ZSTD_getCParams(level, iSourceSize, iDictSize); |
|
229 | 217 | |
|
230 | 218 | /* Values derived from the input level and sizes are passed along to the |
|
231 | 219 | constructor. But only if a value doesn't already exist. */ |
|
232 | 220 | val = PyDict_GetItemString(kwargs, "window_log"); |
|
233 | 221 | if (!val) { |
|
234 | 222 | val = PyLong_FromUnsignedLong(params.windowLog); |
|
235 | 223 | if (!val) { |
|
236 | 224 | goto cleanup; |
|
237 | 225 | } |
|
238 | 226 | PyDict_SetItemString(kwargs, "window_log", val); |
|
239 | 227 | Py_DECREF(val); |
|
240 | 228 | } |
|
241 | 229 | |
|
242 | 230 | val = PyDict_GetItemString(kwargs, "chain_log"); |
|
243 | 231 | if (!val) { |
|
244 | 232 | val = PyLong_FromUnsignedLong(params.chainLog); |
|
245 | 233 | if (!val) { |
|
246 | 234 | goto cleanup; |
|
247 | 235 | } |
|
248 | 236 | PyDict_SetItemString(kwargs, "chain_log", val); |
|
249 | 237 | Py_DECREF(val); |
|
250 | 238 | } |
|
251 | 239 | |
|
252 | 240 | val = PyDict_GetItemString(kwargs, "hash_log"); |
|
253 | 241 | if (!val) { |
|
254 | 242 | val = PyLong_FromUnsignedLong(params.hashLog); |
|
255 | 243 | if (!val) { |
|
256 | 244 | goto cleanup; |
|
257 | 245 | } |
|
258 | 246 | PyDict_SetItemString(kwargs, "hash_log", val); |
|
259 | 247 | Py_DECREF(val); |
|
260 | 248 | } |
|
261 | 249 | |
|
262 | 250 | val = PyDict_GetItemString(kwargs, "search_log"); |
|
263 | 251 | if (!val) { |
|
264 | 252 | val = PyLong_FromUnsignedLong(params.searchLog); |
|
265 | 253 | if (!val) { |
|
266 | 254 | goto cleanup; |
|
267 | 255 | } |
|
268 | 256 | PyDict_SetItemString(kwargs, "search_log", val); |
|
269 | 257 | Py_DECREF(val); |
|
270 | 258 | } |
|
271 | 259 | |
|
272 | 260 | val = PyDict_GetItemString(kwargs, "min_match"); |
|
273 | 261 | if (!val) { |
|
274 | 262 | val = PyLong_FromUnsignedLong(params.searchLength); |
|
275 | 263 | if (!val) { |
|
276 | 264 | goto cleanup; |
|
277 | 265 | } |
|
278 | 266 | PyDict_SetItemString(kwargs, "min_match", val); |
|
279 | 267 | Py_DECREF(val); |
|
280 | 268 | } |
|
281 | 269 | |
|
282 | 270 | val = PyDict_GetItemString(kwargs, "target_length"); |
|
283 | 271 | if (!val) { |
|
284 | 272 | val = PyLong_FromUnsignedLong(params.targetLength); |
|
285 | 273 | if (!val) { |
|
286 | 274 | goto cleanup; |
|
287 | 275 | } |
|
288 | 276 | PyDict_SetItemString(kwargs, "target_length", val); |
|
289 | 277 | Py_DECREF(val); |
|
290 | 278 | } |
|
291 | 279 | |
|
292 | 280 | val = PyDict_GetItemString(kwargs, "compression_strategy"); |
|
293 | 281 | if (!val) { |
|
294 | 282 | val = PyLong_FromUnsignedLong(params.strategy); |
|
295 | 283 | if (!val) { |
|
296 | 284 | goto cleanup; |
|
297 | 285 | } |
|
298 | 286 | PyDict_SetItemString(kwargs, "compression_strategy", val); |
|
299 | 287 | Py_DECREF(val); |
|
300 | 288 | } |
|
301 | 289 | |
|
302 | val = PyDict_GetItemString(kwargs, "compress_literals"); | |
|
303 | if (!val) { | |
|
304 | val = PyLong_FromLong(level >= 0 ? 1 : 0); | |
|
305 | if (!val) { | |
|
306 | goto cleanup; | |
|
307 | } | |
|
308 | PyDict_SetItemString(kwargs, "compress_literals", val); | |
|
309 | Py_DECREF(val); | |
|
310 | } | |
|
311 | ||
|
312 | 290 | result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType); |
|
313 | 291 | if (!result) { |
|
314 | 292 | goto cleanup; |
|
315 | 293 | } |
|
316 | 294 | |
|
317 | 295 | result->params = NULL; |
|
318 | 296 | |
|
319 | 297 | val = PyTuple_New(0); |
|
320 | 298 | if (!val) { |
|
321 | 299 | Py_CLEAR(result); |
|
322 | 300 | goto cleanup; |
|
323 | 301 | } |
|
324 | 302 | |
|
325 | 303 | res = ZstdCompressionParameters_init(result, val, kwargs); |
|
326 | 304 | Py_DECREF(val); |
|
327 | 305 | |
|
328 | 306 | if (res) { |
|
329 | 307 | Py_CLEAR(result); |
|
330 | 308 | goto cleanup; |
|
331 | 309 | } |
|
332 | 310 | |
|
333 | 311 | cleanup: |
|
334 | 312 | if (managedKwargs) { |
|
335 | 313 | Py_DECREF(kwargs); |
|
336 | 314 | } |
|
337 | 315 | |
|
338 | 316 | return result; |
|
339 | 317 | } |
|
340 | 318 | |
|
341 | 319 | PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__, |
|
342 | 320 | "Estimate the size in bytes of a compression context for compression parameters\n" |
|
343 | 321 | ); |
|
344 | 322 | |
|
345 | 323 | PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) { |
|
346 | 324 | return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); |
|
347 | 325 | } |
|
348 | 326 | |
|
349 | 327 | PyDoc_STRVAR(ZstdCompressionParameters__doc__, |
|
350 | 328 | "ZstdCompressionParameters: low-level control over zstd compression"); |
|
351 | 329 | |
|
352 | 330 | static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) { |
|
353 | 331 | if (self->params) { |
|
354 | 332 | ZSTD_freeCCtxParams(self->params); |
|
355 | 333 | self->params = NULL; |
|
356 | 334 | } |
|
357 | 335 | |
|
358 | 336 | PyObject_Del(self); |
|
359 | 337 | } |
|
360 | 338 | |
|
361 | 339 | static PyMethodDef ZstdCompressionParameters_methods[] = { |
|
362 | 340 | { |
|
363 | 341 | "from_level", |
|
364 | 342 | (PyCFunction)CompressionParameters_from_level, |
|
365 | 343 | METH_VARARGS | METH_KEYWORDS | METH_STATIC, |
|
366 | 344 | ZstdCompressionParameters_from_level__doc__ |
|
367 | 345 | }, |
|
368 | 346 | { |
|
369 | 347 | "estimated_compression_context_size", |
|
370 | 348 | (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, |
|
371 | 349 | METH_NOARGS, |
|
372 | 350 | ZstdCompressionParameters_estimated_compression_context_size__doc__ |
|
373 | 351 | }, |
|
374 | 352 | { NULL, NULL } |
|
375 | 353 | }; |
|
376 | 354 | |
|
377 | 355 | static PyMemberDef ZstdCompressionParameters_members[] = { |
|
378 | 356 | { "format", T_UINT, |
|
379 | 357 | offsetof(ZstdCompressionParametersObject, format), READONLY, |
|
380 | 358 | "compression format" }, |
|
381 | 359 | { "compression_level", T_INT, |
|
382 | 360 | offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY, |
|
383 | 361 | "compression level" }, |
|
384 | 362 | { "window_log", T_UINT, |
|
385 | 363 | offsetof(ZstdCompressionParametersObject, windowLog), READONLY, |
|
386 | 364 | "window log" }, |
|
387 | 365 | { "hash_log", T_UINT, |
|
388 | 366 | offsetof(ZstdCompressionParametersObject, hashLog), READONLY, |
|
389 | 367 | "hash log" }, |
|
390 | 368 | { "chain_log", T_UINT, |
|
391 | 369 | offsetof(ZstdCompressionParametersObject, chainLog), READONLY, |
|
392 | 370 | "chain log" }, |
|
393 | 371 | { "search_log", T_UINT, |
|
394 | 372 | offsetof(ZstdCompressionParametersObject, searchLog), READONLY, |
|
395 | 373 | "search log" }, |
|
396 | 374 | { "min_match", T_UINT, |
|
397 | 375 | offsetof(ZstdCompressionParametersObject, minMatch), READONLY, |
|
398 | 376 | "search length" }, |
|
399 | 377 | { "target_length", T_UINT, |
|
400 | 378 | offsetof(ZstdCompressionParametersObject, targetLength), READONLY, |
|
401 | 379 | "target length" }, |
|
402 | 380 | { "compression_strategy", T_UINT, |
|
403 | 381 | offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY, |
|
404 | 382 | "compression strategy" }, |
|
405 | 383 | { "write_content_size", T_UINT, |
|
406 | 384 | offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY, |
|
407 | 385 | "whether to write content size in frames" }, |
|
408 | 386 | { "write_checksum", T_UINT, |
|
409 | 387 | offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY, |
|
410 | 388 | "whether to write checksum in frames" }, |
|
411 | 389 | { "write_dict_id", T_UINT, |
|
412 | 390 | offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY, |
|
413 | 391 | "whether to write dictionary ID in frames" }, |
|
414 | 392 | { "threads", T_UINT, |
|
415 | 393 | offsetof(ZstdCompressionParametersObject, threads), READONLY, |
|
416 | 394 | "number of threads to use" }, |
|
417 | 395 | { "job_size", T_UINT, |
|
418 | 396 | offsetof(ZstdCompressionParametersObject, jobSize), READONLY, |
|
419 | 397 | "size of compression job when using multiple threads" }, |
|
420 | 398 | { "overlap_size_log", T_UINT, |
|
421 | 399 | offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY, |
|
422 | 400 | "Size of previous input reloaded at the beginning of each job" }, |
|
423 | { "compress_literals", T_UINT, | |
|
424 | offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY, | |
|
425 | "whether Huffman compression of literals is in use" }, | |
|
426 | 401 | { "force_max_window", T_UINT, |
|
427 | 402 | offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY, |
|
428 | 403 | "force back references to remain smaller than window size" }, |
|
429 | 404 | { "enable_ldm", T_UINT, |
|
430 | 405 | offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY, |
|
431 | 406 | "whether to enable long distance matching" }, |
|
432 | 407 | { "ldm_hash_log", T_UINT, |
|
433 | 408 | offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY, |
|
434 | 409 | "Size of the table for long distance matching, as a power of 2" }, |
|
435 | 410 | { "ldm_min_match", T_UINT, |
|
436 | 411 | offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY, |
|
437 | 412 | "minimum size of searched matches for long distance matcher" }, |
|
438 | 413 | { "ldm_bucket_size_log", T_UINT, |
|
439 | 414 | offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY, |
|
440 | 415 | "log size of each bucket in the LDM hash table for collision resolution" }, |
|
441 | 416 | { "ldm_hash_every_log", T_UINT, |
|
442 | 417 | offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY, |
|
443 | 418 | "frequency of inserting/looking up entries in the LDM hash table" }, |
|
444 | 419 | { NULL } |
|
445 | 420 | }; |
|
446 | 421 | |
|
447 | 422 | PyTypeObject ZstdCompressionParametersType = { |
|
448 | 423 | PyVarObject_HEAD_INIT(NULL, 0) |
|
449 | 424 | "ZstdCompressionParameters", /* tp_name */ |
|
450 | 425 | sizeof(ZstdCompressionParametersObject), /* tp_basicsize */ |
|
451 | 426 | 0, /* tp_itemsize */ |
|
452 | 427 | (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */ |
|
453 | 428 | 0, /* tp_print */ |
|
454 | 429 | 0, /* tp_getattr */ |
|
455 | 430 | 0, /* tp_setattr */ |
|
456 | 431 | 0, /* tp_compare */ |
|
457 | 432 | 0, /* tp_repr */ |
|
458 | 433 | 0, /* tp_as_number */ |
|
459 | 434 | 0, /* tp_as_sequence */ |
|
460 | 435 | 0, /* tp_as_mapping */ |
|
461 | 436 | 0, /* tp_hash */ |
|
462 | 437 | 0, /* tp_call */ |
|
463 | 438 | 0, /* tp_str */ |
|
464 | 439 | 0, /* tp_getattro */ |
|
465 | 440 | 0, /* tp_setattro */ |
|
466 | 441 | 0, /* tp_as_buffer */ |
|
467 | 442 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
468 | 443 | ZstdCompressionParameters__doc__, /* tp_doc */ |
|
469 | 444 | 0, /* tp_traverse */ |
|
470 | 445 | 0, /* tp_clear */ |
|
471 | 446 | 0, /* tp_richcompare */ |
|
472 | 447 | 0, /* tp_weaklistoffset */ |
|
473 | 448 | 0, /* tp_iter */ |
|
474 | 449 | 0, /* tp_iternext */ |
|
475 | 450 | ZstdCompressionParameters_methods, /* tp_methods */ |
|
476 | 451 | ZstdCompressionParameters_members, /* tp_members */ |
|
477 | 452 | 0, /* tp_getset */ |
|
478 | 453 | 0, /* tp_base */ |
|
479 | 454 | 0, /* tp_dict */ |
|
480 | 455 | 0, /* tp_descr_get */ |
|
481 | 456 | 0, /* tp_descr_set */ |
|
482 | 457 | 0, /* tp_dictoffset */ |
|
483 | 458 | (initproc)ZstdCompressionParameters_init, /* tp_init */ |
|
484 | 459 | 0, /* tp_alloc */ |
|
485 | 460 | PyType_GenericNew, /* tp_new */ |
|
486 | 461 | }; |
|
487 | 462 | |
|
488 | 463 | void compressionparams_module_init(PyObject* mod) { |
|
489 | 464 | Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type; |
|
490 | 465 | if (PyType_Ready(&ZstdCompressionParametersType) < 0) { |
|
491 | 466 | return; |
|
492 | 467 | } |
|
493 | 468 | |
|
494 | 469 | Py_INCREF(&ZstdCompressionParametersType); |
|
495 | 470 | PyModule_AddObject(mod, "ZstdCompressionParameters", |
|
496 | 471 | (PyObject*)&ZstdCompressionParametersType); |
|
497 | 472 | |
|
498 | 473 | /* TODO remove deprecated alias. */ |
|
499 | 474 | Py_INCREF(&ZstdCompressionParametersType); |
|
500 | 475 | PyModule_AddObject(mod, "CompressionParameters", |
|
501 | 476 | (PyObject*)&ZstdCompressionParametersType); |
|
502 | 477 | } |
@@ -1,405 +1,386 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2017-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | static void set_unsupported_operation(void) { |
|
14 | 14 | PyObject* iomod; |
|
15 | 15 | PyObject* exc; |
|
16 | 16 | |
|
17 | 17 | iomod = PyImport_ImportModule("io"); |
|
18 | 18 | if (NULL == iomod) { |
|
19 | 19 | return; |
|
20 | 20 | } |
|
21 | 21 | |
|
22 | 22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); |
|
23 | 23 | if (NULL == exc) { |
|
24 | 24 | Py_DECREF(iomod); |
|
25 | 25 | return; |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | 28 | PyErr_SetNone(exc); |
|
29 | 29 | Py_DECREF(exc); |
|
30 | 30 | Py_DECREF(iomod); |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | static void reader_dealloc(ZstdCompressionReader* self) { |
|
34 | 34 | Py_XDECREF(self->compressor); |
|
35 | 35 | Py_XDECREF(self->reader); |
|
36 | 36 | |
|
37 | 37 | if (self->buffer.buf) { |
|
38 | 38 | PyBuffer_Release(&self->buffer); |
|
39 | 39 | memset(&self->buffer, 0, sizeof(self->buffer)); |
|
40 | 40 | } |
|
41 | 41 | |
|
42 | 42 | PyObject_Del(self); |
|
43 | 43 | } |
|
44 | 44 | |
|
45 | 45 | static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) { |
|
46 | size_t zresult; | |
|
47 | ||
|
48 | 46 | if (self->entered) { |
|
49 | 47 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); |
|
50 | 48 | return NULL; |
|
51 | 49 | } |
|
52 | 50 | |
|
53 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize); | |
|
54 | if (ZSTD_isError(zresult)) { | |
|
55 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
56 | ZSTD_getErrorName(zresult)); | |
|
57 | return NULL; | |
|
58 | } | |
|
59 | ||
|
60 | 51 | self->entered = 1; |
|
61 | 52 | |
|
62 | 53 | Py_INCREF(self); |
|
63 | 54 | return self; |
|
64 | 55 | } |
|
65 | 56 | |
|
66 | 57 | static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) { |
|
67 | 58 | PyObject* exc_type; |
|
68 | 59 | PyObject* exc_value; |
|
69 | 60 | PyObject* exc_tb; |
|
70 | 61 | |
|
71 | 62 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { |
|
72 | 63 | return NULL; |
|
73 | 64 | } |
|
74 | 65 | |
|
75 | 66 | self->entered = 0; |
|
76 | 67 | self->closed = 1; |
|
77 | 68 | |
|
78 | 69 | /* Release resources associated with source. */ |
|
79 | 70 | Py_CLEAR(self->reader); |
|
80 | 71 | if (self->buffer.buf) { |
|
81 | 72 | PyBuffer_Release(&self->buffer); |
|
82 | 73 | memset(&self->buffer, 0, sizeof(self->buffer)); |
|
83 | 74 | } |
|
84 | 75 | |
|
85 | 76 | Py_CLEAR(self->compressor); |
|
86 | 77 | |
|
87 | 78 | Py_RETURN_FALSE; |
|
88 | 79 | } |
|
89 | 80 | |
|
90 | 81 | static PyObject* reader_readable(ZstdCompressionReader* self) { |
|
91 | 82 | Py_RETURN_TRUE; |
|
92 | 83 | } |
|
93 | 84 | |
|
94 | 85 | static PyObject* reader_writable(ZstdCompressionReader* self) { |
|
95 | 86 | Py_RETURN_FALSE; |
|
96 | 87 | } |
|
97 | 88 | |
|
98 | 89 | static PyObject* reader_seekable(ZstdCompressionReader* self) { |
|
99 | 90 | Py_RETURN_FALSE; |
|
100 | 91 | } |
|
101 | 92 | |
|
102 | 93 | static PyObject* reader_readline(PyObject* self, PyObject* args) { |
|
103 | 94 | set_unsupported_operation(); |
|
104 | 95 | return NULL; |
|
105 | 96 | } |
|
106 | 97 | |
|
107 | 98 | static PyObject* reader_readlines(PyObject* self, PyObject* args) { |
|
108 | 99 | set_unsupported_operation(); |
|
109 | 100 | return NULL; |
|
110 | 101 | } |
|
111 | 102 | |
|
112 | 103 | static PyObject* reader_write(PyObject* self, PyObject* args) { |
|
113 | 104 | PyErr_SetString(PyExc_OSError, "stream is not writable"); |
|
114 | 105 | return NULL; |
|
115 | 106 | } |
|
116 | 107 | |
|
117 | 108 | static PyObject* reader_writelines(PyObject* self, PyObject* args) { |
|
118 | 109 | PyErr_SetString(PyExc_OSError, "stream is not writable"); |
|
119 | 110 | return NULL; |
|
120 | 111 | } |
|
121 | 112 | |
|
122 | 113 | static PyObject* reader_isatty(PyObject* self) { |
|
123 | 114 | Py_RETURN_FALSE; |
|
124 | 115 | } |
|
125 | 116 | |
|
126 | 117 | static PyObject* reader_flush(PyObject* self) { |
|
127 | 118 | Py_RETURN_NONE; |
|
128 | 119 | } |
|
129 | 120 | |
|
130 | 121 | static PyObject* reader_close(ZstdCompressionReader* self) { |
|
131 | 122 | self->closed = 1; |
|
132 | 123 | Py_RETURN_NONE; |
|
133 | 124 | } |
|
134 | 125 | |
|
135 | static PyObject* reader_closed(ZstdCompressionReader* self) { | |
|
136 | if (self->closed) { | |
|
137 | Py_RETURN_TRUE; | |
|
138 | } | |
|
139 | else { | |
|
140 | Py_RETURN_FALSE; | |
|
141 | } | |
|
142 | } | |
|
143 | ||
|
144 | 126 | static PyObject* reader_tell(ZstdCompressionReader* self) { |
|
145 | 127 | /* TODO should this raise OSError since stream isn't seekable? */ |
|
146 | 128 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); |
|
147 | 129 | } |
|
148 | 130 | |
|
149 | 131 | static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) { |
|
150 | 132 | static char* kwlist[] = { |
|
151 | 133 | "size", |
|
152 | 134 | NULL |
|
153 | 135 | }; |
|
154 | 136 | |
|
155 | 137 | Py_ssize_t size = -1; |
|
156 | 138 | PyObject* result = NULL; |
|
157 | 139 | char* resultBuffer; |
|
158 | 140 | Py_ssize_t resultSize; |
|
159 | 141 | size_t zresult; |
|
160 | 142 | size_t oldPos; |
|
161 | 143 | |
|
162 | if (!self->entered) { | |
|
163 | PyErr_SetString(ZstdError, "read() must be called from an active context manager"); | |
|
164 | return NULL; | |
|
165 | } | |
|
166 | ||
|
167 | 144 | if (self->closed) { |
|
168 | 145 | PyErr_SetString(PyExc_ValueError, "stream is closed"); |
|
169 | 146 | return NULL; |
|
170 | 147 | } |
|
171 | 148 | |
|
172 | 149 | if (self->finishedOutput) { |
|
173 | 150 | return PyBytes_FromStringAndSize("", 0); |
|
174 | 151 | } |
|
175 | 152 | |
|
176 | 153 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) { |
|
177 | 154 | return NULL; |
|
178 | 155 | } |
|
179 | 156 | |
|
180 | 157 | if (size < 1) { |
|
181 | 158 | PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts"); |
|
182 | 159 | return NULL; |
|
183 | 160 | } |
|
184 | 161 | |
|
185 | 162 | result = PyBytes_FromStringAndSize(NULL, size); |
|
186 | 163 | if (NULL == result) { |
|
187 | 164 | return NULL; |
|
188 | 165 | } |
|
189 | 166 | |
|
190 | 167 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); |
|
191 | 168 | |
|
192 | 169 | self->output.dst = resultBuffer; |
|
193 | 170 | self->output.size = resultSize; |
|
194 | 171 | self->output.pos = 0; |
|
195 | 172 | |
|
196 | 173 | readinput: |
|
197 | 174 | |
|
198 | 175 | /* If we have data left over, consume it. */ |
|
199 | 176 | if (self->input.pos < self->input.size) { |
|
200 | 177 | oldPos = self->output.pos; |
|
201 | 178 | |
|
202 | 179 | Py_BEGIN_ALLOW_THREADS |
|
203 | 180 | zresult = ZSTD_compress_generic(self->compressor->cctx, |
|
204 | 181 | &self->output, &self->input, ZSTD_e_continue); |
|
205 | 182 | |
|
206 | 183 | Py_END_ALLOW_THREADS |
|
207 | 184 | |
|
208 | 185 | self->bytesCompressed += self->output.pos - oldPos; |
|
209 | 186 | |
|
210 | 187 | /* Input exhausted. Clear out state tracking. */ |
|
211 | 188 | if (self->input.pos == self->input.size) { |
|
212 | 189 | memset(&self->input, 0, sizeof(self->input)); |
|
213 | 190 | Py_CLEAR(self->readResult); |
|
214 | 191 | |
|
215 | 192 | if (self->buffer.buf) { |
|
216 | 193 | self->finishedInput = 1; |
|
217 | 194 | } |
|
218 | 195 | } |
|
219 | 196 | |
|
220 | 197 | if (ZSTD_isError(zresult)) { |
|
221 | 198 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
222 | 199 | return NULL; |
|
223 | 200 | } |
|
224 | 201 | |
|
225 | 202 | if (self->output.pos) { |
|
226 | 203 | /* If no more room in output, emit it. */ |
|
227 | 204 | if (self->output.pos == self->output.size) { |
|
228 | 205 | memset(&self->output, 0, sizeof(self->output)); |
|
229 | 206 | return result; |
|
230 | 207 | } |
|
231 | 208 | |
|
232 | 209 | /* |
|
233 | 210 | * There is room in the output. We fall through to below, which will either |
|
234 | 211 | * get more input for us or will attempt to end the stream. |
|
235 | 212 | */ |
|
236 | 213 | } |
|
237 | 214 | |
|
238 | 215 | /* Fall through to gather more input. */ |
|
239 | 216 | } |
|
240 | 217 | |
|
241 | 218 | if (!self->finishedInput) { |
|
242 | 219 | if (self->reader) { |
|
243 | 220 | Py_buffer buffer; |
|
244 | 221 | |
|
245 | 222 | assert(self->readResult == NULL); |
|
246 | 223 | self->readResult = PyObject_CallMethod(self->reader, "read", |
|
247 | 224 | "k", self->readSize); |
|
248 | 225 | if (self->readResult == NULL) { |
|
249 | 226 | return NULL; |
|
250 | 227 | } |
|
251 | 228 | |
|
252 | 229 | memset(&buffer, 0, sizeof(buffer)); |
|
253 | 230 | |
|
254 | 231 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { |
|
255 | 232 | return NULL; |
|
256 | 233 | } |
|
257 | 234 | |
|
258 | 235 | /* EOF */ |
|
259 | 236 | if (0 == buffer.len) { |
|
260 | 237 | self->finishedInput = 1; |
|
261 | 238 | Py_CLEAR(self->readResult); |
|
262 | 239 | } |
|
263 | 240 | else { |
|
264 | 241 | self->input.src = buffer.buf; |
|
265 | 242 | self->input.size = buffer.len; |
|
266 | 243 | self->input.pos = 0; |
|
267 | 244 | } |
|
268 | 245 | |
|
269 | 246 | PyBuffer_Release(&buffer); |
|
270 | 247 | } |
|
271 | 248 | else { |
|
272 | 249 | assert(self->buffer.buf); |
|
273 | 250 | |
|
274 | 251 | self->input.src = self->buffer.buf; |
|
275 | 252 | self->input.size = self->buffer.len; |
|
276 | 253 | self->input.pos = 0; |
|
277 | 254 | } |
|
278 | 255 | } |
|
279 | 256 | |
|
280 | 257 | if (self->input.size) { |
|
281 | 258 | goto readinput; |
|
282 | 259 | } |
|
283 | 260 | |
|
284 | 261 | /* Else EOF */ |
|
285 | 262 | oldPos = self->output.pos; |
|
286 | 263 | |
|
287 | 264 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, |
|
288 | 265 | &self->input, ZSTD_e_end); |
|
289 | 266 | |
|
290 | 267 | self->bytesCompressed += self->output.pos - oldPos; |
|
291 | 268 | |
|
292 | 269 | if (ZSTD_isError(zresult)) { |
|
293 | 270 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
294 | 271 | ZSTD_getErrorName(zresult)); |
|
295 | 272 | return NULL; |
|
296 | 273 | } |
|
297 | 274 | |
|
298 | 275 | assert(self->output.pos); |
|
299 | 276 | |
|
300 | 277 | if (0 == zresult) { |
|
301 | 278 | self->finishedOutput = 1; |
|
302 | 279 | } |
|
303 | 280 | |
|
304 | 281 | if (safe_pybytes_resize(&result, self->output.pos)) { |
|
305 | 282 | Py_XDECREF(result); |
|
306 | 283 | return NULL; |
|
307 | 284 | } |
|
308 | 285 | |
|
309 | 286 | memset(&self->output, 0, sizeof(self->output)); |
|
310 | 287 | |
|
311 | 288 | return result; |
|
312 | 289 | } |
|
313 | 290 | |
|
314 | 291 | static PyObject* reader_readall(PyObject* self) { |
|
315 | 292 | PyErr_SetNone(PyExc_NotImplementedError); |
|
316 | 293 | return NULL; |
|
317 | 294 | } |
|
318 | 295 | |
|
319 | 296 | static PyObject* reader_iter(PyObject* self) { |
|
320 | 297 | set_unsupported_operation(); |
|
321 | 298 | return NULL; |
|
322 | 299 | } |
|
323 | 300 | |
|
324 | 301 | static PyObject* reader_iternext(PyObject* self) { |
|
325 | 302 | set_unsupported_operation(); |
|
326 | 303 | return NULL; |
|
327 | 304 | } |
|
328 | 305 | |
|
329 | 306 | static PyMethodDef reader_methods[] = { |
|
330 | 307 | { "__enter__", (PyCFunction)reader_enter, METH_NOARGS, |
|
331 | 308 | PyDoc_STR("Enter a compression context") }, |
|
332 | 309 | { "__exit__", (PyCFunction)reader_exit, METH_VARARGS, |
|
333 | 310 | PyDoc_STR("Exit a compression context") }, |
|
334 | 311 | { "close", (PyCFunction)reader_close, METH_NOARGS, |
|
335 | 312 | PyDoc_STR("Close the stream so it cannot perform any more operations") }, |
|
336 | { "closed", (PyCFunction)reader_closed, METH_NOARGS, | |
|
337 | PyDoc_STR("Whether stream is closed") }, | |
|
338 | 313 | { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") }, |
|
339 | 314 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, |
|
340 | 315 | { "readable", (PyCFunction)reader_readable, METH_NOARGS, |
|
341 | 316 | PyDoc_STR("Returns True") }, |
|
342 | 317 | { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") }, |
|
343 | 318 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, |
|
344 | 319 | { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") }, |
|
345 | 320 | { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") }, |
|
346 | 321 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, |
|
347 | 322 | PyDoc_STR("Returns False") }, |
|
348 | 323 | { "tell", (PyCFunction)reader_tell, METH_NOARGS, |
|
349 | 324 | PyDoc_STR("Returns current number of bytes compressed") }, |
|
350 | 325 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, |
|
351 | 326 | PyDoc_STR("Returns False") }, |
|
352 | 327 | { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") }, |
|
353 | 328 | { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") }, |
|
354 | 329 | { NULL, NULL } |
|
355 | 330 | }; |
|
356 | 331 | |
|
332 | static PyMemberDef reader_members[] = { | |
|
333 | { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed), | |
|
334 | READONLY, "whether stream is closed" }, | |
|
335 | { NULL } | |
|
336 | }; | |
|
337 | ||
|
357 | 338 | PyTypeObject ZstdCompressionReaderType = { |
|
358 | 339 | PyVarObject_HEAD_INIT(NULL, 0) |
|
359 | 340 | "zstd.ZstdCompressionReader", /* tp_name */ |
|
360 | 341 | sizeof(ZstdCompressionReader), /* tp_basicsize */ |
|
361 | 342 | 0, /* tp_itemsize */ |
|
362 | 343 | (destructor)reader_dealloc, /* tp_dealloc */ |
|
363 | 344 | 0, /* tp_print */ |
|
364 | 345 | 0, /* tp_getattr */ |
|
365 | 346 | 0, /* tp_setattr */ |
|
366 | 347 | 0, /* tp_compare */ |
|
367 | 348 | 0, /* tp_repr */ |
|
368 | 349 | 0, /* tp_as_number */ |
|
369 | 350 | 0, /* tp_as_sequence */ |
|
370 | 351 | 0, /* tp_as_mapping */ |
|
371 | 352 | 0, /* tp_hash */ |
|
372 | 353 | 0, /* tp_call */ |
|
373 | 354 | 0, /* tp_str */ |
|
374 | 355 | 0, /* tp_getattro */ |
|
375 | 356 | 0, /* tp_setattro */ |
|
376 | 357 | 0, /* tp_as_buffer */ |
|
377 | 358 | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
|
378 | 359 | 0, /* tp_doc */ |
|
379 | 360 | 0, /* tp_traverse */ |
|
380 | 361 | 0, /* tp_clear */ |
|
381 | 362 | 0, /* tp_richcompare */ |
|
382 | 363 | 0, /* tp_weaklistoffset */ |
|
383 | 364 | reader_iter, /* tp_iter */ |
|
384 | 365 | reader_iternext, /* tp_iternext */ |
|
385 | 366 | reader_methods, /* tp_methods */ |
|
386 |
|
|
|
367 | reader_members, /* tp_members */ | |
|
387 | 368 | 0, /* tp_getset */ |
|
388 | 369 | 0, /* tp_base */ |
|
389 | 370 | 0, /* tp_dict */ |
|
390 | 371 | 0, /* tp_descr_get */ |
|
391 | 372 | 0, /* tp_descr_set */ |
|
392 | 373 | 0, /* tp_dictoffset */ |
|
393 | 374 | 0, /* tp_init */ |
|
394 | 375 | 0, /* tp_alloc */ |
|
395 | 376 | PyType_GenericNew, /* tp_new */ |
|
396 | 377 | }; |
|
397 | 378 | |
|
398 | 379 | void compressionreader_module_init(PyObject* mod) { |
|
399 | 380 | /* TODO make reader a sub-class of io.RawIOBase */ |
|
400 | 381 | |
|
401 | 382 | Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type; |
|
402 | 383 | if (PyType_Ready(&ZstdCompressionReaderType) < 0) { |
|
403 | 384 | return; |
|
404 | 385 | } |
|
405 | 386 | } |
@@ -1,315 +1,316 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | PyDoc_STRVAR(ZstdCompresssionWriter__doc__, |
|
14 | 14 | """A context manager used for writing compressed output to a writer.\n" |
|
15 | 15 | ); |
|
16 | 16 | |
|
17 | 17 | static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) { |
|
18 | 18 | Py_XDECREF(self->compressor); |
|
19 | 19 | Py_XDECREF(self->writer); |
|
20 | 20 | |
|
21 | 21 | PyObject_Del(self); |
|
22 | 22 | } |
|
23 | 23 | |
|
24 | 24 | static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) { |
|
25 | 25 | size_t zresult; |
|
26 | 26 | |
|
27 | 27 | if (self->entered) { |
|
28 | 28 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); |
|
29 | 29 | return NULL; |
|
30 | 30 | } |
|
31 | 31 | |
|
32 | 32 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize); |
|
33 | 33 | if (ZSTD_isError(zresult)) { |
|
34 | 34 | PyErr_Format(ZstdError, "error setting source size: %s", |
|
35 | 35 | ZSTD_getErrorName(zresult)); |
|
36 | 36 | return NULL; |
|
37 | 37 | } |
|
38 | 38 | |
|
39 | 39 | self->entered = 1; |
|
40 | 40 | |
|
41 | 41 | Py_INCREF(self); |
|
42 | 42 | return (PyObject*)self; |
|
43 | 43 | } |
|
44 | 44 | |
|
45 | 45 | static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) { |
|
46 | 46 | PyObject* exc_type; |
|
47 | 47 | PyObject* exc_value; |
|
48 | 48 | PyObject* exc_tb; |
|
49 | 49 | size_t zresult; |
|
50 | 50 | |
|
51 | 51 | ZSTD_outBuffer output; |
|
52 | 52 | PyObject* res; |
|
53 | 53 | |
|
54 | 54 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { |
|
55 | 55 | return NULL; |
|
56 | 56 | } |
|
57 | 57 | |
|
58 | 58 | self->entered = 0; |
|
59 | 59 | |
|
60 | 60 | if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) { |
|
61 | 61 | ZSTD_inBuffer inBuffer; |
|
62 | 62 | |
|
63 | 63 | inBuffer.src = NULL; |
|
64 | 64 | inBuffer.size = 0; |
|
65 | 65 | inBuffer.pos = 0; |
|
66 | 66 | |
|
67 | 67 | output.dst = PyMem_Malloc(self->outSize); |
|
68 | 68 | if (!output.dst) { |
|
69 | 69 | return PyErr_NoMemory(); |
|
70 | 70 | } |
|
71 | 71 | output.size = self->outSize; |
|
72 | 72 | output.pos = 0; |
|
73 | 73 | |
|
74 | 74 | while (1) { |
|
75 | 75 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end); |
|
76 | 76 | if (ZSTD_isError(zresult)) { |
|
77 | 77 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
78 | 78 | ZSTD_getErrorName(zresult)); |
|
79 | 79 | PyMem_Free(output.dst); |
|
80 | 80 | return NULL; |
|
81 | 81 | } |
|
82 | 82 | |
|
83 | 83 | if (output.pos) { |
|
84 | 84 | #if PY_MAJOR_VERSION >= 3 |
|
85 | 85 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
86 | 86 | #else |
|
87 | 87 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
88 | 88 | #endif |
|
89 | 89 | output.dst, output.pos); |
|
90 | 90 | Py_XDECREF(res); |
|
91 | 91 | } |
|
92 | 92 | |
|
93 | 93 | if (!zresult) { |
|
94 | 94 | break; |
|
95 | 95 | } |
|
96 | 96 | |
|
97 | 97 | output.pos = 0; |
|
98 | 98 | } |
|
99 | 99 | |
|
100 | 100 | PyMem_Free(output.dst); |
|
101 | 101 | } |
|
102 | 102 | |
|
103 | 103 | Py_RETURN_FALSE; |
|
104 | 104 | } |
|
105 | 105 | |
|
106 | 106 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { |
|
107 | 107 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx)); |
|
108 | 108 | } |
|
109 | 109 | |
|
110 | 110 | static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) { |
|
111 | 111 | static char* kwlist[] = { |
|
112 | 112 | "data", |
|
113 | 113 | NULL |
|
114 | 114 | }; |
|
115 | 115 | |
|
116 | 116 | PyObject* result = NULL; |
|
117 | 117 | Py_buffer source; |
|
118 | 118 | size_t zresult; |
|
119 | 119 | ZSTD_inBuffer input; |
|
120 | 120 | ZSTD_outBuffer output; |
|
121 | 121 | PyObject* res; |
|
122 | 122 | Py_ssize_t totalWrite = 0; |
|
123 | 123 | |
|
124 | 124 | #if PY_MAJOR_VERSION >= 3 |
|
125 | 125 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", |
|
126 | 126 | #else |
|
127 | 127 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write", |
|
128 | 128 | #endif |
|
129 | 129 | kwlist, &source)) { |
|
130 | 130 | return NULL; |
|
131 | 131 | } |
|
132 | 132 | |
|
133 | 133 | if (!self->entered) { |
|
134 | 134 | PyErr_SetString(ZstdError, "compress must be called from an active context manager"); |
|
135 | 135 | goto finally; |
|
136 | 136 | } |
|
137 | 137 | |
|
138 | 138 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
139 | 139 | PyErr_SetString(PyExc_ValueError, |
|
140 | 140 | "data buffer should be contiguous and have at most one dimension"); |
|
141 | 141 | goto finally; |
|
142 | 142 | } |
|
143 | 143 | |
|
144 | 144 | output.dst = PyMem_Malloc(self->outSize); |
|
145 | 145 | if (!output.dst) { |
|
146 | 146 | PyErr_NoMemory(); |
|
147 | 147 | goto finally; |
|
148 | 148 | } |
|
149 | 149 | output.size = self->outSize; |
|
150 | 150 | output.pos = 0; |
|
151 | 151 | |
|
152 | 152 | input.src = source.buf; |
|
153 | 153 | input.size = source.len; |
|
154 | 154 | input.pos = 0; |
|
155 | 155 | |
|
156 | 156 | while ((ssize_t)input.pos < source.len) { |
|
157 | 157 | Py_BEGIN_ALLOW_THREADS |
|
158 | 158 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue); |
|
159 | 159 | Py_END_ALLOW_THREADS |
|
160 | 160 | |
|
161 | 161 | if (ZSTD_isError(zresult)) { |
|
162 | 162 | PyMem_Free(output.dst); |
|
163 | 163 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
164 | 164 | goto finally; |
|
165 | 165 | } |
|
166 | 166 | |
|
167 | 167 | /* Copy data from output buffer to writer. */ |
|
168 | 168 | if (output.pos) { |
|
169 | 169 | #if PY_MAJOR_VERSION >= 3 |
|
170 | 170 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
171 | 171 | #else |
|
172 | 172 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
173 | 173 | #endif |
|
174 | 174 | output.dst, output.pos); |
|
175 | 175 | Py_XDECREF(res); |
|
176 | 176 | totalWrite += output.pos; |
|
177 | 177 | self->bytesCompressed += output.pos; |
|
178 | 178 | } |
|
179 | 179 | output.pos = 0; |
|
180 | 180 | } |
|
181 | 181 | |
|
182 | 182 | PyMem_Free(output.dst); |
|
183 | 183 | |
|
184 | 184 | result = PyLong_FromSsize_t(totalWrite); |
|
185 | 185 | |
|
186 | 186 | finally: |
|
187 | 187 | PyBuffer_Release(&source); |
|
188 | 188 | return result; |
|
189 | 189 | } |
|
190 | 190 | |
|
191 | 191 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { |
|
192 | 192 | size_t zresult; |
|
193 | 193 | ZSTD_outBuffer output; |
|
194 | 194 | ZSTD_inBuffer input; |
|
195 | 195 | PyObject* res; |
|
196 | 196 | Py_ssize_t totalWrite = 0; |
|
197 | 197 | |
|
198 | 198 | if (!self->entered) { |
|
199 | 199 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); |
|
200 | 200 | return NULL; |
|
201 | 201 | } |
|
202 | 202 | |
|
203 | 203 | input.src = NULL; |
|
204 | 204 | input.size = 0; |
|
205 | 205 | input.pos = 0; |
|
206 | 206 | |
|
207 | 207 | output.dst = PyMem_Malloc(self->outSize); |
|
208 | 208 | if (!output.dst) { |
|
209 | 209 | return PyErr_NoMemory(); |
|
210 | 210 | } |
|
211 | 211 | output.size = self->outSize; |
|
212 | 212 | output.pos = 0; |
|
213 | 213 | |
|
214 | 214 | while (1) { |
|
215 | 215 | Py_BEGIN_ALLOW_THREADS |
|
216 | 216 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush); |
|
217 | 217 | Py_END_ALLOW_THREADS |
|
218 | 218 | |
|
219 | 219 | if (ZSTD_isError(zresult)) { |
|
220 | 220 | PyMem_Free(output.dst); |
|
221 | 221 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
222 | 222 | return NULL; |
|
223 | 223 | } |
|
224 | 224 | |
|
225 | if (!output.pos) { | |
|
226 | break; | |
|
227 | } | |
|
228 | ||
|
229 | 225 | /* Copy data from output buffer to writer. */ |
|
230 | 226 | if (output.pos) { |
|
231 | 227 | #if PY_MAJOR_VERSION >= 3 |
|
232 | 228 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
233 | 229 | #else |
|
234 | 230 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
235 | 231 | #endif |
|
236 | 232 | output.dst, output.pos); |
|
237 | 233 | Py_XDECREF(res); |
|
238 | 234 | totalWrite += output.pos; |
|
239 | 235 | self->bytesCompressed += output.pos; |
|
240 | 236 | } |
|
237 | ||
|
241 | 238 | output.pos = 0; |
|
239 | ||
|
240 | if (!zresult) { | |
|
241 | break; | |
|
242 | } | |
|
242 | 243 | } |
|
243 | 244 | |
|
244 | 245 | PyMem_Free(output.dst); |
|
245 | 246 | |
|
246 | 247 | return PyLong_FromSsize_t(totalWrite); |
|
247 | 248 | } |
|
248 | 249 | |
|
249 | 250 | static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) { |
|
250 | 251 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); |
|
251 | 252 | } |
|
252 | 253 | |
|
253 | 254 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
254 | 255 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, |
|
255 | 256 | PyDoc_STR("Enter a compression context.") }, |
|
256 | 257 | { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS, |
|
257 | 258 | PyDoc_STR("Exit a compression context.") }, |
|
258 | 259 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, |
|
259 | 260 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, |
|
260 | 261 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS, |
|
261 | 262 | PyDoc_STR("Compress data") }, |
|
262 | 263 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, |
|
263 | 264 | PyDoc_STR("Flush data and finish a zstd frame") }, |
|
264 | 265 | { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS, |
|
265 | 266 | PyDoc_STR("Returns current number of bytes compressed") }, |
|
266 | 267 | { NULL, NULL } |
|
267 | 268 | }; |
|
268 | 269 | |
|
269 | 270 | PyTypeObject ZstdCompressionWriterType = { |
|
270 | 271 | PyVarObject_HEAD_INIT(NULL, 0) |
|
271 | 272 | "zstd.ZstdCompressionWriter", /* tp_name */ |
|
272 | 273 | sizeof(ZstdCompressionWriter), /* tp_basicsize */ |
|
273 | 274 | 0, /* tp_itemsize */ |
|
274 | 275 | (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */ |
|
275 | 276 | 0, /* tp_print */ |
|
276 | 277 | 0, /* tp_getattr */ |
|
277 | 278 | 0, /* tp_setattr */ |
|
278 | 279 | 0, /* tp_compare */ |
|
279 | 280 | 0, /* tp_repr */ |
|
280 | 281 | 0, /* tp_as_number */ |
|
281 | 282 | 0, /* tp_as_sequence */ |
|
282 | 283 | 0, /* tp_as_mapping */ |
|
283 | 284 | 0, /* tp_hash */ |
|
284 | 285 | 0, /* tp_call */ |
|
285 | 286 | 0, /* tp_str */ |
|
286 | 287 | 0, /* tp_getattro */ |
|
287 | 288 | 0, /* tp_setattro */ |
|
288 | 289 | 0, /* tp_as_buffer */ |
|
289 | 290 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
290 | 291 | ZstdCompresssionWriter__doc__, /* tp_doc */ |
|
291 | 292 | 0, /* tp_traverse */ |
|
292 | 293 | 0, /* tp_clear */ |
|
293 | 294 | 0, /* tp_richcompare */ |
|
294 | 295 | 0, /* tp_weaklistoffset */ |
|
295 | 296 | 0, /* tp_iter */ |
|
296 | 297 | 0, /* tp_iternext */ |
|
297 | 298 | ZstdCompressionWriter_methods, /* tp_methods */ |
|
298 | 299 | 0, /* tp_members */ |
|
299 | 300 | 0, /* tp_getset */ |
|
300 | 301 | 0, /* tp_base */ |
|
301 | 302 | 0, /* tp_dict */ |
|
302 | 303 | 0, /* tp_descr_get */ |
|
303 | 304 | 0, /* tp_descr_set */ |
|
304 | 305 | 0, /* tp_dictoffset */ |
|
305 | 306 | 0, /* tp_init */ |
|
306 | 307 | 0, /* tp_alloc */ |
|
307 | 308 | PyType_GenericNew, /* tp_new */ |
|
308 | 309 | }; |
|
309 | 310 | |
|
310 | 311 | void compressionwriter_module_init(PyObject* mod) { |
|
311 | 312 | Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type; |
|
312 | 313 | if (PyType_Ready(&ZstdCompressionWriterType) < 0) { |
|
313 | 314 | return; |
|
314 | 315 | } |
|
315 | 316 | } |
@@ -1,273 +1,256 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | PyDoc_STRVAR(ZstdCompressionObj__doc__, |
|
14 | 14 | "Perform compression using a standard library compatible API.\n" |
|
15 | 15 | ); |
|
16 | 16 | |
|
17 | 17 | static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) { |
|
18 | 18 | PyMem_Free(self->output.dst); |
|
19 | 19 | self->output.dst = NULL; |
|
20 | 20 | |
|
21 | 21 | Py_XDECREF(self->compressor); |
|
22 | 22 | |
|
23 | 23 | PyObject_Del(self); |
|
24 | 24 | } |
|
25 | 25 | |
|
26 | 26 | static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) { |
|
27 | 27 | static char* kwlist[] = { |
|
28 | 28 | "data", |
|
29 | 29 | NULL |
|
30 | 30 | }; |
|
31 | 31 | |
|
32 | 32 | Py_buffer source; |
|
33 | 33 | ZSTD_inBuffer input; |
|
34 | 34 | size_t zresult; |
|
35 | 35 | PyObject* result = NULL; |
|
36 | 36 | Py_ssize_t resultSize = 0; |
|
37 | 37 | |
|
38 | 38 | if (self->finished) { |
|
39 | 39 | PyErr_SetString(ZstdError, "cannot call compress() after compressor finished"); |
|
40 | 40 | return NULL; |
|
41 | 41 | } |
|
42 | 42 | |
|
43 | 43 | #if PY_MAJOR_VERSION >= 3 |
|
44 | 44 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", |
|
45 | 45 | #else |
|
46 | 46 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress", |
|
47 | 47 | #endif |
|
48 | 48 | kwlist, &source)) { |
|
49 | 49 | return NULL; |
|
50 | 50 | } |
|
51 | 51 | |
|
52 | 52 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
53 | 53 | PyErr_SetString(PyExc_ValueError, |
|
54 | 54 | "data buffer should be contiguous and have at most one dimension"); |
|
55 | 55 | goto finally; |
|
56 | 56 | } |
|
57 | 57 | |
|
58 | 58 | input.src = source.buf; |
|
59 | 59 | input.size = source.len; |
|
60 | 60 | input.pos = 0; |
|
61 | 61 | |
|
62 | 62 | while ((ssize_t)input.pos < source.len) { |
|
63 | 63 | Py_BEGIN_ALLOW_THREADS |
|
64 | 64 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, |
|
65 | 65 | &input, ZSTD_e_continue); |
|
66 | 66 | Py_END_ALLOW_THREADS |
|
67 | 67 | |
|
68 | 68 | if (ZSTD_isError(zresult)) { |
|
69 | 69 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
70 | 70 | Py_CLEAR(result); |
|
71 | 71 | goto finally; |
|
72 | 72 | } |
|
73 | 73 | |
|
74 | 74 | if (self->output.pos) { |
|
75 | 75 | if (result) { |
|
76 | 76 | resultSize = PyBytes_GET_SIZE(result); |
|
77 | 77 | |
|
78 | 78 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { |
|
79 | 79 | Py_CLEAR(result); |
|
80 | 80 | goto finally; |
|
81 | 81 | } |
|
82 | 82 | |
|
83 | 83 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
84 | 84 | self->output.dst, self->output.pos); |
|
85 | 85 | } |
|
86 | 86 | else { |
|
87 | 87 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
88 | 88 | if (!result) { |
|
89 | 89 | goto finally; |
|
90 | 90 | } |
|
91 | 91 | } |
|
92 | 92 | |
|
93 | 93 | self->output.pos = 0; |
|
94 | 94 | } |
|
95 | 95 | } |
|
96 | 96 | |
|
97 | 97 | if (NULL == result) { |
|
98 | 98 | result = PyBytes_FromString(""); |
|
99 | 99 | } |
|
100 | 100 | |
|
101 | 101 | finally: |
|
102 | 102 | PyBuffer_Release(&source); |
|
103 | 103 | |
|
104 | 104 | return result; |
|
105 | 105 | } |
|
106 | 106 | |
|
107 | 107 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) { |
|
108 | 108 | static char* kwlist[] = { |
|
109 | 109 | "flush_mode", |
|
110 | 110 | NULL |
|
111 | 111 | }; |
|
112 | 112 | |
|
113 | 113 | int flushMode = compressorobj_flush_finish; |
|
114 | 114 | size_t zresult; |
|
115 | 115 | PyObject* result = NULL; |
|
116 | 116 | Py_ssize_t resultSize = 0; |
|
117 | 117 | ZSTD_inBuffer input; |
|
118 | ZSTD_EndDirective zFlushMode; | |
|
118 | 119 | |
|
119 | 120 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) { |
|
120 | 121 | return NULL; |
|
121 | 122 | } |
|
122 | 123 | |
|
123 | 124 | if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) { |
|
124 | 125 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); |
|
125 | 126 | return NULL; |
|
126 | 127 | } |
|
127 | 128 | |
|
128 | 129 | if (self->finished) { |
|
129 | 130 | PyErr_SetString(ZstdError, "compressor object already finished"); |
|
130 | 131 | return NULL; |
|
131 | 132 | } |
|
132 | 133 | |
|
134 | switch (flushMode) { | |
|
135 | case compressorobj_flush_block: | |
|
136 | zFlushMode = ZSTD_e_flush; | |
|
137 | break; | |
|
138 | ||
|
139 | case compressorobj_flush_finish: | |
|
140 | zFlushMode = ZSTD_e_end; | |
|
141 | self->finished = 1; | |
|
142 | break; | |
|
143 | ||
|
144 | default: | |
|
145 | PyErr_SetString(ZstdError, "unhandled flush mode"); | |
|
146 | return NULL; | |
|
147 | } | |
|
148 | ||
|
133 | 149 | assert(self->output.pos == 0); |
|
134 | 150 | |
|
135 | 151 | input.src = NULL; |
|
136 | 152 | input.size = 0; |
|
137 | 153 | input.pos = 0; |
|
138 | 154 | |
|
139 | if (flushMode == compressorobj_flush_block) { | |
|
140 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is | |
|
141 | guaranteed to hold a full block. */ | |
|
155 | while (1) { | |
|
142 | 156 | Py_BEGIN_ALLOW_THREADS |
|
143 | 157 |
|
|
144 |
|
|
|
158 | &input, zFlushMode); | |
|
145 | 159 | Py_END_ALLOW_THREADS |
|
146 | 160 | |
|
147 | 161 | if (ZSTD_isError(zresult)) { |
|
148 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
149 | return NULL; | |
|
150 | } | |
|
151 | ||
|
152 | /* Output buffer is guaranteed to hold full block. */ | |
|
153 | assert(zresult == 0); | |
|
154 | ||
|
155 | if (self->output.pos) { | |
|
156 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
157 | if (!result) { | |
|
158 | return NULL; | |
|
159 | } | |
|
160 | } | |
|
161 | ||
|
162 | self->output.pos = 0; | |
|
163 | ||
|
164 | if (result) { | |
|
165 | return result; | |
|
166 | } | |
|
167 | else { | |
|
168 | return PyBytes_FromString(""); | |
|
169 | } | |
|
170 | } | |
|
171 | ||
|
172 | assert(flushMode == compressorobj_flush_finish); | |
|
173 | self->finished = 1; | |
|
174 | ||
|
175 | while (1) { | |
|
176 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
|
177 | &input, ZSTD_e_end); | |
|
178 | if (ZSTD_isError(zresult)) { | |
|
179 | 162 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
180 | 163 | ZSTD_getErrorName(zresult)); |
|
181 | 164 | return NULL; |
|
182 | 165 | } |
|
183 | 166 | |
|
184 | 167 | if (self->output.pos) { |
|
185 | 168 | if (result) { |
|
186 | 169 | resultSize = PyBytes_GET_SIZE(result); |
|
187 | 170 | |
|
188 | 171 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { |
|
189 | 172 | Py_XDECREF(result); |
|
190 | 173 | return NULL; |
|
191 | 174 | } |
|
192 | 175 | |
|
193 | 176 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
194 | 177 | self->output.dst, self->output.pos); |
|
195 | 178 | } |
|
196 | 179 | else { |
|
197 | 180 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
198 | 181 | if (!result) { |
|
199 | 182 | return NULL; |
|
200 | 183 | } |
|
201 | 184 | } |
|
202 | 185 | |
|
203 | 186 | self->output.pos = 0; |
|
204 | 187 | } |
|
205 | 188 | |
|
206 | 189 | if (!zresult) { |
|
207 | 190 | break; |
|
208 | 191 | } |
|
209 | 192 | } |
|
210 | 193 | |
|
211 | 194 | if (result) { |
|
212 | 195 | return result; |
|
213 | 196 | } |
|
214 | 197 | else { |
|
215 | 198 | return PyBytes_FromString(""); |
|
216 | 199 | } |
|
217 | 200 | } |
|
218 | 201 | |
|
219 | 202 | static PyMethodDef ZstdCompressionObj_methods[] = { |
|
220 | 203 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS, |
|
221 | 204 | PyDoc_STR("compress data") }, |
|
222 | 205 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS, |
|
223 | 206 | PyDoc_STR("finish compression operation") }, |
|
224 | 207 | { NULL, NULL } |
|
225 | 208 | }; |
|
226 | 209 | |
|
227 | 210 | PyTypeObject ZstdCompressionObjType = { |
|
228 | 211 | PyVarObject_HEAD_INIT(NULL, 0) |
|
229 | 212 | "zstd.ZstdCompressionObj", /* tp_name */ |
|
230 | 213 | sizeof(ZstdCompressionObj), /* tp_basicsize */ |
|
231 | 214 | 0, /* tp_itemsize */ |
|
232 | 215 | (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */ |
|
233 | 216 | 0, /* tp_print */ |
|
234 | 217 | 0, /* tp_getattr */ |
|
235 | 218 | 0, /* tp_setattr */ |
|
236 | 219 | 0, /* tp_compare */ |
|
237 | 220 | 0, /* tp_repr */ |
|
238 | 221 | 0, /* tp_as_number */ |
|
239 | 222 | 0, /* tp_as_sequence */ |
|
240 | 223 | 0, /* tp_as_mapping */ |
|
241 | 224 | 0, /* tp_hash */ |
|
242 | 225 | 0, /* tp_call */ |
|
243 | 226 | 0, /* tp_str */ |
|
244 | 227 | 0, /* tp_getattro */ |
|
245 | 228 | 0, /* tp_setattro */ |
|
246 | 229 | 0, /* tp_as_buffer */ |
|
247 | 230 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
248 | 231 | ZstdCompressionObj__doc__, /* tp_doc */ |
|
249 | 232 | 0, /* tp_traverse */ |
|
250 | 233 | 0, /* tp_clear */ |
|
251 | 234 | 0, /* tp_richcompare */ |
|
252 | 235 | 0, /* tp_weaklistoffset */ |
|
253 | 236 | 0, /* tp_iter */ |
|
254 | 237 | 0, /* tp_iternext */ |
|
255 | 238 | ZstdCompressionObj_methods, /* tp_methods */ |
|
256 | 239 | 0, /* tp_members */ |
|
257 | 240 | 0, /* tp_getset */ |
|
258 | 241 | 0, /* tp_base */ |
|
259 | 242 | 0, /* tp_dict */ |
|
260 | 243 | 0, /* tp_descr_get */ |
|
261 | 244 | 0, /* tp_descr_set */ |
|
262 | 245 | 0, /* tp_dictoffset */ |
|
263 | 246 | 0, /* tp_init */ |
|
264 | 247 | 0, /* tp_alloc */ |
|
265 | 248 | PyType_GenericNew, /* tp_new */ |
|
266 | 249 | }; |
|
267 | 250 | |
|
268 | 251 | void compressobj_module_init(PyObject* module) { |
|
269 | 252 | Py_TYPE(&ZstdCompressionObjType) = &PyType_Type; |
|
270 | 253 | if (PyType_Ready(&ZstdCompressionObjType) < 0) { |
|
271 | 254 | return; |
|
272 | 255 | } |
|
273 | 256 | } |
@@ -1,1604 +1,1651 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | #include "pool.h" |
|
11 | 11 | |
|
12 | 12 | extern PyObject* ZstdError; |
|
13 | 13 | |
|
14 |
int |
|
|
14 | int setup_cctx(ZstdCompressor* compressor) { | |
|
15 | 15 | size_t zresult; |
|
16 | 16 | |
|
17 | 17 | assert(compressor); |
|
18 | 18 | assert(compressor->cctx); |
|
19 | 19 | assert(compressor->params); |
|
20 | 20 | |
|
21 | ZSTD_CCtx_reset(compressor->cctx); | |
|
22 | ||
|
23 | 21 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params); |
|
24 | 22 | if (ZSTD_isError(zresult)) { |
|
25 | 23 | PyErr_Format(ZstdError, "could not set compression parameters: %s", |
|
26 | 24 | ZSTD_getErrorName(zresult)); |
|
27 | 25 | return 1; |
|
28 | 26 | } |
|
29 | 27 | |
|
30 | 28 | if (compressor->dict) { |
|
31 | 29 | if (compressor->dict->cdict) { |
|
32 | 30 | zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict); |
|
33 | 31 | } |
|
34 | 32 | else { |
|
35 | 33 | zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx, |
|
36 | 34 | compressor->dict->dictData, compressor->dict->dictSize, |
|
37 | 35 | ZSTD_dlm_byRef, compressor->dict->dictType); |
|
38 | 36 | } |
|
39 | 37 | if (ZSTD_isError(zresult)) { |
|
40 | 38 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", |
|
41 | 39 | ZSTD_getErrorName(zresult)); |
|
42 | 40 | return 1; |
|
43 | 41 | } |
|
44 | 42 | } |
|
45 | 43 | |
|
46 | 44 | return 0; |
|
47 | 45 | } |
|
48 | 46 | |
|
49 | 47 | static PyObject* frame_progression(ZSTD_CCtx* cctx) { |
|
50 | 48 | PyObject* result = NULL; |
|
51 | 49 | PyObject* value; |
|
52 | 50 | ZSTD_frameProgression progression; |
|
53 | 51 | |
|
54 | 52 | result = PyTuple_New(3); |
|
55 | 53 | if (!result) { |
|
56 | 54 | return NULL; |
|
57 | 55 | } |
|
58 | 56 | |
|
59 | 57 | progression = ZSTD_getFrameProgression(cctx); |
|
60 | 58 | |
|
61 | 59 | value = PyLong_FromUnsignedLongLong(progression.ingested); |
|
62 | 60 | if (!value) { |
|
63 | 61 | Py_DECREF(result); |
|
64 | 62 | return NULL; |
|
65 | 63 | } |
|
66 | 64 | |
|
67 | 65 | PyTuple_SET_ITEM(result, 0, value); |
|
68 | 66 | |
|
69 | 67 | value = PyLong_FromUnsignedLongLong(progression.consumed); |
|
70 | 68 | if (!value) { |
|
71 | 69 | Py_DECREF(result); |
|
72 | 70 | return NULL; |
|
73 | 71 | } |
|
74 | 72 | |
|
75 | 73 | PyTuple_SET_ITEM(result, 1, value); |
|
76 | 74 | |
|
77 | 75 | value = PyLong_FromUnsignedLongLong(progression.produced); |
|
78 | 76 | if (!value) { |
|
79 | 77 | Py_DECREF(result); |
|
80 | 78 | return NULL; |
|
81 | 79 | } |
|
82 | 80 | |
|
83 | 81 | PyTuple_SET_ITEM(result, 2, value); |
|
84 | 82 | |
|
85 | 83 | return result; |
|
86 | 84 | } |
|
87 | 85 | |
|
88 | 86 | PyDoc_STRVAR(ZstdCompressor__doc__, |
|
89 | 87 | "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
|
90 | 88 | "\n" |
|
91 | 89 | "Create an object used to perform Zstandard compression.\n" |
|
92 | 90 | "\n" |
|
93 | 91 | "An instance can compress data various ways. Instances can be used multiple\n" |
|
94 | 92 | "times. Each compression operation will use the compression parameters\n" |
|
95 | 93 | "defined at construction time.\n" |
|
96 | 94 | "\n" |
|
97 | 95 | "Compression can be configured via the following names arguments:\n" |
|
98 | 96 | "\n" |
|
99 | 97 | "level\n" |
|
100 | 98 | " Integer compression level.\n" |
|
101 | 99 | "dict_data\n" |
|
102 | 100 | " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n" |
|
103 | 101 | "compression_params\n" |
|
104 | 102 | " A ``CompressionParameters`` instance defining low-level compression" |
|
105 | 103 | " parameters. If defined, this will overwrite the ``level`` argument.\n" |
|
106 | 104 | "write_checksum\n" |
|
107 | 105 | " If True, a 4 byte content checksum will be written with the compressed\n" |
|
108 | 106 | " data, allowing the decompressor to perform content verification.\n" |
|
109 | 107 | "write_content_size\n" |
|
110 | 108 | " If True (the default), the decompressed content size will be included in\n" |
|
111 | 109 | " the header of the compressed data. This data will only be written if the\n" |
|
112 | 110 | " compressor knows the size of the input data.\n" |
|
113 | 111 | "write_dict_id\n" |
|
114 | 112 | " Determines whether the dictionary ID will be written into the compressed\n" |
|
115 | 113 | " data. Defaults to True. Only adds content to the compressed data if\n" |
|
116 | 114 | " a dictionary is being used.\n" |
|
117 | 115 | "threads\n" |
|
118 | 116 | " Number of threads to use to compress data concurrently. When set,\n" |
|
119 | 117 | " compression operations are performed on multiple threads. The default\n" |
|
120 | 118 | " value (0) disables multi-threaded compression. A value of ``-1`` means to\n" |
|
121 | 119 | " set the number of threads to the number of detected logical CPUs.\n" |
|
122 | 120 | ); |
|
123 | 121 | |
|
124 | 122 | static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
125 | 123 | static char* kwlist[] = { |
|
126 | 124 | "level", |
|
127 | 125 | "dict_data", |
|
128 | 126 | "compression_params", |
|
129 | 127 | "write_checksum", |
|
130 | 128 | "write_content_size", |
|
131 | 129 | "write_dict_id", |
|
132 | 130 | "threads", |
|
133 | 131 | NULL |
|
134 | 132 | }; |
|
135 | 133 | |
|
136 | 134 | int level = 3; |
|
137 | 135 | ZstdCompressionDict* dict = NULL; |
|
138 | 136 | ZstdCompressionParametersObject* params = NULL; |
|
139 | 137 | PyObject* writeChecksum = NULL; |
|
140 | 138 | PyObject* writeContentSize = NULL; |
|
141 | 139 | PyObject* writeDictID = NULL; |
|
142 | 140 | int threads = 0; |
|
143 | 141 | |
|
144 | 142 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", |
|
145 | 143 | kwlist, &level, &ZstdCompressionDictType, &dict, |
|
146 | 144 | &ZstdCompressionParametersType, ¶ms, |
|
147 | 145 | &writeChecksum, &writeContentSize, &writeDictID, &threads)) { |
|
148 | 146 | return -1; |
|
149 | 147 | } |
|
150 | 148 | |
|
151 | 149 | if (level > ZSTD_maxCLevel()) { |
|
152 | 150 | PyErr_Format(PyExc_ValueError, "level must be less than %d", |
|
153 | 151 | ZSTD_maxCLevel() + 1); |
|
154 | 152 | return -1; |
|
155 | 153 | } |
|
156 | 154 | |
|
157 | 155 | if (threads < 0) { |
|
158 | 156 | threads = cpu_count(); |
|
159 | 157 | } |
|
160 | 158 | |
|
161 | 159 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the |
|
162 | 160 | overhead of each compression operation. */ |
|
163 | 161 | self->cctx = ZSTD_createCCtx(); |
|
164 | 162 | if (!self->cctx) { |
|
165 | 163 | PyErr_NoMemory(); |
|
166 | 164 | return -1; |
|
167 | 165 | } |
|
168 | 166 | |
|
169 | 167 | /* TODO stuff the original parameters away somewhere so we can reset later. This |
|
170 | 168 | will allow us to do things like automatically adjust cparams based on input |
|
171 | 169 | size (assuming zstd isn't doing that internally). */ |
|
172 | 170 | |
|
173 | 171 | self->params = ZSTD_createCCtxParams(); |
|
174 | 172 | if (!self->params) { |
|
175 | 173 | PyErr_NoMemory(); |
|
176 | 174 | return -1; |
|
177 | 175 | } |
|
178 | 176 | |
|
179 | 177 | if (params && writeChecksum) { |
|
180 | 178 | PyErr_SetString(PyExc_ValueError, |
|
181 | 179 | "cannot define compression_params and write_checksum"); |
|
182 | 180 | return -1; |
|
183 | 181 | } |
|
184 | 182 | |
|
185 | 183 | if (params && writeContentSize) { |
|
186 | 184 | PyErr_SetString(PyExc_ValueError, |
|
187 | 185 | "cannot define compression_params and write_content_size"); |
|
188 | 186 | return -1; |
|
189 | 187 | } |
|
190 | 188 | |
|
191 | 189 | if (params && writeDictID) { |
|
192 | 190 | PyErr_SetString(PyExc_ValueError, |
|
193 | 191 | "cannot define compression_params and write_dict_id"); |
|
194 | 192 | return -1; |
|
195 | 193 | } |
|
196 | 194 | |
|
197 | 195 | if (params && threads) { |
|
198 | 196 | PyErr_SetString(PyExc_ValueError, |
|
199 | 197 | "cannot define compression_params and threads"); |
|
200 | 198 | return -1; |
|
201 | 199 | } |
|
202 | 200 | |
|
203 | 201 | if (params) { |
|
204 | 202 | if (set_parameters(self->params, params)) { |
|
205 | 203 | return -1; |
|
206 | 204 | } |
|
207 | 205 | } |
|
208 | 206 | else { |
|
209 | 207 | if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) { |
|
210 | 208 | return -1; |
|
211 | 209 | } |
|
212 | 210 | |
|
213 | 211 | if (set_parameter(self->params, ZSTD_p_contentSizeFlag, |
|
214 | 212 | writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) { |
|
215 | 213 | return -1; |
|
216 | 214 | } |
|
217 | 215 | |
|
218 | 216 | if (set_parameter(self->params, ZSTD_p_checksumFlag, |
|
219 | 217 | writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) { |
|
220 | 218 | return -1; |
|
221 | 219 | } |
|
222 | 220 | |
|
223 | 221 | if (set_parameter(self->params, ZSTD_p_dictIDFlag, |
|
224 | 222 | writeDictID ? PyObject_IsTrue(writeDictID) : 1)) { |
|
225 | 223 | return -1; |
|
226 | 224 | } |
|
227 | 225 | |
|
228 | 226 | if (threads) { |
|
229 | 227 | if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) { |
|
230 | 228 | return -1; |
|
231 | 229 | } |
|
232 | 230 | } |
|
233 | 231 | } |
|
234 | 232 | |
|
235 | 233 | if (dict) { |
|
236 | 234 | self->dict = dict; |
|
237 | 235 | Py_INCREF(dict); |
|
238 | 236 | } |
|
239 | 237 | |
|
240 |
|
|
|
238 | if (setup_cctx(self)) { | |
|
241 | 239 |
|
|
242 | 240 | } |
|
243 | 241 | |
|
244 | 242 | return 0; |
|
245 | 243 | } |
|
246 | 244 | |
|
247 | 245 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { |
|
248 | 246 | if (self->cctx) { |
|
249 | 247 | ZSTD_freeCCtx(self->cctx); |
|
250 | 248 | self->cctx = NULL; |
|
251 | 249 | } |
|
252 | 250 | |
|
253 | 251 | if (self->params) { |
|
254 | 252 | ZSTD_freeCCtxParams(self->params); |
|
255 | 253 | self->params = NULL; |
|
256 | 254 | } |
|
257 | 255 | |
|
258 | 256 | Py_XDECREF(self->dict); |
|
259 | 257 | PyObject_Del(self); |
|
260 | 258 | } |
|
261 | 259 | |
|
262 | 260 | PyDoc_STRVAR(ZstdCompressor_memory_size__doc__, |
|
263 | 261 | "memory_size()\n" |
|
264 | 262 | "\n" |
|
265 | 263 | "Obtain the memory usage of this compressor, in bytes.\n" |
|
266 | 264 | ); |
|
267 | 265 | |
|
268 | 266 | static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) { |
|
269 | 267 | if (self->cctx) { |
|
270 | 268 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx)); |
|
271 | 269 | } |
|
272 | 270 | else { |
|
273 | 271 | PyErr_SetString(ZstdError, "no compressor context found; this should never happen"); |
|
274 | 272 | return NULL; |
|
275 | 273 | } |
|
276 | 274 | } |
|
277 | 275 | |
|
278 | 276 | PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__, |
|
279 | 277 | "frame_progression()\n" |
|
280 | 278 | "\n" |
|
281 | 279 | "Return information on how much work the compressor has done.\n" |
|
282 | 280 | "\n" |
|
283 | 281 | "Returns a 3-tuple of (ingested, consumed, produced).\n" |
|
284 | 282 | ); |
|
285 | 283 | |
|
286 | 284 | static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) { |
|
287 | 285 | return frame_progression(self->cctx); |
|
288 | 286 | } |
|
289 | 287 | |
|
290 | 288 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, |
|
291 | 289 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" |
|
292 | 290 | "compress data between streams\n" |
|
293 | 291 | "\n" |
|
294 | 292 | "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n" |
|
295 | 293 | "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n" |
|
296 | 294 | "method.\n" |
|
297 | 295 | "\n" |
|
298 | 296 | "An optional ``size`` argument specifies the size of the source stream.\n" |
|
299 | 297 | "If defined, compression parameters will be tuned based on the size.\n" |
|
300 | 298 | "\n" |
|
301 | 299 | "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n" |
|
302 | 300 | "of ``read()`` and ``write()`` operations, respectively. By default, they use\n" |
|
303 | 301 | "the default compression stream input and output sizes, respectively.\n" |
|
304 | 302 | ); |
|
305 | 303 | |
|
306 | 304 | static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
307 | 305 | static char* kwlist[] = { |
|
308 | 306 | "ifh", |
|
309 | 307 | "ofh", |
|
310 | 308 | "size", |
|
311 | 309 | "read_size", |
|
312 | 310 | "write_size", |
|
313 | 311 | NULL |
|
314 | 312 | }; |
|
315 | 313 | |
|
316 | 314 | PyObject* source; |
|
317 | 315 | PyObject* dest; |
|
318 | 316 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
319 | 317 | size_t inSize = ZSTD_CStreamInSize(); |
|
320 | 318 | size_t outSize = ZSTD_CStreamOutSize(); |
|
321 | 319 | ZSTD_inBuffer input; |
|
322 | 320 | ZSTD_outBuffer output; |
|
323 | 321 | Py_ssize_t totalRead = 0; |
|
324 | 322 | Py_ssize_t totalWrite = 0; |
|
325 | 323 | char* readBuffer; |
|
326 | 324 | Py_ssize_t readSize; |
|
327 | 325 | PyObject* readResult = NULL; |
|
328 | 326 | PyObject* res = NULL; |
|
329 | 327 | size_t zresult; |
|
330 | 328 | PyObject* writeResult; |
|
331 | 329 | PyObject* totalReadPy; |
|
332 | 330 | PyObject* totalWritePy; |
|
333 | 331 | |
|
334 | 332 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist, |
|
335 | 333 | &source, &dest, &sourceSize, &inSize, &outSize)) { |
|
336 | 334 | return NULL; |
|
337 | 335 | } |
|
338 | 336 | |
|
339 | 337 | if (!PyObject_HasAttrString(source, "read")) { |
|
340 | 338 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); |
|
341 | 339 | return NULL; |
|
342 | 340 | } |
|
343 | 341 | |
|
344 | 342 | if (!PyObject_HasAttrString(dest, "write")) { |
|
345 | 343 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
|
346 | 344 | return NULL; |
|
347 | 345 | } |
|
348 | 346 | |
|
349 | if (ensure_cctx(self)) { | |
|
350 | return NULL; | |
|
351 | } | |
|
347 | ZSTD_CCtx_reset(self->cctx); | |
|
352 | 348 | |
|
353 | 349 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); |
|
354 | 350 | if (ZSTD_isError(zresult)) { |
|
355 | 351 | PyErr_Format(ZstdError, "error setting source size: %s", |
|
356 | 352 | ZSTD_getErrorName(zresult)); |
|
357 | 353 | return NULL; |
|
358 | 354 | } |
|
359 | 355 | |
|
360 | 356 | /* Prevent free on uninitialized memory in finally. */ |
|
361 | 357 | output.dst = PyMem_Malloc(outSize); |
|
362 | 358 | if (!output.dst) { |
|
363 | 359 | PyErr_NoMemory(); |
|
364 | 360 | res = NULL; |
|
365 | 361 | goto finally; |
|
366 | 362 | } |
|
367 | 363 | output.size = outSize; |
|
368 | 364 | output.pos = 0; |
|
369 | 365 | |
|
370 | 366 | input.src = NULL; |
|
371 | 367 | input.size = 0; |
|
372 | 368 | input.pos = 0; |
|
373 | 369 | |
|
374 | 370 | while (1) { |
|
375 | 371 | /* Try to read from source stream. */ |
|
376 | 372 | readResult = PyObject_CallMethod(source, "read", "n", inSize); |
|
377 | 373 | if (!readResult) { |
|
378 | 374 | PyErr_SetString(ZstdError, "could not read() from source"); |
|
379 | 375 | goto finally; |
|
380 | 376 | } |
|
381 | 377 | |
|
382 | 378 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
383 | 379 | |
|
384 | 380 | /* If no data was read, we're at EOF. */ |
|
385 | 381 | if (0 == readSize) { |
|
386 | 382 | break; |
|
387 | 383 | } |
|
388 | 384 | |
|
389 | 385 | totalRead += readSize; |
|
390 | 386 | |
|
391 | 387 | /* Send data to compressor */ |
|
392 | 388 | input.src = readBuffer; |
|
393 | 389 | input.size = readSize; |
|
394 | 390 | input.pos = 0; |
|
395 | 391 | |
|
396 | 392 | while (input.pos < input.size) { |
|
397 | 393 | Py_BEGIN_ALLOW_THREADS |
|
398 | 394 | zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue); |
|
399 | 395 | Py_END_ALLOW_THREADS |
|
400 | 396 | |
|
401 | 397 | if (ZSTD_isError(zresult)) { |
|
402 | 398 | res = NULL; |
|
403 | 399 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
404 | 400 | goto finally; |
|
405 | 401 | } |
|
406 | 402 | |
|
407 | 403 | if (output.pos) { |
|
408 | 404 | #if PY_MAJOR_VERSION >= 3 |
|
409 | 405 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
410 | 406 | #else |
|
411 | 407 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
412 | 408 | #endif |
|
413 | 409 | output.dst, output.pos); |
|
414 | 410 | Py_XDECREF(writeResult); |
|
415 | 411 | totalWrite += output.pos; |
|
416 | 412 | output.pos = 0; |
|
417 | 413 | } |
|
418 | 414 | } |
|
419 | 415 | |
|
420 | 416 | Py_CLEAR(readResult); |
|
421 | 417 | } |
|
422 | 418 | |
|
423 | 419 | /* We've finished reading. Now flush the compressor stream. */ |
|
424 | 420 | assert(input.pos == input.size); |
|
425 | 421 | |
|
426 | 422 | while (1) { |
|
427 | 423 | Py_BEGIN_ALLOW_THREADS |
|
428 | 424 | zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end); |
|
429 | 425 | Py_END_ALLOW_THREADS |
|
430 | 426 | |
|
431 | 427 | if (ZSTD_isError(zresult)) { |
|
432 | 428 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
433 | 429 | ZSTD_getErrorName(zresult)); |
|
434 | 430 | res = NULL; |
|
435 | 431 | goto finally; |
|
436 | 432 | } |
|
437 | 433 | |
|
438 | 434 | if (output.pos) { |
|
439 | 435 | #if PY_MAJOR_VERSION >= 3 |
|
440 | 436 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
441 | 437 | #else |
|
442 | 438 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
443 | 439 | #endif |
|
444 | 440 | output.dst, output.pos); |
|
445 | 441 | totalWrite += output.pos; |
|
446 | 442 | Py_XDECREF(writeResult); |
|
447 | 443 | output.pos = 0; |
|
448 | 444 | } |
|
449 | 445 | |
|
450 | 446 | if (!zresult) { |
|
451 | 447 | break; |
|
452 | 448 | } |
|
453 | 449 | } |
|
454 | 450 | |
|
455 | 451 | totalReadPy = PyLong_FromSsize_t(totalRead); |
|
456 | 452 | totalWritePy = PyLong_FromSsize_t(totalWrite); |
|
457 | 453 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); |
|
458 | 454 | Py_DECREF(totalReadPy); |
|
459 | 455 | Py_DECREF(totalWritePy); |
|
460 | 456 | |
|
461 | 457 | finally: |
|
462 | 458 | if (output.dst) { |
|
463 | 459 | PyMem_Free(output.dst); |
|
464 | 460 | } |
|
465 | 461 | |
|
466 | 462 | Py_XDECREF(readResult); |
|
467 | 463 | |
|
468 | 464 | return res; |
|
469 | 465 | } |
|
470 | 466 | |
|
471 | 467 | PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__, |
|
472 | 468 | "stream_reader(source, [size=0])\n" |
|
473 | 469 | "\n" |
|
474 | 470 | "Obtain an object that behaves like an I/O stream.\n" |
|
475 | 471 | "\n" |
|
476 | 472 | "The source object can be any object with a ``read(size)`` method\n" |
|
477 | 473 | "or an object that conforms to the buffer protocol.\n" |
|
478 | 474 | ); |
|
479 | 475 | |
|
480 | 476 | static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
481 | 477 | static char* kwlist[] = { |
|
482 | 478 | "source", |
|
483 | 479 | "size", |
|
484 | 480 | "read_size", |
|
485 | 481 | NULL |
|
486 | 482 | }; |
|
487 | 483 | |
|
488 | 484 | PyObject* source; |
|
489 | 485 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
490 | 486 | size_t readSize = ZSTD_CStreamInSize(); |
|
491 | 487 | ZstdCompressionReader* result = NULL; |
|
488 | size_t zresult; | |
|
492 | 489 | |
|
493 | 490 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist, |
|
494 | 491 | &source, &sourceSize, &readSize)) { |
|
495 | 492 | return NULL; |
|
496 | 493 | } |
|
497 | 494 | |
|
498 | 495 | result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL); |
|
499 | 496 | if (!result) { |
|
500 | 497 | return NULL; |
|
501 | 498 | } |
|
502 | 499 | |
|
503 | 500 | if (PyObject_HasAttrString(source, "read")) { |
|
504 | 501 | result->reader = source; |
|
505 | 502 | Py_INCREF(source); |
|
506 | 503 | result->readSize = readSize; |
|
507 | 504 | } |
|
508 | 505 | else if (1 == PyObject_CheckBuffer(source)) { |
|
509 | 506 | if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { |
|
510 | 507 | goto except; |
|
511 | 508 | } |
|
512 | 509 | |
|
513 | 510 | assert(result->buffer.len >= 0); |
|
514 | 511 | |
|
515 | 512 | sourceSize = result->buffer.len; |
|
516 | 513 | } |
|
517 | 514 | else { |
|
518 | 515 | PyErr_SetString(PyExc_TypeError, |
|
519 | 516 | "must pass an object with a read() method or that conforms to the buffer protocol"); |
|
520 | 517 | goto except; |
|
521 | 518 | } |
|
522 | 519 | |
|
523 | if (ensure_cctx(self)) { | |
|
520 | ZSTD_CCtx_reset(self->cctx); | |
|
521 | ||
|
522 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
523 | if (ZSTD_isError(zresult)) { | |
|
524 | PyErr_Format(ZstdError, "error setting source source: %s", | |
|
525 | ZSTD_getErrorName(zresult)); | |
|
524 | 526 | goto except; |
|
525 | 527 | } |
|
526 | 528 | |
|
527 | 529 | result->compressor = self; |
|
528 | 530 | Py_INCREF(self); |
|
529 | result->sourceSize = sourceSize; | |
|
530 | 531 | |
|
531 | 532 | return result; |
|
532 | 533 | |
|
533 | 534 | except: |
|
534 | 535 | Py_CLEAR(result); |
|
535 | 536 | |
|
536 | 537 | return NULL; |
|
537 | 538 | } |
|
538 | 539 | |
|
539 | 540 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
|
540 | 541 | "compress(data)\n" |
|
541 | 542 | "\n" |
|
542 | 543 | "Compress data in a single operation.\n" |
|
543 | 544 | "\n" |
|
544 | 545 | "This is the simplest mechanism to perform compression: simply pass in a\n" |
|
545 | 546 | "value and get a compressed value back. It is almost the most prone to abuse.\n" |
|
546 | 547 | "The input and output values must fit in memory, so passing in very large\n" |
|
547 | 548 | "values can result in excessive memory usage. For this reason, one of the\n" |
|
548 | 549 | "streaming based APIs is preferred for larger values.\n" |
|
549 | 550 | ); |
|
550 | 551 | |
|
551 | 552 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
552 | 553 | static char* kwlist[] = { |
|
553 | 554 | "data", |
|
554 | 555 | NULL |
|
555 | 556 | }; |
|
556 | 557 | |
|
557 | 558 | Py_buffer source; |
|
558 | 559 | size_t destSize; |
|
559 | 560 | PyObject* output = NULL; |
|
560 | 561 | size_t zresult; |
|
561 | 562 | ZSTD_outBuffer outBuffer; |
|
562 | 563 | ZSTD_inBuffer inBuffer; |
|
563 | 564 | |
|
564 | 565 | #if PY_MAJOR_VERSION >= 3 |
|
565 | 566 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", |
|
566 | 567 | #else |
|
567 | 568 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress", |
|
568 | 569 | #endif |
|
569 | 570 | kwlist, &source)) { |
|
570 | 571 | return NULL; |
|
571 | 572 | } |
|
572 | 573 | |
|
573 | 574 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
574 | 575 | PyErr_SetString(PyExc_ValueError, |
|
575 | 576 | "data buffer should be contiguous and have at most one dimension"); |
|
576 | 577 | goto finally; |
|
577 | 578 | } |
|
578 | 579 | |
|
579 | if (ensure_cctx(self)) { | |
|
580 | goto finally; | |
|
581 | } | |
|
580 | ZSTD_CCtx_reset(self->cctx); | |
|
582 | 581 | |
|
583 | 582 | destSize = ZSTD_compressBound(source.len); |
|
584 | 583 | output = PyBytes_FromStringAndSize(NULL, destSize); |
|
585 | 584 | if (!output) { |
|
586 | 585 | goto finally; |
|
587 | 586 | } |
|
588 | 587 | |
|
589 | 588 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len); |
|
590 | 589 | if (ZSTD_isError(zresult)) { |
|
591 | 590 | PyErr_Format(ZstdError, "error setting source size: %s", |
|
592 | 591 | ZSTD_getErrorName(zresult)); |
|
593 | 592 | Py_CLEAR(output); |
|
594 | 593 | goto finally; |
|
595 | 594 | } |
|
596 | 595 | |
|
597 | 596 | inBuffer.src = source.buf; |
|
598 | 597 | inBuffer.size = source.len; |
|
599 | 598 | inBuffer.pos = 0; |
|
600 | 599 | |
|
601 | 600 | outBuffer.dst = PyBytes_AsString(output); |
|
602 | 601 | outBuffer.size = destSize; |
|
603 | 602 | outBuffer.pos = 0; |
|
604 | 603 | |
|
605 | 604 | Py_BEGIN_ALLOW_THREADS |
|
606 | 605 | /* By avoiding ZSTD_compress(), we don't necessarily write out content |
|
607 | 606 | size. This means the argument to ZstdCompressor to control frame |
|
608 | 607 | parameters is honored. */ |
|
609 | 608 | zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end); |
|
610 | 609 | Py_END_ALLOW_THREADS |
|
611 | 610 | |
|
612 | 611 | if (ZSTD_isError(zresult)) { |
|
613 | 612 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
|
614 | 613 | Py_CLEAR(output); |
|
615 | 614 | goto finally; |
|
616 | 615 | } |
|
617 | 616 | else if (zresult) { |
|
618 | 617 | PyErr_SetString(ZstdError, "unexpected partial frame flush"); |
|
619 | 618 | Py_CLEAR(output); |
|
620 | 619 | goto finally; |
|
621 | 620 | } |
|
622 | 621 | |
|
623 | 622 | Py_SIZE(output) = outBuffer.pos; |
|
624 | 623 | |
|
625 | 624 | finally: |
|
626 | 625 | PyBuffer_Release(&source); |
|
627 | 626 | return output; |
|
628 | 627 | } |
|
629 | 628 | |
|
630 | 629 | PyDoc_STRVAR(ZstdCompressionObj__doc__, |
|
631 | 630 | "compressobj()\n" |
|
632 | 631 | "\n" |
|
633 | 632 | "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n" |
|
634 | 633 | "\n" |
|
635 | 634 | "The returned object exposes an API similar to ``zlib.compressobj`` and\n" |
|
636 | 635 | "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n" |
|
637 | 636 | "without changing how compression is performed.\n" |
|
638 | 637 | ); |
|
639 | 638 | |
|
640 | 639 | static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
641 | 640 | static char* kwlist[] = { |
|
642 | 641 | "size", |
|
643 | 642 | NULL |
|
644 | 643 | }; |
|
645 | 644 | |
|
646 | 645 | unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
647 | 646 | size_t outSize = ZSTD_CStreamOutSize(); |
|
648 | 647 | ZstdCompressionObj* result = NULL; |
|
649 | 648 | size_t zresult; |
|
650 | 649 | |
|
651 | 650 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) { |
|
652 | 651 | return NULL; |
|
653 | 652 | } |
|
654 | 653 | |
|
655 | if (ensure_cctx(self)) { | |
|
656 | return NULL; | |
|
657 | } | |
|
654 | ZSTD_CCtx_reset(self->cctx); | |
|
658 | 655 | |
|
659 | 656 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); |
|
660 | 657 | if (ZSTD_isError(zresult)) { |
|
661 | 658 | PyErr_Format(ZstdError, "error setting source size: %s", |
|
662 | 659 | ZSTD_getErrorName(zresult)); |
|
663 | 660 | return NULL; |
|
664 | 661 | } |
|
665 | 662 | |
|
666 | 663 | result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL); |
|
667 | 664 | if (!result) { |
|
668 | 665 | return NULL; |
|
669 | 666 | } |
|
670 | 667 | |
|
671 | 668 | result->output.dst = PyMem_Malloc(outSize); |
|
672 | 669 | if (!result->output.dst) { |
|
673 | 670 | PyErr_NoMemory(); |
|
674 | 671 | Py_DECREF(result); |
|
675 | 672 | return NULL; |
|
676 | 673 | } |
|
677 | 674 | result->output.size = outSize; |
|
678 | 675 | result->compressor = self; |
|
679 | 676 | Py_INCREF(result->compressor); |
|
680 | 677 | |
|
681 | 678 | return result; |
|
682 | 679 | } |
|
683 | 680 | |
|
684 | 681 | PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__, |
|
685 | 682 | "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n" |
|
686 | 683 | "Read uncompressed data from a reader and return an iterator\n" |
|
687 | 684 | "\n" |
|
688 | 685 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" |
|
689 | 686 | "\n" |
|
690 | 687 | "Uncompressed data will be obtained from ``reader`` by calling the\n" |
|
691 | 688 | "``read(size)`` method of it. The source data will be streamed into a\n" |
|
692 | 689 | "compressor. As compressed data is available, it will be exposed to the\n" |
|
693 | 690 | "iterator.\n" |
|
694 | 691 | "\n" |
|
695 | 692 | "Data is read from the source in chunks of ``read_size``. Compressed chunks\n" |
|
696 | 693 | "are at most ``write_size`` bytes. Both values default to the zstd input and\n" |
|
697 | 694 | "and output defaults, respectively.\n" |
|
698 | 695 | "\n" |
|
699 | 696 | "The caller is partially in control of how fast data is fed into the\n" |
|
700 | 697 | "compressor by how it consumes the returned iterator. The compressor will\n" |
|
701 | 698 | "not consume from the reader unless the caller consumes from the iterator.\n" |
|
702 | 699 | ); |
|
703 | 700 | |
|
704 | 701 | static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
705 | 702 | static char* kwlist[] = { |
|
706 | 703 | "reader", |
|
707 | 704 | "size", |
|
708 | 705 | "read_size", |
|
709 | 706 | "write_size", |
|
710 | 707 | NULL |
|
711 | 708 | }; |
|
712 | 709 | |
|
713 | 710 | PyObject* reader; |
|
714 | 711 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
715 | 712 | size_t inSize = ZSTD_CStreamInSize(); |
|
716 | 713 | size_t outSize = ZSTD_CStreamOutSize(); |
|
717 | 714 | ZstdCompressorIterator* result; |
|
718 | 715 | size_t zresult; |
|
719 | 716 | |
|
720 | 717 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist, |
|
721 | 718 | &reader, &sourceSize, &inSize, &outSize)) { |
|
722 | 719 | return NULL; |
|
723 | 720 | } |
|
724 | 721 | |
|
725 | 722 | result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL); |
|
726 | 723 | if (!result) { |
|
727 | 724 | return NULL; |
|
728 | 725 | } |
|
729 | 726 | if (PyObject_HasAttrString(reader, "read")) { |
|
730 | 727 | result->reader = reader; |
|
731 | 728 | Py_INCREF(result->reader); |
|
732 | 729 | } |
|
733 | 730 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
734 | 731 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { |
|
735 | 732 | goto except; |
|
736 | 733 | } |
|
737 | 734 | |
|
738 | 735 | sourceSize = result->buffer.len; |
|
739 | 736 | } |
|
740 | 737 | else { |
|
741 | 738 | PyErr_SetString(PyExc_ValueError, |
|
742 | 739 | "must pass an object with a read() method or conforms to buffer protocol"); |
|
743 | 740 | goto except; |
|
744 | 741 | } |
|
745 | 742 | |
|
746 | if (ensure_cctx(self)) { | |
|
747 | return NULL; | |
|
748 | } | |
|
743 | ZSTD_CCtx_reset(self->cctx); | |
|
749 | 744 | |
|
750 | 745 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); |
|
751 | 746 | if (ZSTD_isError(zresult)) { |
|
752 | 747 | PyErr_Format(ZstdError, "error setting source size: %s", |
|
753 | 748 | ZSTD_getErrorName(zresult)); |
|
754 | 749 | return NULL; |
|
755 | 750 | } |
|
756 | 751 | |
|
757 | 752 | result->compressor = self; |
|
758 | 753 | Py_INCREF(result->compressor); |
|
759 | 754 | |
|
760 | 755 | result->inSize = inSize; |
|
761 | 756 | result->outSize = outSize; |
|
762 | 757 | |
|
763 | 758 | result->output.dst = PyMem_Malloc(outSize); |
|
764 | 759 | if (!result->output.dst) { |
|
765 | 760 | PyErr_NoMemory(); |
|
766 | 761 | goto except; |
|
767 | 762 | } |
|
768 | 763 | result->output.size = outSize; |
|
769 | 764 | |
|
770 | 765 | goto finally; |
|
771 | 766 | |
|
772 | 767 | except: |
|
773 | 768 | Py_CLEAR(result); |
|
774 | 769 | |
|
775 | 770 | finally: |
|
776 | 771 | return result; |
|
777 | 772 | } |
|
778 | 773 | |
|
779 | 774 | PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__, |
|
780 | 775 | "Create a context manager to write compressed data to an object.\n" |
|
781 | 776 | "\n" |
|
782 | 777 | "The passed object must have a ``write()`` method.\n" |
|
783 | 778 | "\n" |
|
784 | 779 | "The caller feeds input data to the object by calling ``compress(data)``.\n" |
|
785 | 780 | "Compressed data is written to the argument given to this function.\n" |
|
786 | 781 | "\n" |
|
787 | 782 | "The function takes an optional ``size`` argument indicating the total size\n" |
|
788 | 783 | "of the eventual input. If specified, the size will influence compression\n" |
|
789 | 784 | "parameter tuning and could result in the size being written into the\n" |
|
790 | 785 | "header of the compressed data.\n" |
|
791 | 786 | "\n" |
|
792 | 787 | "An optional ``write_size`` argument is also accepted. It defines the maximum\n" |
|
793 | 788 | "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" |
|
794 | 789 | "for a compressor output stream.\n" |
|
795 | 790 | ); |
|
796 | 791 | |
|
797 | 792 | static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
798 | 793 | static char* kwlist[] = { |
|
799 | 794 | "writer", |
|
800 | 795 | "size", |
|
801 | 796 | "write_size", |
|
802 | 797 | NULL |
|
803 | 798 | }; |
|
804 | 799 | |
|
805 | 800 | PyObject* writer; |
|
806 | 801 | ZstdCompressionWriter* result; |
|
807 | 802 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
808 | 803 | size_t outSize = ZSTD_CStreamOutSize(); |
|
809 | 804 | |
|
810 | 805 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist, |
|
811 | 806 | &writer, &sourceSize, &outSize)) { |
|
812 | 807 | return NULL; |
|
813 | 808 | } |
|
814 | 809 | |
|
815 | 810 | if (!PyObject_HasAttrString(writer, "write")) { |
|
816 | 811 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
|
817 | 812 | return NULL; |
|
818 | 813 | } |
|
819 | 814 | |
|
820 | if (ensure_cctx(self)) { | |
|
821 | return NULL; | |
|
822 | } | |
|
815 | ZSTD_CCtx_reset(self->cctx); | |
|
823 | 816 | |
|
824 | 817 | result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); |
|
825 | 818 | if (!result) { |
|
826 | 819 | return NULL; |
|
827 | 820 | } |
|
828 | 821 | |
|
829 | 822 | result->compressor = self; |
|
830 | 823 | Py_INCREF(result->compressor); |
|
831 | 824 | |
|
832 | 825 | result->writer = writer; |
|
833 | 826 | Py_INCREF(result->writer); |
|
834 | 827 | |
|
835 | 828 | result->sourceSize = sourceSize; |
|
836 | 829 | result->outSize = outSize; |
|
837 | 830 | result->bytesCompressed = 0; |
|
838 | 831 | |
|
839 | 832 | return result; |
|
840 | 833 | } |
|
841 | 834 | |
|
835 | PyDoc_STRVAR(ZstdCompressor_chunker__doc__, | |
|
836 | "Create an object for iterative compressing to same-sized chunks.\n" | |
|
837 | ); | |
|
838 | ||
|
839 | static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
840 | static char* kwlist[] = { | |
|
841 | "size", | |
|
842 | "chunk_size", | |
|
843 | NULL | |
|
844 | }; | |
|
845 | ||
|
846 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
847 | size_t chunkSize = ZSTD_CStreamOutSize(); | |
|
848 | ZstdCompressionChunker* chunker; | |
|
849 | size_t zresult; | |
|
850 | ||
|
851 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist, | |
|
852 | &sourceSize, &chunkSize)) { | |
|
853 | return NULL; | |
|
854 | } | |
|
855 | ||
|
856 | ZSTD_CCtx_reset(self->cctx); | |
|
857 | ||
|
858 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
859 | if (ZSTD_isError(zresult)) { | |
|
860 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
861 | ZSTD_getErrorName(zresult)); | |
|
862 | return NULL; | |
|
863 | } | |
|
864 | ||
|
865 | chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL); | |
|
866 | if (!chunker) { | |
|
867 | return NULL; | |
|
868 | } | |
|
869 | ||
|
870 | chunker->output.dst = PyMem_Malloc(chunkSize); | |
|
871 | if (!chunker->output.dst) { | |
|
872 | PyErr_NoMemory(); | |
|
873 | Py_DECREF(chunker); | |
|
874 | return NULL; | |
|
875 | } | |
|
876 | chunker->output.size = chunkSize; | |
|
877 | chunker->output.pos = 0; | |
|
878 | ||
|
879 | chunker->compressor = self; | |
|
880 | Py_INCREF(chunker->compressor); | |
|
881 | ||
|
882 | chunker->chunkSize = chunkSize; | |
|
883 | ||
|
884 | return chunker; | |
|
885 | } | |
|
886 | ||
|
842 | 887 | typedef struct { |
|
843 | 888 | void* sourceData; |
|
844 | 889 | size_t sourceSize; |
|
845 | 890 | } DataSource; |
|
846 | 891 | |
|
847 | 892 | typedef struct { |
|
848 | 893 | DataSource* sources; |
|
849 | 894 | Py_ssize_t sourcesSize; |
|
850 | 895 | unsigned long long totalSourceSize; |
|
851 | 896 | } DataSources; |
|
852 | 897 | |
|
853 | 898 | typedef struct { |
|
854 | 899 | void* dest; |
|
855 | 900 | Py_ssize_t destSize; |
|
856 | 901 | BufferSegment* segments; |
|
857 | 902 | Py_ssize_t segmentsSize; |
|
858 | 903 | } DestBuffer; |
|
859 | 904 | |
|
860 | 905 | typedef enum { |
|
861 | 906 | WorkerError_none = 0, |
|
862 | 907 | WorkerError_zstd = 1, |
|
863 | 908 | WorkerError_no_memory = 2, |
|
864 | 909 | WorkerError_nospace = 3, |
|
865 | 910 | } WorkerError; |
|
866 | 911 | |
|
867 | 912 | /** |
|
868 | 913 | * Holds state for an individual worker performing multi_compress_to_buffer work. |
|
869 | 914 | */ |
|
870 | 915 | typedef struct { |
|
871 | 916 | /* Used for compression. */ |
|
872 | 917 | ZSTD_CCtx* cctx; |
|
873 | 918 | |
|
874 | 919 | /* What to compress. */ |
|
875 | 920 | DataSource* sources; |
|
876 | 921 | Py_ssize_t sourcesSize; |
|
877 | 922 | Py_ssize_t startOffset; |
|
878 | 923 | Py_ssize_t endOffset; |
|
879 | 924 | unsigned long long totalSourceSize; |
|
880 | 925 | |
|
881 | 926 | /* Result storage. */ |
|
882 | 927 | DestBuffer* destBuffers; |
|
883 | 928 | Py_ssize_t destCount; |
|
884 | 929 | |
|
885 | 930 | /* Error tracking. */ |
|
886 | 931 | WorkerError error; |
|
887 | 932 | size_t zresult; |
|
888 | 933 | Py_ssize_t errorOffset; |
|
889 | 934 | } WorkerState; |
|
890 | 935 | |
|
891 | 936 | static void compress_worker(WorkerState* state) { |
|
892 | 937 | Py_ssize_t inputOffset = state->startOffset; |
|
893 | 938 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; |
|
894 | 939 | Py_ssize_t currentBufferStartOffset = state->startOffset; |
|
895 | 940 | size_t zresult; |
|
896 | 941 | void* newDest; |
|
897 | 942 | size_t allocationSize; |
|
898 | 943 | size_t boundSize; |
|
899 | 944 | Py_ssize_t destOffset = 0; |
|
900 | 945 | DataSource* sources = state->sources; |
|
901 | 946 | DestBuffer* destBuffer; |
|
902 | 947 | |
|
903 | 948 | assert(!state->destBuffers); |
|
904 | 949 | assert(0 == state->destCount); |
|
905 | 950 | |
|
906 | 951 | /* |
|
907 | 952 | * The total size of the compressed data is unknown until we actually |
|
908 | 953 | * compress data. That means we can't pre-allocate the exact size we need. |
|
909 | 954 | * |
|
910 | 955 | * There is a cost to every allocation and reallocation. So, it is in our |
|
911 | 956 | * interest to minimize the number of allocations. |
|
912 | 957 | * |
|
913 | 958 | * There is also a cost to too few allocations. If allocations are too |
|
914 | 959 | * large they may fail. If buffers are shared and all inputs become |
|
915 | 960 | * irrelevant at different lifetimes, then a reference to one segment |
|
916 | 961 | * in the buffer will keep the entire buffer alive. This leads to excessive |
|
917 | 962 | * memory usage. |
|
918 | 963 | * |
|
919 | 964 | * Our current strategy is to assume a compression ratio of 16:1 and |
|
920 | 965 | * allocate buffers of that size, rounded up to the nearest power of 2 |
|
921 | 966 | * (because computers like round numbers). That ratio is greater than what |
|
922 | 967 | * most inputs achieve. This is by design: we don't want to over-allocate. |
|
923 | 968 | * But we don't want to under-allocate and lead to too many buffers either. |
|
924 | 969 | */ |
|
925 | 970 | |
|
926 | 971 | state->destCount = 1; |
|
927 | 972 | |
|
928 | 973 | state->destBuffers = calloc(1, sizeof(DestBuffer)); |
|
929 | 974 | if (NULL == state->destBuffers) { |
|
930 | 975 | state->error = WorkerError_no_memory; |
|
931 | 976 | return; |
|
932 | 977 | } |
|
933 | 978 | |
|
934 | 979 | destBuffer = &state->destBuffers[state->destCount - 1]; |
|
935 | 980 | |
|
936 | 981 | /* |
|
937 | 982 | * Rather than track bounds and grow the segments buffer, allocate space |
|
938 | 983 | * to hold remaining items then truncate when we're done with it. |
|
939 | 984 | */ |
|
940 | 985 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); |
|
941 | 986 | if (NULL == destBuffer->segments) { |
|
942 | 987 | state->error = WorkerError_no_memory; |
|
943 | 988 | return; |
|
944 | 989 | } |
|
945 | 990 | |
|
946 | 991 | destBuffer->segmentsSize = remainingItems; |
|
947 | 992 | |
|
948 | 993 | assert(state->totalSourceSize <= SIZE_MAX); |
|
949 | 994 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); |
|
950 | 995 | |
|
951 | 996 | /* If the maximum size of the output is larger than that, round up. */ |
|
952 | 997 | boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize); |
|
953 | 998 | |
|
954 | 999 | if (boundSize > allocationSize) { |
|
955 | 1000 | allocationSize = roundpow2(boundSize); |
|
956 | 1001 | } |
|
957 | 1002 | |
|
958 | 1003 | destBuffer->dest = malloc(allocationSize); |
|
959 | 1004 | if (NULL == destBuffer->dest) { |
|
960 | 1005 | state->error = WorkerError_no_memory; |
|
961 | 1006 | return; |
|
962 | 1007 | } |
|
963 | 1008 | |
|
964 | 1009 | destBuffer->destSize = allocationSize; |
|
965 | 1010 | |
|
966 | 1011 | for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) { |
|
967 | 1012 | void* source = sources[inputOffset].sourceData; |
|
968 | 1013 | size_t sourceSize = sources[inputOffset].sourceSize; |
|
969 | 1014 | size_t destAvailable; |
|
970 | 1015 | void* dest; |
|
971 | 1016 | ZSTD_outBuffer opOutBuffer; |
|
972 | 1017 | ZSTD_inBuffer opInBuffer; |
|
973 | 1018 | |
|
974 | 1019 | destAvailable = destBuffer->destSize - destOffset; |
|
975 | 1020 | boundSize = ZSTD_compressBound(sourceSize); |
|
976 | 1021 | |
|
977 | 1022 | /* |
|
978 | 1023 | * Not enough space in current buffer to hold largest compressed output. |
|
979 | 1024 | * So allocate and switch to a new output buffer. |
|
980 | 1025 | */ |
|
981 | 1026 | if (boundSize > destAvailable) { |
|
982 | 1027 | /* |
|
983 | 1028 | * The downsizing of the existing buffer is optional. It should be cheap |
|
984 | 1029 | * (unlike growing). So we just do it. |
|
985 | 1030 | */ |
|
986 | 1031 | if (destAvailable) { |
|
987 | 1032 | newDest = realloc(destBuffer->dest, destOffset); |
|
988 | 1033 | if (NULL == newDest) { |
|
989 | 1034 | state->error = WorkerError_no_memory; |
|
990 | 1035 | return; |
|
991 | 1036 | } |
|
992 | 1037 | |
|
993 | 1038 | destBuffer->dest = newDest; |
|
994 | 1039 | destBuffer->destSize = destOffset; |
|
995 | 1040 | } |
|
996 | 1041 | |
|
997 | 1042 | /* Truncate segments buffer. */ |
|
998 | 1043 | newDest = realloc(destBuffer->segments, |
|
999 | 1044 | (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment)); |
|
1000 | 1045 | if (NULL == newDest) { |
|
1001 | 1046 | state->error = WorkerError_no_memory; |
|
1002 | 1047 | return; |
|
1003 | 1048 | } |
|
1004 | 1049 | |
|
1005 | 1050 | destBuffer->segments = newDest; |
|
1006 | 1051 | destBuffer->segmentsSize = inputOffset - currentBufferStartOffset; |
|
1007 | 1052 | |
|
1008 | 1053 | /* Grow space for new struct. */ |
|
1009 | 1054 | /* TODO consider over-allocating so we don't do this every time. */ |
|
1010 | 1055 | newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer)); |
|
1011 | 1056 | if (NULL == newDest) { |
|
1012 | 1057 | state->error = WorkerError_no_memory; |
|
1013 | 1058 | return; |
|
1014 | 1059 | } |
|
1015 | 1060 | |
|
1016 | 1061 | state->destBuffers = newDest; |
|
1017 | 1062 | state->destCount++; |
|
1018 | 1063 | |
|
1019 | 1064 | destBuffer = &state->destBuffers[state->destCount - 1]; |
|
1020 | 1065 | |
|
1021 | 1066 | /* Don't take any chances with non-NULL pointers. */ |
|
1022 | 1067 | memset(destBuffer, 0, sizeof(DestBuffer)); |
|
1023 | 1068 | |
|
1024 | 1069 | /** |
|
1025 | 1070 | * We could dynamically update allocation size based on work done so far. |
|
1026 | 1071 | * For now, keep is simple. |
|
1027 | 1072 | */ |
|
1028 | 1073 | assert(state->totalSourceSize <= SIZE_MAX); |
|
1029 | 1074 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); |
|
1030 | 1075 | |
|
1031 | 1076 | if (boundSize > allocationSize) { |
|
1032 | 1077 | allocationSize = roundpow2(boundSize); |
|
1033 | 1078 | } |
|
1034 | 1079 | |
|
1035 | 1080 | destBuffer->dest = malloc(allocationSize); |
|
1036 | 1081 | if (NULL == destBuffer->dest) { |
|
1037 | 1082 | state->error = WorkerError_no_memory; |
|
1038 | 1083 | return; |
|
1039 | 1084 | } |
|
1040 | 1085 | |
|
1041 | 1086 | destBuffer->destSize = allocationSize; |
|
1042 | 1087 | destAvailable = allocationSize; |
|
1043 | 1088 | destOffset = 0; |
|
1044 | 1089 | |
|
1045 | 1090 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); |
|
1046 | 1091 | if (NULL == destBuffer->segments) { |
|
1047 | 1092 | state->error = WorkerError_no_memory; |
|
1048 | 1093 | return; |
|
1049 | 1094 | } |
|
1050 | 1095 | |
|
1051 | 1096 | destBuffer->segmentsSize = remainingItems; |
|
1052 | 1097 | currentBufferStartOffset = inputOffset; |
|
1053 | 1098 | } |
|
1054 | 1099 | |
|
1055 | 1100 | dest = (char*)destBuffer->dest + destOffset; |
|
1056 | 1101 | |
|
1057 | 1102 | opInBuffer.src = source; |
|
1058 | 1103 | opInBuffer.size = sourceSize; |
|
1059 | 1104 | opInBuffer.pos = 0; |
|
1060 | 1105 | |
|
1061 | 1106 | opOutBuffer.dst = dest; |
|
1062 | 1107 | opOutBuffer.size = destAvailable; |
|
1063 | 1108 | opOutBuffer.pos = 0; |
|
1064 | 1109 | |
|
1065 | 1110 | zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize); |
|
1066 | 1111 | if (ZSTD_isError(zresult)) { |
|
1067 | 1112 | state->error = WorkerError_zstd; |
|
1068 | 1113 | state->zresult = zresult; |
|
1069 | 1114 | state->errorOffset = inputOffset; |
|
1070 | 1115 | break; |
|
1071 | 1116 | } |
|
1072 | 1117 | |
|
1073 | 1118 | zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end); |
|
1074 | 1119 | if (ZSTD_isError(zresult)) { |
|
1075 | 1120 | state->error = WorkerError_zstd; |
|
1076 | 1121 | state->zresult = zresult; |
|
1077 | 1122 | state->errorOffset = inputOffset; |
|
1078 | 1123 | break; |
|
1079 | 1124 | } |
|
1080 | 1125 | else if (zresult) { |
|
1081 | 1126 | state->error = WorkerError_nospace; |
|
1082 | 1127 | state->errorOffset = inputOffset; |
|
1083 | 1128 | break; |
|
1084 | 1129 | } |
|
1085 | 1130 | |
|
1086 | 1131 | destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; |
|
1087 | 1132 | destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos; |
|
1088 | 1133 | |
|
1089 | 1134 | destOffset += opOutBuffer.pos; |
|
1090 | 1135 | remainingItems--; |
|
1091 | 1136 | } |
|
1092 | 1137 | |
|
1093 | 1138 | if (destBuffer->destSize > destOffset) { |
|
1094 | 1139 | newDest = realloc(destBuffer->dest, destOffset); |
|
1095 | 1140 | if (NULL == newDest) { |
|
1096 | 1141 | state->error = WorkerError_no_memory; |
|
1097 | 1142 | return; |
|
1098 | 1143 | } |
|
1099 | 1144 | |
|
1100 | 1145 | destBuffer->dest = newDest; |
|
1101 | 1146 | destBuffer->destSize = destOffset; |
|
1102 | 1147 | } |
|
1103 | 1148 | } |
|
1104 | 1149 | |
|
1105 | 1150 | ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, |
|
1106 | 1151 | DataSources* sources, Py_ssize_t threadCount) { |
|
1107 | 1152 | unsigned long long bytesPerWorker; |
|
1108 | 1153 | POOL_ctx* pool = NULL; |
|
1109 | 1154 | WorkerState* workerStates = NULL; |
|
1110 | 1155 | Py_ssize_t i; |
|
1111 | 1156 | unsigned long long workerBytes = 0; |
|
1112 | 1157 | Py_ssize_t workerStartOffset = 0; |
|
1113 | 1158 | Py_ssize_t currentThread = 0; |
|
1114 | 1159 | int errored = 0; |
|
1115 | 1160 | Py_ssize_t segmentsCount = 0; |
|
1116 | 1161 | Py_ssize_t segmentIndex; |
|
1117 | 1162 | PyObject* segmentsArg = NULL; |
|
1118 | 1163 | ZstdBufferWithSegments* buffer; |
|
1119 | 1164 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1120 | 1165 | |
|
1121 | 1166 | assert(sources->sourcesSize > 0); |
|
1122 | 1167 | assert(sources->totalSourceSize > 0); |
|
1123 | 1168 | assert(threadCount >= 1); |
|
1124 | 1169 | |
|
1125 | 1170 | /* More threads than inputs makes no sense. */ |
|
1126 | 1171 | threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize |
|
1127 | 1172 | : threadCount; |
|
1128 | 1173 | |
|
1129 | 1174 | /* TODO lower thread count when input size is too small and threads would add |
|
1130 | 1175 | overhead. */ |
|
1131 | 1176 | |
|
1132 | 1177 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); |
|
1133 | 1178 | if (NULL == workerStates) { |
|
1134 | 1179 | PyErr_NoMemory(); |
|
1135 | 1180 | goto finally; |
|
1136 | 1181 | } |
|
1137 | 1182 | |
|
1138 | 1183 | memset(workerStates, 0, threadCount * sizeof(WorkerState)); |
|
1139 | 1184 | |
|
1140 | 1185 | if (threadCount > 1) { |
|
1141 | 1186 | pool = POOL_create(threadCount, 1); |
|
1142 | 1187 | if (NULL == pool) { |
|
1143 | 1188 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); |
|
1144 | 1189 | goto finally; |
|
1145 | 1190 | } |
|
1146 | 1191 | } |
|
1147 | 1192 | |
|
1148 | 1193 | bytesPerWorker = sources->totalSourceSize / threadCount; |
|
1149 | 1194 | |
|
1150 | 1195 | for (i = 0; i < threadCount; i++) { |
|
1151 | 1196 | size_t zresult; |
|
1152 | 1197 | |
|
1153 | 1198 | workerStates[i].cctx = ZSTD_createCCtx(); |
|
1154 | 1199 | if (!workerStates[i].cctx) { |
|
1155 | 1200 | PyErr_NoMemory(); |
|
1156 | 1201 | goto finally; |
|
1157 | 1202 | } |
|
1158 | 1203 | |
|
1159 | 1204 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx, |
|
1160 | 1205 | compressor->params); |
|
1161 | 1206 | if (ZSTD_isError(zresult)) { |
|
1162 | 1207 | PyErr_Format(ZstdError, "could not set compression parameters: %s", |
|
1163 | 1208 | ZSTD_getErrorName(zresult)); |
|
1164 | 1209 | goto finally; |
|
1165 | 1210 | } |
|
1166 | 1211 | |
|
1167 | 1212 | if (compressor->dict) { |
|
1168 | 1213 | if (compressor->dict->cdict) { |
|
1169 | 1214 | zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict); |
|
1170 | 1215 | } |
|
1171 | 1216 | else { |
|
1172 | 1217 | zresult = ZSTD_CCtx_loadDictionary_advanced( |
|
1173 | 1218 | workerStates[i].cctx, |
|
1174 | 1219 | compressor->dict->dictData, |
|
1175 | 1220 | compressor->dict->dictSize, |
|
1176 | 1221 | ZSTD_dlm_byRef, |
|
1177 | 1222 | compressor->dict->dictType); |
|
1178 | 1223 | } |
|
1179 | 1224 | |
|
1180 | 1225 | if (ZSTD_isError(zresult)) { |
|
1181 | 1226 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", |
|
1182 | 1227 | ZSTD_getErrorName(zresult)); |
|
1183 | 1228 | goto finally; |
|
1184 | 1229 | } |
|
1185 | 1230 | |
|
1186 | 1231 | } |
|
1187 | 1232 | |
|
1188 | 1233 | workerStates[i].sources = sources->sources; |
|
1189 | 1234 | workerStates[i].sourcesSize = sources->sourcesSize; |
|
1190 | 1235 | } |
|
1191 | 1236 | |
|
1192 | 1237 | Py_BEGIN_ALLOW_THREADS |
|
1193 | 1238 | for (i = 0; i < sources->sourcesSize; i++) { |
|
1194 | 1239 | workerBytes += sources->sources[i].sourceSize; |
|
1195 | 1240 | |
|
1196 | 1241 | /* |
|
1197 | 1242 | * The last worker/thread needs to handle all remaining work. Don't |
|
1198 | 1243 | * trigger it prematurely. Defer to the block outside of the loop |
|
1199 | 1244 | * to run the last worker/thread. But do still process this loop |
|
1200 | 1245 | * so workerBytes is correct. |
|
1201 | 1246 | */ |
|
1202 | 1247 | if (currentThread == threadCount - 1) { |
|
1203 | 1248 | continue; |
|
1204 | 1249 | } |
|
1205 | 1250 | |
|
1206 | 1251 | if (workerBytes >= bytesPerWorker) { |
|
1207 | 1252 | assert(currentThread < threadCount); |
|
1208 | 1253 | workerStates[currentThread].totalSourceSize = workerBytes; |
|
1209 | 1254 | workerStates[currentThread].startOffset = workerStartOffset; |
|
1210 | 1255 | workerStates[currentThread].endOffset = i; |
|
1211 | 1256 | |
|
1212 | 1257 | if (threadCount > 1) { |
|
1213 | 1258 | POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]); |
|
1214 | 1259 | } |
|
1215 | 1260 | else { |
|
1216 | 1261 | compress_worker(&workerStates[currentThread]); |
|
1217 | 1262 | } |
|
1218 | 1263 | |
|
1219 | 1264 | currentThread++; |
|
1220 | 1265 | workerStartOffset = i + 1; |
|
1221 | 1266 | workerBytes = 0; |
|
1222 | 1267 | } |
|
1223 | 1268 | } |
|
1224 | 1269 | |
|
1225 | 1270 | if (workerBytes) { |
|
1226 | 1271 | assert(currentThread < threadCount); |
|
1227 | 1272 | workerStates[currentThread].totalSourceSize = workerBytes; |
|
1228 | 1273 | workerStates[currentThread].startOffset = workerStartOffset; |
|
1229 | 1274 | workerStates[currentThread].endOffset = sources->sourcesSize - 1; |
|
1230 | 1275 | |
|
1231 | 1276 | if (threadCount > 1) { |
|
1232 | 1277 | POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]); |
|
1233 | 1278 | } |
|
1234 | 1279 | else { |
|
1235 | 1280 | compress_worker(&workerStates[currentThread]); |
|
1236 | 1281 | } |
|
1237 | 1282 | } |
|
1238 | 1283 | |
|
1239 | 1284 | if (threadCount > 1) { |
|
1240 | 1285 | POOL_free(pool); |
|
1241 | 1286 | pool = NULL; |
|
1242 | 1287 | } |
|
1243 | 1288 | |
|
1244 | 1289 | Py_END_ALLOW_THREADS |
|
1245 | 1290 | |
|
1246 | 1291 | for (i = 0; i < threadCount; i++) { |
|
1247 | 1292 | switch (workerStates[i].error) { |
|
1248 | 1293 | case WorkerError_no_memory: |
|
1249 | 1294 | PyErr_NoMemory(); |
|
1250 | 1295 | errored = 1; |
|
1251 | 1296 | break; |
|
1252 | 1297 | |
|
1253 | 1298 | case WorkerError_zstd: |
|
1254 | 1299 | PyErr_Format(ZstdError, "error compressing item %zd: %s", |
|
1255 | 1300 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); |
|
1256 | 1301 | errored = 1; |
|
1257 | 1302 | break; |
|
1258 | 1303 | |
|
1259 | 1304 | case WorkerError_nospace: |
|
1260 | 1305 | PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output", |
|
1261 | 1306 | workerStates[i].errorOffset); |
|
1262 | 1307 | errored = 1; |
|
1263 | 1308 | break; |
|
1264 | 1309 | |
|
1265 | 1310 | default: |
|
1266 | 1311 | ; |
|
1267 | 1312 | } |
|
1268 | 1313 | |
|
1269 | 1314 | if (errored) { |
|
1270 | 1315 | break; |
|
1271 | 1316 | } |
|
1272 | 1317 | |
|
1273 | 1318 | } |
|
1274 | 1319 | |
|
1275 | 1320 | if (errored) { |
|
1276 | 1321 | goto finally; |
|
1277 | 1322 | } |
|
1278 | 1323 | |
|
1279 | 1324 | segmentsCount = 0; |
|
1280 | 1325 | for (i = 0; i < threadCount; i++) { |
|
1281 | 1326 | WorkerState* state = &workerStates[i]; |
|
1282 | 1327 | segmentsCount += state->destCount; |
|
1283 | 1328 | } |
|
1284 | 1329 | |
|
1285 | 1330 | segmentsArg = PyTuple_New(segmentsCount); |
|
1286 | 1331 | if (NULL == segmentsArg) { |
|
1287 | 1332 | goto finally; |
|
1288 | 1333 | } |
|
1289 | 1334 | |
|
1290 | 1335 | segmentIndex = 0; |
|
1291 | 1336 | |
|
1292 | 1337 | for (i = 0; i < threadCount; i++) { |
|
1293 | 1338 | Py_ssize_t j; |
|
1294 | 1339 | WorkerState* state = &workerStates[i]; |
|
1295 | 1340 | |
|
1296 | 1341 | for (j = 0; j < state->destCount; j++) { |
|
1297 | 1342 | DestBuffer* destBuffer = &state->destBuffers[j]; |
|
1298 | 1343 | buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize, |
|
1299 | 1344 | destBuffer->segments, destBuffer->segmentsSize); |
|
1300 | 1345 | |
|
1301 | 1346 | if (NULL == buffer) { |
|
1302 | 1347 | goto finally; |
|
1303 | 1348 | } |
|
1304 | 1349 | |
|
1305 | 1350 | /* Tell instance to use free() instsead of PyMem_Free(). */ |
|
1306 | 1351 | buffer->useFree = 1; |
|
1307 | 1352 | |
|
1308 | 1353 | /* |
|
1309 | 1354 | * BufferWithSegments_FromMemory takes ownership of the backing memory. |
|
1310 | 1355 | * Unset it here so it doesn't get freed below. |
|
1311 | 1356 | */ |
|
1312 | 1357 | destBuffer->dest = NULL; |
|
1313 | 1358 | destBuffer->segments = NULL; |
|
1314 | 1359 | |
|
1315 | 1360 | PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer); |
|
1316 | 1361 | } |
|
1317 | 1362 | } |
|
1318 | 1363 | |
|
1319 | 1364 | result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject( |
|
1320 | 1365 | (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg); |
|
1321 | 1366 | |
|
1322 | 1367 | finally: |
|
1323 | 1368 | Py_CLEAR(segmentsArg); |
|
1324 | 1369 | |
|
1325 | 1370 | if (pool) { |
|
1326 | 1371 | POOL_free(pool); |
|
1327 | 1372 | } |
|
1328 | 1373 | |
|
1329 | 1374 | if (workerStates) { |
|
1330 | 1375 | Py_ssize_t j; |
|
1331 | 1376 | |
|
1332 | 1377 | for (i = 0; i < threadCount; i++) { |
|
1333 | 1378 | WorkerState state = workerStates[i]; |
|
1334 | 1379 | |
|
1335 | 1380 | if (state.cctx) { |
|
1336 | 1381 | ZSTD_freeCCtx(state.cctx); |
|
1337 | 1382 | } |
|
1338 | 1383 | |
|
1339 | 1384 | /* malloc() is used in worker thread. */ |
|
1340 | 1385 | |
|
1341 | 1386 | for (j = 0; j < state.destCount; j++) { |
|
1342 | 1387 | if (state.destBuffers) { |
|
1343 | 1388 | free(state.destBuffers[j].dest); |
|
1344 | 1389 | free(state.destBuffers[j].segments); |
|
1345 | 1390 | } |
|
1346 | 1391 | } |
|
1347 | 1392 | |
|
1348 | 1393 | |
|
1349 | 1394 | free(state.destBuffers); |
|
1350 | 1395 | } |
|
1351 | 1396 | |
|
1352 | 1397 | PyMem_Free(workerStates); |
|
1353 | 1398 | } |
|
1354 | 1399 | |
|
1355 | 1400 | return result; |
|
1356 | 1401 | } |
|
1357 | 1402 | |
|
1358 | 1403 | PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__, |
|
1359 | 1404 | "Compress multiple pieces of data as a single operation\n" |
|
1360 | 1405 | "\n" |
|
1361 | 1406 | "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n" |
|
1362 | 1407 | "a list of bytes like objects holding data to compress.\n" |
|
1363 | 1408 | "\n" |
|
1364 | 1409 | "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n" |
|
1365 | 1410 | "\n" |
|
1366 | 1411 | "This function is optimized to perform multiple compression operations as\n" |
|
1367 | 1412 | "as possible with as little overhead as possbile.\n" |
|
1368 | 1413 | ); |
|
1369 | 1414 | |
|
1370 | 1415 | static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
1371 | 1416 | static char* kwlist[] = { |
|
1372 | 1417 | "data", |
|
1373 | 1418 | "threads", |
|
1374 | 1419 | NULL |
|
1375 | 1420 | }; |
|
1376 | 1421 | |
|
1377 | 1422 | PyObject* data; |
|
1378 | 1423 | int threads = 0; |
|
1379 | 1424 | Py_buffer* dataBuffers = NULL; |
|
1380 | 1425 | DataSources sources; |
|
1381 | 1426 | Py_ssize_t i; |
|
1382 | 1427 | Py_ssize_t sourceCount = 0; |
|
1383 | 1428 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1384 | 1429 | |
|
1385 | 1430 | memset(&sources, 0, sizeof(sources)); |
|
1386 | 1431 | |
|
1387 | 1432 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist, |
|
1388 | 1433 | &data, &threads)) { |
|
1389 | 1434 | return NULL; |
|
1390 | 1435 | } |
|
1391 | 1436 | |
|
1392 | 1437 | if (threads < 0) { |
|
1393 | 1438 | threads = cpu_count(); |
|
1394 | 1439 | } |
|
1395 | 1440 | |
|
1396 | 1441 | if (threads < 2) { |
|
1397 | 1442 | threads = 1; |
|
1398 | 1443 | } |
|
1399 | 1444 | |
|
1400 | 1445 | if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) { |
|
1401 | 1446 | ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data; |
|
1402 | 1447 | |
|
1403 | 1448 | sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource)); |
|
1404 | 1449 | if (NULL == sources.sources) { |
|
1405 | 1450 | PyErr_NoMemory(); |
|
1406 | 1451 | goto finally; |
|
1407 | 1452 | } |
|
1408 | 1453 | |
|
1409 | 1454 | for (i = 0; i < buffer->segmentCount; i++) { |
|
1410 | 1455 | if (buffer->segments[i].length > SIZE_MAX) { |
|
1411 | 1456 | PyErr_Format(PyExc_ValueError, |
|
1412 | 1457 | "buffer segment %zd is too large for this platform", i); |
|
1413 | 1458 | goto finally; |
|
1414 | 1459 | } |
|
1415 | 1460 | |
|
1416 | 1461 | sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset; |
|
1417 | 1462 | sources.sources[i].sourceSize = (size_t)buffer->segments[i].length; |
|
1418 | 1463 | sources.totalSourceSize += buffer->segments[i].length; |
|
1419 | 1464 | } |
|
1420 | 1465 | |
|
1421 | 1466 | sources.sourcesSize = buffer->segmentCount; |
|
1422 | 1467 | } |
|
1423 | 1468 | else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) { |
|
1424 | 1469 | Py_ssize_t j; |
|
1425 | 1470 | Py_ssize_t offset = 0; |
|
1426 | 1471 | ZstdBufferWithSegments* buffer; |
|
1427 | 1472 | ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data; |
|
1428 | 1473 | |
|
1429 | 1474 | sourceCount = BufferWithSegmentsCollection_length(collection); |
|
1430 | 1475 | |
|
1431 | 1476 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); |
|
1432 | 1477 | if (NULL == sources.sources) { |
|
1433 | 1478 | PyErr_NoMemory(); |
|
1434 | 1479 | goto finally; |
|
1435 | 1480 | } |
|
1436 | 1481 | |
|
1437 | 1482 | for (i = 0; i < collection->bufferCount; i++) { |
|
1438 | 1483 | buffer = collection->buffers[i]; |
|
1439 | 1484 | |
|
1440 | 1485 | for (j = 0; j < buffer->segmentCount; j++) { |
|
1441 | 1486 | if (buffer->segments[j].length > SIZE_MAX) { |
|
1442 | 1487 | PyErr_Format(PyExc_ValueError, |
|
1443 | 1488 | "buffer segment %zd in buffer %zd is too large for this platform", |
|
1444 | 1489 | j, i); |
|
1445 | 1490 | goto finally; |
|
1446 | 1491 | } |
|
1447 | 1492 | |
|
1448 | 1493 | sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset; |
|
1449 | 1494 | sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length; |
|
1450 | 1495 | sources.totalSourceSize += buffer->segments[j].length; |
|
1451 | 1496 | |
|
1452 | 1497 | offset++; |
|
1453 | 1498 | } |
|
1454 | 1499 | } |
|
1455 | 1500 | |
|
1456 | 1501 | sources.sourcesSize = sourceCount; |
|
1457 | 1502 | } |
|
1458 | 1503 | else if (PyList_Check(data)) { |
|
1459 | 1504 | sourceCount = PyList_GET_SIZE(data); |
|
1460 | 1505 | |
|
1461 | 1506 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); |
|
1462 | 1507 | if (NULL == sources.sources) { |
|
1463 | 1508 | PyErr_NoMemory(); |
|
1464 | 1509 | goto finally; |
|
1465 | 1510 | } |
|
1466 | 1511 | |
|
1467 | 1512 | dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer)); |
|
1468 | 1513 | if (NULL == dataBuffers) { |
|
1469 | 1514 | PyErr_NoMemory(); |
|
1470 | 1515 | goto finally; |
|
1471 | 1516 | } |
|
1472 | 1517 | |
|
1473 | 1518 | memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer)); |
|
1474 | 1519 | |
|
1475 | 1520 | for (i = 0; i < sourceCount; i++) { |
|
1476 | 1521 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i), |
|
1477 | 1522 | &dataBuffers[i], PyBUF_CONTIG_RO)) { |
|
1478 | 1523 | PyErr_Clear(); |
|
1479 | 1524 | PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i); |
|
1480 | 1525 | goto finally; |
|
1481 | 1526 | } |
|
1482 | 1527 | |
|
1483 | 1528 | sources.sources[i].sourceData = dataBuffers[i].buf; |
|
1484 | 1529 | sources.sources[i].sourceSize = dataBuffers[i].len; |
|
1485 | 1530 | sources.totalSourceSize += dataBuffers[i].len; |
|
1486 | 1531 | } |
|
1487 | 1532 | |
|
1488 | 1533 | sources.sourcesSize = sourceCount; |
|
1489 | 1534 | } |
|
1490 | 1535 | else { |
|
1491 | 1536 | PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments"); |
|
1492 | 1537 | goto finally; |
|
1493 | 1538 | } |
|
1494 | 1539 | |
|
1495 | 1540 | if (0 == sources.sourcesSize) { |
|
1496 | 1541 | PyErr_SetString(PyExc_ValueError, "no source elements found"); |
|
1497 | 1542 | goto finally; |
|
1498 | 1543 | } |
|
1499 | 1544 | |
|
1500 | 1545 | if (0 == sources.totalSourceSize) { |
|
1501 | 1546 | PyErr_SetString(PyExc_ValueError, "source elements are empty"); |
|
1502 | 1547 | goto finally; |
|
1503 | 1548 | } |
|
1504 | 1549 | |
|
1505 | 1550 | if (sources.totalSourceSize > SIZE_MAX) { |
|
1506 | 1551 | PyErr_SetString(PyExc_ValueError, "sources are too large for this platform"); |
|
1507 | 1552 | goto finally; |
|
1508 | 1553 | } |
|
1509 | 1554 | |
|
1510 | 1555 | result = compress_from_datasources(self, &sources, threads); |
|
1511 | 1556 | |
|
1512 | 1557 | finally: |
|
1513 | 1558 | PyMem_Free(sources.sources); |
|
1514 | 1559 | |
|
1515 | 1560 | if (dataBuffers) { |
|
1516 | 1561 | for (i = 0; i < sourceCount; i++) { |
|
1517 | 1562 | PyBuffer_Release(&dataBuffers[i]); |
|
1518 | 1563 | } |
|
1519 | 1564 | |
|
1520 | 1565 | PyMem_Free(dataBuffers); |
|
1521 | 1566 | } |
|
1522 | 1567 | |
|
1523 | 1568 | return result; |
|
1524 | 1569 | } |
|
1525 | 1570 | |
|
1526 | 1571 | static PyMethodDef ZstdCompressor_methods[] = { |
|
1572 | { "chunker", (PyCFunction)ZstdCompressor_chunker, | |
|
1573 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ }, | |
|
1527 | 1574 | { "compress", (PyCFunction)ZstdCompressor_compress, |
|
1528 | 1575 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
|
1529 | 1576 | { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
|
1530 | 1577 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
|
1531 | 1578 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
|
1532 | 1579 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
|
1533 | 1580 | { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader, |
|
1534 | 1581 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ }, |
|
1535 | 1582 | { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer, |
|
1536 | 1583 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, |
|
1537 | 1584 | { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter, |
|
1538 | 1585 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, |
|
1539 | 1586 | /* TODO Remove deprecated API */ |
|
1540 | 1587 | { "read_from", (PyCFunction)ZstdCompressor_read_to_iter, |
|
1541 | 1588 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, |
|
1542 | 1589 | /* TODO remove deprecated API */ |
|
1543 | 1590 | { "write_to", (PyCFunction)ZstdCompressor_stream_writer, |
|
1544 | 1591 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, |
|
1545 | 1592 | { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, |
|
1546 | 1593 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, |
|
1547 | 1594 | { "memory_size", (PyCFunction)ZstdCompressor_memory_size, |
|
1548 | 1595 | METH_NOARGS, ZstdCompressor_memory_size__doc__ }, |
|
1549 | 1596 | { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression, |
|
1550 | 1597 | METH_NOARGS, ZstdCompressor_frame_progression__doc__ }, |
|
1551 | 1598 | { NULL, NULL } |
|
1552 | 1599 | }; |
|
1553 | 1600 | |
|
1554 | 1601 | PyTypeObject ZstdCompressorType = { |
|
1555 | 1602 | PyVarObject_HEAD_INIT(NULL, 0) |
|
1556 | 1603 | "zstd.ZstdCompressor", /* tp_name */ |
|
1557 | 1604 | sizeof(ZstdCompressor), /* tp_basicsize */ |
|
1558 | 1605 | 0, /* tp_itemsize */ |
|
1559 | 1606 | (destructor)ZstdCompressor_dealloc, /* tp_dealloc */ |
|
1560 | 1607 | 0, /* tp_print */ |
|
1561 | 1608 | 0, /* tp_getattr */ |
|
1562 | 1609 | 0, /* tp_setattr */ |
|
1563 | 1610 | 0, /* tp_compare */ |
|
1564 | 1611 | 0, /* tp_repr */ |
|
1565 | 1612 | 0, /* tp_as_number */ |
|
1566 | 1613 | 0, /* tp_as_sequence */ |
|
1567 | 1614 | 0, /* tp_as_mapping */ |
|
1568 | 1615 | 0, /* tp_hash */ |
|
1569 | 1616 | 0, /* tp_call */ |
|
1570 | 1617 | 0, /* tp_str */ |
|
1571 | 1618 | 0, /* tp_getattro */ |
|
1572 | 1619 | 0, /* tp_setattro */ |
|
1573 | 1620 | 0, /* tp_as_buffer */ |
|
1574 | 1621 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1575 | 1622 | ZstdCompressor__doc__, /* tp_doc */ |
|
1576 | 1623 | 0, /* tp_traverse */ |
|
1577 | 1624 | 0, /* tp_clear */ |
|
1578 | 1625 | 0, /* tp_richcompare */ |
|
1579 | 1626 | 0, /* tp_weaklistoffset */ |
|
1580 | 1627 | 0, /* tp_iter */ |
|
1581 | 1628 | 0, /* tp_iternext */ |
|
1582 | 1629 | ZstdCompressor_methods, /* tp_methods */ |
|
1583 | 1630 | 0, /* tp_members */ |
|
1584 | 1631 | 0, /* tp_getset */ |
|
1585 | 1632 | 0, /* tp_base */ |
|
1586 | 1633 | 0, /* tp_dict */ |
|
1587 | 1634 | 0, /* tp_descr_get */ |
|
1588 | 1635 | 0, /* tp_descr_set */ |
|
1589 | 1636 | 0, /* tp_dictoffset */ |
|
1590 | 1637 | (initproc)ZstdCompressor_init, /* tp_init */ |
|
1591 | 1638 | 0, /* tp_alloc */ |
|
1592 | 1639 | PyType_GenericNew, /* tp_new */ |
|
1593 | 1640 | }; |
|
1594 | 1641 | |
|
1595 | 1642 | void compressor_module_init(PyObject* mod) { |
|
1596 | 1643 | Py_TYPE(&ZstdCompressorType) = &PyType_Type; |
|
1597 | 1644 | if (PyType_Ready(&ZstdCompressorType) < 0) { |
|
1598 | 1645 | return; |
|
1599 | 1646 | } |
|
1600 | 1647 | |
|
1601 | 1648 | Py_INCREF((PyObject*)&ZstdCompressorType); |
|
1602 | 1649 | PyModule_AddObject(mod, "ZstdCompressor", |
|
1603 | 1650 | (PyObject*)&ZstdCompressorType); |
|
1604 | 1651 | } |
@@ -1,102 +1,103 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | static char frame_header[] = { |
|
14 | 14 | '\x28', |
|
15 | 15 | '\xb5', |
|
16 | 16 | '\x2f', |
|
17 | 17 | '\xfd', |
|
18 | 18 | }; |
|
19 | 19 | |
|
20 | 20 | void constants_module_init(PyObject* mod) { |
|
21 | 21 | PyObject* version; |
|
22 | 22 | PyObject* zstdVersion; |
|
23 | 23 | PyObject* frameHeader; |
|
24 | 24 | |
|
25 | 25 | #if PY_MAJOR_VERSION >= 3 |
|
26 | 26 | version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION); |
|
27 | 27 | #else |
|
28 | 28 | version = PyString_FromString(PYTHON_ZSTANDARD_VERSION); |
|
29 | 29 | #endif |
|
30 | Py_INCREF(version); | |
|
31 | 30 | PyModule_AddObject(mod, "__version__", version); |
|
32 | 31 | |
|
33 | 32 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); |
|
34 | 33 | PyModule_AddObject(mod, "ZstdError", ZstdError); |
|
35 | 34 | |
|
36 | 35 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish); |
|
37 | 36 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block); |
|
38 | 37 | |
|
39 | 38 | /* For now, the version is a simple tuple instead of a dedicated type. */ |
|
40 | 39 | zstdVersion = PyTuple_New(3); |
|
41 | 40 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); |
|
42 | 41 | PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR)); |
|
43 | 42 | PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE)); |
|
44 | Py_INCREF(zstdVersion); | |
|
45 | 43 | PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion); |
|
46 | 44 | |
|
47 | 45 | frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header)); |
|
48 | 46 | if (frameHeader) { |
|
49 | 47 | PyModule_AddObject(mod, "FRAME_HEADER", frameHeader); |
|
50 | 48 | } |
|
51 | 49 | else { |
|
52 | 50 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); |
|
53 | 51 | } |
|
54 | 52 | |
|
55 | 53 | PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN", |
|
56 | 54 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN)); |
|
57 | 55 | PyModule_AddObject(mod, "CONTENTSIZE_ERROR", |
|
58 | 56 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR)); |
|
59 | 57 | |
|
60 | 58 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); |
|
61 | 59 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", |
|
62 | 60 | (long)ZSTD_CStreamInSize()); |
|
63 | 61 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", |
|
64 | 62 | (long)ZSTD_CStreamOutSize()); |
|
65 | 63 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", |
|
66 | 64 | (long)ZSTD_DStreamInSize()); |
|
67 | 65 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", |
|
68 | 66 | (long)ZSTD_DStreamOutSize()); |
|
69 | 67 | |
|
70 | 68 | PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER); |
|
69 | PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX); | |
|
70 | PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX); | |
|
71 | 71 | PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN); |
|
72 | 72 | PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX); |
|
73 | 73 | PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN); |
|
74 | 74 | PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX); |
|
75 | 75 | PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN); |
|
76 | 76 | PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX); |
|
77 | 77 | PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX); |
|
78 | 78 | PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN); |
|
79 | 79 | PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX); |
|
80 | 80 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN); |
|
81 | 81 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX); |
|
82 | 82 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); |
|
83 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX); | |
|
83 | 84 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN); |
|
84 | 85 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX); |
|
85 | 86 | PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX); |
|
86 | 87 | |
|
87 | 88 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); |
|
88 | 89 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); |
|
89 | 90 | PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy); |
|
90 | 91 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy); |
|
91 | 92 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); |
|
92 | 93 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); |
|
93 | 94 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); |
|
94 | 95 | PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra); |
|
95 | 96 | |
|
96 | 97 | PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto); |
|
97 | 98 | PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent); |
|
98 | 99 | PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict); |
|
99 | 100 | |
|
100 | 101 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1); |
|
101 | 102 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless); |
|
102 | 103 | } |
@@ -1,459 +1,440 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2017-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | static void set_unsupported_operation(void) { |
|
14 | 14 | PyObject* iomod; |
|
15 | 15 | PyObject* exc; |
|
16 | 16 | |
|
17 | 17 | iomod = PyImport_ImportModule("io"); |
|
18 | 18 | if (NULL == iomod) { |
|
19 | 19 | return; |
|
20 | 20 | } |
|
21 | 21 | |
|
22 | 22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); |
|
23 | 23 | if (NULL == exc) { |
|
24 | 24 | Py_DECREF(iomod); |
|
25 | 25 | return; |
|
26 | 26 | } |
|
27 | 27 | |
|
28 | 28 | PyErr_SetNone(exc); |
|
29 | 29 | Py_DECREF(exc); |
|
30 | 30 | Py_DECREF(iomod); |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | static void reader_dealloc(ZstdDecompressionReader* self) { |
|
34 | 34 | Py_XDECREF(self->decompressor); |
|
35 | 35 | Py_XDECREF(self->reader); |
|
36 | 36 | |
|
37 | 37 | if (self->buffer.buf) { |
|
38 | 38 | PyBuffer_Release(&self->buffer); |
|
39 | 39 | } |
|
40 | 40 | |
|
41 | 41 | PyObject_Del(self); |
|
42 | 42 | } |
|
43 | 43 | |
|
44 | 44 | static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) { |
|
45 | 45 | if (self->entered) { |
|
46 | 46 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); |
|
47 | 47 | return NULL; |
|
48 | 48 | } |
|
49 | 49 | |
|
50 | if (ensure_dctx(self->decompressor, 1)) { | |
|
51 | return NULL; | |
|
52 | } | |
|
53 | ||
|
54 | 50 | self->entered = 1; |
|
55 | 51 | |
|
56 | 52 | Py_INCREF(self); |
|
57 | 53 | return self; |
|
58 | 54 | } |
|
59 | 55 | |
|
60 | 56 | static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) { |
|
61 | 57 | PyObject* exc_type; |
|
62 | 58 | PyObject* exc_value; |
|
63 | 59 | PyObject* exc_tb; |
|
64 | 60 | |
|
65 | 61 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { |
|
66 | 62 | return NULL; |
|
67 | 63 | } |
|
68 | 64 | |
|
69 | 65 | self->entered = 0; |
|
70 | 66 | self->closed = 1; |
|
71 | 67 | |
|
72 | 68 | /* Release resources. */ |
|
73 | 69 | Py_CLEAR(self->reader); |
|
74 | 70 | if (self->buffer.buf) { |
|
75 | 71 | PyBuffer_Release(&self->buffer); |
|
76 | 72 | memset(&self->buffer, 0, sizeof(self->buffer)); |
|
77 | 73 | } |
|
78 | 74 | |
|
79 | 75 | Py_CLEAR(self->decompressor); |
|
80 | 76 | |
|
81 | 77 | Py_RETURN_FALSE; |
|
82 | 78 | } |
|
83 | 79 | |
|
84 | 80 | static PyObject* reader_readable(PyObject* self) { |
|
85 | 81 | Py_RETURN_TRUE; |
|
86 | 82 | } |
|
87 | 83 | |
|
88 | 84 | static PyObject* reader_writable(PyObject* self) { |
|
89 | 85 | Py_RETURN_FALSE; |
|
90 | 86 | } |
|
91 | 87 | |
|
92 | 88 | static PyObject* reader_seekable(PyObject* self) { |
|
93 | 89 | Py_RETURN_TRUE; |
|
94 | 90 | } |
|
95 | 91 | |
|
96 | 92 | static PyObject* reader_close(ZstdDecompressionReader* self) { |
|
97 | 93 | self->closed = 1; |
|
98 | 94 | Py_RETURN_NONE; |
|
99 | 95 | } |
|
100 | 96 | |
|
101 | static PyObject* reader_closed(ZstdDecompressionReader* self) { | |
|
102 | if (self->closed) { | |
|
103 | Py_RETURN_TRUE; | |
|
104 | } | |
|
105 | else { | |
|
106 | Py_RETURN_FALSE; | |
|
107 | } | |
|
108 | } | |
|
109 | ||
|
110 | 97 | static PyObject* reader_flush(PyObject* self) { |
|
111 | 98 | Py_RETURN_NONE; |
|
112 | 99 | } |
|
113 | 100 | |
|
114 | 101 | static PyObject* reader_isatty(PyObject* self) { |
|
115 | 102 | Py_RETURN_FALSE; |
|
116 | 103 | } |
|
117 | 104 | |
|
118 | 105 | static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) { |
|
119 | 106 | static char* kwlist[] = { |
|
120 | 107 | "size", |
|
121 | 108 | NULL |
|
122 | 109 | }; |
|
123 | 110 | |
|
124 | 111 | Py_ssize_t size = -1; |
|
125 | 112 | PyObject* result = NULL; |
|
126 | 113 | char* resultBuffer; |
|
127 | 114 | Py_ssize_t resultSize; |
|
128 | 115 | ZSTD_outBuffer output; |
|
129 | 116 | size_t zresult; |
|
130 | 117 | |
|
131 | if (!self->entered) { | |
|
132 | PyErr_SetString(ZstdError, "read() must be called from an active context manager"); | |
|
133 | return NULL; | |
|
134 | } | |
|
135 | ||
|
136 | 118 | if (self->closed) { |
|
137 | 119 | PyErr_SetString(PyExc_ValueError, "stream is closed"); |
|
138 | 120 | return NULL; |
|
139 | 121 | } |
|
140 | 122 | |
|
141 | 123 | if (self->finishedOutput) { |
|
142 | 124 | return PyBytes_FromStringAndSize("", 0); |
|
143 | 125 | } |
|
144 | 126 | |
|
145 | 127 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) { |
|
146 | 128 | return NULL; |
|
147 | 129 | } |
|
148 | 130 | |
|
149 | 131 | if (size < 1) { |
|
150 | 132 | PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts"); |
|
151 | 133 | return NULL; |
|
152 | 134 | } |
|
153 | 135 | |
|
154 | 136 | result = PyBytes_FromStringAndSize(NULL, size); |
|
155 | 137 | if (NULL == result) { |
|
156 | 138 | return NULL; |
|
157 | 139 | } |
|
158 | 140 | |
|
159 | 141 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); |
|
160 | 142 | |
|
161 | 143 | output.dst = resultBuffer; |
|
162 | 144 | output.size = resultSize; |
|
163 | 145 | output.pos = 0; |
|
164 | 146 | |
|
165 | 147 | readinput: |
|
166 | 148 | |
|
167 | 149 | /* Consume input data left over from last time. */ |
|
168 | 150 | if (self->input.pos < self->input.size) { |
|
169 | 151 | Py_BEGIN_ALLOW_THREADS |
|
170 | 152 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, |
|
171 | 153 | &output, &self->input); |
|
172 | 154 | Py_END_ALLOW_THREADS |
|
173 | 155 | |
|
174 | 156 | /* Input exhausted. Clear our state tracking. */ |
|
175 | 157 | if (self->input.pos == self->input.size) { |
|
176 | 158 | memset(&self->input, 0, sizeof(self->input)); |
|
177 | 159 | Py_CLEAR(self->readResult); |
|
178 | 160 | |
|
179 | 161 | if (self->buffer.buf) { |
|
180 | 162 | self->finishedInput = 1; |
|
181 | 163 | } |
|
182 | 164 | } |
|
183 | 165 | |
|
184 | 166 | if (ZSTD_isError(zresult)) { |
|
185 | 167 | PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult)); |
|
186 | 168 | return NULL; |
|
187 | 169 | } |
|
188 | 170 | else if (0 == zresult) { |
|
189 | 171 | self->finishedOutput = 1; |
|
190 | 172 | } |
|
191 | 173 | |
|
192 | 174 | /* We fulfilled the full read request. Emit it. */ |
|
193 | 175 | if (output.pos && output.pos == output.size) { |
|
194 | 176 | self->bytesDecompressed += output.size; |
|
195 | 177 | return result; |
|
196 | 178 | } |
|
197 | 179 | |
|
198 | 180 | /* |
|
199 | 181 | * There is more room in the output. Fall through to try to collect |
|
200 | 182 | * more data so we can try to fill the output. |
|
201 | 183 | */ |
|
202 | 184 | } |
|
203 | 185 | |
|
204 | 186 | if (!self->finishedInput) { |
|
205 | 187 | if (self->reader) { |
|
206 | 188 | Py_buffer buffer; |
|
207 | 189 | |
|
208 | 190 | assert(self->readResult == NULL); |
|
209 | 191 | self->readResult = PyObject_CallMethod(self->reader, "read", |
|
210 | 192 | "k", self->readSize); |
|
211 | 193 | if (NULL == self->readResult) { |
|
212 | 194 | return NULL; |
|
213 | 195 | } |
|
214 | 196 | |
|
215 | 197 | memset(&buffer, 0, sizeof(buffer)); |
|
216 | 198 | |
|
217 | 199 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { |
|
218 | 200 | return NULL; |
|
219 | 201 | } |
|
220 | 202 | |
|
221 | 203 | /* EOF */ |
|
222 | 204 | if (0 == buffer.len) { |
|
223 | 205 | self->finishedInput = 1; |
|
224 | 206 | Py_CLEAR(self->readResult); |
|
225 | 207 | } |
|
226 | 208 | else { |
|
227 | 209 | self->input.src = buffer.buf; |
|
228 | 210 | self->input.size = buffer.len; |
|
229 | 211 | self->input.pos = 0; |
|
230 | 212 | } |
|
231 | 213 | |
|
232 | 214 | PyBuffer_Release(&buffer); |
|
233 | 215 | } |
|
234 | 216 | else { |
|
235 | 217 | assert(self->buffer.buf); |
|
236 | 218 | /* |
|
237 | 219 | * We should only get here once since above block will exhaust |
|
238 | 220 | * source buffer until finishedInput is set. |
|
239 | 221 | */ |
|
240 | 222 | assert(self->input.src == NULL); |
|
241 | 223 | |
|
242 | 224 | self->input.src = self->buffer.buf; |
|
243 | 225 | self->input.size = self->buffer.len; |
|
244 | 226 | self->input.pos = 0; |
|
245 | 227 | } |
|
246 | 228 | } |
|
247 | 229 | |
|
248 | 230 | if (self->input.size) { |
|
249 | 231 | goto readinput; |
|
250 | 232 | } |
|
251 | 233 | |
|
252 | 234 | /* EOF */ |
|
253 | 235 | self->bytesDecompressed += output.pos; |
|
254 | 236 | |
|
255 | 237 | if (safe_pybytes_resize(&result, output.pos)) { |
|
256 | 238 | Py_XDECREF(result); |
|
257 | 239 | return NULL; |
|
258 | 240 | } |
|
259 | 241 | |
|
260 | 242 | return result; |
|
261 | 243 | } |
|
262 | 244 | |
|
263 | 245 | static PyObject* reader_readall(PyObject* self) { |
|
264 | 246 | PyErr_SetNone(PyExc_NotImplementedError); |
|
265 | 247 | return NULL; |
|
266 | 248 | } |
|
267 | 249 | |
|
268 | 250 | static PyObject* reader_readline(PyObject* self) { |
|
269 | 251 | PyErr_SetNone(PyExc_NotImplementedError); |
|
270 | 252 | return NULL; |
|
271 | 253 | } |
|
272 | 254 | |
|
273 | 255 | static PyObject* reader_readlines(PyObject* self) { |
|
274 | 256 | PyErr_SetNone(PyExc_NotImplementedError); |
|
275 | 257 | return NULL; |
|
276 | 258 | } |
|
277 | 259 | |
|
278 | 260 | static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) { |
|
279 | 261 | Py_ssize_t pos; |
|
280 | 262 | int whence = 0; |
|
281 | 263 | unsigned long long readAmount = 0; |
|
282 | 264 | size_t defaultOutSize = ZSTD_DStreamOutSize(); |
|
283 | 265 | |
|
284 | if (!self->entered) { | |
|
285 | PyErr_SetString(ZstdError, "seek() must be called from an active context manager"); | |
|
286 | return NULL; | |
|
287 | } | |
|
288 | ||
|
289 | 266 | if (self->closed) { |
|
290 | 267 | PyErr_SetString(PyExc_ValueError, "stream is closed"); |
|
291 | 268 | return NULL; |
|
292 | 269 | } |
|
293 | 270 | |
|
294 | 271 | if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) { |
|
295 | 272 | return NULL; |
|
296 | 273 | } |
|
297 | 274 | |
|
298 | 275 | if (whence == SEEK_SET) { |
|
299 | 276 | if (pos < 0) { |
|
300 | 277 | PyErr_SetString(PyExc_ValueError, |
|
301 | 278 | "cannot seek to negative position with SEEK_SET"); |
|
302 | 279 | return NULL; |
|
303 | 280 | } |
|
304 | 281 | |
|
305 | 282 | if ((unsigned long long)pos < self->bytesDecompressed) { |
|
306 | 283 | PyErr_SetString(PyExc_ValueError, |
|
307 | 284 | "cannot seek zstd decompression stream backwards"); |
|
308 | 285 | return NULL; |
|
309 | 286 | } |
|
310 | 287 | |
|
311 | 288 | readAmount = pos - self->bytesDecompressed; |
|
312 | 289 | } |
|
313 | 290 | else if (whence == SEEK_CUR) { |
|
314 | 291 | if (pos < 0) { |
|
315 | 292 | PyErr_SetString(PyExc_ValueError, |
|
316 | 293 | "cannot seek zstd decompression stream backwards"); |
|
317 | 294 | return NULL; |
|
318 | 295 | } |
|
319 | 296 | |
|
320 | 297 | readAmount = pos; |
|
321 | 298 | } |
|
322 | 299 | else if (whence == SEEK_END) { |
|
323 | 300 | /* We /could/ support this with pos==0. But let's not do that until someone |
|
324 | 301 | needs it. */ |
|
325 | 302 | PyErr_SetString(PyExc_ValueError, |
|
326 | 303 | "zstd decompression streams cannot be seeked with SEEK_END"); |
|
327 | 304 | return NULL; |
|
328 | 305 | } |
|
329 | 306 | |
|
330 | 307 | /* It is a bit inefficient to do this via the Python API. But since there |
|
331 | 308 | is a bit of state tracking involved to read from this type, it is the |
|
332 | 309 | easiest to implement. */ |
|
333 | 310 | while (readAmount) { |
|
334 | 311 | Py_ssize_t readSize; |
|
335 | 312 | PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K", |
|
336 | 313 | readAmount < defaultOutSize ? readAmount : defaultOutSize); |
|
337 | 314 | |
|
338 | 315 | if (!readResult) { |
|
339 | 316 | return NULL; |
|
340 | 317 | } |
|
341 | 318 | |
|
342 | 319 | readSize = PyBytes_GET_SIZE(readResult); |
|
343 | 320 | |
|
344 | 321 | /* Empty read means EOF. */ |
|
345 | 322 | if (!readSize) { |
|
346 | 323 | break; |
|
347 | 324 | } |
|
348 | 325 | |
|
349 | 326 | readAmount -= readSize; |
|
350 | 327 | } |
|
351 | 328 | |
|
352 | 329 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); |
|
353 | 330 | } |
|
354 | 331 | |
|
355 | 332 | static PyObject* reader_tell(ZstdDecompressionReader* self) { |
|
356 | 333 | /* TODO should this raise OSError since stream isn't seekable? */ |
|
357 | 334 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); |
|
358 | 335 | } |
|
359 | 336 | |
|
360 | 337 | static PyObject* reader_write(PyObject* self, PyObject* args) { |
|
361 | 338 | set_unsupported_operation(); |
|
362 | 339 | return NULL; |
|
363 | 340 | } |
|
364 | 341 | |
|
365 | 342 | static PyObject* reader_writelines(PyObject* self, PyObject* args) { |
|
366 | 343 | set_unsupported_operation(); |
|
367 | 344 | return NULL; |
|
368 | 345 | } |
|
369 | 346 | |
|
370 | 347 | static PyObject* reader_iter(PyObject* self) { |
|
371 | 348 | PyErr_SetNone(PyExc_NotImplementedError); |
|
372 | 349 | return NULL; |
|
373 | 350 | } |
|
374 | 351 | |
|
375 | 352 | static PyObject* reader_iternext(PyObject* self) { |
|
376 | 353 | PyErr_SetNone(PyExc_NotImplementedError); |
|
377 | 354 | return NULL; |
|
378 | 355 | } |
|
379 | 356 | |
|
380 | 357 | static PyMethodDef reader_methods[] = { |
|
381 | 358 | { "__enter__", (PyCFunction)reader_enter, METH_NOARGS, |
|
382 | 359 | PyDoc_STR("Enter a compression context") }, |
|
383 | 360 | { "__exit__", (PyCFunction)reader_exit, METH_VARARGS, |
|
384 | 361 | PyDoc_STR("Exit a compression context") }, |
|
385 | 362 | { "close", (PyCFunction)reader_close, METH_NOARGS, |
|
386 | 363 | PyDoc_STR("Close the stream so it cannot perform any more operations") }, |
|
387 | { "closed", (PyCFunction)reader_closed, METH_NOARGS, | |
|
388 | PyDoc_STR("Whether stream is closed") }, | |
|
389 | 364 | { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") }, |
|
390 | 365 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, |
|
391 | 366 | { "readable", (PyCFunction)reader_readable, METH_NOARGS, |
|
392 | 367 | PyDoc_STR("Returns True") }, |
|
393 | 368 | { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, |
|
394 | 369 | PyDoc_STR("read compressed data") }, |
|
395 | 370 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, |
|
396 | 371 | { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") }, |
|
397 | 372 | { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") }, |
|
398 | 373 | { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") }, |
|
399 | 374 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, |
|
400 | 375 | PyDoc_STR("Returns True") }, |
|
401 | 376 | { "tell", (PyCFunction)reader_tell, METH_NOARGS, |
|
402 | 377 | PyDoc_STR("Returns current number of bytes compressed") }, |
|
403 | 378 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, |
|
404 | 379 | PyDoc_STR("Returns False") }, |
|
405 | 380 | { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") }, |
|
406 | 381 | { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") }, |
|
407 | 382 | { NULL, NULL } |
|
408 | 383 | }; |
|
409 | 384 | |
|
385 | static PyMemberDef reader_members[] = { | |
|
386 | { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed), | |
|
387 | READONLY, "whether stream is closed" }, | |
|
388 | { NULL } | |
|
389 | }; | |
|
390 | ||
|
410 | 391 | PyTypeObject ZstdDecompressionReaderType = { |
|
411 | 392 | PyVarObject_HEAD_INIT(NULL, 0) |
|
412 | 393 | "zstd.ZstdDecompressionReader", /* tp_name */ |
|
413 | 394 | sizeof(ZstdDecompressionReader), /* tp_basicsize */ |
|
414 | 395 | 0, /* tp_itemsize */ |
|
415 | 396 | (destructor)reader_dealloc, /* tp_dealloc */ |
|
416 | 397 | 0, /* tp_print */ |
|
417 | 398 | 0, /* tp_getattr */ |
|
418 | 399 | 0, /* tp_setattr */ |
|
419 | 400 | 0, /* tp_compare */ |
|
420 | 401 | 0, /* tp_repr */ |
|
421 | 402 | 0, /* tp_as_number */ |
|
422 | 403 | 0, /* tp_as_sequence */ |
|
423 | 404 | 0, /* tp_as_mapping */ |
|
424 | 405 | 0, /* tp_hash */ |
|
425 | 406 | 0, /* tp_call */ |
|
426 | 407 | 0, /* tp_str */ |
|
427 | 408 | 0, /* tp_getattro */ |
|
428 | 409 | 0, /* tp_setattro */ |
|
429 | 410 | 0, /* tp_as_buffer */ |
|
430 | 411 | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
|
431 | 412 | 0, /* tp_doc */ |
|
432 | 413 | 0, /* tp_traverse */ |
|
433 | 414 | 0, /* tp_clear */ |
|
434 | 415 | 0, /* tp_richcompare */ |
|
435 | 416 | 0, /* tp_weaklistoffset */ |
|
436 | 417 | reader_iter, /* tp_iter */ |
|
437 | 418 | reader_iternext, /* tp_iternext */ |
|
438 | 419 | reader_methods, /* tp_methods */ |
|
439 |
|
|
|
420 | reader_members, /* tp_members */ | |
|
440 | 421 | 0, /* tp_getset */ |
|
441 | 422 | 0, /* tp_base */ |
|
442 | 423 | 0, /* tp_dict */ |
|
443 | 424 | 0, /* tp_descr_get */ |
|
444 | 425 | 0, /* tp_descr_set */ |
|
445 | 426 | 0, /* tp_dictoffset */ |
|
446 | 427 | 0, /* tp_init */ |
|
447 | 428 | 0, /* tp_alloc */ |
|
448 | 429 | PyType_GenericNew, /* tp_new */ |
|
449 | 430 | }; |
|
450 | 431 | |
|
451 | 432 | |
|
452 | 433 | void decompressionreader_module_init(PyObject* mod) { |
|
453 | 434 | /* TODO make reader a sub-class of io.RawIOBase */ |
|
454 | 435 | |
|
455 | 436 | Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type; |
|
456 | 437 | if (PyType_Ready(&ZstdDecompressionReaderType) < 0) { |
|
457 | 438 | return; |
|
458 | 439 | } |
|
459 | 440 | } |
@@ -1,174 +1,185 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | PyDoc_STRVAR(DecompressionObj__doc__, |
|
14 | 14 | "Perform decompression using a standard library compatible API.\n" |
|
15 | 15 | ); |
|
16 | 16 | |
|
17 | 17 | static void DecompressionObj_dealloc(ZstdDecompressionObj* self) { |
|
18 | 18 | Py_XDECREF(self->decompressor); |
|
19 | 19 | |
|
20 | 20 | PyObject_Del(self); |
|
21 | 21 | } |
|
22 | 22 | |
|
23 | 23 | static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) { |
|
24 | 24 | static char* kwlist[] = { |
|
25 | 25 | "data", |
|
26 | 26 | NULL |
|
27 | 27 | }; |
|
28 | 28 | |
|
29 | 29 | Py_buffer source; |
|
30 | 30 | size_t zresult; |
|
31 | 31 | ZSTD_inBuffer input; |
|
32 | 32 | ZSTD_outBuffer output; |
|
33 | 33 | PyObject* result = NULL; |
|
34 | 34 | Py_ssize_t resultSize = 0; |
|
35 | 35 | |
|
36 | output.dst = NULL; | |
|
37 | ||
|
36 | 38 | if (self->finished) { |
|
37 | 39 | PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times"); |
|
38 | 40 | return NULL; |
|
39 | 41 | } |
|
40 | 42 | |
|
41 | 43 | #if PY_MAJOR_VERSION >= 3 |
|
42 | 44 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress", |
|
43 | 45 | #else |
|
44 | 46 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress", |
|
45 | 47 | #endif |
|
46 | 48 | kwlist, &source)) { |
|
47 | 49 | return NULL; |
|
48 | 50 | } |
|
49 | 51 | |
|
50 | 52 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
51 | 53 | PyErr_SetString(PyExc_ValueError, |
|
52 | 54 | "data buffer should be contiguous and have at most one dimension"); |
|
53 | 55 | goto finally; |
|
54 | 56 | } |
|
55 | 57 | |
|
58 | /* Special case of empty input. Output will always be empty. */ | |
|
59 | if (source.len == 0) { | |
|
60 | result = PyBytes_FromString(""); | |
|
61 | goto finally; | |
|
62 | } | |
|
63 | ||
|
56 | 64 | input.src = source.buf; |
|
57 | 65 | input.size = source.len; |
|
58 | 66 | input.pos = 0; |
|
59 | 67 | |
|
60 | 68 | output.dst = PyMem_Malloc(self->outSize); |
|
61 | 69 | if (!output.dst) { |
|
62 | 70 | PyErr_NoMemory(); |
|
63 | 71 | goto except; |
|
64 | 72 | } |
|
65 | 73 | output.size = self->outSize; |
|
66 | 74 | output.pos = 0; |
|
67 | 75 | |
|
68 | /* Read input until exhausted. */ | |
|
69 | while (input.pos < input.size) { | |
|
76 | while (1) { | |
|
70 | 77 | Py_BEGIN_ALLOW_THREADS |
|
71 | 78 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input); |
|
72 | 79 | Py_END_ALLOW_THREADS |
|
73 | 80 | |
|
74 | 81 | if (ZSTD_isError(zresult)) { |
|
75 | 82 | PyErr_Format(ZstdError, "zstd decompressor error: %s", |
|
76 | 83 | ZSTD_getErrorName(zresult)); |
|
77 | 84 | goto except; |
|
78 | 85 | } |
|
79 | 86 | |
|
80 | 87 | if (0 == zresult) { |
|
81 | 88 | self->finished = 1; |
|
82 | 89 | } |
|
83 | 90 | |
|
84 | 91 | if (output.pos) { |
|
85 | 92 | if (result) { |
|
86 | 93 | resultSize = PyBytes_GET_SIZE(result); |
|
87 | 94 | if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) { |
|
88 | 95 | Py_XDECREF(result); |
|
89 | 96 | goto except; |
|
90 | 97 | } |
|
91 | 98 | |
|
92 | 99 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
93 | 100 | output.dst, output.pos); |
|
94 | 101 | } |
|
95 | 102 | else { |
|
96 | 103 | result = PyBytes_FromStringAndSize(output.dst, output.pos); |
|
97 | 104 | if (!result) { |
|
98 | 105 | goto except; |
|
99 | 106 | } |
|
100 | 107 | } |
|
108 | } | |
|
109 | ||
|
110 | if (zresult == 0 || (input.pos == input.size && output.pos == 0)) { | |
|
111 | break; | |
|
112 | } | |
|
101 | 113 | |
|
102 | 114 |
|
|
103 | 115 |
|
|
104 | } | |
|
105 | 116 | |
|
106 | 117 | if (!result) { |
|
107 | 118 | result = PyBytes_FromString(""); |
|
108 | 119 | } |
|
109 | 120 | |
|
110 | 121 | goto finally; |
|
111 | 122 | |
|
112 | 123 | except: |
|
113 | 124 | Py_CLEAR(result); |
|
114 | 125 | |
|
115 | 126 | finally: |
|
116 | 127 | PyMem_Free(output.dst); |
|
117 | 128 | PyBuffer_Release(&source); |
|
118 | 129 | |
|
119 | 130 | return result; |
|
120 | 131 | } |
|
121 | 132 | |
|
122 | 133 | static PyMethodDef DecompressionObj_methods[] = { |
|
123 | 134 | { "decompress", (PyCFunction)DecompressionObj_decompress, |
|
124 | 135 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") }, |
|
125 | 136 | { NULL, NULL } |
|
126 | 137 | }; |
|
127 | 138 | |
|
128 | 139 | PyTypeObject ZstdDecompressionObjType = { |
|
129 | 140 | PyVarObject_HEAD_INIT(NULL, 0) |
|
130 | 141 | "zstd.ZstdDecompressionObj", /* tp_name */ |
|
131 | 142 | sizeof(ZstdDecompressionObj), /* tp_basicsize */ |
|
132 | 143 | 0, /* tp_itemsize */ |
|
133 | 144 | (destructor)DecompressionObj_dealloc, /* tp_dealloc */ |
|
134 | 145 | 0, /* tp_print */ |
|
135 | 146 | 0, /* tp_getattr */ |
|
136 | 147 | 0, /* tp_setattr */ |
|
137 | 148 | 0, /* tp_compare */ |
|
138 | 149 | 0, /* tp_repr */ |
|
139 | 150 | 0, /* tp_as_number */ |
|
140 | 151 | 0, /* tp_as_sequence */ |
|
141 | 152 | 0, /* tp_as_mapping */ |
|
142 | 153 | 0, /* tp_hash */ |
|
143 | 154 | 0, /* tp_call */ |
|
144 | 155 | 0, /* tp_str */ |
|
145 | 156 | 0, /* tp_getattro */ |
|
146 | 157 | 0, /* tp_setattro */ |
|
147 | 158 | 0, /* tp_as_buffer */ |
|
148 | 159 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
149 | 160 | DecompressionObj__doc__, /* tp_doc */ |
|
150 | 161 | 0, /* tp_traverse */ |
|
151 | 162 | 0, /* tp_clear */ |
|
152 | 163 | 0, /* tp_richcompare */ |
|
153 | 164 | 0, /* tp_weaklistoffset */ |
|
154 | 165 | 0, /* tp_iter */ |
|
155 | 166 | 0, /* tp_iternext */ |
|
156 | 167 | DecompressionObj_methods, /* tp_methods */ |
|
157 | 168 | 0, /* tp_members */ |
|
158 | 169 | 0, /* tp_getset */ |
|
159 | 170 | 0, /* tp_base */ |
|
160 | 171 | 0, /* tp_dict */ |
|
161 | 172 | 0, /* tp_descr_get */ |
|
162 | 173 | 0, /* tp_descr_set */ |
|
163 | 174 | 0, /* tp_dictoffset */ |
|
164 | 175 | 0, /* tp_init */ |
|
165 | 176 | 0, /* tp_alloc */ |
|
166 | 177 | PyType_GenericNew, /* tp_new */ |
|
167 | 178 | }; |
|
168 | 179 | |
|
169 | 180 | void decompressobj_module_init(PyObject* module) { |
|
170 | 181 | Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type; |
|
171 | 182 | if (PyType_Ready(&ZstdDecompressionObjType) < 0) { |
|
172 | 183 | return; |
|
173 | 184 | } |
|
174 | 185 | } |
@@ -1,1803 +1,1807 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | #include "pool.h" |
|
11 | 11 | |
|
12 | 12 | extern PyObject* ZstdError; |
|
13 | 13 | |
|
14 | 14 | /** |
|
15 | 15 | * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation. |
|
16 | 16 | */ |
|
17 | 17 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) { |
|
18 | 18 | size_t zresult; |
|
19 | 19 | |
|
20 | 20 | ZSTD_DCtx_reset(decompressor->dctx); |
|
21 | 21 | |
|
22 | 22 | if (decompressor->maxWindowSize) { |
|
23 | 23 | zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize); |
|
24 | 24 | if (ZSTD_isError(zresult)) { |
|
25 | 25 | PyErr_Format(ZstdError, "unable to set max window size: %s", |
|
26 | 26 | ZSTD_getErrorName(zresult)); |
|
27 | 27 | return 1; |
|
28 | 28 | } |
|
29 | 29 | } |
|
30 | 30 | |
|
31 | 31 | zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format); |
|
32 | 32 | if (ZSTD_isError(zresult)) { |
|
33 | 33 | PyErr_Format(ZstdError, "unable to set decoding format: %s", |
|
34 | 34 | ZSTD_getErrorName(zresult)); |
|
35 | 35 | return 1; |
|
36 | 36 | } |
|
37 | 37 | |
|
38 | 38 | if (loadDict && decompressor->dict) { |
|
39 | 39 | if (ensure_ddict(decompressor->dict)) { |
|
40 | 40 | return 1; |
|
41 | 41 | } |
|
42 | 42 | |
|
43 | 43 | zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict); |
|
44 | 44 | if (ZSTD_isError(zresult)) { |
|
45 | 45 | PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s", |
|
46 | 46 | ZSTD_getErrorName(zresult)); |
|
47 | 47 | return 1; |
|
48 | 48 | } |
|
49 | 49 | } |
|
50 | 50 | |
|
51 | 51 | return 0; |
|
52 | 52 | } |
|
53 | 53 | |
|
54 | 54 | PyDoc_STRVAR(Decompressor__doc__, |
|
55 | 55 | "ZstdDecompressor(dict_data=None)\n" |
|
56 | 56 | "\n" |
|
57 | 57 | "Create an object used to perform Zstandard decompression.\n" |
|
58 | 58 | "\n" |
|
59 | 59 | "An instance can perform multiple decompression operations." |
|
60 | 60 | ); |
|
61 | 61 | |
|
62 | 62 | static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
63 | 63 | static char* kwlist[] = { |
|
64 | 64 | "dict_data", |
|
65 | 65 | "max_window_size", |
|
66 | 66 | "format", |
|
67 | 67 | NULL |
|
68 | 68 | }; |
|
69 | 69 | |
|
70 | 70 | ZstdCompressionDict* dict = NULL; |
|
71 | 71 | size_t maxWindowSize = 0; |
|
72 | 72 | ZSTD_format_e format = ZSTD_f_zstd1; |
|
73 | 73 | |
|
74 | 74 | self->dctx = NULL; |
|
75 | 75 | self->dict = NULL; |
|
76 | 76 | |
|
77 | 77 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist, |
|
78 | 78 | &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) { |
|
79 | 79 | return -1; |
|
80 | 80 | } |
|
81 | 81 | |
|
82 | 82 | self->dctx = ZSTD_createDCtx(); |
|
83 | 83 | if (!self->dctx) { |
|
84 | 84 | PyErr_NoMemory(); |
|
85 | 85 | goto except; |
|
86 | 86 | } |
|
87 | 87 | |
|
88 | 88 | self->maxWindowSize = maxWindowSize; |
|
89 | 89 | self->format = format; |
|
90 | 90 | |
|
91 | 91 | if (dict) { |
|
92 | 92 | self->dict = dict; |
|
93 | 93 | Py_INCREF(dict); |
|
94 | 94 | } |
|
95 | 95 | |
|
96 | 96 | if (ensure_dctx(self, 1)) { |
|
97 | 97 | goto except; |
|
98 | 98 | } |
|
99 | 99 | |
|
100 | 100 | return 0; |
|
101 | 101 | |
|
102 | 102 | except: |
|
103 | 103 | Py_CLEAR(self->dict); |
|
104 | 104 | |
|
105 | 105 | if (self->dctx) { |
|
106 | 106 | ZSTD_freeDCtx(self->dctx); |
|
107 | 107 | self->dctx = NULL; |
|
108 | 108 | } |
|
109 | 109 | |
|
110 | 110 | return -1; |
|
111 | 111 | } |
|
112 | 112 | |
|
113 | 113 | static void Decompressor_dealloc(ZstdDecompressor* self) { |
|
114 | 114 | Py_CLEAR(self->dict); |
|
115 | 115 | |
|
116 | 116 | if (self->dctx) { |
|
117 | 117 | ZSTD_freeDCtx(self->dctx); |
|
118 | 118 | self->dctx = NULL; |
|
119 | 119 | } |
|
120 | 120 | |
|
121 | 121 | PyObject_Del(self); |
|
122 | 122 | } |
|
123 | 123 | |
|
124 | 124 | PyDoc_STRVAR(Decompressor_memory_size__doc__, |
|
125 | 125 | "memory_size() -- Size of decompression context, in bytes\n" |
|
126 | 126 | ); |
|
127 | 127 | |
|
128 | 128 | static PyObject* Decompressor_memory_size(ZstdDecompressor* self) { |
|
129 | 129 | if (self->dctx) { |
|
130 | 130 | return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx)); |
|
131 | 131 | } |
|
132 | 132 | else { |
|
133 | 133 | PyErr_SetString(ZstdError, "no decompressor context found; this should never happen"); |
|
134 | 134 | return NULL; |
|
135 | 135 | } |
|
136 | 136 | } |
|
137 | 137 | |
|
138 | 138 | PyDoc_STRVAR(Decompressor_copy_stream__doc__, |
|
139 | 139 | "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" |
|
140 | 140 | "\n" |
|
141 | 141 | "Compressed data will be read from ``ifh``, decompressed, and written to\n" |
|
142 | 142 | "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n" |
|
143 | 143 | "``write(data)`` method.\n" |
|
144 | 144 | "\n" |
|
145 | 145 | "The optional ``read_size`` and ``write_size`` arguments control the chunk\n" |
|
146 | 146 | "size of data that is ``read()`` and ``write()`` between streams. They default\n" |
|
147 | 147 | "to the default input and output sizes of zstd decompressor streams.\n" |
|
148 | 148 | ); |
|
149 | 149 | |
|
150 | 150 | static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
151 | 151 | static char* kwlist[] = { |
|
152 | 152 | "ifh", |
|
153 | 153 | "ofh", |
|
154 | 154 | "read_size", |
|
155 | 155 | "write_size", |
|
156 | 156 | NULL |
|
157 | 157 | }; |
|
158 | 158 | |
|
159 | 159 | PyObject* source; |
|
160 | 160 | PyObject* dest; |
|
161 | 161 | size_t inSize = ZSTD_DStreamInSize(); |
|
162 | 162 | size_t outSize = ZSTD_DStreamOutSize(); |
|
163 | 163 | ZSTD_inBuffer input; |
|
164 | 164 | ZSTD_outBuffer output; |
|
165 | 165 | Py_ssize_t totalRead = 0; |
|
166 | 166 | Py_ssize_t totalWrite = 0; |
|
167 | 167 | char* readBuffer; |
|
168 | 168 | Py_ssize_t readSize; |
|
169 | 169 | PyObject* readResult = NULL; |
|
170 | 170 | PyObject* res = NULL; |
|
171 | 171 | size_t zresult = 0; |
|
172 | 172 | PyObject* writeResult; |
|
173 | 173 | PyObject* totalReadPy; |
|
174 | 174 | PyObject* totalWritePy; |
|
175 | 175 | |
|
176 | 176 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist, |
|
177 | 177 | &source, &dest, &inSize, &outSize)) { |
|
178 | 178 | return NULL; |
|
179 | 179 | } |
|
180 | 180 | |
|
181 | 181 | if (!PyObject_HasAttrString(source, "read")) { |
|
182 | 182 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); |
|
183 | 183 | return NULL; |
|
184 | 184 | } |
|
185 | 185 | |
|
186 | 186 | if (!PyObject_HasAttrString(dest, "write")) { |
|
187 | 187 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
|
188 | 188 | return NULL; |
|
189 | 189 | } |
|
190 | 190 | |
|
191 | 191 | /* Prevent free on uninitialized memory in finally. */ |
|
192 | 192 | output.dst = NULL; |
|
193 | 193 | |
|
194 | 194 | if (ensure_dctx(self, 1)) { |
|
195 | 195 | res = NULL; |
|
196 | 196 | goto finally; |
|
197 | 197 | } |
|
198 | 198 | |
|
199 | 199 | output.dst = PyMem_Malloc(outSize); |
|
200 | 200 | if (!output.dst) { |
|
201 | 201 | PyErr_NoMemory(); |
|
202 | 202 | res = NULL; |
|
203 | 203 | goto finally; |
|
204 | 204 | } |
|
205 | 205 | output.size = outSize; |
|
206 | 206 | output.pos = 0; |
|
207 | 207 | |
|
208 | 208 | /* Read source stream until EOF */ |
|
209 | 209 | while (1) { |
|
210 | 210 | readResult = PyObject_CallMethod(source, "read", "n", inSize); |
|
211 | 211 | if (!readResult) { |
|
212 | 212 | PyErr_SetString(ZstdError, "could not read() from source"); |
|
213 | 213 | goto finally; |
|
214 | 214 | } |
|
215 | 215 | |
|
216 | 216 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
217 | 217 | |
|
218 | 218 | /* If no data was read, we're at EOF. */ |
|
219 | 219 | if (0 == readSize) { |
|
220 | 220 | break; |
|
221 | 221 | } |
|
222 | 222 | |
|
223 | 223 | totalRead += readSize; |
|
224 | 224 | |
|
225 | 225 | /* Send data to decompressor */ |
|
226 | 226 | input.src = readBuffer; |
|
227 | 227 | input.size = readSize; |
|
228 | 228 | input.pos = 0; |
|
229 | 229 | |
|
230 | 230 | while (input.pos < input.size) { |
|
231 | 231 | Py_BEGIN_ALLOW_THREADS |
|
232 | 232 | zresult = ZSTD_decompress_generic(self->dctx, &output, &input); |
|
233 | 233 | Py_END_ALLOW_THREADS |
|
234 | 234 | |
|
235 | 235 | if (ZSTD_isError(zresult)) { |
|
236 | 236 | PyErr_Format(ZstdError, "zstd decompressor error: %s", |
|
237 | 237 | ZSTD_getErrorName(zresult)); |
|
238 | 238 | res = NULL; |
|
239 | 239 | goto finally; |
|
240 | 240 | } |
|
241 | 241 | |
|
242 | 242 | if (output.pos) { |
|
243 | 243 | #if PY_MAJOR_VERSION >= 3 |
|
244 | 244 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
245 | 245 | #else |
|
246 | 246 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
247 | 247 | #endif |
|
248 | 248 | output.dst, output.pos); |
|
249 | 249 | |
|
250 | 250 | Py_XDECREF(writeResult); |
|
251 | 251 | totalWrite += output.pos; |
|
252 | 252 | output.pos = 0; |
|
253 | 253 | } |
|
254 | 254 | } |
|
255 | 255 | |
|
256 | 256 | Py_CLEAR(readResult); |
|
257 | 257 | } |
|
258 | 258 | |
|
259 | 259 | /* Source stream is exhausted. Finish up. */ |
|
260 | 260 | |
|
261 | 261 | totalReadPy = PyLong_FromSsize_t(totalRead); |
|
262 | 262 | totalWritePy = PyLong_FromSsize_t(totalWrite); |
|
263 | 263 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); |
|
264 | 264 | Py_DECREF(totalReadPy); |
|
265 | 265 | Py_DECREF(totalWritePy); |
|
266 | 266 | |
|
267 | 267 | finally: |
|
268 | 268 | if (output.dst) { |
|
269 | 269 | PyMem_Free(output.dst); |
|
270 | 270 | } |
|
271 | 271 | |
|
272 | 272 | Py_XDECREF(readResult); |
|
273 | 273 | |
|
274 | 274 | return res; |
|
275 | 275 | } |
|
276 | 276 | |
|
277 | 277 | PyDoc_STRVAR(Decompressor_decompress__doc__, |
|
278 | 278 | "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n" |
|
279 | 279 | "\n" |
|
280 | 280 | "This method will decompress the entirety of the argument and return the\n" |
|
281 | 281 | "result.\n" |
|
282 | 282 | "\n" |
|
283 | 283 | "The input bytes are expected to contain a full Zstandard frame (something\n" |
|
284 | 284 | "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n" |
|
285 | 285 | "not contain a full frame, an exception will be raised.\n" |
|
286 | 286 | "\n" |
|
287 | 287 | "If the frame header of the compressed data does not contain the content size\n" |
|
288 | 288 | "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n" |
|
289 | 289 | "allocation of size ``max_output_size`` will be performed and an attempt will\n" |
|
290 | 290 | "be made to perform decompression into that buffer. If the buffer is too\n" |
|
291 | 291 | "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n" |
|
292 | 292 | "be resized if it is too large.\n" |
|
293 | 293 | "\n" |
|
294 | 294 | "Uncompressed data could be much larger than compressed data. As a result,\n" |
|
295 | 295 | "calling this function could result in a very large memory allocation being\n" |
|
296 | 296 | "performed to hold the uncompressed data. Therefore it is **highly**\n" |
|
297 | 297 | "recommended to use a streaming decompression method instead of this one.\n" |
|
298 | 298 | ); |
|
299 | 299 | |
|
300 | 300 | PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
301 | 301 | static char* kwlist[] = { |
|
302 | 302 | "data", |
|
303 | 303 | "max_output_size", |
|
304 | 304 | NULL |
|
305 | 305 | }; |
|
306 | 306 | |
|
307 | 307 | Py_buffer source; |
|
308 | 308 | Py_ssize_t maxOutputSize = 0; |
|
309 | 309 | unsigned long long decompressedSize; |
|
310 | 310 | size_t destCapacity; |
|
311 | 311 | PyObject* result = NULL; |
|
312 | 312 | size_t zresult; |
|
313 | 313 | ZSTD_outBuffer outBuffer; |
|
314 | 314 | ZSTD_inBuffer inBuffer; |
|
315 | 315 | |
|
316 | 316 | #if PY_MAJOR_VERSION >= 3 |
|
317 | 317 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress", |
|
318 | 318 | #else |
|
319 | 319 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress", |
|
320 | 320 | #endif |
|
321 | 321 | kwlist, &source, &maxOutputSize)) { |
|
322 | 322 | return NULL; |
|
323 | 323 | } |
|
324 | 324 | |
|
325 | 325 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
326 | 326 | PyErr_SetString(PyExc_ValueError, |
|
327 | 327 | "data buffer should be contiguous and have at most one dimension"); |
|
328 | 328 | goto finally; |
|
329 | 329 | } |
|
330 | 330 | |
|
331 | 331 | if (ensure_dctx(self, 1)) { |
|
332 | 332 | goto finally; |
|
333 | 333 | } |
|
334 | 334 | |
|
335 | 335 | decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len); |
|
336 | 336 | |
|
337 | 337 | if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) { |
|
338 | 338 | PyErr_SetString(ZstdError, "error determining content size from frame header"); |
|
339 | 339 | goto finally; |
|
340 | 340 | } |
|
341 | 341 | /* Special case of empty frame. */ |
|
342 | 342 | else if (0 == decompressedSize) { |
|
343 | 343 | result = PyBytes_FromStringAndSize("", 0); |
|
344 | 344 | goto finally; |
|
345 | 345 | } |
|
346 | 346 | /* Missing content size in frame header. */ |
|
347 | 347 | if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) { |
|
348 | 348 | if (0 == maxOutputSize) { |
|
349 | 349 | PyErr_SetString(ZstdError, "could not determine content size in frame header"); |
|
350 | 350 | goto finally; |
|
351 | 351 | } |
|
352 | 352 | |
|
353 | 353 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); |
|
354 | 354 | destCapacity = maxOutputSize; |
|
355 | 355 | decompressedSize = 0; |
|
356 | 356 | } |
|
357 | 357 | /* Size is recorded in frame header. */ |
|
358 | 358 | else { |
|
359 | 359 | assert(SIZE_MAX >= PY_SSIZE_T_MAX); |
|
360 | 360 | if (decompressedSize > PY_SSIZE_T_MAX) { |
|
361 | 361 | PyErr_SetString(ZstdError, "frame is too large to decompress on this platform"); |
|
362 | 362 | goto finally; |
|
363 | 363 | } |
|
364 | 364 | |
|
365 | 365 | result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize); |
|
366 | 366 | destCapacity = (size_t)decompressedSize; |
|
367 | 367 | } |
|
368 | 368 | |
|
369 | 369 | if (!result) { |
|
370 | 370 | goto finally; |
|
371 | 371 | } |
|
372 | 372 | |
|
373 | 373 | outBuffer.dst = PyBytes_AsString(result); |
|
374 | 374 | outBuffer.size = destCapacity; |
|
375 | 375 | outBuffer.pos = 0; |
|
376 | 376 | |
|
377 | 377 | inBuffer.src = source.buf; |
|
378 | 378 | inBuffer.size = source.len; |
|
379 | 379 | inBuffer.pos = 0; |
|
380 | 380 | |
|
381 | 381 | Py_BEGIN_ALLOW_THREADS |
|
382 | 382 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); |
|
383 | 383 | Py_END_ALLOW_THREADS |
|
384 | 384 | |
|
385 | 385 | if (ZSTD_isError(zresult)) { |
|
386 | 386 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
|
387 | 387 | Py_CLEAR(result); |
|
388 | 388 | goto finally; |
|
389 | 389 | } |
|
390 | 390 | else if (zresult) { |
|
391 | 391 | PyErr_Format(ZstdError, "decompression error: did not decompress full frame"); |
|
392 | 392 | Py_CLEAR(result); |
|
393 | 393 | goto finally; |
|
394 | 394 | } |
|
395 | 395 | else if (decompressedSize && outBuffer.pos != decompressedSize) { |
|
396 | 396 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
|
397 | 397 | zresult, decompressedSize); |
|
398 | 398 | Py_CLEAR(result); |
|
399 | 399 | goto finally; |
|
400 | 400 | } |
|
401 | 401 | else if (outBuffer.pos < destCapacity) { |
|
402 | 402 | if (safe_pybytes_resize(&result, outBuffer.pos)) { |
|
403 | 403 | Py_CLEAR(result); |
|
404 | 404 | goto finally; |
|
405 | 405 | } |
|
406 | 406 | } |
|
407 | 407 | |
|
408 | 408 | finally: |
|
409 | 409 | PyBuffer_Release(&source); |
|
410 | 410 | return result; |
|
411 | 411 | } |
|
412 | 412 | |
|
413 | 413 | PyDoc_STRVAR(Decompressor_decompressobj__doc__, |
|
414 | 414 | "decompressobj([write_size=default])\n" |
|
415 | 415 | "\n" |
|
416 | 416 | "Incrementally feed data into a decompressor.\n" |
|
417 | 417 | "\n" |
|
418 | 418 | "The returned object exposes a ``decompress(data)`` method. This makes it\n" |
|
419 | 419 | "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n" |
|
420 | 420 | "callers can swap in the zstd decompressor while using the same API.\n" |
|
421 | 421 | ); |
|
422 | 422 | |
|
423 | 423 | static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
424 | 424 | static char* kwlist[] = { |
|
425 | 425 | "write_size", |
|
426 | 426 | NULL |
|
427 | 427 | }; |
|
428 | 428 | |
|
429 | 429 | ZstdDecompressionObj* result = NULL; |
|
430 | 430 | size_t outSize = ZSTD_DStreamOutSize(); |
|
431 | 431 | |
|
432 | 432 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) { |
|
433 | 433 | return NULL; |
|
434 | 434 | } |
|
435 | 435 | |
|
436 | 436 | if (!outSize) { |
|
437 | 437 | PyErr_SetString(PyExc_ValueError, "write_size must be positive"); |
|
438 | 438 | return NULL; |
|
439 | 439 | } |
|
440 | 440 | |
|
441 | 441 | result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL); |
|
442 | 442 | if (!result) { |
|
443 | 443 | return NULL; |
|
444 | 444 | } |
|
445 | 445 | |
|
446 | 446 | if (ensure_dctx(self, 1)) { |
|
447 | 447 | Py_DECREF(result); |
|
448 | 448 | return NULL; |
|
449 | 449 | } |
|
450 | 450 | |
|
451 | 451 | result->decompressor = self; |
|
452 | 452 | Py_INCREF(result->decompressor); |
|
453 | 453 | result->outSize = outSize; |
|
454 | 454 | |
|
455 | 455 | return result; |
|
456 | 456 | } |
|
457 | 457 | |
|
458 | 458 | PyDoc_STRVAR(Decompressor_read_to_iter__doc__, |
|
459 | 459 | "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n" |
|
460 | 460 | "Read compressed data and return an iterator\n" |
|
461 | 461 | "\n" |
|
462 | 462 | "Returns an iterator of decompressed data chunks produced from reading from\n" |
|
463 | 463 | "the ``reader``.\n" |
|
464 | 464 | "\n" |
|
465 | 465 | "Compressed data will be obtained from ``reader`` by calling the\n" |
|
466 | 466 | "``read(size)`` method of it. The source data will be streamed into a\n" |
|
467 | 467 | "decompressor. As decompressed data is available, it will be exposed to the\n" |
|
468 | 468 | "returned iterator.\n" |
|
469 | 469 | "\n" |
|
470 | 470 | "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n" |
|
471 | 471 | "iterator in chunks of size ``write_size``. The default values are the input\n" |
|
472 | 472 | "and output sizes for a zstd streaming decompressor.\n" |
|
473 | 473 | "\n" |
|
474 | 474 | "There is also support for skipping the first ``skip_bytes`` of data from\n" |
|
475 | 475 | "the source.\n" |
|
476 | 476 | ); |
|
477 | 477 | |
|
478 | 478 | static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
479 | 479 | static char* kwlist[] = { |
|
480 | 480 | "reader", |
|
481 | 481 | "read_size", |
|
482 | 482 | "write_size", |
|
483 | 483 | "skip_bytes", |
|
484 | 484 | NULL |
|
485 | 485 | }; |
|
486 | 486 | |
|
487 | 487 | PyObject* reader; |
|
488 | 488 | size_t inSize = ZSTD_DStreamInSize(); |
|
489 | 489 | size_t outSize = ZSTD_DStreamOutSize(); |
|
490 | 490 | ZstdDecompressorIterator* result; |
|
491 | 491 | size_t skipBytes = 0; |
|
492 | 492 | |
|
493 | 493 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist, |
|
494 | 494 | &reader, &inSize, &outSize, &skipBytes)) { |
|
495 | 495 | return NULL; |
|
496 | 496 | } |
|
497 | 497 | |
|
498 | 498 | if (skipBytes >= inSize) { |
|
499 | 499 | PyErr_SetString(PyExc_ValueError, |
|
500 | 500 | "skip_bytes must be smaller than read_size"); |
|
501 | 501 | return NULL; |
|
502 | 502 | } |
|
503 | 503 | |
|
504 | 504 | result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL); |
|
505 | 505 | if (!result) { |
|
506 | 506 | return NULL; |
|
507 | 507 | } |
|
508 | 508 | |
|
509 | 509 | if (PyObject_HasAttrString(reader, "read")) { |
|
510 | 510 | result->reader = reader; |
|
511 | 511 | Py_INCREF(result->reader); |
|
512 | 512 | } |
|
513 | 513 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
514 | 514 | /* Object claims it is a buffer. Try to get a handle to it. */ |
|
515 | 515 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { |
|
516 | 516 | goto except; |
|
517 | 517 | } |
|
518 | 518 | } |
|
519 | 519 | else { |
|
520 | 520 | PyErr_SetString(PyExc_ValueError, |
|
521 | 521 | "must pass an object with a read() method or conforms to buffer protocol"); |
|
522 | 522 | goto except; |
|
523 | 523 | } |
|
524 | 524 | |
|
525 | 525 | result->decompressor = self; |
|
526 | 526 | Py_INCREF(result->decompressor); |
|
527 | 527 | |
|
528 | 528 | result->inSize = inSize; |
|
529 | 529 | result->outSize = outSize; |
|
530 | 530 | result->skipBytes = skipBytes; |
|
531 | 531 | |
|
532 | 532 | if (ensure_dctx(self, 1)) { |
|
533 | 533 | goto except; |
|
534 | 534 | } |
|
535 | 535 | |
|
536 | 536 | result->input.src = PyMem_Malloc(inSize); |
|
537 | 537 | if (!result->input.src) { |
|
538 | 538 | PyErr_NoMemory(); |
|
539 | 539 | goto except; |
|
540 | 540 | } |
|
541 | 541 | |
|
542 | 542 | goto finally; |
|
543 | 543 | |
|
544 | 544 | except: |
|
545 | 545 | Py_CLEAR(result); |
|
546 | 546 | |
|
547 | 547 | finally: |
|
548 | 548 | |
|
549 | 549 | return result; |
|
550 | 550 | } |
|
551 | 551 | |
|
552 | 552 | PyDoc_STRVAR(Decompressor_stream_reader__doc__, |
|
553 | 553 | "stream_reader(source, [read_size=default])\n" |
|
554 | 554 | "\n" |
|
555 | 555 | "Obtain an object that behaves like an I/O stream that can be used for\n" |
|
556 | 556 | "reading decompressed output from an object.\n" |
|
557 | 557 | "\n" |
|
558 | 558 | "The source object can be any object with a ``read(size)`` method or that\n" |
|
559 | 559 | "conforms to the buffer protocol.\n" |
|
560 | 560 | ); |
|
561 | 561 | |
|
562 | 562 | static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
563 | 563 | static char* kwlist[] = { |
|
564 | 564 | "source", |
|
565 | 565 | "read_size", |
|
566 | 566 | NULL |
|
567 | 567 | }; |
|
568 | 568 | |
|
569 | 569 | PyObject* source; |
|
570 | 570 | size_t readSize = ZSTD_DStreamInSize(); |
|
571 | 571 | ZstdDecompressionReader* result; |
|
572 | 572 | |
|
573 | 573 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist, |
|
574 | 574 | &source, &readSize)) { |
|
575 | 575 | return NULL; |
|
576 | 576 | } |
|
577 | 577 | |
|
578 | if (ensure_dctx(self, 1)) { | |
|
579 | return NULL; | |
|
580 | } | |
|
581 | ||
|
578 | 582 | result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL); |
|
579 | 583 | if (NULL == result) { |
|
580 | 584 | return NULL; |
|
581 | 585 | } |
|
582 | 586 | |
|
583 | 587 | if (PyObject_HasAttrString(source, "read")) { |
|
584 | 588 | result->reader = source; |
|
585 | 589 | Py_INCREF(source); |
|
586 | 590 | result->readSize = readSize; |
|
587 | 591 | } |
|
588 | 592 | else if (1 == PyObject_CheckBuffer(source)) { |
|
589 | 593 | if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { |
|
590 | 594 | Py_CLEAR(result); |
|
591 | 595 | return NULL; |
|
592 | 596 | } |
|
593 | 597 | } |
|
594 | 598 | else { |
|
595 | 599 | PyErr_SetString(PyExc_TypeError, |
|
596 | 600 | "must pass an object with a read() method or that conforms to the buffer protocol"); |
|
597 | 601 | Py_CLEAR(result); |
|
598 | 602 | return NULL; |
|
599 | 603 | } |
|
600 | 604 | |
|
601 | 605 | result->decompressor = self; |
|
602 | 606 | Py_INCREF(self); |
|
603 | 607 | |
|
604 | 608 | return result; |
|
605 | 609 | } |
|
606 | 610 | |
|
607 | 611 | PyDoc_STRVAR(Decompressor_stream_writer__doc__, |
|
608 | 612 | "Create a context manager to write decompressed data to an object.\n" |
|
609 | 613 | "\n" |
|
610 | 614 | "The passed object must have a ``write()`` method.\n" |
|
611 | 615 | "\n" |
|
612 | 616 | "The caller feeds intput data to the object by calling ``write(data)``.\n" |
|
613 | 617 | "Decompressed data is written to the argument given as it is decompressed.\n" |
|
614 | 618 | "\n" |
|
615 | 619 | "An optional ``write_size`` argument defines the size of chunks to\n" |
|
616 | 620 | "``write()`` to the writer. It defaults to the default output size for a zstd\n" |
|
617 | 621 | "streaming decompressor.\n" |
|
618 | 622 | ); |
|
619 | 623 | |
|
620 | 624 | static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
621 | 625 | static char* kwlist[] = { |
|
622 | 626 | "writer", |
|
623 | 627 | "write_size", |
|
624 | 628 | NULL |
|
625 | 629 | }; |
|
626 | 630 | |
|
627 | 631 | PyObject* writer; |
|
628 | 632 | size_t outSize = ZSTD_DStreamOutSize(); |
|
629 | 633 | ZstdDecompressionWriter* result; |
|
630 | 634 | |
|
631 | 635 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist, |
|
632 | 636 | &writer, &outSize)) { |
|
633 | 637 | return NULL; |
|
634 | 638 | } |
|
635 | 639 | |
|
636 | 640 | if (!PyObject_HasAttrString(writer, "write")) { |
|
637 | 641 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
|
638 | 642 | return NULL; |
|
639 | 643 | } |
|
640 | 644 | |
|
641 | 645 | result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL); |
|
642 | 646 | if (!result) { |
|
643 | 647 | return NULL; |
|
644 | 648 | } |
|
645 | 649 | |
|
646 | 650 | result->decompressor = self; |
|
647 | 651 | Py_INCREF(result->decompressor); |
|
648 | 652 | |
|
649 | 653 | result->writer = writer; |
|
650 | 654 | Py_INCREF(result->writer); |
|
651 | 655 | |
|
652 | 656 | result->outSize = outSize; |
|
653 | 657 | |
|
654 | 658 | return result; |
|
655 | 659 | } |
|
656 | 660 | |
|
657 | 661 | PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, |
|
658 | 662 | "Decompress a series of chunks using the content dictionary chaining technique\n" |
|
659 | 663 | ); |
|
660 | 664 | |
|
661 | 665 | static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
662 | 666 | static char* kwlist[] = { |
|
663 | 667 | "frames", |
|
664 | 668 | NULL |
|
665 | 669 | }; |
|
666 | 670 | |
|
667 | 671 | PyObject* chunks; |
|
668 | 672 | Py_ssize_t chunksLen; |
|
669 | 673 | Py_ssize_t chunkIndex; |
|
670 | 674 | char parity = 0; |
|
671 | 675 | PyObject* chunk; |
|
672 | 676 | char* chunkData; |
|
673 | 677 | Py_ssize_t chunkSize; |
|
674 | 678 | size_t zresult; |
|
675 | 679 | ZSTD_frameHeader frameHeader; |
|
676 | 680 | void* buffer1 = NULL; |
|
677 | 681 | size_t buffer1Size = 0; |
|
678 | 682 | size_t buffer1ContentSize = 0; |
|
679 | 683 | void* buffer2 = NULL; |
|
680 | 684 | size_t buffer2Size = 0; |
|
681 | 685 | size_t buffer2ContentSize = 0; |
|
682 | 686 | void* destBuffer = NULL; |
|
683 | 687 | PyObject* result = NULL; |
|
684 | 688 | ZSTD_outBuffer outBuffer; |
|
685 | 689 | ZSTD_inBuffer inBuffer; |
|
686 | 690 | |
|
687 | 691 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", |
|
688 | 692 | kwlist, &PyList_Type, &chunks)) { |
|
689 | 693 | return NULL; |
|
690 | 694 | } |
|
691 | 695 | |
|
692 | 696 | chunksLen = PyList_Size(chunks); |
|
693 | 697 | if (!chunksLen) { |
|
694 | 698 | PyErr_SetString(PyExc_ValueError, "empty input chain"); |
|
695 | 699 | return NULL; |
|
696 | 700 | } |
|
697 | 701 | |
|
698 | 702 | /* The first chunk should not be using a dictionary. We handle it specially. */ |
|
699 | 703 | chunk = PyList_GetItem(chunks, 0); |
|
700 | 704 | if (!PyBytes_Check(chunk)) { |
|
701 | 705 | PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); |
|
702 | 706 | return NULL; |
|
703 | 707 | } |
|
704 | 708 | |
|
705 | 709 | /* We require that all chunks be zstd frames and that they have content size set. */ |
|
706 | 710 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
707 | 711 | zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize); |
|
708 | 712 | if (ZSTD_isError(zresult)) { |
|
709 | 713 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); |
|
710 | 714 | return NULL; |
|
711 | 715 | } |
|
712 | 716 | else if (zresult) { |
|
713 | 717 | PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); |
|
714 | 718 | return NULL; |
|
715 | 719 | } |
|
716 | 720 | |
|
717 | 721 | if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) { |
|
718 | 722 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); |
|
719 | 723 | return NULL; |
|
720 | 724 | } |
|
721 | 725 | |
|
722 | 726 | assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize); |
|
723 | 727 | |
|
724 | 728 | /* We check against PY_SSIZE_T_MAX here because we ultimately cast the |
|
725 | 729 | * result to a Python object and it's length can be no greater than |
|
726 | 730 | * Py_ssize_t. In theory, we could have an intermediate frame that is |
|
727 | 731 | * larger. But a) why would this API be used for frames that large b) |
|
728 | 732 | * it isn't worth the complexity to support. */ |
|
729 | 733 | assert(SIZE_MAX >= PY_SSIZE_T_MAX); |
|
730 | 734 | if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) { |
|
731 | 735 | PyErr_SetString(PyExc_ValueError, |
|
732 | 736 | "chunk 0 is too large to decompress on this platform"); |
|
733 | 737 | return NULL; |
|
734 | 738 | } |
|
735 | 739 | |
|
736 | 740 | if (ensure_dctx(self, 0)) { |
|
737 | 741 | goto finally; |
|
738 | 742 | } |
|
739 | 743 | |
|
740 | 744 | buffer1Size = (size_t)frameHeader.frameContentSize; |
|
741 | 745 | buffer1 = PyMem_Malloc(buffer1Size); |
|
742 | 746 | if (!buffer1) { |
|
743 | 747 | goto finally; |
|
744 | 748 | } |
|
745 | 749 | |
|
746 | 750 | outBuffer.dst = buffer1; |
|
747 | 751 | outBuffer.size = buffer1Size; |
|
748 | 752 | outBuffer.pos = 0; |
|
749 | 753 | |
|
750 | 754 | inBuffer.src = chunkData; |
|
751 | 755 | inBuffer.size = chunkSize; |
|
752 | 756 | inBuffer.pos = 0; |
|
753 | 757 | |
|
754 | 758 | Py_BEGIN_ALLOW_THREADS |
|
755 | 759 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); |
|
756 | 760 | Py_END_ALLOW_THREADS |
|
757 | 761 | if (ZSTD_isError(zresult)) { |
|
758 | 762 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); |
|
759 | 763 | goto finally; |
|
760 | 764 | } |
|
761 | 765 | else if (zresult) { |
|
762 | 766 | PyErr_Format(ZstdError, "chunk 0 did not decompress full frame"); |
|
763 | 767 | goto finally; |
|
764 | 768 | } |
|
765 | 769 | |
|
766 | 770 | buffer1ContentSize = outBuffer.pos; |
|
767 | 771 | |
|
768 | 772 | /* Special case of a simple chain. */ |
|
769 | 773 | if (1 == chunksLen) { |
|
770 | 774 | result = PyBytes_FromStringAndSize(buffer1, buffer1Size); |
|
771 | 775 | goto finally; |
|
772 | 776 | } |
|
773 | 777 | |
|
774 | 778 | /* This should ideally look at next chunk. But this is slightly simpler. */ |
|
775 | 779 | buffer2Size = (size_t)frameHeader.frameContentSize; |
|
776 | 780 | buffer2 = PyMem_Malloc(buffer2Size); |
|
777 | 781 | if (!buffer2) { |
|
778 | 782 | goto finally; |
|
779 | 783 | } |
|
780 | 784 | |
|
781 | 785 | /* For each subsequent chunk, use the previous fulltext as a content dictionary. |
|
782 | 786 | Our strategy is to have 2 buffers. One holds the previous fulltext (to be |
|
783 | 787 | used as a content dictionary) and the other holds the new fulltext. The |
|
784 | 788 | buffers grow when needed but never decrease in size. This limits the |
|
785 | 789 | memory allocator overhead. |
|
786 | 790 | */ |
|
787 | 791 | for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { |
|
788 | 792 | chunk = PyList_GetItem(chunks, chunkIndex); |
|
789 | 793 | if (!PyBytes_Check(chunk)) { |
|
790 | 794 | PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); |
|
791 | 795 | goto finally; |
|
792 | 796 | } |
|
793 | 797 | |
|
794 | 798 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
795 | 799 | zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize); |
|
796 | 800 | if (ZSTD_isError(zresult)) { |
|
797 | 801 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); |
|
798 | 802 | goto finally; |
|
799 | 803 | } |
|
800 | 804 | else if (zresult) { |
|
801 | 805 | PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); |
|
802 | 806 | goto finally; |
|
803 | 807 | } |
|
804 | 808 | |
|
805 | 809 | if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) { |
|
806 | 810 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); |
|
807 | 811 | goto finally; |
|
808 | 812 | } |
|
809 | 813 | |
|
810 | 814 | assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize); |
|
811 | 815 | |
|
812 | 816 | if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) { |
|
813 | 817 | PyErr_Format(PyExc_ValueError, |
|
814 | 818 | "chunk %zd is too large to decompress on this platform", chunkIndex); |
|
815 | 819 | goto finally; |
|
816 | 820 | } |
|
817 | 821 | |
|
818 | 822 | inBuffer.src = chunkData; |
|
819 | 823 | inBuffer.size = chunkSize; |
|
820 | 824 | inBuffer.pos = 0; |
|
821 | 825 | |
|
822 | 826 | parity = chunkIndex % 2; |
|
823 | 827 | |
|
824 | 828 | /* This could definitely be abstracted to reduce code duplication. */ |
|
825 | 829 | if (parity) { |
|
826 | 830 | /* Resize destination buffer to hold larger content. */ |
|
827 | 831 | if (buffer2Size < frameHeader.frameContentSize) { |
|
828 | 832 | buffer2Size = (size_t)frameHeader.frameContentSize; |
|
829 | 833 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); |
|
830 | 834 | if (!destBuffer) { |
|
831 | 835 | goto finally; |
|
832 | 836 | } |
|
833 | 837 | buffer2 = destBuffer; |
|
834 | 838 | } |
|
835 | 839 | |
|
836 | 840 | Py_BEGIN_ALLOW_THREADS |
|
837 | 841 | zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx, |
|
838 | 842 | buffer1, buffer1ContentSize, ZSTD_dct_rawContent); |
|
839 | 843 | Py_END_ALLOW_THREADS |
|
840 | 844 | if (ZSTD_isError(zresult)) { |
|
841 | 845 | PyErr_Format(ZstdError, |
|
842 | 846 | "failed to load prefix dictionary at chunk %zd", chunkIndex); |
|
843 | 847 | goto finally; |
|
844 | 848 | } |
|
845 | 849 | |
|
846 | 850 | outBuffer.dst = buffer2; |
|
847 | 851 | outBuffer.size = buffer2Size; |
|
848 | 852 | outBuffer.pos = 0; |
|
849 | 853 | |
|
850 | 854 | Py_BEGIN_ALLOW_THREADS |
|
851 | 855 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); |
|
852 | 856 | Py_END_ALLOW_THREADS |
|
853 | 857 | if (ZSTD_isError(zresult)) { |
|
854 | 858 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
855 | 859 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
856 | 860 | goto finally; |
|
857 | 861 | } |
|
858 | 862 | else if (zresult) { |
|
859 | 863 | PyErr_Format(ZstdError, "chunk %zd did not decompress full frame", |
|
860 | 864 | chunkIndex); |
|
861 | 865 | goto finally; |
|
862 | 866 | } |
|
863 | 867 | |
|
864 | 868 | buffer2ContentSize = outBuffer.pos; |
|
865 | 869 | } |
|
866 | 870 | else { |
|
867 | 871 | if (buffer1Size < frameHeader.frameContentSize) { |
|
868 | 872 | buffer1Size = (size_t)frameHeader.frameContentSize; |
|
869 | 873 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); |
|
870 | 874 | if (!destBuffer) { |
|
871 | 875 | goto finally; |
|
872 | 876 | } |
|
873 | 877 | buffer1 = destBuffer; |
|
874 | 878 | } |
|
875 | 879 | |
|
876 | 880 | Py_BEGIN_ALLOW_THREADS |
|
877 | 881 | zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx, |
|
878 | 882 | buffer2, buffer2ContentSize, ZSTD_dct_rawContent); |
|
879 | 883 | Py_END_ALLOW_THREADS |
|
880 | 884 | if (ZSTD_isError(zresult)) { |
|
881 | 885 | PyErr_Format(ZstdError, |
|
882 | 886 | "failed to load prefix dictionary at chunk %zd", chunkIndex); |
|
883 | 887 | goto finally; |
|
884 | 888 | } |
|
885 | 889 | |
|
886 | 890 | outBuffer.dst = buffer1; |
|
887 | 891 | outBuffer.size = buffer1Size; |
|
888 | 892 | outBuffer.pos = 0; |
|
889 | 893 | |
|
890 | 894 | Py_BEGIN_ALLOW_THREADS |
|
891 | 895 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); |
|
892 | 896 | Py_END_ALLOW_THREADS |
|
893 | 897 | if (ZSTD_isError(zresult)) { |
|
894 | 898 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
895 | 899 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
896 | 900 | goto finally; |
|
897 | 901 | } |
|
898 | 902 | else if (zresult) { |
|
899 | 903 | PyErr_Format(ZstdError, "chunk %zd did not decompress full frame", |
|
900 | 904 | chunkIndex); |
|
901 | 905 | goto finally; |
|
902 | 906 | } |
|
903 | 907 | |
|
904 | 908 | buffer1ContentSize = outBuffer.pos; |
|
905 | 909 | } |
|
906 | 910 | } |
|
907 | 911 | |
|
908 | 912 | result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, |
|
909 | 913 | parity ? buffer2ContentSize : buffer1ContentSize); |
|
910 | 914 | |
|
911 | 915 | finally: |
|
912 | 916 | if (buffer2) { |
|
913 | 917 | PyMem_Free(buffer2); |
|
914 | 918 | } |
|
915 | 919 | if (buffer1) { |
|
916 | 920 | PyMem_Free(buffer1); |
|
917 | 921 | } |
|
918 | 922 | |
|
919 | 923 | return result; |
|
920 | 924 | } |
|
921 | 925 | |
|
922 | 926 | typedef struct { |
|
923 | 927 | void* sourceData; |
|
924 | 928 | size_t sourceSize; |
|
925 | 929 | size_t destSize; |
|
926 | 930 | } FramePointer; |
|
927 | 931 | |
|
928 | 932 | typedef struct { |
|
929 | 933 | FramePointer* frames; |
|
930 | 934 | Py_ssize_t framesSize; |
|
931 | 935 | unsigned long long compressedSize; |
|
932 | 936 | } FrameSources; |
|
933 | 937 | |
|
934 | 938 | typedef struct { |
|
935 | 939 | void* dest; |
|
936 | 940 | Py_ssize_t destSize; |
|
937 | 941 | BufferSegment* segments; |
|
938 | 942 | Py_ssize_t segmentsSize; |
|
939 | 943 | } DestBuffer; |
|
940 | 944 | |
|
941 | 945 | typedef enum { |
|
942 | 946 | WorkerError_none = 0, |
|
943 | 947 | WorkerError_zstd = 1, |
|
944 | 948 | WorkerError_memory = 2, |
|
945 | 949 | WorkerError_sizeMismatch = 3, |
|
946 | 950 | WorkerError_unknownSize = 4, |
|
947 | 951 | } WorkerError; |
|
948 | 952 | |
|
949 | 953 | typedef struct { |
|
950 | 954 | /* Source records and length */ |
|
951 | 955 | FramePointer* framePointers; |
|
952 | 956 | /* Which records to process. */ |
|
953 | 957 | Py_ssize_t startOffset; |
|
954 | 958 | Py_ssize_t endOffset; |
|
955 | 959 | unsigned long long totalSourceSize; |
|
956 | 960 | |
|
957 | 961 | /* Compression state and settings. */ |
|
958 | 962 | ZSTD_DCtx* dctx; |
|
959 | 963 | int requireOutputSizes; |
|
960 | 964 | |
|
961 | 965 | /* Output storage. */ |
|
962 | 966 | DestBuffer* destBuffers; |
|
963 | 967 | Py_ssize_t destCount; |
|
964 | 968 | |
|
965 | 969 | /* Item that error occurred on. */ |
|
966 | 970 | Py_ssize_t errorOffset; |
|
967 | 971 | /* If an error occurred. */ |
|
968 | 972 | WorkerError error; |
|
969 | 973 | /* result from zstd decompression operation */ |
|
970 | 974 | size_t zresult; |
|
971 | 975 | } WorkerState; |
|
972 | 976 | |
|
973 | 977 | static void decompress_worker(WorkerState* state) { |
|
974 | 978 | size_t allocationSize; |
|
975 | 979 | DestBuffer* destBuffer; |
|
976 | 980 | Py_ssize_t frameIndex; |
|
977 | 981 | Py_ssize_t localOffset = 0; |
|
978 | 982 | Py_ssize_t currentBufferStartIndex = state->startOffset; |
|
979 | 983 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; |
|
980 | 984 | void* tmpBuf; |
|
981 | 985 | Py_ssize_t destOffset = 0; |
|
982 | 986 | FramePointer* framePointers = state->framePointers; |
|
983 | 987 | size_t zresult; |
|
984 | 988 | unsigned long long totalOutputSize = 0; |
|
985 | 989 | |
|
986 | 990 | assert(NULL == state->destBuffers); |
|
987 | 991 | assert(0 == state->destCount); |
|
988 | 992 | assert(state->endOffset - state->startOffset >= 0); |
|
989 | 993 | |
|
990 | 994 | /* We could get here due to the way work is allocated. Ideally we wouldn't |
|
991 | 995 | get here. But that would require a bit of a refactor in the caller. */ |
|
992 | 996 | if (state->totalSourceSize > SIZE_MAX) { |
|
993 | 997 | state->error = WorkerError_memory; |
|
994 | 998 | state->errorOffset = 0; |
|
995 | 999 | return; |
|
996 | 1000 | } |
|
997 | 1001 | |
|
998 | 1002 | /* |
|
999 | 1003 | * We need to allocate a buffer to hold decompressed data. How we do this |
|
1000 | 1004 | * depends on what we know about the output. The following scenarios are |
|
1001 | 1005 | * possible: |
|
1002 | 1006 | * |
|
1003 | 1007 | * 1. All structs defining frames declare the output size. |
|
1004 | 1008 | * 2. The decompressed size is embedded within the zstd frame. |
|
1005 | 1009 | * 3. The decompressed size is not stored anywhere. |
|
1006 | 1010 | * |
|
1007 | 1011 | * For now, we only support #1 and #2. |
|
1008 | 1012 | */ |
|
1009 | 1013 | |
|
1010 | 1014 | /* Resolve ouput segments. */ |
|
1011 | 1015 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { |
|
1012 | 1016 | FramePointer* fp = &framePointers[frameIndex]; |
|
1013 | 1017 | unsigned long long decompressedSize; |
|
1014 | 1018 | |
|
1015 | 1019 | if (0 == fp->destSize) { |
|
1016 | 1020 | decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize); |
|
1017 | 1021 | |
|
1018 | 1022 | if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) { |
|
1019 | 1023 | state->error = WorkerError_unknownSize; |
|
1020 | 1024 | state->errorOffset = frameIndex; |
|
1021 | 1025 | return; |
|
1022 | 1026 | } |
|
1023 | 1027 | else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) { |
|
1024 | 1028 | if (state->requireOutputSizes) { |
|
1025 | 1029 | state->error = WorkerError_unknownSize; |
|
1026 | 1030 | state->errorOffset = frameIndex; |
|
1027 | 1031 | return; |
|
1028 | 1032 | } |
|
1029 | 1033 | |
|
1030 | 1034 | /* This will fail the assert for .destSize > 0 below. */ |
|
1031 | 1035 | decompressedSize = 0; |
|
1032 | 1036 | } |
|
1033 | 1037 | |
|
1034 | 1038 | if (decompressedSize > SIZE_MAX) { |
|
1035 | 1039 | state->error = WorkerError_memory; |
|
1036 | 1040 | state->errorOffset = frameIndex; |
|
1037 | 1041 | return; |
|
1038 | 1042 | } |
|
1039 | 1043 | |
|
1040 | 1044 | fp->destSize = (size_t)decompressedSize; |
|
1041 | 1045 | } |
|
1042 | 1046 | |
|
1043 | 1047 | totalOutputSize += fp->destSize; |
|
1044 | 1048 | } |
|
1045 | 1049 | |
|
1046 | 1050 | state->destBuffers = calloc(1, sizeof(DestBuffer)); |
|
1047 | 1051 | if (NULL == state->destBuffers) { |
|
1048 | 1052 | state->error = WorkerError_memory; |
|
1049 | 1053 | return; |
|
1050 | 1054 | } |
|
1051 | 1055 | |
|
1052 | 1056 | state->destCount = 1; |
|
1053 | 1057 | |
|
1054 | 1058 | destBuffer = &state->destBuffers[state->destCount - 1]; |
|
1055 | 1059 | |
|
1056 | 1060 | assert(framePointers[state->startOffset].destSize > 0); /* For now. */ |
|
1057 | 1061 | |
|
1058 | 1062 | allocationSize = roundpow2((size_t)state->totalSourceSize); |
|
1059 | 1063 | |
|
1060 | 1064 | if (framePointers[state->startOffset].destSize > allocationSize) { |
|
1061 | 1065 | allocationSize = roundpow2(framePointers[state->startOffset].destSize); |
|
1062 | 1066 | } |
|
1063 | 1067 | |
|
1064 | 1068 | destBuffer->dest = malloc(allocationSize); |
|
1065 | 1069 | if (NULL == destBuffer->dest) { |
|
1066 | 1070 | state->error = WorkerError_memory; |
|
1067 | 1071 | return; |
|
1068 | 1072 | } |
|
1069 | 1073 | |
|
1070 | 1074 | destBuffer->destSize = allocationSize; |
|
1071 | 1075 | |
|
1072 | 1076 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); |
|
1073 | 1077 | if (NULL == destBuffer->segments) { |
|
1074 | 1078 | /* Caller will free state->dest as part of cleanup. */ |
|
1075 | 1079 | state->error = WorkerError_memory; |
|
1076 | 1080 | return; |
|
1077 | 1081 | } |
|
1078 | 1082 | |
|
1079 | 1083 | destBuffer->segmentsSize = remainingItems; |
|
1080 | 1084 | |
|
1081 | 1085 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { |
|
1082 | 1086 | ZSTD_outBuffer outBuffer; |
|
1083 | 1087 | ZSTD_inBuffer inBuffer; |
|
1084 | 1088 | const void* source = framePointers[frameIndex].sourceData; |
|
1085 | 1089 | const size_t sourceSize = framePointers[frameIndex].sourceSize; |
|
1086 | 1090 | void* dest; |
|
1087 | 1091 | const size_t decompressedSize = framePointers[frameIndex].destSize; |
|
1088 | 1092 | size_t destAvailable = destBuffer->destSize - destOffset; |
|
1089 | 1093 | |
|
1090 | 1094 | assert(decompressedSize > 0); /* For now. */ |
|
1091 | 1095 | |
|
1092 | 1096 | /* |
|
1093 | 1097 | * Not enough space in current buffer. Finish current before and allocate and |
|
1094 | 1098 | * switch to a new one. |
|
1095 | 1099 | */ |
|
1096 | 1100 | if (decompressedSize > destAvailable) { |
|
1097 | 1101 | /* |
|
1098 | 1102 | * Shrinking the destination buffer is optional. But it should be cheap, |
|
1099 | 1103 | * so we just do it. |
|
1100 | 1104 | */ |
|
1101 | 1105 | if (destAvailable) { |
|
1102 | 1106 | tmpBuf = realloc(destBuffer->dest, destOffset); |
|
1103 | 1107 | if (NULL == tmpBuf) { |
|
1104 | 1108 | state->error = WorkerError_memory; |
|
1105 | 1109 | return; |
|
1106 | 1110 | } |
|
1107 | 1111 | |
|
1108 | 1112 | destBuffer->dest = tmpBuf; |
|
1109 | 1113 | destBuffer->destSize = destOffset; |
|
1110 | 1114 | } |
|
1111 | 1115 | |
|
1112 | 1116 | /* Truncate segments buffer. */ |
|
1113 | 1117 | tmpBuf = realloc(destBuffer->segments, |
|
1114 | 1118 | (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment)); |
|
1115 | 1119 | if (NULL == tmpBuf) { |
|
1116 | 1120 | state->error = WorkerError_memory; |
|
1117 | 1121 | return; |
|
1118 | 1122 | } |
|
1119 | 1123 | |
|
1120 | 1124 | destBuffer->segments = tmpBuf; |
|
1121 | 1125 | destBuffer->segmentsSize = frameIndex - currentBufferStartIndex; |
|
1122 | 1126 | |
|
1123 | 1127 | /* Grow space for new DestBuffer. */ |
|
1124 | 1128 | tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer)); |
|
1125 | 1129 | if (NULL == tmpBuf) { |
|
1126 | 1130 | state->error = WorkerError_memory; |
|
1127 | 1131 | return; |
|
1128 | 1132 | } |
|
1129 | 1133 | |
|
1130 | 1134 | state->destBuffers = tmpBuf; |
|
1131 | 1135 | state->destCount++; |
|
1132 | 1136 | |
|
1133 | 1137 | destBuffer = &state->destBuffers[state->destCount - 1]; |
|
1134 | 1138 | |
|
1135 | 1139 | /* Don't take any chances will non-NULL pointers. */ |
|
1136 | 1140 | memset(destBuffer, 0, sizeof(DestBuffer)); |
|
1137 | 1141 | |
|
1138 | 1142 | allocationSize = roundpow2((size_t)state->totalSourceSize); |
|
1139 | 1143 | |
|
1140 | 1144 | if (decompressedSize > allocationSize) { |
|
1141 | 1145 | allocationSize = roundpow2(decompressedSize); |
|
1142 | 1146 | } |
|
1143 | 1147 | |
|
1144 | 1148 | destBuffer->dest = malloc(allocationSize); |
|
1145 | 1149 | if (NULL == destBuffer->dest) { |
|
1146 | 1150 | state->error = WorkerError_memory; |
|
1147 | 1151 | return; |
|
1148 | 1152 | } |
|
1149 | 1153 | |
|
1150 | 1154 | destBuffer->destSize = allocationSize; |
|
1151 | 1155 | destAvailable = allocationSize; |
|
1152 | 1156 | destOffset = 0; |
|
1153 | 1157 | localOffset = 0; |
|
1154 | 1158 | |
|
1155 | 1159 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); |
|
1156 | 1160 | if (NULL == destBuffer->segments) { |
|
1157 | 1161 | state->error = WorkerError_memory; |
|
1158 | 1162 | return; |
|
1159 | 1163 | } |
|
1160 | 1164 | |
|
1161 | 1165 | destBuffer->segmentsSize = remainingItems; |
|
1162 | 1166 | currentBufferStartIndex = frameIndex; |
|
1163 | 1167 | } |
|
1164 | 1168 | |
|
1165 | 1169 | dest = (char*)destBuffer->dest + destOffset; |
|
1166 | 1170 | |
|
1167 | 1171 | outBuffer.dst = dest; |
|
1168 | 1172 | outBuffer.size = decompressedSize; |
|
1169 | 1173 | outBuffer.pos = 0; |
|
1170 | 1174 | |
|
1171 | 1175 | inBuffer.src = source; |
|
1172 | 1176 | inBuffer.size = sourceSize; |
|
1173 | 1177 | inBuffer.pos = 0; |
|
1174 | 1178 | |
|
1175 | 1179 | zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer); |
|
1176 | 1180 | if (ZSTD_isError(zresult)) { |
|
1177 | 1181 | state->error = WorkerError_zstd; |
|
1178 | 1182 | state->zresult = zresult; |
|
1179 | 1183 | state->errorOffset = frameIndex; |
|
1180 | 1184 | return; |
|
1181 | 1185 | } |
|
1182 | 1186 | else if (zresult || outBuffer.pos != decompressedSize) { |
|
1183 | 1187 | state->error = WorkerError_sizeMismatch; |
|
1184 | 1188 | state->zresult = outBuffer.pos; |
|
1185 | 1189 | state->errorOffset = frameIndex; |
|
1186 | 1190 | return; |
|
1187 | 1191 | } |
|
1188 | 1192 | |
|
1189 | 1193 | destBuffer->segments[localOffset].offset = destOffset; |
|
1190 | 1194 | destBuffer->segments[localOffset].length = outBuffer.pos; |
|
1191 | 1195 | destOffset += outBuffer.pos; |
|
1192 | 1196 | localOffset++; |
|
1193 | 1197 | remainingItems--; |
|
1194 | 1198 | } |
|
1195 | 1199 | |
|
1196 | 1200 | if (destBuffer->destSize > destOffset) { |
|
1197 | 1201 | tmpBuf = realloc(destBuffer->dest, destOffset); |
|
1198 | 1202 | if (NULL == tmpBuf) { |
|
1199 | 1203 | state->error = WorkerError_memory; |
|
1200 | 1204 | return; |
|
1201 | 1205 | } |
|
1202 | 1206 | |
|
1203 | 1207 | destBuffer->dest = tmpBuf; |
|
1204 | 1208 | destBuffer->destSize = destOffset; |
|
1205 | 1209 | } |
|
1206 | 1210 | } |
|
1207 | 1211 | |
|
1208 | 1212 | ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames, |
|
1209 | 1213 | Py_ssize_t threadCount) { |
|
1210 | 1214 | Py_ssize_t i = 0; |
|
1211 | 1215 | int errored = 0; |
|
1212 | 1216 | Py_ssize_t segmentsCount; |
|
1213 | 1217 | ZstdBufferWithSegments* bws = NULL; |
|
1214 | 1218 | PyObject* resultArg = NULL; |
|
1215 | 1219 | Py_ssize_t resultIndex; |
|
1216 | 1220 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1217 | 1221 | FramePointer* framePointers = frames->frames; |
|
1218 | 1222 | unsigned long long workerBytes = 0; |
|
1219 | 1223 | Py_ssize_t currentThread = 0; |
|
1220 | 1224 | Py_ssize_t workerStartOffset = 0; |
|
1221 | 1225 | POOL_ctx* pool = NULL; |
|
1222 | 1226 | WorkerState* workerStates = NULL; |
|
1223 | 1227 | unsigned long long bytesPerWorker; |
|
1224 | 1228 | |
|
1225 | 1229 | /* Caller should normalize 0 and negative values to 1 or larger. */ |
|
1226 | 1230 | assert(threadCount >= 1); |
|
1227 | 1231 | |
|
1228 | 1232 | /* More threads than inputs makes no sense under any conditions. */ |
|
1229 | 1233 | threadCount = frames->framesSize < threadCount ? frames->framesSize |
|
1230 | 1234 | : threadCount; |
|
1231 | 1235 | |
|
1232 | 1236 | /* TODO lower thread count if input size is too small and threads would just |
|
1233 | 1237 | add overhead. */ |
|
1234 | 1238 | |
|
1235 | 1239 | if (decompressor->dict) { |
|
1236 | 1240 | if (ensure_ddict(decompressor->dict)) { |
|
1237 | 1241 | return NULL; |
|
1238 | 1242 | } |
|
1239 | 1243 | } |
|
1240 | 1244 | |
|
1241 | 1245 | /* If threadCount==1, we don't start a thread pool. But we do leverage the |
|
1242 | 1246 | same API for dispatching work. */ |
|
1243 | 1247 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); |
|
1244 | 1248 | if (NULL == workerStates) { |
|
1245 | 1249 | PyErr_NoMemory(); |
|
1246 | 1250 | goto finally; |
|
1247 | 1251 | } |
|
1248 | 1252 | |
|
1249 | 1253 | memset(workerStates, 0, threadCount * sizeof(WorkerState)); |
|
1250 | 1254 | |
|
1251 | 1255 | if (threadCount > 1) { |
|
1252 | 1256 | pool = POOL_create(threadCount, 1); |
|
1253 | 1257 | if (NULL == pool) { |
|
1254 | 1258 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); |
|
1255 | 1259 | goto finally; |
|
1256 | 1260 | } |
|
1257 | 1261 | } |
|
1258 | 1262 | |
|
1259 | 1263 | bytesPerWorker = frames->compressedSize / threadCount; |
|
1260 | 1264 | |
|
1261 | 1265 | if (bytesPerWorker > SIZE_MAX) { |
|
1262 | 1266 | PyErr_SetString(ZstdError, "too much data per worker for this platform"); |
|
1263 | 1267 | goto finally; |
|
1264 | 1268 | } |
|
1265 | 1269 | |
|
1266 | 1270 | for (i = 0; i < threadCount; i++) { |
|
1267 | 1271 | size_t zresult; |
|
1268 | 1272 | |
|
1269 | 1273 | workerStates[i].dctx = ZSTD_createDCtx(); |
|
1270 | 1274 | if (NULL == workerStates[i].dctx) { |
|
1271 | 1275 | PyErr_NoMemory(); |
|
1272 | 1276 | goto finally; |
|
1273 | 1277 | } |
|
1274 | 1278 | |
|
1275 | 1279 | ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx); |
|
1276 | 1280 | |
|
1277 | 1281 | if (decompressor->dict) { |
|
1278 | 1282 | zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict); |
|
1279 | 1283 | if (zresult) { |
|
1280 | 1284 | PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s", |
|
1281 | 1285 | ZSTD_getErrorName(zresult)); |
|
1282 | 1286 | goto finally; |
|
1283 | 1287 | } |
|
1284 | 1288 | } |
|
1285 | 1289 | |
|
1286 | 1290 | workerStates[i].framePointers = framePointers; |
|
1287 | 1291 | workerStates[i].requireOutputSizes = 1; |
|
1288 | 1292 | } |
|
1289 | 1293 | |
|
1290 | 1294 | Py_BEGIN_ALLOW_THREADS |
|
1291 | 1295 | /* There are many ways to split work among workers. |
|
1292 | 1296 | |
|
1293 | 1297 | For now, we take a simple approach of splitting work so each worker |
|
1294 | 1298 | gets roughly the same number of input bytes. This will result in more |
|
1295 | 1299 | starvation than running N>threadCount jobs. But it avoids complications |
|
1296 | 1300 | around state tracking, which could involve extra locking. |
|
1297 | 1301 | */ |
|
1298 | 1302 | for (i = 0; i < frames->framesSize; i++) { |
|
1299 | 1303 | workerBytes += frames->frames[i].sourceSize; |
|
1300 | 1304 | |
|
1301 | 1305 | /* |
|
1302 | 1306 | * The last worker/thread needs to handle all remaining work. Don't |
|
1303 | 1307 | * trigger it prematurely. Defer to the block outside of the loop. |
|
1304 | 1308 | * (But still process this loop so workerBytes is correct. |
|
1305 | 1309 | */ |
|
1306 | 1310 | if (currentThread == threadCount - 1) { |
|
1307 | 1311 | continue; |
|
1308 | 1312 | } |
|
1309 | 1313 | |
|
1310 | 1314 | if (workerBytes >= bytesPerWorker) { |
|
1311 | 1315 | workerStates[currentThread].startOffset = workerStartOffset; |
|
1312 | 1316 | workerStates[currentThread].endOffset = i; |
|
1313 | 1317 | workerStates[currentThread].totalSourceSize = workerBytes; |
|
1314 | 1318 | |
|
1315 | 1319 | if (threadCount > 1) { |
|
1316 | 1320 | POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]); |
|
1317 | 1321 | } |
|
1318 | 1322 | else { |
|
1319 | 1323 | decompress_worker(&workerStates[currentThread]); |
|
1320 | 1324 | } |
|
1321 | 1325 | currentThread++; |
|
1322 | 1326 | workerStartOffset = i + 1; |
|
1323 | 1327 | workerBytes = 0; |
|
1324 | 1328 | } |
|
1325 | 1329 | } |
|
1326 | 1330 | |
|
1327 | 1331 | if (workerBytes) { |
|
1328 | 1332 | workerStates[currentThread].startOffset = workerStartOffset; |
|
1329 | 1333 | workerStates[currentThread].endOffset = frames->framesSize - 1; |
|
1330 | 1334 | workerStates[currentThread].totalSourceSize = workerBytes; |
|
1331 | 1335 | |
|
1332 | 1336 | if (threadCount > 1) { |
|
1333 | 1337 | POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]); |
|
1334 | 1338 | } |
|
1335 | 1339 | else { |
|
1336 | 1340 | decompress_worker(&workerStates[currentThread]); |
|
1337 | 1341 | } |
|
1338 | 1342 | } |
|
1339 | 1343 | |
|
1340 | 1344 | if (threadCount > 1) { |
|
1341 | 1345 | POOL_free(pool); |
|
1342 | 1346 | pool = NULL; |
|
1343 | 1347 | } |
|
1344 | 1348 | Py_END_ALLOW_THREADS |
|
1345 | 1349 | |
|
1346 | 1350 | for (i = 0; i < threadCount; i++) { |
|
1347 | 1351 | switch (workerStates[i].error) { |
|
1348 | 1352 | case WorkerError_none: |
|
1349 | 1353 | break; |
|
1350 | 1354 | |
|
1351 | 1355 | case WorkerError_zstd: |
|
1352 | 1356 | PyErr_Format(ZstdError, "error decompressing item %zd: %s", |
|
1353 | 1357 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); |
|
1354 | 1358 | errored = 1; |
|
1355 | 1359 | break; |
|
1356 | 1360 | |
|
1357 | 1361 | case WorkerError_memory: |
|
1358 | 1362 | PyErr_NoMemory(); |
|
1359 | 1363 | errored = 1; |
|
1360 | 1364 | break; |
|
1361 | 1365 | |
|
1362 | 1366 | case WorkerError_sizeMismatch: |
|
1363 | 1367 | PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu", |
|
1364 | 1368 | workerStates[i].errorOffset, workerStates[i].zresult, |
|
1365 | 1369 | framePointers[workerStates[i].errorOffset].destSize); |
|
1366 | 1370 | errored = 1; |
|
1367 | 1371 | break; |
|
1368 | 1372 | |
|
1369 | 1373 | case WorkerError_unknownSize: |
|
1370 | 1374 | PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd", |
|
1371 | 1375 | workerStates[i].errorOffset); |
|
1372 | 1376 | errored = 1; |
|
1373 | 1377 | break; |
|
1374 | 1378 | |
|
1375 | 1379 | default: |
|
1376 | 1380 | PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug", |
|
1377 | 1381 | workerStates[i].error); |
|
1378 | 1382 | errored = 1; |
|
1379 | 1383 | break; |
|
1380 | 1384 | } |
|
1381 | 1385 | |
|
1382 | 1386 | if (errored) { |
|
1383 | 1387 | break; |
|
1384 | 1388 | } |
|
1385 | 1389 | } |
|
1386 | 1390 | |
|
1387 | 1391 | if (errored) { |
|
1388 | 1392 | goto finally; |
|
1389 | 1393 | } |
|
1390 | 1394 | |
|
1391 | 1395 | segmentsCount = 0; |
|
1392 | 1396 | for (i = 0; i < threadCount; i++) { |
|
1393 | 1397 | segmentsCount += workerStates[i].destCount; |
|
1394 | 1398 | } |
|
1395 | 1399 | |
|
1396 | 1400 | resultArg = PyTuple_New(segmentsCount); |
|
1397 | 1401 | if (NULL == resultArg) { |
|
1398 | 1402 | goto finally; |
|
1399 | 1403 | } |
|
1400 | 1404 | |
|
1401 | 1405 | resultIndex = 0; |
|
1402 | 1406 | |
|
1403 | 1407 | for (i = 0; i < threadCount; i++) { |
|
1404 | 1408 | Py_ssize_t bufferIndex; |
|
1405 | 1409 | WorkerState* state = &workerStates[i]; |
|
1406 | 1410 | |
|
1407 | 1411 | for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) { |
|
1408 | 1412 | DestBuffer* destBuffer = &state->destBuffers[bufferIndex]; |
|
1409 | 1413 | |
|
1410 | 1414 | bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize, |
|
1411 | 1415 | destBuffer->segments, destBuffer->segmentsSize); |
|
1412 | 1416 | if (NULL == bws) { |
|
1413 | 1417 | goto finally; |
|
1414 | 1418 | } |
|
1415 | 1419 | |
|
1416 | 1420 | /* |
|
1417 | 1421 | * Memory for buffer and segments was allocated using malloc() in worker |
|
1418 | 1422 | * and the memory is transferred to the BufferWithSegments instance. So |
|
1419 | 1423 | * tell instance to use free() and NULL the reference in the state struct |
|
1420 | 1424 | * so it isn't freed below. |
|
1421 | 1425 | */ |
|
1422 | 1426 | bws->useFree = 1; |
|
1423 | 1427 | destBuffer->dest = NULL; |
|
1424 | 1428 | destBuffer->segments = NULL; |
|
1425 | 1429 | |
|
1426 | 1430 | PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws); |
|
1427 | 1431 | } |
|
1428 | 1432 | } |
|
1429 | 1433 | |
|
1430 | 1434 | result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject( |
|
1431 | 1435 | (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg); |
|
1432 | 1436 | |
|
1433 | 1437 | finally: |
|
1434 | 1438 | Py_CLEAR(resultArg); |
|
1435 | 1439 | |
|
1436 | 1440 | if (workerStates) { |
|
1437 | 1441 | for (i = 0; i < threadCount; i++) { |
|
1438 | 1442 | Py_ssize_t bufferIndex; |
|
1439 | 1443 | WorkerState* state = &workerStates[i]; |
|
1440 | 1444 | |
|
1441 | 1445 | if (state->dctx) { |
|
1442 | 1446 | ZSTD_freeDCtx(state->dctx); |
|
1443 | 1447 | } |
|
1444 | 1448 | |
|
1445 | 1449 | for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) { |
|
1446 | 1450 | if (state->destBuffers) { |
|
1447 | 1451 | /* |
|
1448 | 1452 | * Will be NULL if memory transfered to a BufferWithSegments. |
|
1449 | 1453 | * Otherwise it is left over after an error occurred. |
|
1450 | 1454 | */ |
|
1451 | 1455 | free(state->destBuffers[bufferIndex].dest); |
|
1452 | 1456 | free(state->destBuffers[bufferIndex].segments); |
|
1453 | 1457 | } |
|
1454 | 1458 | } |
|
1455 | 1459 | |
|
1456 | 1460 | free(state->destBuffers); |
|
1457 | 1461 | } |
|
1458 | 1462 | |
|
1459 | 1463 | PyMem_Free(workerStates); |
|
1460 | 1464 | } |
|
1461 | 1465 | |
|
1462 | 1466 | POOL_free(pool); |
|
1463 | 1467 | |
|
1464 | 1468 | return result; |
|
1465 | 1469 | } |
|
1466 | 1470 | |
|
1467 | 1471 | PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__, |
|
1468 | 1472 | "Decompress multiple frames to output buffers\n" |
|
1469 | 1473 | "\n" |
|
1470 | 1474 | "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n" |
|
1471 | 1475 | "list of bytes-like objects. Each item in the passed collection should be a\n" |
|
1472 | 1476 | "compressed zstd frame.\n" |
|
1473 | 1477 | "\n" |
|
1474 | 1478 | "Unless ``decompressed_sizes`` is specified, the content size *must* be\n" |
|
1475 | 1479 | "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n" |
|
1476 | 1480 | "it is an object conforming to the buffer protocol that represents an array\n" |
|
1477 | 1481 | "of 64-bit unsigned integers in the machine's native format. Specifying\n" |
|
1478 | 1482 | "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n" |
|
1479 | 1483 | "output size.\n" |
|
1480 | 1484 | "\n" |
|
1481 | 1485 | "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n" |
|
1482 | 1486 | "data. All decompressed data is allocated in a single memory buffer. The\n" |
|
1483 | 1487 | "``BufferWithSegments`` instance tracks which objects are at which offsets\n" |
|
1484 | 1488 | "and their respective lengths.\n" |
|
1485 | 1489 | "\n" |
|
1486 | 1490 | "The ``threads`` argument controls how many threads to use for operations.\n" |
|
1487 | 1491 | "Negative values will use the same number of threads as logical CPUs on the\n" |
|
1488 | 1492 | "machine.\n" |
|
1489 | 1493 | ); |
|
1490 | 1494 | |
|
1491 | 1495 | static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
1492 | 1496 | static char* kwlist[] = { |
|
1493 | 1497 | "frames", |
|
1494 | 1498 | "decompressed_sizes", |
|
1495 | 1499 | "threads", |
|
1496 | 1500 | NULL |
|
1497 | 1501 | }; |
|
1498 | 1502 | |
|
1499 | 1503 | PyObject* frames; |
|
1500 | 1504 | Py_buffer frameSizes; |
|
1501 | 1505 | int threads = 0; |
|
1502 | 1506 | Py_ssize_t frameCount; |
|
1503 | 1507 | Py_buffer* frameBuffers = NULL; |
|
1504 | 1508 | FramePointer* framePointers = NULL; |
|
1505 | 1509 | unsigned long long* frameSizesP = NULL; |
|
1506 | 1510 | unsigned long long totalInputSize = 0; |
|
1507 | 1511 | FrameSources frameSources; |
|
1508 | 1512 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1509 | 1513 | Py_ssize_t i; |
|
1510 | 1514 | |
|
1511 | 1515 | memset(&frameSizes, 0, sizeof(frameSizes)); |
|
1512 | 1516 | |
|
1513 | 1517 | #if PY_MAJOR_VERSION >= 3 |
|
1514 | 1518 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer", |
|
1515 | 1519 | #else |
|
1516 | 1520 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer", |
|
1517 | 1521 | #endif |
|
1518 | 1522 | kwlist, &frames, &frameSizes, &threads)) { |
|
1519 | 1523 | return NULL; |
|
1520 | 1524 | } |
|
1521 | 1525 | |
|
1522 | 1526 | if (frameSizes.buf) { |
|
1523 | 1527 | if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) { |
|
1524 | 1528 | PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension"); |
|
1525 | 1529 | goto finally; |
|
1526 | 1530 | } |
|
1527 | 1531 | |
|
1528 | 1532 | frameSizesP = (unsigned long long*)frameSizes.buf; |
|
1529 | 1533 | } |
|
1530 | 1534 | |
|
1531 | 1535 | if (threads < 0) { |
|
1532 | 1536 | threads = cpu_count(); |
|
1533 | 1537 | } |
|
1534 | 1538 | |
|
1535 | 1539 | if (threads < 2) { |
|
1536 | 1540 | threads = 1; |
|
1537 | 1541 | } |
|
1538 | 1542 | |
|
1539 | 1543 | if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) { |
|
1540 | 1544 | ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames; |
|
1541 | 1545 | frameCount = buffer->segmentCount; |
|
1542 | 1546 | |
|
1543 | 1547 | if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) { |
|
1544 | 1548 | PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd", |
|
1545 | 1549 | frameCount * sizeof(unsigned long long), frameSizes.len); |
|
1546 | 1550 | goto finally; |
|
1547 | 1551 | } |
|
1548 | 1552 | |
|
1549 | 1553 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); |
|
1550 | 1554 | if (!framePointers) { |
|
1551 | 1555 | PyErr_NoMemory(); |
|
1552 | 1556 | goto finally; |
|
1553 | 1557 | } |
|
1554 | 1558 | |
|
1555 | 1559 | for (i = 0; i < frameCount; i++) { |
|
1556 | 1560 | void* sourceData; |
|
1557 | 1561 | unsigned long long sourceSize; |
|
1558 | 1562 | unsigned long long decompressedSize = 0; |
|
1559 | 1563 | |
|
1560 | 1564 | if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) { |
|
1561 | 1565 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i); |
|
1562 | 1566 | goto finally; |
|
1563 | 1567 | } |
|
1564 | 1568 | |
|
1565 | 1569 | sourceData = (char*)buffer->data + buffer->segments[i].offset; |
|
1566 | 1570 | sourceSize = buffer->segments[i].length; |
|
1567 | 1571 | totalInputSize += sourceSize; |
|
1568 | 1572 | |
|
1569 | 1573 | if (frameSizesP) { |
|
1570 | 1574 | decompressedSize = frameSizesP[i]; |
|
1571 | 1575 | } |
|
1572 | 1576 | |
|
1573 | 1577 | if (sourceSize > SIZE_MAX) { |
|
1574 | 1578 | PyErr_Format(PyExc_ValueError, |
|
1575 | 1579 | "item %zd is too large for this platform", i); |
|
1576 | 1580 | goto finally; |
|
1577 | 1581 | } |
|
1578 | 1582 | |
|
1579 | 1583 | if (decompressedSize > SIZE_MAX) { |
|
1580 | 1584 | PyErr_Format(PyExc_ValueError, |
|
1581 | 1585 | "decompressed size of item %zd is too large for this platform", i); |
|
1582 | 1586 | goto finally; |
|
1583 | 1587 | } |
|
1584 | 1588 | |
|
1585 | 1589 | framePointers[i].sourceData = sourceData; |
|
1586 | 1590 | framePointers[i].sourceSize = (size_t)sourceSize; |
|
1587 | 1591 | framePointers[i].destSize = (size_t)decompressedSize; |
|
1588 | 1592 | } |
|
1589 | 1593 | } |
|
1590 | 1594 | else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) { |
|
1591 | 1595 | Py_ssize_t offset = 0; |
|
1592 | 1596 | ZstdBufferWithSegments* buffer; |
|
1593 | 1597 | ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames; |
|
1594 | 1598 | |
|
1595 | 1599 | frameCount = BufferWithSegmentsCollection_length(collection); |
|
1596 | 1600 | |
|
1597 | 1601 | if (frameSizes.buf && frameSizes.len != frameCount) { |
|
1598 | 1602 | PyErr_Format(PyExc_ValueError, |
|
1599 | 1603 | "decompressed_sizes size mismatch; expected %zd; got %zd", |
|
1600 | 1604 | frameCount * sizeof(unsigned long long), frameSizes.len); |
|
1601 | 1605 | goto finally; |
|
1602 | 1606 | } |
|
1603 | 1607 | |
|
1604 | 1608 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); |
|
1605 | 1609 | if (NULL == framePointers) { |
|
1606 | 1610 | PyErr_NoMemory(); |
|
1607 | 1611 | goto finally; |
|
1608 | 1612 | } |
|
1609 | 1613 | |
|
1610 | 1614 | /* Iterate the data structure directly because it is faster. */ |
|
1611 | 1615 | for (i = 0; i < collection->bufferCount; i++) { |
|
1612 | 1616 | Py_ssize_t segmentIndex; |
|
1613 | 1617 | buffer = collection->buffers[i]; |
|
1614 | 1618 | |
|
1615 | 1619 | for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) { |
|
1616 | 1620 | unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0; |
|
1617 | 1621 | |
|
1618 | 1622 | if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) { |
|
1619 | 1623 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", |
|
1620 | 1624 | offset); |
|
1621 | 1625 | goto finally; |
|
1622 | 1626 | } |
|
1623 | 1627 | |
|
1624 | 1628 | if (buffer->segments[segmentIndex].length > SIZE_MAX) { |
|
1625 | 1629 | PyErr_Format(PyExc_ValueError, |
|
1626 | 1630 | "item %zd in buffer %zd is too large for this platform", |
|
1627 | 1631 | segmentIndex, i); |
|
1628 | 1632 | goto finally; |
|
1629 | 1633 | } |
|
1630 | 1634 | |
|
1631 | 1635 | if (decompressedSize > SIZE_MAX) { |
|
1632 | 1636 | PyErr_Format(PyExc_ValueError, |
|
1633 | 1637 | "decompressed size of item %zd in buffer %zd is too large for this platform", |
|
1634 | 1638 | segmentIndex, i); |
|
1635 | 1639 | goto finally; |
|
1636 | 1640 | } |
|
1637 | 1641 | |
|
1638 | 1642 | totalInputSize += buffer->segments[segmentIndex].length; |
|
1639 | 1643 | |
|
1640 | 1644 | framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset; |
|
1641 | 1645 | framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length; |
|
1642 | 1646 | framePointers[offset].destSize = (size_t)decompressedSize; |
|
1643 | 1647 | |
|
1644 | 1648 | offset++; |
|
1645 | 1649 | } |
|
1646 | 1650 | } |
|
1647 | 1651 | } |
|
1648 | 1652 | else if (PyList_Check(frames)) { |
|
1649 | 1653 | frameCount = PyList_GET_SIZE(frames); |
|
1650 | 1654 | |
|
1651 | 1655 | if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) { |
|
1652 | 1656 | PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd", |
|
1653 | 1657 | frameCount * sizeof(unsigned long long), frameSizes.len); |
|
1654 | 1658 | goto finally; |
|
1655 | 1659 | } |
|
1656 | 1660 | |
|
1657 | 1661 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); |
|
1658 | 1662 | if (!framePointers) { |
|
1659 | 1663 | PyErr_NoMemory(); |
|
1660 | 1664 | goto finally; |
|
1661 | 1665 | } |
|
1662 | 1666 | |
|
1663 | 1667 | frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer)); |
|
1664 | 1668 | if (NULL == frameBuffers) { |
|
1665 | 1669 | PyErr_NoMemory(); |
|
1666 | 1670 | goto finally; |
|
1667 | 1671 | } |
|
1668 | 1672 | |
|
1669 | 1673 | memset(frameBuffers, 0, frameCount * sizeof(Py_buffer)); |
|
1670 | 1674 | |
|
1671 | 1675 | /* Do a pass to assemble info about our input buffers and output sizes. */ |
|
1672 | 1676 | for (i = 0; i < frameCount; i++) { |
|
1673 | 1677 | unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0; |
|
1674 | 1678 | |
|
1675 | 1679 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i), |
|
1676 | 1680 | &frameBuffers[i], PyBUF_CONTIG_RO)) { |
|
1677 | 1681 | PyErr_Clear(); |
|
1678 | 1682 | PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i); |
|
1679 | 1683 | goto finally; |
|
1680 | 1684 | } |
|
1681 | 1685 | |
|
1682 | 1686 | if (decompressedSize > SIZE_MAX) { |
|
1683 | 1687 | PyErr_Format(PyExc_ValueError, |
|
1684 | 1688 | "decompressed size of item %zd is too large for this platform", i); |
|
1685 | 1689 | goto finally; |
|
1686 | 1690 | } |
|
1687 | 1691 | |
|
1688 | 1692 | totalInputSize += frameBuffers[i].len; |
|
1689 | 1693 | |
|
1690 | 1694 | framePointers[i].sourceData = frameBuffers[i].buf; |
|
1691 | 1695 | framePointers[i].sourceSize = frameBuffers[i].len; |
|
1692 | 1696 | framePointers[i].destSize = (size_t)decompressedSize; |
|
1693 | 1697 | } |
|
1694 | 1698 | } |
|
1695 | 1699 | else { |
|
1696 | 1700 | PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments"); |
|
1697 | 1701 | goto finally; |
|
1698 | 1702 | } |
|
1699 | 1703 | |
|
1700 | 1704 | /* We now have an array with info about our inputs and outputs. Feed it into |
|
1701 | 1705 | our generic decompression function. */ |
|
1702 | 1706 | frameSources.frames = framePointers; |
|
1703 | 1707 | frameSources.framesSize = frameCount; |
|
1704 | 1708 | frameSources.compressedSize = totalInputSize; |
|
1705 | 1709 | |
|
1706 | 1710 | result = decompress_from_framesources(self, &frameSources, threads); |
|
1707 | 1711 | |
|
1708 | 1712 | finally: |
|
1709 | 1713 | if (frameSizes.buf) { |
|
1710 | 1714 | PyBuffer_Release(&frameSizes); |
|
1711 | 1715 | } |
|
1712 | 1716 | PyMem_Free(framePointers); |
|
1713 | 1717 | |
|
1714 | 1718 | if (frameBuffers) { |
|
1715 | 1719 | for (i = 0; i < frameCount; i++) { |
|
1716 | 1720 | PyBuffer_Release(&frameBuffers[i]); |
|
1717 | 1721 | } |
|
1718 | 1722 | |
|
1719 | 1723 | PyMem_Free(frameBuffers); |
|
1720 | 1724 | } |
|
1721 | 1725 | |
|
1722 | 1726 | return result; |
|
1723 | 1727 | } |
|
1724 | 1728 | |
|
1725 | 1729 | static PyMethodDef Decompressor_methods[] = { |
|
1726 | 1730 | { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, |
|
1727 | 1731 | Decompressor_copy_stream__doc__ }, |
|
1728 | 1732 | { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, |
|
1729 | 1733 | Decompressor_decompress__doc__ }, |
|
1730 | 1734 | { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS, |
|
1731 | 1735 | Decompressor_decompressobj__doc__ }, |
|
1732 | 1736 | { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS, |
|
1733 | 1737 | Decompressor_read_to_iter__doc__ }, |
|
1734 | 1738 | /* TODO Remove deprecated API */ |
|
1735 | 1739 | { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS, |
|
1736 | 1740 | Decompressor_read_to_iter__doc__ }, |
|
1737 | 1741 | { "stream_reader", (PyCFunction)Decompressor_stream_reader, |
|
1738 | 1742 | METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ }, |
|
1739 | 1743 | { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS, |
|
1740 | 1744 | Decompressor_stream_writer__doc__ }, |
|
1741 | 1745 | /* TODO remove deprecated API */ |
|
1742 | 1746 | { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS, |
|
1743 | 1747 | Decompressor_stream_writer__doc__ }, |
|
1744 | 1748 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, |
|
1745 | 1749 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, |
|
1746 | 1750 | { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer, |
|
1747 | 1751 | METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ }, |
|
1748 | 1752 | { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS, |
|
1749 | 1753 | Decompressor_memory_size__doc__ }, |
|
1750 | 1754 | { NULL, NULL } |
|
1751 | 1755 | }; |
|
1752 | 1756 | |
|
1753 | 1757 | PyTypeObject ZstdDecompressorType = { |
|
1754 | 1758 | PyVarObject_HEAD_INIT(NULL, 0) |
|
1755 | 1759 | "zstd.ZstdDecompressor", /* tp_name */ |
|
1756 | 1760 | sizeof(ZstdDecompressor), /* tp_basicsize */ |
|
1757 | 1761 | 0, /* tp_itemsize */ |
|
1758 | 1762 | (destructor)Decompressor_dealloc, /* tp_dealloc */ |
|
1759 | 1763 | 0, /* tp_print */ |
|
1760 | 1764 | 0, /* tp_getattr */ |
|
1761 | 1765 | 0, /* tp_setattr */ |
|
1762 | 1766 | 0, /* tp_compare */ |
|
1763 | 1767 | 0, /* tp_repr */ |
|
1764 | 1768 | 0, /* tp_as_number */ |
|
1765 | 1769 | 0, /* tp_as_sequence */ |
|
1766 | 1770 | 0, /* tp_as_mapping */ |
|
1767 | 1771 | 0, /* tp_hash */ |
|
1768 | 1772 | 0, /* tp_call */ |
|
1769 | 1773 | 0, /* tp_str */ |
|
1770 | 1774 | 0, /* tp_getattro */ |
|
1771 | 1775 | 0, /* tp_setattro */ |
|
1772 | 1776 | 0, /* tp_as_buffer */ |
|
1773 | 1777 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1774 | 1778 | Decompressor__doc__, /* tp_doc */ |
|
1775 | 1779 | 0, /* tp_traverse */ |
|
1776 | 1780 | 0, /* tp_clear */ |
|
1777 | 1781 | 0, /* tp_richcompare */ |
|
1778 | 1782 | 0, /* tp_weaklistoffset */ |
|
1779 | 1783 | 0, /* tp_iter */ |
|
1780 | 1784 | 0, /* tp_iternext */ |
|
1781 | 1785 | Decompressor_methods, /* tp_methods */ |
|
1782 | 1786 | 0, /* tp_members */ |
|
1783 | 1787 | 0, /* tp_getset */ |
|
1784 | 1788 | 0, /* tp_base */ |
|
1785 | 1789 | 0, /* tp_dict */ |
|
1786 | 1790 | 0, /* tp_descr_get */ |
|
1787 | 1791 | 0, /* tp_descr_set */ |
|
1788 | 1792 | 0, /* tp_dictoffset */ |
|
1789 | 1793 | (initproc)Decompressor_init, /* tp_init */ |
|
1790 | 1794 | 0, /* tp_alloc */ |
|
1791 | 1795 | PyType_GenericNew, /* tp_new */ |
|
1792 | 1796 | }; |
|
1793 | 1797 | |
|
1794 | 1798 | void decompressor_module_init(PyObject* mod) { |
|
1795 | 1799 | Py_TYPE(&ZstdDecompressorType) = &PyType_Type; |
|
1796 | 1800 | if (PyType_Ready(&ZstdDecompressorType) < 0) { |
|
1797 | 1801 | return; |
|
1798 | 1802 | } |
|
1799 | 1803 | |
|
1800 | 1804 | Py_INCREF((PyObject*)&ZstdDecompressorType); |
|
1801 | 1805 | PyModule_AddObject(mod, "ZstdDecompressor", |
|
1802 | 1806 | (PyObject*)&ZstdDecompressorType); |
|
1803 | 1807 | } |
@@ -1,346 +1,373 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #define PY_SSIZE_T_CLEAN |
|
10 | 10 | #include <Python.h> |
|
11 | 11 | #include "structmember.h" |
|
12 | 12 | |
|
13 | 13 | #define ZSTD_STATIC_LINKING_ONLY |
|
14 | 14 | #define ZDICT_STATIC_LINKING_ONLY |
|
15 | 15 | #include <zstd.h> |
|
16 | 16 | #include <zdict.h> |
|
17 | 17 | |
|
18 | #define PYTHON_ZSTANDARD_VERSION "0.9.0" | |
|
18 | /* Remember to change the string in zstandard/__init__ as well */ | |
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.10.1" | |
|
19 | 20 | |
|
20 | 21 | typedef enum { |
|
21 | 22 | compressorobj_flush_finish, |
|
22 | 23 | compressorobj_flush_block, |
|
23 | 24 | } CompressorObj_Flush; |
|
24 | 25 | |
|
25 | 26 | /* |
|
26 | 27 | Represents a ZstdCompressionParameters type. |
|
27 | 28 | |
|
28 | 29 | This type holds all the low-level compression parameters that can be set. |
|
29 | 30 | */ |
|
30 | 31 | typedef struct { |
|
31 | 32 | PyObject_HEAD |
|
32 | 33 | ZSTD_CCtx_params* params; |
|
33 | 34 | unsigned format; |
|
34 | 35 | int compressionLevel; |
|
35 | 36 | unsigned windowLog; |
|
36 | 37 | unsigned hashLog; |
|
37 | 38 | unsigned chainLog; |
|
38 | 39 | unsigned searchLog; |
|
39 | 40 | unsigned minMatch; |
|
40 | 41 | unsigned targetLength; |
|
41 | 42 | unsigned compressionStrategy; |
|
42 | 43 | unsigned contentSizeFlag; |
|
43 | 44 | unsigned checksumFlag; |
|
44 | 45 | unsigned dictIDFlag; |
|
45 | 46 | unsigned threads; |
|
46 | 47 | unsigned jobSize; |
|
47 | 48 | unsigned overlapSizeLog; |
|
48 | unsigned compressLiterals; | |
|
49 | 49 | unsigned forceMaxWindow; |
|
50 | 50 | unsigned enableLongDistanceMatching; |
|
51 | 51 | unsigned ldmHashLog; |
|
52 | 52 | unsigned ldmMinMatch; |
|
53 | 53 | unsigned ldmBucketSizeLog; |
|
54 | 54 | unsigned ldmHashEveryLog; |
|
55 | 55 | } ZstdCompressionParametersObject; |
|
56 | 56 | |
|
57 | 57 | extern PyTypeObject ZstdCompressionParametersType; |
|
58 | 58 | |
|
59 | 59 | /* |
|
60 | 60 | Represents a FrameParameters type. |
|
61 | 61 | |
|
62 | 62 | This type is basically a wrapper around ZSTD_frameParams. |
|
63 | 63 | */ |
|
64 | 64 | typedef struct { |
|
65 | 65 | PyObject_HEAD |
|
66 | 66 | unsigned long long frameContentSize; |
|
67 | 67 | unsigned long long windowSize; |
|
68 | 68 | unsigned dictID; |
|
69 | 69 | char checksumFlag; |
|
70 | 70 | } FrameParametersObject; |
|
71 | 71 | |
|
72 | 72 | extern PyTypeObject FrameParametersType; |
|
73 | 73 | |
|
74 | 74 | /* |
|
75 | 75 | Represents a ZstdCompressionDict type. |
|
76 | 76 | |
|
77 | 77 | Instances hold data used for a zstd compression dictionary. |
|
78 | 78 | */ |
|
79 | 79 | typedef struct { |
|
80 | 80 | PyObject_HEAD |
|
81 | 81 | |
|
82 | 82 | /* Pointer to dictionary data. Owned by self. */ |
|
83 | 83 | void* dictData; |
|
84 | 84 | /* Size of dictionary data. */ |
|
85 | 85 | size_t dictSize; |
|
86 | 86 | ZSTD_dictContentType_e dictType; |
|
87 | 87 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
88 | 88 | unsigned k; |
|
89 | 89 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
90 | 90 | unsigned d; |
|
91 | 91 | /* Digested dictionary, suitable for reuse. */ |
|
92 | 92 | ZSTD_CDict* cdict; |
|
93 | 93 | ZSTD_DDict* ddict; |
|
94 | 94 | } ZstdCompressionDict; |
|
95 | 95 | |
|
96 | 96 | extern PyTypeObject ZstdCompressionDictType; |
|
97 | 97 | |
|
98 | 98 | /* |
|
99 | 99 | Represents a ZstdCompressor type. |
|
100 | 100 | */ |
|
101 | 101 | typedef struct { |
|
102 | 102 | PyObject_HEAD |
|
103 | 103 | |
|
104 | 104 | /* Number of threads to use for operations. */ |
|
105 | 105 | unsigned int threads; |
|
106 | 106 | /* Pointer to compression dictionary to use. NULL if not using dictionary |
|
107 | 107 | compression. */ |
|
108 | 108 | ZstdCompressionDict* dict; |
|
109 | 109 | /* Compression context to use. Populated during object construction. */ |
|
110 | 110 | ZSTD_CCtx* cctx; |
|
111 | 111 | /* Compression parameters in use. */ |
|
112 | 112 | ZSTD_CCtx_params* params; |
|
113 | 113 | } ZstdCompressor; |
|
114 | 114 | |
|
115 | 115 | extern PyTypeObject ZstdCompressorType; |
|
116 | 116 | |
|
117 | 117 | typedef struct { |
|
118 | 118 | PyObject_HEAD |
|
119 | 119 | |
|
120 | 120 | ZstdCompressor* compressor; |
|
121 | 121 | ZSTD_outBuffer output; |
|
122 | 122 | int finished; |
|
123 | 123 | } ZstdCompressionObj; |
|
124 | 124 | |
|
125 | 125 | extern PyTypeObject ZstdCompressionObjType; |
|
126 | 126 | |
|
127 | 127 | typedef struct { |
|
128 | 128 | PyObject_HEAD |
|
129 | 129 | |
|
130 | 130 | ZstdCompressor* compressor; |
|
131 | 131 | PyObject* writer; |
|
132 | 132 | unsigned long long sourceSize; |
|
133 | 133 | size_t outSize; |
|
134 | 134 | int entered; |
|
135 | 135 | unsigned long long bytesCompressed; |
|
136 | 136 | } ZstdCompressionWriter; |
|
137 | 137 | |
|
138 | 138 | extern PyTypeObject ZstdCompressionWriterType; |
|
139 | 139 | |
|
140 | 140 | typedef struct { |
|
141 | 141 | PyObject_HEAD |
|
142 | 142 | |
|
143 | 143 | ZstdCompressor* compressor; |
|
144 | 144 | PyObject* reader; |
|
145 | 145 | Py_buffer buffer; |
|
146 | 146 | Py_ssize_t bufferOffset; |
|
147 | 147 | size_t inSize; |
|
148 | 148 | size_t outSize; |
|
149 | 149 | |
|
150 | 150 | ZSTD_inBuffer input; |
|
151 | 151 | ZSTD_outBuffer output; |
|
152 | 152 | int finishedOutput; |
|
153 | 153 | int finishedInput; |
|
154 | 154 | PyObject* readResult; |
|
155 | 155 | } ZstdCompressorIterator; |
|
156 | 156 | |
|
157 | 157 | extern PyTypeObject ZstdCompressorIteratorType; |
|
158 | 158 | |
|
159 | 159 | typedef struct { |
|
160 | 160 | PyObject_HEAD |
|
161 | 161 | |
|
162 | 162 | ZstdCompressor* compressor; |
|
163 | 163 | PyObject* reader; |
|
164 | 164 | Py_buffer buffer; |
|
165 | unsigned long long sourceSize; | |
|
166 | 165 | size_t readSize; |
|
167 | 166 | |
|
168 | 167 | int entered; |
|
169 | 168 | int closed; |
|
170 | 169 | unsigned long long bytesCompressed; |
|
171 | 170 | |
|
172 | 171 | ZSTD_inBuffer input; |
|
173 | 172 | ZSTD_outBuffer output; |
|
174 | 173 | int finishedInput; |
|
175 | 174 | int finishedOutput; |
|
176 | 175 | PyObject* readResult; |
|
177 | 176 | } ZstdCompressionReader; |
|
178 | 177 | |
|
179 | 178 | extern PyTypeObject ZstdCompressionReaderType; |
|
180 | 179 | |
|
181 | 180 | typedef struct { |
|
182 | 181 | PyObject_HEAD |
|
183 | 182 | |
|
183 | ZstdCompressor* compressor; | |
|
184 | ZSTD_inBuffer input; | |
|
185 | ZSTD_outBuffer output; | |
|
186 | Py_buffer inBuffer; | |
|
187 | int finished; | |
|
188 | size_t chunkSize; | |
|
189 | } ZstdCompressionChunker; | |
|
190 | ||
|
191 | extern PyTypeObject ZstdCompressionChunkerType; | |
|
192 | ||
|
193 | typedef enum { | |
|
194 | compressionchunker_mode_normal, | |
|
195 | compressionchunker_mode_flush, | |
|
196 | compressionchunker_mode_finish, | |
|
197 | } CompressionChunkerMode; | |
|
198 | ||
|
199 | typedef struct { | |
|
200 | PyObject_HEAD | |
|
201 | ||
|
202 | ZstdCompressionChunker* chunker; | |
|
203 | CompressionChunkerMode mode; | |
|
204 | } ZstdCompressionChunkerIterator; | |
|
205 | ||
|
206 | extern PyTypeObject ZstdCompressionChunkerIteratorType; | |
|
207 | ||
|
208 | typedef struct { | |
|
209 | PyObject_HEAD | |
|
210 | ||
|
184 | 211 | ZSTD_DCtx* dctx; |
|
185 | 212 | ZstdCompressionDict* dict; |
|
186 | 213 | size_t maxWindowSize; |
|
187 | 214 | ZSTD_format_e format; |
|
188 | 215 | } ZstdDecompressor; |
|
189 | 216 | |
|
190 | 217 | extern PyTypeObject ZstdDecompressorType; |
|
191 | 218 | |
|
192 | 219 | typedef struct { |
|
193 | 220 | PyObject_HEAD |
|
194 | 221 | |
|
195 | 222 | ZstdDecompressor* decompressor; |
|
196 | 223 | size_t outSize; |
|
197 | 224 | int finished; |
|
198 | 225 | } ZstdDecompressionObj; |
|
199 | 226 | |
|
200 | 227 | extern PyTypeObject ZstdDecompressionObjType; |
|
201 | 228 | |
|
202 | 229 | typedef struct { |
|
203 | 230 | PyObject_HEAD |
|
204 | 231 | |
|
205 | 232 | /* Parent decompressor to which this object is associated. */ |
|
206 | 233 | ZstdDecompressor* decompressor; |
|
207 | 234 | /* Object to read() from (if reading from a stream). */ |
|
208 | 235 | PyObject* reader; |
|
209 | 236 | /* Size for read() operations on reader. */ |
|
210 | 237 | size_t readSize; |
|
211 | 238 | /* Buffer to read from (if reading from a buffer). */ |
|
212 | 239 | Py_buffer buffer; |
|
213 | 240 | |
|
214 | 241 | /* Whether the context manager is active. */ |
|
215 | 242 | int entered; |
|
216 | 243 | /* Whether we've closed the stream. */ |
|
217 | 244 | int closed; |
|
218 | 245 | |
|
219 | 246 | /* Number of bytes decompressed and returned to user. */ |
|
220 | 247 | unsigned long long bytesDecompressed; |
|
221 | 248 | |
|
222 | 249 | /* Tracks data going into decompressor. */ |
|
223 | 250 | ZSTD_inBuffer input; |
|
224 | 251 | |
|
225 | 252 | /* Holds output from read() operation on reader. */ |
|
226 | 253 | PyObject* readResult; |
|
227 | 254 | |
|
228 | 255 | /* Whether all input has been sent to the decompressor. */ |
|
229 | 256 | int finishedInput; |
|
230 | 257 | /* Whether all output has been flushed from the decompressor. */ |
|
231 | 258 | int finishedOutput; |
|
232 | 259 | } ZstdDecompressionReader; |
|
233 | 260 | |
|
234 | 261 | extern PyTypeObject ZstdDecompressionReaderType; |
|
235 | 262 | |
|
236 | 263 | typedef struct { |
|
237 | 264 | PyObject_HEAD |
|
238 | 265 | |
|
239 | 266 | ZstdDecompressor* decompressor; |
|
240 | 267 | PyObject* writer; |
|
241 | 268 | size_t outSize; |
|
242 | 269 | int entered; |
|
243 | 270 | } ZstdDecompressionWriter; |
|
244 | 271 | |
|
245 | 272 | extern PyTypeObject ZstdDecompressionWriterType; |
|
246 | 273 | |
|
247 | 274 | typedef struct { |
|
248 | 275 | PyObject_HEAD |
|
249 | 276 | |
|
250 | 277 | ZstdDecompressor* decompressor; |
|
251 | 278 | PyObject* reader; |
|
252 | 279 | Py_buffer buffer; |
|
253 | 280 | Py_ssize_t bufferOffset; |
|
254 | 281 | size_t inSize; |
|
255 | 282 | size_t outSize; |
|
256 | 283 | size_t skipBytes; |
|
257 | 284 | ZSTD_inBuffer input; |
|
258 | 285 | ZSTD_outBuffer output; |
|
259 | 286 | Py_ssize_t readCount; |
|
260 | 287 | int finishedInput; |
|
261 | 288 | int finishedOutput; |
|
262 | 289 | } ZstdDecompressorIterator; |
|
263 | 290 | |
|
264 | 291 | extern PyTypeObject ZstdDecompressorIteratorType; |
|
265 | 292 | |
|
266 | 293 | typedef struct { |
|
267 | 294 | int errored; |
|
268 | 295 | PyObject* chunk; |
|
269 | 296 | } DecompressorIteratorResult; |
|
270 | 297 | |
|
271 | 298 | typedef struct { |
|
272 | 299 | /* The public API is that these are 64-bit unsigned integers. So these can't |
|
273 | 300 | * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may |
|
274 | 301 | * be nonsensical for this platform. */ |
|
275 | 302 | unsigned long long offset; |
|
276 | 303 | unsigned long long length; |
|
277 | 304 | } BufferSegment; |
|
278 | 305 | |
|
279 | 306 | typedef struct { |
|
280 | 307 | PyObject_HEAD |
|
281 | 308 | |
|
282 | 309 | PyObject* parent; |
|
283 | 310 | BufferSegment* segments; |
|
284 | 311 | Py_ssize_t segmentCount; |
|
285 | 312 | } ZstdBufferSegments; |
|
286 | 313 | |
|
287 | 314 | extern PyTypeObject ZstdBufferSegmentsType; |
|
288 | 315 | |
|
289 | 316 | typedef struct { |
|
290 | 317 | PyObject_HEAD |
|
291 | 318 | |
|
292 | 319 | PyObject* parent; |
|
293 | 320 | void* data; |
|
294 | 321 | Py_ssize_t dataSize; |
|
295 | 322 | unsigned long long offset; |
|
296 | 323 | } ZstdBufferSegment; |
|
297 | 324 | |
|
298 | 325 | extern PyTypeObject ZstdBufferSegmentType; |
|
299 | 326 | |
|
300 | 327 | typedef struct { |
|
301 | 328 | PyObject_HEAD |
|
302 | 329 | |
|
303 | 330 | Py_buffer parent; |
|
304 | 331 | void* data; |
|
305 | 332 | unsigned long long dataSize; |
|
306 | 333 | BufferSegment* segments; |
|
307 | 334 | Py_ssize_t segmentCount; |
|
308 | 335 | int useFree; |
|
309 | 336 | } ZstdBufferWithSegments; |
|
310 | 337 | |
|
311 | 338 | extern PyTypeObject ZstdBufferWithSegmentsType; |
|
312 | 339 | |
|
313 | 340 | /** |
|
314 | 341 | * An ordered collection of BufferWithSegments exposed as a squashed collection. |
|
315 | 342 | * |
|
316 | 343 | * This type provides a virtual view spanning multiple BufferWithSegments |
|
317 | 344 | * instances. It allows multiple instances to be "chained" together and |
|
318 | 345 | * exposed as a single collection. e.g. if there are 2 buffers holding |
|
319 | 346 | * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer. |
|
320 | 347 | */ |
|
321 | 348 | typedef struct { |
|
322 | 349 | PyObject_HEAD |
|
323 | 350 | |
|
324 | 351 | /* An array of buffers that should be exposed through this instance. */ |
|
325 | 352 | ZstdBufferWithSegments** buffers; |
|
326 | 353 | /* Number of elements in buffers array. */ |
|
327 | 354 | Py_ssize_t bufferCount; |
|
328 | 355 | /* Array of first offset in each buffer instance. 0th entry corresponds |
|
329 | 356 | to number of elements in the 0th buffer. 1st entry corresponds to the |
|
330 | 357 | sum of elements in 0th and 1st buffers. */ |
|
331 | 358 | Py_ssize_t* firstElements; |
|
332 | 359 | } ZstdBufferWithSegmentsCollection; |
|
333 | 360 | |
|
334 | 361 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; |
|
335 | 362 | |
|
336 | 363 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value); |
|
337 | 364 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj); |
|
338 | 365 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs); |
|
339 | 366 | int ensure_ddict(ZstdCompressionDict* dict); |
|
340 | 367 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict); |
|
341 | 368 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
342 | 369 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); |
|
343 | 370 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); |
|
344 | 371 | int cpu_count(void); |
|
345 | 372 | size_t roundpow2(size_t); |
|
346 | 373 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size); |
@@ -1,196 +1,199 | |||
|
1 | 1 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | 9 | import cffi |
|
10 | 10 | import distutils.ccompiler |
|
11 | 11 | import os |
|
12 | 12 | import re |
|
13 | 13 | import subprocess |
|
14 | 14 | import tempfile |
|
15 | 15 | |
|
16 | 16 | |
|
17 | 17 | HERE = os.path.abspath(os.path.dirname(__file__)) |
|
18 | 18 | |
|
19 | 19 | SOURCES = ['zstd/%s' % p for p in ( |
|
20 | 'common/debug.c', | |
|
20 | 21 | 'common/entropy_common.c', |
|
21 | 22 | 'common/error_private.c', |
|
22 | 23 | 'common/fse_decompress.c', |
|
23 | 24 | 'common/pool.c', |
|
24 | 25 | 'common/threading.c', |
|
25 | 26 | 'common/xxhash.c', |
|
26 | 27 | 'common/zstd_common.c', |
|
27 | 28 | 'compress/fse_compress.c', |
|
29 | 'compress/hist.c', | |
|
28 | 30 | 'compress/huf_compress.c', |
|
29 | 31 | 'compress/zstd_compress.c', |
|
30 | 32 | 'compress/zstd_double_fast.c', |
|
31 | 33 | 'compress/zstd_fast.c', |
|
32 | 34 | 'compress/zstd_lazy.c', |
|
33 | 35 | 'compress/zstd_ldm.c', |
|
34 | 36 | 'compress/zstd_opt.c', |
|
35 | 37 | 'compress/zstdmt_compress.c', |
|
36 | 38 | 'decompress/huf_decompress.c', |
|
37 | 39 | 'decompress/zstd_decompress.c', |
|
38 | 40 | 'dictBuilder/cover.c', |
|
41 | 'dictBuilder/fastcover.c', | |
|
39 | 42 | 'dictBuilder/divsufsort.c', |
|
40 | 43 | 'dictBuilder/zdict.c', |
|
41 | 44 | )] |
|
42 | 45 | |
|
43 | 46 | # Headers whose preprocessed output will be fed into cdef(). |
|
44 | 47 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( |
|
45 | 48 | ('zstd.h',), |
|
46 | 49 | ('dictBuilder', 'zdict.h'), |
|
47 | 50 | )] |
|
48 | 51 | |
|
49 | 52 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( |
|
50 | 53 | 'zstd', |
|
51 | 54 | 'zstd/common', |
|
52 | 55 | 'zstd/compress', |
|
53 | 56 | 'zstd/decompress', |
|
54 | 57 | 'zstd/dictBuilder', |
|
55 | 58 | )] |
|
56 | 59 | |
|
57 | 60 | # cffi can't parse some of the primitives in zstd.h. So we invoke the |
|
58 | 61 | # preprocessor and feed its output into cffi. |
|
59 | 62 | compiler = distutils.ccompiler.new_compiler() |
|
60 | 63 | |
|
61 | 64 | # Needed for MSVC. |
|
62 | 65 | if hasattr(compiler, 'initialize'): |
|
63 | 66 | compiler.initialize() |
|
64 | 67 | |
|
65 | 68 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor |
|
66 | 69 | # manually. |
|
67 | 70 | if compiler.compiler_type == 'unix': |
|
68 | 71 | args = list(compiler.executables['compiler']) |
|
69 | 72 | args.extend([ |
|
70 | 73 | '-E', |
|
71 | 74 | '-DZSTD_STATIC_LINKING_ONLY', |
|
72 | 75 | '-DZDICT_STATIC_LINKING_ONLY', |
|
73 | 76 | ]) |
|
74 | 77 | elif compiler.compiler_type == 'msvc': |
|
75 | 78 | args = [compiler.cc] |
|
76 | 79 | args.extend([ |
|
77 | 80 | '/EP', |
|
78 | 81 | '/DZSTD_STATIC_LINKING_ONLY', |
|
79 | 82 | '/DZDICT_STATIC_LINKING_ONLY', |
|
80 | 83 | ]) |
|
81 | 84 | else: |
|
82 | 85 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) |
|
83 | 86 | |
|
84 | 87 | def preprocess(path): |
|
85 | 88 | with open(path, 'rb') as fh: |
|
86 | 89 | lines = [] |
|
87 | 90 | it = iter(fh) |
|
88 | 91 | |
|
89 | 92 | for l in it: |
|
90 | 93 | # zstd.h includes <stddef.h>, which is also included by cffi's |
|
91 | 94 | # boilerplate. This can lead to duplicate declarations. So we strip |
|
92 | 95 | # this include from the preprocessor invocation. |
|
93 | 96 | # |
|
94 | 97 | # The same things happens for including zstd.h, so give it the same |
|
95 | 98 | # treatment. |
|
96 | 99 | # |
|
97 | 100 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline |
|
98 | 101 | # #define in zstdmt_compress.h and results in a compiler warning. So drop |
|
99 | 102 | # the inline #define. |
|
100 | 103 | if l.startswith((b'#include <stddef.h>', |
|
101 | 104 | b'#include "zstd.h"', |
|
102 | 105 | b'#define ZSTD_STATIC_LINKING_ONLY')): |
|
103 | 106 | continue |
|
104 | 107 | |
|
105 | 108 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't |
|
106 | 109 | # important so just filter it out. |
|
107 | 110 | if l.startswith(b'ZSTDLIB_API'): |
|
108 | 111 | l = l[len(b'ZSTDLIB_API '):] |
|
109 | 112 | |
|
110 | 113 | lines.append(l) |
|
111 | 114 | |
|
112 | 115 | fd, input_file = tempfile.mkstemp(suffix='.h') |
|
113 | 116 | os.write(fd, b''.join(lines)) |
|
114 | 117 | os.close(fd) |
|
115 | 118 | |
|
116 | 119 | try: |
|
117 | 120 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) |
|
118 | 121 | output = process.communicate()[0] |
|
119 | 122 | ret = process.poll() |
|
120 | 123 | if ret: |
|
121 | 124 | raise Exception('preprocessor exited with error') |
|
122 | 125 | |
|
123 | 126 | return output |
|
124 | 127 | finally: |
|
125 | 128 | os.unlink(input_file) |
|
126 | 129 | |
|
127 | 130 | |
|
128 | 131 | def normalize_output(output): |
|
129 | 132 | lines = [] |
|
130 | 133 | for line in output.splitlines(): |
|
131 | 134 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
132 | 135 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
133 | 136 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] |
|
134 | 137 | |
|
135 | 138 | if line.startswith(b'__attribute__((deprecated('): |
|
136 | 139 | continue |
|
137 | 140 | elif b'__declspec(deprecated(' in line: |
|
138 | 141 | continue |
|
139 | 142 | |
|
140 | 143 | lines.append(line) |
|
141 | 144 | |
|
142 | 145 | return b'\n'.join(lines) |
|
143 | 146 | |
|
144 | 147 | |
|
145 | 148 | ffi = cffi.FFI() |
|
146 | 149 | # zstd.h uses a possible undefined MIN(). Define it until |
|
147 | 150 | # https://github.com/facebook/zstd/issues/976 is fixed. |
|
148 | 151 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning |
|
149 | 152 | # when cffi uses the function. Since we statically link against zstd, even |
|
150 | 153 | # if we use the deprecated functions it shouldn't be a huge problem. |
|
151 | 154 | ffi.set_source('_zstd_cffi', ''' |
|
152 | 155 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) |
|
153 | 156 | #define ZSTD_STATIC_LINKING_ONLY |
|
154 | 157 | #include <zstd.h> |
|
155 | 158 | #define ZDICT_STATIC_LINKING_ONLY |
|
156 | 159 | #define ZDICT_DISABLE_DEPRECATE_WARNINGS |
|
157 | 160 | #include <zdict.h> |
|
158 | 161 | ''', sources=SOURCES, |
|
159 | 162 | include_dirs=INCLUDE_DIRS, |
|
160 | 163 | extra_compile_args=['-DZSTD_MULTITHREAD']) |
|
161 | 164 | |
|
162 | 165 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') |
|
163 | 166 | |
|
164 | 167 | sources = [] |
|
165 | 168 | |
|
166 | 169 | # Feed normalized preprocessor output for headers into the cdef parser. |
|
167 | 170 | for header in HEADERS: |
|
168 | 171 | preprocessed = preprocess(header) |
|
169 | 172 | sources.append(normalize_output(preprocessed)) |
|
170 | 173 | |
|
171 | 174 | # #define's are effectively erased as part of going through preprocessor. |
|
172 | 175 | # So perform a manual pass to re-add those to the cdef source. |
|
173 | 176 | with open(header, 'rb') as fh: |
|
174 | 177 | for line in fh: |
|
175 | 178 | line = line.strip() |
|
176 | 179 | m = DEFINE.match(line) |
|
177 | 180 | if not m: |
|
178 | 181 | continue |
|
179 | 182 | |
|
180 | 183 | if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY': |
|
181 | 184 | continue |
|
182 | 185 | |
|
183 | 186 | # The parser doesn't like some constants with complex values. |
|
184 | 187 | if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'): |
|
185 | 188 | continue |
|
186 | 189 | |
|
187 | 190 | # The ... is magic syntax by the cdef parser to resolve the |
|
188 | 191 | # value at compile time. |
|
189 | 192 | sources.append(m.group(0) + b' ...') |
|
190 | 193 | |
|
191 | 194 | cdeflines = b'\n'.join(sources).splitlines() |
|
192 | 195 | cdeflines = [l for l in cdeflines if l.strip()] |
|
193 | 196 | ffi.cdef(b'\n'.join(cdeflines).decode('latin1')) |
|
194 | 197 | |
|
195 | 198 | if __name__ == '__main__': |
|
196 | 199 | ffi.compile() |
@@ -1,160 +1,188 | |||
|
1 | 1 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | import distutils.ccompiler |
|
8 | 8 | import os |
|
9 | import sys | |
|
10 | 9 | |
|
11 | 10 | from distutils.extension import Extension |
|
12 | 11 | |
|
13 | 12 | |
|
14 | 13 | zstd_sources = ['zstd/%s' % p for p in ( |
|
14 | 'common/debug.c', | |
|
15 | 15 | 'common/entropy_common.c', |
|
16 | 16 | 'common/error_private.c', |
|
17 | 17 | 'common/fse_decompress.c', |
|
18 | 18 | 'common/pool.c', |
|
19 | 19 | 'common/threading.c', |
|
20 | 20 | 'common/xxhash.c', |
|
21 | 21 | 'common/zstd_common.c', |
|
22 | 22 | 'compress/fse_compress.c', |
|
23 | 'compress/hist.c', | |
|
23 | 24 | 'compress/huf_compress.c', |
|
24 | 25 | 'compress/zstd_compress.c', |
|
25 | 26 | 'compress/zstd_double_fast.c', |
|
26 | 27 | 'compress/zstd_fast.c', |
|
27 | 28 | 'compress/zstd_lazy.c', |
|
28 | 29 | 'compress/zstd_ldm.c', |
|
29 | 30 | 'compress/zstd_opt.c', |
|
30 | 31 | 'compress/zstdmt_compress.c', |
|
31 | 32 | 'decompress/huf_decompress.c', |
|
32 | 33 | 'decompress/zstd_decompress.c', |
|
33 | 34 | 'dictBuilder/cover.c', |
|
34 | 35 | 'dictBuilder/divsufsort.c', |
|
36 | 'dictBuilder/fastcover.c', | |
|
35 | 37 | 'dictBuilder/zdict.c', |
|
36 | 38 | )] |
|
37 | 39 | |
|
38 | 40 | zstd_sources_legacy = ['zstd/%s' % p for p in ( |
|
39 | 41 | 'deprecated/zbuff_common.c', |
|
40 | 42 | 'deprecated/zbuff_compress.c', |
|
41 | 43 | 'deprecated/zbuff_decompress.c', |
|
42 | 44 | 'legacy/zstd_v01.c', |
|
43 | 45 | 'legacy/zstd_v02.c', |
|
44 | 46 | 'legacy/zstd_v03.c', |
|
45 | 47 | 'legacy/zstd_v04.c', |
|
46 | 48 | 'legacy/zstd_v05.c', |
|
47 | 49 | 'legacy/zstd_v06.c', |
|
48 | 50 | 'legacy/zstd_v07.c' |
|
49 | 51 | )] |
|
50 | 52 | |
|
51 | 53 | zstd_includes = [ |
|
52 | 54 | 'zstd', |
|
53 | 55 | 'zstd/common', |
|
54 | 56 | 'zstd/compress', |
|
55 | 57 | 'zstd/decompress', |
|
56 | 58 | 'zstd/dictBuilder', |
|
57 | 59 | ] |
|
58 | 60 | |
|
59 | 61 | zstd_includes_legacy = [ |
|
60 | 62 | 'zstd/deprecated', |
|
61 | 63 | 'zstd/legacy', |
|
62 | 64 | ] |
|
63 | 65 | |
|
64 | 66 | ext_includes = [ |
|
65 | 67 | 'c-ext', |
|
66 | 68 | 'zstd/common', |
|
67 | 69 | ] |
|
68 | 70 | |
|
69 | 71 | ext_sources = [ |
|
70 | 72 | 'zstd/common/pool.c', |
|
71 | 73 | 'zstd/common/threading.c', |
|
72 | 74 | 'zstd.c', |
|
73 | 75 | 'c-ext/bufferutil.c', |
|
74 | 76 | 'c-ext/compressiondict.c', |
|
75 | 77 | 'c-ext/compressobj.c', |
|
76 | 78 | 'c-ext/compressor.c', |
|
77 | 79 | 'c-ext/compressoriterator.c', |
|
80 | 'c-ext/compressionchunker.c', | |
|
78 | 81 | 'c-ext/compressionparams.c', |
|
79 | 82 | 'c-ext/compressionreader.c', |
|
80 | 83 | 'c-ext/compressionwriter.c', |
|
81 | 84 | 'c-ext/constants.c', |
|
82 | 85 | 'c-ext/decompressobj.c', |
|
83 | 86 | 'c-ext/decompressor.c', |
|
84 | 87 | 'c-ext/decompressoriterator.c', |
|
85 | 88 | 'c-ext/decompressionreader.c', |
|
86 | 89 | 'c-ext/decompressionwriter.c', |
|
87 | 90 | 'c-ext/frameparams.c', |
|
88 | 91 | ] |
|
89 | 92 | |
|
90 | 93 | zstd_depends = [ |
|
91 | 94 | 'c-ext/python-zstandard.h', |
|
92 | 95 | ] |
|
93 | 96 | |
|
94 | 97 | |
|
95 | 98 | def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', |
|
96 | warnings_as_errors=False): | |
|
97 |
"""Obtain a distutils.extension.Extension for the C extension. |
|
|
98 | root = os.path.abspath(os.path.dirname(__file__)) | |
|
99 | warnings_as_errors=False, root=None): | |
|
100 | """Obtain a distutils.extension.Extension for the C extension. | |
|
101 | ||
|
102 | ``support_legacy`` controls whether to compile in legacy zstd format support. | |
|
103 | ||
|
104 | ``system_zstd`` controls whether to compile against the system zstd library. | |
|
105 | For this to work, the system zstd library and headers must match what | |
|
106 | python-zstandard is coded against exactly. | |
|
107 | ||
|
108 | ``name`` is the module name of the C extension to produce. | |
|
109 | ||
|
110 | ``warnings_as_errors`` controls whether compiler warnings are turned into | |
|
111 | compiler errors. | |
|
99 | 112 |
|
|
100 | sources = set([os.path.join(root, p) for p in ext_sources]) | |
|
113 | ``root`` defines a root path that source should be computed as relative | |
|
114 | to. This should be the directory with the main ``setup.py`` that is | |
|
115 | being invoked. If not defined, paths will be relative to this file. | |
|
116 | """ | |
|
117 | actual_root = os.path.abspath(os.path.dirname(__file__)) | |
|
118 | root = root or actual_root | |
|
119 | ||
|
120 | sources = set([os.path.join(actual_root, p) for p in ext_sources]) | |
|
101 | 121 | if not system_zstd: |
|
102 | sources.update([os.path.join(root, p) for p in zstd_sources]) | |
|
122 | sources.update([os.path.join(actual_root, p) for p in zstd_sources]) | |
|
103 | 123 | if support_legacy: |
|
104 |
sources.update([os.path.join(root, p) |
|
|
124 | sources.update([os.path.join(actual_root, p) | |
|
125 | for p in zstd_sources_legacy]) | |
|
105 | 126 | sources = list(sources) |
|
106 | 127 | |
|
107 | include_dirs = set([os.path.join(root, d) for d in ext_includes]) | |
|
128 | include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) | |
|
108 | 129 | if not system_zstd: |
|
109 |
include_dirs.update([os.path.join(root, d) |
|
|
130 | include_dirs.update([os.path.join(actual_root, d) | |
|
131 | for d in zstd_includes]) | |
|
110 | 132 | if support_legacy: |
|
111 |
include_dirs.update([os.path.join(root, d) |
|
|
133 | include_dirs.update([os.path.join(actual_root, d) | |
|
134 | for d in zstd_includes_legacy]) | |
|
112 | 135 | include_dirs = list(include_dirs) |
|
113 | 136 | |
|
114 | depends = [os.path.join(root, p) for p in zstd_depends] | |
|
137 | depends = [os.path.join(actual_root, p) for p in zstd_depends] | |
|
115 | 138 | |
|
116 | 139 | compiler = distutils.ccompiler.new_compiler() |
|
117 | 140 | |
|
118 | 141 | # Needed for MSVC. |
|
119 | 142 | if hasattr(compiler, 'initialize'): |
|
120 | 143 | compiler.initialize() |
|
121 | 144 | |
|
122 | 145 | if compiler.compiler_type == 'unix': |
|
123 | 146 | compiler_type = 'unix' |
|
124 | 147 | elif compiler.compiler_type == 'msvc': |
|
125 | 148 | compiler_type = 'msvc' |
|
126 | 149 | elif compiler.compiler_type == 'mingw32': |
|
127 | 150 | compiler_type = 'mingw32' |
|
128 | 151 | else: |
|
129 | 152 | raise Exception('unhandled compiler type: %s' % |
|
130 | 153 | compiler.compiler_type) |
|
131 | 154 | |
|
132 | 155 | extra_args = ['-DZSTD_MULTITHREAD'] |
|
133 | 156 | |
|
134 | 157 | if not system_zstd: |
|
135 | 158 | extra_args.append('-DZSTDLIB_VISIBILITY=') |
|
136 | 159 | extra_args.append('-DZDICTLIB_VISIBILITY=') |
|
137 | 160 | extra_args.append('-DZSTDERRORLIB_VISIBILITY=') |
|
138 | 161 | |
|
139 | 162 | if compiler_type == 'unix': |
|
140 | 163 | extra_args.append('-fvisibility=hidden') |
|
141 | 164 | |
|
142 | 165 | if not system_zstd and support_legacy: |
|
143 | 166 | extra_args.append('-DZSTD_LEGACY_SUPPORT=1') |
|
144 | 167 | |
|
145 | 168 | if warnings_as_errors: |
|
146 | 169 | if compiler_type in ('unix', 'mingw32'): |
|
147 | 170 | extra_args.append('-Werror') |
|
148 | 171 | elif compiler_type == 'msvc': |
|
149 | 172 | extra_args.append('/WX') |
|
150 | 173 | else: |
|
151 | 174 | assert False |
|
152 | 175 | |
|
153 | 176 | libraries = ['zstd'] if system_zstd else [] |
|
154 | 177 | |
|
178 | # Python 3.7 doesn't like absolute paths. So normalize to relative. | |
|
179 | sources = [os.path.relpath(p, root) for p in sources] | |
|
180 | include_dirs = [os.path.relpath(p, root) for p in include_dirs] | |
|
181 | depends = [os.path.relpath(p, root) for p in depends] | |
|
182 | ||
|
155 | 183 | # TODO compile with optimizations. |
|
156 | 184 | return Extension(name, sources, |
|
157 | 185 | include_dirs=include_dirs, |
|
158 | 186 | depends=depends, |
|
159 | 187 | extra_compile_args=extra_args, |
|
160 | 188 | libraries=libraries) |
@@ -1,1266 +1,1463 | |||
|
1 | 1 | import hashlib |
|
2 | 2 | import io |
|
3 | 3 | import struct |
|
4 | 4 | import sys |
|
5 | 5 | import tarfile |
|
6 | 6 | import unittest |
|
7 | 7 | |
|
8 | 8 | import zstandard as zstd |
|
9 | 9 | |
|
10 | 10 | from .common import ( |
|
11 | 11 | make_cffi, |
|
12 | 12 | OpCountingBytesIO, |
|
13 | 13 | ) |
|
14 | 14 | |
|
15 | 15 | |
|
16 | 16 | if sys.version_info[0] >= 3: |
|
17 | 17 | next = lambda it: it.__next__() |
|
18 | 18 | else: |
|
19 | 19 | next = lambda it: it.next() |
|
20 | 20 | |
|
21 | 21 | |
|
22 | 22 | def multithreaded_chunk_size(level, source_size=0): |
|
23 | 23 | params = zstd.ZstdCompressionParameters.from_level(level, |
|
24 | 24 | source_size=source_size) |
|
25 | 25 | |
|
26 | 26 | return 1 << (params.window_log + 2) |
|
27 | 27 | |
|
28 | 28 | |
|
29 | 29 | @make_cffi |
|
30 | 30 | class TestCompressor(unittest.TestCase): |
|
31 | 31 | def test_level_bounds(self): |
|
32 | 32 | with self.assertRaises(ValueError): |
|
33 | 33 | zstd.ZstdCompressor(level=23) |
|
34 | 34 | |
|
35 | 35 | def test_memory_size(self): |
|
36 | 36 | cctx = zstd.ZstdCompressor(level=1) |
|
37 | 37 | self.assertGreater(cctx.memory_size(), 100) |
|
38 | 38 | |
|
39 | 39 | |
|
40 | 40 | @make_cffi |
|
41 | 41 | class TestCompressor_compress(unittest.TestCase): |
|
42 | 42 | def test_compress_empty(self): |
|
43 | 43 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
44 | 44 | result = cctx.compress(b'') |
|
45 | 45 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
46 | 46 | params = zstd.get_frame_parameters(result) |
|
47 | 47 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
48 | 48 | self.assertEqual(params.window_size, 524288) |
|
49 | 49 | self.assertEqual(params.dict_id, 0) |
|
50 | 50 | self.assertFalse(params.has_checksum, 0) |
|
51 | 51 | |
|
52 | 52 | cctx = zstd.ZstdCompressor() |
|
53 | 53 | result = cctx.compress(b'') |
|
54 | 54 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00') |
|
55 | 55 | params = zstd.get_frame_parameters(result) |
|
56 | 56 | self.assertEqual(params.content_size, 0) |
|
57 | 57 | |
|
58 | 58 | def test_input_types(self): |
|
59 | 59 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
60 | 60 | expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f' |
|
61 | 61 | |
|
62 | 62 | mutable_array = bytearray(3) |
|
63 | 63 | mutable_array[:] = b'foo' |
|
64 | 64 | |
|
65 | 65 | sources = [ |
|
66 | 66 | memoryview(b'foo'), |
|
67 | 67 | bytearray(b'foo'), |
|
68 | 68 | mutable_array, |
|
69 | 69 | ] |
|
70 | 70 | |
|
71 | 71 | for source in sources: |
|
72 | 72 | self.assertEqual(cctx.compress(source), expected) |
|
73 | 73 | |
|
74 | 74 | def test_compress_large(self): |
|
75 | 75 | chunks = [] |
|
76 | 76 | for i in range(255): |
|
77 | 77 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
78 | 78 | |
|
79 | 79 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) |
|
80 | 80 | result = cctx.compress(b''.join(chunks)) |
|
81 | 81 | self.assertEqual(len(result), 999) |
|
82 | 82 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
83 | 83 | |
|
84 | 84 | # This matches the test for read_to_iter() below. |
|
85 | 85 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
86 | 86 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') |
|
87 | 87 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' |
|
88 | 88 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' |
|
89 | 89 | b'\x02\x09\x00\x00\x6f') |
|
90 | 90 | |
|
91 | 91 | def test_negative_level(self): |
|
92 | 92 | cctx = zstd.ZstdCompressor(level=-4) |
|
93 | 93 | result = cctx.compress(b'foo' * 256) |
|
94 | 94 | |
|
95 | 95 | def test_no_magic(self): |
|
96 | 96 | params = zstd.ZstdCompressionParameters.from_level( |
|
97 | 97 | 1, format=zstd.FORMAT_ZSTD1) |
|
98 | 98 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
99 | 99 | magic = cctx.compress(b'foobar') |
|
100 | 100 | |
|
101 | 101 | params = zstd.ZstdCompressionParameters.from_level( |
|
102 | 102 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
103 | 103 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
104 | 104 | no_magic = cctx.compress(b'foobar') |
|
105 | 105 | |
|
106 | 106 | self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd') |
|
107 | 107 | self.assertEqual(magic[4:], no_magic) |
|
108 | 108 | |
|
109 | 109 | def test_write_checksum(self): |
|
110 | 110 | cctx = zstd.ZstdCompressor(level=1) |
|
111 | 111 | no_checksum = cctx.compress(b'foobar') |
|
112 | 112 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
113 | 113 | with_checksum = cctx.compress(b'foobar') |
|
114 | 114 | |
|
115 | 115 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
116 | 116 | |
|
117 | 117 | no_params = zstd.get_frame_parameters(no_checksum) |
|
118 | 118 | with_params = zstd.get_frame_parameters(with_checksum) |
|
119 | 119 | |
|
120 | 120 | self.assertFalse(no_params.has_checksum) |
|
121 | 121 | self.assertTrue(with_params.has_checksum) |
|
122 | 122 | |
|
123 | 123 | def test_write_content_size(self): |
|
124 | 124 | cctx = zstd.ZstdCompressor(level=1) |
|
125 | 125 | with_size = cctx.compress(b'foobar' * 256) |
|
126 | 126 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
127 | 127 | no_size = cctx.compress(b'foobar' * 256) |
|
128 | 128 | |
|
129 | 129 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
130 | 130 | |
|
131 | 131 | no_params = zstd.get_frame_parameters(no_size) |
|
132 | 132 | with_params = zstd.get_frame_parameters(with_size) |
|
133 | 133 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
134 | 134 | self.assertEqual(with_params.content_size, 1536) |
|
135 | 135 | |
|
136 | 136 | def test_no_dict_id(self): |
|
137 | 137 | samples = [] |
|
138 | 138 | for i in range(128): |
|
139 | 139 | samples.append(b'foo' * 64) |
|
140 | 140 | samples.append(b'bar' * 64) |
|
141 | 141 | samples.append(b'foobar' * 64) |
|
142 | 142 | |
|
143 | 143 | d = zstd.train_dictionary(1024, samples) |
|
144 | 144 | |
|
145 | 145 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
146 | 146 | with_dict_id = cctx.compress(b'foobarfoobar') |
|
147 | 147 | |
|
148 | 148 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
149 | 149 | no_dict_id = cctx.compress(b'foobarfoobar') |
|
150 | 150 | |
|
151 | 151 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
152 | 152 | |
|
153 | 153 | no_params = zstd.get_frame_parameters(no_dict_id) |
|
154 | 154 | with_params = zstd.get_frame_parameters(with_dict_id) |
|
155 | 155 | self.assertEqual(no_params.dict_id, 0) |
|
156 |
self.assertEqual(with_params.dict_id, 1 |
|
|
156 | self.assertEqual(with_params.dict_id, 1880053135) | |
|
157 | 157 | |
|
158 | 158 | def test_compress_dict_multiple(self): |
|
159 | 159 | samples = [] |
|
160 | 160 | for i in range(128): |
|
161 | 161 | samples.append(b'foo' * 64) |
|
162 | 162 | samples.append(b'bar' * 64) |
|
163 | 163 | samples.append(b'foobar' * 64) |
|
164 | 164 | |
|
165 | 165 | d = zstd.train_dictionary(8192, samples) |
|
166 | 166 | |
|
167 | 167 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
168 | 168 | |
|
169 | 169 | for i in range(32): |
|
170 | 170 | cctx.compress(b'foo bar foobar foo bar foobar') |
|
171 | 171 | |
|
172 | 172 | def test_dict_precompute(self): |
|
173 | 173 | samples = [] |
|
174 | 174 | for i in range(128): |
|
175 | 175 | samples.append(b'foo' * 64) |
|
176 | 176 | samples.append(b'bar' * 64) |
|
177 | 177 | samples.append(b'foobar' * 64) |
|
178 | 178 | |
|
179 | 179 | d = zstd.train_dictionary(8192, samples) |
|
180 | 180 | d.precompute_compress(level=1) |
|
181 | 181 | |
|
182 | 182 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
183 | 183 | |
|
184 | 184 | for i in range(32): |
|
185 | 185 | cctx.compress(b'foo bar foobar foo bar foobar') |
|
186 | 186 | |
|
187 | 187 | def test_multithreaded(self): |
|
188 | 188 | chunk_size = multithreaded_chunk_size(1) |
|
189 | 189 | source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) |
|
190 | 190 | |
|
191 | 191 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
192 | 192 | compressed = cctx.compress(source) |
|
193 | 193 | |
|
194 | 194 | params = zstd.get_frame_parameters(compressed) |
|
195 | 195 | self.assertEqual(params.content_size, chunk_size * 2) |
|
196 | 196 | self.assertEqual(params.dict_id, 0) |
|
197 | 197 | self.assertFalse(params.has_checksum) |
|
198 | 198 | |
|
199 | 199 | dctx = zstd.ZstdDecompressor() |
|
200 | 200 | self.assertEqual(dctx.decompress(compressed), source) |
|
201 | 201 | |
|
202 | 202 | def test_multithreaded_dict(self): |
|
203 | 203 | samples = [] |
|
204 | 204 | for i in range(128): |
|
205 | 205 | samples.append(b'foo' * 64) |
|
206 | 206 | samples.append(b'bar' * 64) |
|
207 | 207 | samples.append(b'foobar' * 64) |
|
208 | 208 | |
|
209 | 209 | d = zstd.train_dictionary(1024, samples) |
|
210 | 210 | |
|
211 | 211 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) |
|
212 | 212 | |
|
213 | 213 | result = cctx.compress(b'foo') |
|
214 | 214 | params = zstd.get_frame_parameters(result); |
|
215 | 215 | self.assertEqual(params.content_size, 3); |
|
216 | 216 | self.assertEqual(params.dict_id, d.dict_id()) |
|
217 | 217 | |
|
218 | 218 | self.assertEqual(result, |
|
219 |
b'\x28\xb5\x2f\xfd\x23\x |
|
|
219 | b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' | |
|
220 | 220 | b'\x66\x6f\x6f') |
|
221 | 221 | |
|
222 | 222 | def test_multithreaded_compression_params(self): |
|
223 | 223 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) |
|
224 | 224 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
225 | 225 | |
|
226 | 226 | result = cctx.compress(b'foo') |
|
227 | 227 | params = zstd.get_frame_parameters(result); |
|
228 | 228 | self.assertEqual(params.content_size, 3); |
|
229 | 229 | |
|
230 | 230 | self.assertEqual(result, |
|
231 | 231 | b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') |
|
232 | 232 | |
|
233 | 233 | |
|
234 | 234 | @make_cffi |
|
235 | 235 | class TestCompressor_compressobj(unittest.TestCase): |
|
236 | 236 | def test_compressobj_empty(self): |
|
237 | 237 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
238 | 238 | cobj = cctx.compressobj() |
|
239 | 239 | self.assertEqual(cobj.compress(b''), b'') |
|
240 | 240 | self.assertEqual(cobj.flush(), |
|
241 | 241 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
242 | 242 | |
|
243 | 243 | def test_input_types(self): |
|
244 | 244 | expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' |
|
245 | 245 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
246 | 246 | |
|
247 | 247 | mutable_array = bytearray(3) |
|
248 | 248 | mutable_array[:] = b'foo' |
|
249 | 249 | |
|
250 | 250 | sources = [ |
|
251 | 251 | memoryview(b'foo'), |
|
252 | 252 | bytearray(b'foo'), |
|
253 | 253 | mutable_array, |
|
254 | 254 | ] |
|
255 | 255 | |
|
256 | 256 | for source in sources: |
|
257 | 257 | cobj = cctx.compressobj() |
|
258 | 258 | self.assertEqual(cobj.compress(source), b'') |
|
259 | 259 | self.assertEqual(cobj.flush(), expected) |
|
260 | 260 | |
|
261 | 261 | def test_compressobj_large(self): |
|
262 | 262 | chunks = [] |
|
263 | 263 | for i in range(255): |
|
264 | 264 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
265 | 265 | |
|
266 | 266 | cctx = zstd.ZstdCompressor(level=3) |
|
267 | 267 | cobj = cctx.compressobj() |
|
268 | 268 | |
|
269 | 269 | result = cobj.compress(b''.join(chunks)) + cobj.flush() |
|
270 | 270 | self.assertEqual(len(result), 999) |
|
271 | 271 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
272 | 272 | |
|
273 | 273 | params = zstd.get_frame_parameters(result) |
|
274 | 274 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
275 | 275 | self.assertEqual(params.window_size, 1048576) |
|
276 | 276 | self.assertEqual(params.dict_id, 0) |
|
277 | 277 | self.assertFalse(params.has_checksum) |
|
278 | 278 | |
|
279 | 279 | def test_write_checksum(self): |
|
280 | 280 | cctx = zstd.ZstdCompressor(level=1) |
|
281 | 281 | cobj = cctx.compressobj() |
|
282 | 282 | no_checksum = cobj.compress(b'foobar') + cobj.flush() |
|
283 | 283 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
284 | 284 | cobj = cctx.compressobj() |
|
285 | 285 | with_checksum = cobj.compress(b'foobar') + cobj.flush() |
|
286 | 286 | |
|
287 | 287 | no_params = zstd.get_frame_parameters(no_checksum) |
|
288 | 288 | with_params = zstd.get_frame_parameters(with_checksum) |
|
289 | 289 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
290 | 290 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
291 | 291 | self.assertEqual(no_params.dict_id, 0) |
|
292 | 292 | self.assertEqual(with_params.dict_id, 0) |
|
293 | 293 | self.assertFalse(no_params.has_checksum) |
|
294 | 294 | self.assertTrue(with_params.has_checksum) |
|
295 | 295 | |
|
296 | 296 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
297 | 297 | |
|
298 | 298 | def test_write_content_size(self): |
|
299 | 299 | cctx = zstd.ZstdCompressor(level=1) |
|
300 | 300 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
301 | 301 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
302 | 302 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
303 | 303 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
304 | 304 | no_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
305 | 305 | |
|
306 | 306 | no_params = zstd.get_frame_parameters(no_size) |
|
307 | 307 | with_params = zstd.get_frame_parameters(with_size) |
|
308 | 308 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
309 | 309 | self.assertEqual(with_params.content_size, 1536) |
|
310 | 310 | self.assertEqual(no_params.dict_id, 0) |
|
311 | 311 | self.assertEqual(with_params.dict_id, 0) |
|
312 | 312 | self.assertFalse(no_params.has_checksum) |
|
313 | 313 | self.assertFalse(with_params.has_checksum) |
|
314 | 314 | |
|
315 | 315 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
316 | 316 | |
|
317 | 317 | def test_compress_after_finished(self): |
|
318 | 318 | cctx = zstd.ZstdCompressor() |
|
319 | 319 | cobj = cctx.compressobj() |
|
320 | 320 | |
|
321 | 321 | cobj.compress(b'foo') |
|
322 | 322 | cobj.flush() |
|
323 | 323 | |
|
324 | 324 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'): |
|
325 | 325 | cobj.compress(b'foo') |
|
326 | 326 | |
|
327 | 327 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): |
|
328 | 328 | cobj.flush() |
|
329 | 329 | |
|
330 | 330 | def test_flush_block_repeated(self): |
|
331 | 331 | cctx = zstd.ZstdCompressor(level=1) |
|
332 | 332 | cobj = cctx.compressobj() |
|
333 | 333 | |
|
334 | 334 | self.assertEqual(cobj.compress(b'foo'), b'') |
|
335 | 335 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
|
336 | 336 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') |
|
337 | 337 | self.assertEqual(cobj.compress(b'bar'), b'') |
|
338 | 338 | # 3 byte header plus content. |
|
339 |
self.assertEqual(cobj.flush(), |
|
|
339 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
340 | b'\x18\x00\x00bar') | |
|
341 | self.assertEqual(cobj.flush(), b'\x01\x00\x00') | |
|
340 | 342 | |
|
341 | 343 | def test_flush_empty_block(self): |
|
342 | 344 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
343 | 345 | cobj = cctx.compressobj() |
|
344 | 346 | |
|
345 | 347 | cobj.compress(b'foobar') |
|
346 | 348 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
347 | 349 | # No-op if no block is active (this is internal to zstd). |
|
348 | 350 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'') |
|
349 | 351 | |
|
350 | 352 | trailing = cobj.flush() |
|
351 | 353 | # 3 bytes block header + 4 bytes frame checksum |
|
352 | 354 | self.assertEqual(len(trailing), 7) |
|
353 | 355 | header = trailing[0:3] |
|
354 | 356 | self.assertEqual(header, b'\x01\x00\x00') |
|
355 | 357 | |
|
356 | 358 | def test_multithreaded(self): |
|
357 | 359 | source = io.BytesIO() |
|
358 | 360 | source.write(b'a' * 1048576) |
|
359 | 361 | source.write(b'b' * 1048576) |
|
360 | 362 | source.write(b'c' * 1048576) |
|
361 | 363 | source.seek(0) |
|
362 | 364 | |
|
363 | 365 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
364 | 366 | cobj = cctx.compressobj() |
|
365 | 367 | |
|
366 | 368 | chunks = [] |
|
367 | 369 | while True: |
|
368 | 370 | d = source.read(8192) |
|
369 | 371 | if not d: |
|
370 | 372 | break |
|
371 | 373 | |
|
372 | 374 | chunks.append(cobj.compress(d)) |
|
373 | 375 | |
|
374 | 376 | chunks.append(cobj.flush()) |
|
375 | 377 | |
|
376 | 378 | compressed = b''.join(chunks) |
|
377 | 379 | |
|
378 | 380 | self.assertEqual(len(compressed), 295) |
|
379 | 381 | |
|
380 | 382 | def test_frame_progression(self): |
|
381 | 383 | cctx = zstd.ZstdCompressor() |
|
382 | 384 | |
|
383 | 385 | self.assertEqual(cctx.frame_progression(), (0, 0, 0)) |
|
384 | 386 | |
|
385 | 387 | cobj = cctx.compressobj() |
|
386 | 388 | |
|
387 | 389 | cobj.compress(b'foobar') |
|
388 | 390 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) |
|
389 | 391 | |
|
390 | 392 | cobj.flush() |
|
391 | 393 | self.assertEqual(cctx.frame_progression(), (6, 6, 15)) |
|
392 | 394 | |
|
393 | 395 | def test_bad_size(self): |
|
394 | 396 | cctx = zstd.ZstdCompressor() |
|
395 | 397 | |
|
396 | 398 | cobj = cctx.compressobj(size=2) |
|
397 | 399 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
398 | 400 | cobj.compress(b'foo') |
|
399 | 401 | |
|
400 | 402 | # Try another operation on this instance. |
|
401 | 403 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
402 | 404 | cobj.compress(b'aa') |
|
403 | 405 | |
|
404 | 406 | # Try another operation on the compressor. |
|
405 | 407 | cctx.compressobj(size=4) |
|
406 | 408 | cctx.compress(b'foobar') |
|
407 | 409 | |
|
408 | 410 | |
|
409 | 411 | @make_cffi |
|
410 | 412 | class TestCompressor_copy_stream(unittest.TestCase): |
|
411 | 413 | def test_no_read(self): |
|
412 | 414 | source = object() |
|
413 | 415 | dest = io.BytesIO() |
|
414 | 416 | |
|
415 | 417 | cctx = zstd.ZstdCompressor() |
|
416 | 418 | with self.assertRaises(ValueError): |
|
417 | 419 | cctx.copy_stream(source, dest) |
|
418 | 420 | |
|
419 | 421 | def test_no_write(self): |
|
420 | 422 | source = io.BytesIO() |
|
421 | 423 | dest = object() |
|
422 | 424 | |
|
423 | 425 | cctx = zstd.ZstdCompressor() |
|
424 | 426 | with self.assertRaises(ValueError): |
|
425 | 427 | cctx.copy_stream(source, dest) |
|
426 | 428 | |
|
427 | 429 | def test_empty(self): |
|
428 | 430 | source = io.BytesIO() |
|
429 | 431 | dest = io.BytesIO() |
|
430 | 432 | |
|
431 | 433 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
432 | 434 | r, w = cctx.copy_stream(source, dest) |
|
433 | 435 | self.assertEqual(int(r), 0) |
|
434 | 436 | self.assertEqual(w, 9) |
|
435 | 437 | |
|
436 | 438 | self.assertEqual(dest.getvalue(), |
|
437 | 439 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
438 | 440 | |
|
439 | 441 | def test_large_data(self): |
|
440 | 442 | source = io.BytesIO() |
|
441 | 443 | for i in range(255): |
|
442 | 444 | source.write(struct.Struct('>B').pack(i) * 16384) |
|
443 | 445 | source.seek(0) |
|
444 | 446 | |
|
445 | 447 | dest = io.BytesIO() |
|
446 | 448 | cctx = zstd.ZstdCompressor() |
|
447 | 449 | r, w = cctx.copy_stream(source, dest) |
|
448 | 450 | |
|
449 | 451 | self.assertEqual(r, 255 * 16384) |
|
450 | 452 | self.assertEqual(w, 999) |
|
451 | 453 | |
|
452 | 454 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
453 | 455 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
454 | 456 | self.assertEqual(params.window_size, 1048576) |
|
455 | 457 | self.assertEqual(params.dict_id, 0) |
|
456 | 458 | self.assertFalse(params.has_checksum) |
|
457 | 459 | |
|
458 | 460 | def test_write_checksum(self): |
|
459 | 461 | source = io.BytesIO(b'foobar') |
|
460 | 462 | no_checksum = io.BytesIO() |
|
461 | 463 | |
|
462 | 464 | cctx = zstd.ZstdCompressor(level=1) |
|
463 | 465 | cctx.copy_stream(source, no_checksum) |
|
464 | 466 | |
|
465 | 467 | source.seek(0) |
|
466 | 468 | with_checksum = io.BytesIO() |
|
467 | 469 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
468 | 470 | cctx.copy_stream(source, with_checksum) |
|
469 | 471 | |
|
470 | 472 | self.assertEqual(len(with_checksum.getvalue()), |
|
471 | 473 | len(no_checksum.getvalue()) + 4) |
|
472 | 474 | |
|
473 | 475 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
474 | 476 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
475 | 477 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
476 | 478 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
477 | 479 | self.assertEqual(no_params.dict_id, 0) |
|
478 | 480 | self.assertEqual(with_params.dict_id, 0) |
|
479 | 481 | self.assertFalse(no_params.has_checksum) |
|
480 | 482 | self.assertTrue(with_params.has_checksum) |
|
481 | 483 | |
|
482 | 484 | def test_write_content_size(self): |
|
483 | 485 | source = io.BytesIO(b'foobar' * 256) |
|
484 | 486 | no_size = io.BytesIO() |
|
485 | 487 | |
|
486 | 488 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
487 | 489 | cctx.copy_stream(source, no_size) |
|
488 | 490 | |
|
489 | 491 | source.seek(0) |
|
490 | 492 | with_size = io.BytesIO() |
|
491 | 493 | cctx = zstd.ZstdCompressor(level=1) |
|
492 | 494 | cctx.copy_stream(source, with_size) |
|
493 | 495 | |
|
494 | 496 | # Source content size is unknown, so no content size written. |
|
495 | 497 | self.assertEqual(len(with_size.getvalue()), |
|
496 | 498 | len(no_size.getvalue())) |
|
497 | 499 | |
|
498 | 500 | source.seek(0) |
|
499 | 501 | with_size = io.BytesIO() |
|
500 | 502 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) |
|
501 | 503 | |
|
502 | 504 | # We specified source size, so content size header is present. |
|
503 | 505 | self.assertEqual(len(with_size.getvalue()), |
|
504 | 506 | len(no_size.getvalue()) + 1) |
|
505 | 507 | |
|
506 | 508 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
507 | 509 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
508 | 510 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
509 | 511 | self.assertEqual(with_params.content_size, 1536) |
|
510 | 512 | self.assertEqual(no_params.dict_id, 0) |
|
511 | 513 | self.assertEqual(with_params.dict_id, 0) |
|
512 | 514 | self.assertFalse(no_params.has_checksum) |
|
513 | 515 | self.assertFalse(with_params.has_checksum) |
|
514 | 516 | |
|
515 | 517 | def test_read_write_size(self): |
|
516 | 518 | source = OpCountingBytesIO(b'foobarfoobar') |
|
517 | 519 | dest = OpCountingBytesIO() |
|
518 | 520 | cctx = zstd.ZstdCompressor() |
|
519 | 521 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
520 | 522 | |
|
521 | 523 | self.assertEqual(r, len(source.getvalue())) |
|
522 | 524 | self.assertEqual(w, 21) |
|
523 | 525 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
524 | 526 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
525 | 527 | |
|
526 | 528 | def test_multithreaded(self): |
|
527 | 529 | source = io.BytesIO() |
|
528 | 530 | source.write(b'a' * 1048576) |
|
529 | 531 | source.write(b'b' * 1048576) |
|
530 | 532 | source.write(b'c' * 1048576) |
|
531 | 533 | source.seek(0) |
|
532 | 534 | |
|
533 | 535 | dest = io.BytesIO() |
|
534 | 536 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) |
|
535 | 537 | r, w = cctx.copy_stream(source, dest) |
|
536 | 538 | self.assertEqual(r, 3145728) |
|
537 | 539 | self.assertEqual(w, 295) |
|
538 | 540 | |
|
539 | 541 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
540 | 542 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
541 | 543 | self.assertEqual(params.dict_id, 0) |
|
542 | 544 | self.assertFalse(params.has_checksum) |
|
543 | 545 | |
|
544 | 546 | # Writing content size and checksum works. |
|
545 | 547 | cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) |
|
546 | 548 | dest = io.BytesIO() |
|
547 | 549 | source.seek(0) |
|
548 | 550 | cctx.copy_stream(source, dest, size=len(source.getvalue())) |
|
549 | 551 | |
|
550 | 552 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
551 | 553 | self.assertEqual(params.content_size, 3145728) |
|
552 | 554 | self.assertEqual(params.dict_id, 0) |
|
553 | 555 | self.assertTrue(params.has_checksum) |
|
554 | 556 | |
|
555 | 557 | def test_bad_size(self): |
|
556 | 558 | source = io.BytesIO() |
|
557 | 559 | source.write(b'a' * 32768) |
|
558 | 560 | source.write(b'b' * 32768) |
|
559 | 561 | source.seek(0) |
|
560 | 562 | |
|
561 | 563 | dest = io.BytesIO() |
|
562 | 564 | |
|
563 | 565 | cctx = zstd.ZstdCompressor() |
|
564 | 566 | |
|
565 | 567 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
566 | 568 | cctx.copy_stream(source, dest, size=42) |
|
567 | 569 | |
|
568 | 570 | # Try another operation on this compressor. |
|
569 | 571 | source.seek(0) |
|
570 | 572 | dest = io.BytesIO() |
|
571 | 573 | cctx.copy_stream(source, dest) |
|
572 | 574 | |
|
573 | 575 | |
|
574 | 576 | @make_cffi |
|
575 | 577 | class TestCompressor_stream_reader(unittest.TestCase): |
|
576 | 578 | def test_context_manager(self): |
|
577 | 579 | cctx = zstd.ZstdCompressor() |
|
578 | 580 | |
|
579 | reader = cctx.stream_reader(b'foo' * 60) | |
|
580 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |
|
581 | reader.read(10) | |
|
582 | ||
|
583 | 581 | with cctx.stream_reader(b'foo') as reader: |
|
584 | 582 | with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): |
|
585 | 583 | with reader as reader2: |
|
586 | 584 | pass |
|
587 | 585 | |
|
586 | def test_no_context_manager(self): | |
|
587 | cctx = zstd.ZstdCompressor() | |
|
588 | ||
|
589 | reader = cctx.stream_reader(b'foo') | |
|
590 | reader.read(4) | |
|
591 | self.assertFalse(reader.closed) | |
|
592 | ||
|
593 | reader.close() | |
|
594 | self.assertTrue(reader.closed) | |
|
595 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
596 | reader.read(1) | |
|
597 | ||
|
588 | 598 | def test_not_implemented(self): |
|
589 | 599 | cctx = zstd.ZstdCompressor() |
|
590 | 600 | |
|
591 | 601 | with cctx.stream_reader(b'foo' * 60) as reader: |
|
592 | 602 | with self.assertRaises(io.UnsupportedOperation): |
|
593 | 603 | reader.readline() |
|
594 | 604 | |
|
595 | 605 | with self.assertRaises(io.UnsupportedOperation): |
|
596 | 606 | reader.readlines() |
|
597 | 607 | |
|
598 | 608 | # This could probably be implemented someday. |
|
599 | 609 | with self.assertRaises(NotImplementedError): |
|
600 | 610 | reader.readall() |
|
601 | 611 | |
|
602 | 612 | with self.assertRaises(io.UnsupportedOperation): |
|
603 | 613 | iter(reader) |
|
604 | 614 | |
|
605 | 615 | with self.assertRaises(io.UnsupportedOperation): |
|
606 | 616 | next(reader) |
|
607 | 617 | |
|
608 | 618 | with self.assertRaises(OSError): |
|
609 | 619 | reader.writelines([]) |
|
610 | 620 | |
|
611 | 621 | with self.assertRaises(OSError): |
|
612 | 622 | reader.write(b'foo') |
|
613 | 623 | |
|
614 | 624 | def test_constant_methods(self): |
|
615 | 625 | cctx = zstd.ZstdCompressor() |
|
616 | 626 | |
|
617 | 627 | with cctx.stream_reader(b'boo') as reader: |
|
618 | 628 | self.assertTrue(reader.readable()) |
|
619 | 629 | self.assertFalse(reader.writable()) |
|
620 | 630 | self.assertFalse(reader.seekable()) |
|
621 | 631 | self.assertFalse(reader.isatty()) |
|
632 | self.assertFalse(reader.closed) | |
|
622 | 633 | self.assertIsNone(reader.flush()) |
|
634 | self.assertFalse(reader.closed) | |
|
635 | ||
|
636 | self.assertTrue(reader.closed) | |
|
623 | 637 | |
|
624 | 638 | def test_read_closed(self): |
|
625 | 639 | cctx = zstd.ZstdCompressor() |
|
626 | 640 | |
|
627 | 641 | with cctx.stream_reader(b'foo' * 60) as reader: |
|
628 | 642 | reader.close() |
|
643 | self.assertTrue(reader.closed) | |
|
629 | 644 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): |
|
630 | 645 | reader.read(10) |
|
631 | 646 | |
|
632 | 647 | def test_read_bad_size(self): |
|
633 | 648 | cctx = zstd.ZstdCompressor() |
|
634 | 649 | |
|
635 | 650 | with cctx.stream_reader(b'foo') as reader: |
|
636 | 651 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): |
|
637 | 652 | reader.read(-1) |
|
638 | 653 | |
|
639 | 654 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): |
|
640 | 655 | reader.read(0) |
|
641 | 656 | |
|
642 | 657 | def test_read_buffer(self): |
|
643 | 658 | cctx = zstd.ZstdCompressor() |
|
644 | 659 | |
|
645 | 660 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
646 | 661 | frame = cctx.compress(source) |
|
647 | 662 | |
|
648 | 663 | with cctx.stream_reader(source) as reader: |
|
649 | 664 | self.assertEqual(reader.tell(), 0) |
|
650 | 665 | |
|
651 | 666 | # We should get entire frame in one read. |
|
652 | 667 | result = reader.read(8192) |
|
653 | 668 | self.assertEqual(result, frame) |
|
654 | 669 | self.assertEqual(reader.tell(), len(result)) |
|
655 | 670 | self.assertEqual(reader.read(), b'') |
|
656 | 671 | self.assertEqual(reader.tell(), len(result)) |
|
657 | 672 | |
|
658 | 673 | def test_read_buffer_small_chunks(self): |
|
659 | 674 | cctx = zstd.ZstdCompressor() |
|
660 | 675 | |
|
661 | 676 | source = b'foo' * 60 |
|
662 | 677 | chunks = [] |
|
663 | 678 | |
|
664 | 679 | with cctx.stream_reader(source) as reader: |
|
665 | 680 | self.assertEqual(reader.tell(), 0) |
|
666 | 681 | |
|
667 | 682 | while True: |
|
668 | 683 | chunk = reader.read(1) |
|
669 | 684 | if not chunk: |
|
670 | 685 | break |
|
671 | 686 | |
|
672 | 687 | chunks.append(chunk) |
|
673 | 688 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
674 | 689 | |
|
675 | 690 | self.assertEqual(b''.join(chunks), cctx.compress(source)) |
|
676 | 691 | |
|
677 | 692 | def test_read_stream(self): |
|
678 | 693 | cctx = zstd.ZstdCompressor() |
|
679 | 694 | |
|
680 | 695 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
681 | 696 | frame = cctx.compress(source) |
|
682 | 697 | |
|
683 | 698 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
684 | 699 | self.assertEqual(reader.tell(), 0) |
|
685 | 700 | |
|
686 | 701 | chunk = reader.read(8192) |
|
687 | 702 | self.assertEqual(chunk, frame) |
|
688 | 703 | self.assertEqual(reader.tell(), len(chunk)) |
|
689 | 704 | self.assertEqual(reader.read(), b'') |
|
690 | 705 | self.assertEqual(reader.tell(), len(chunk)) |
|
691 | 706 | |
|
692 | 707 | def test_read_stream_small_chunks(self): |
|
693 | 708 | cctx = zstd.ZstdCompressor() |
|
694 | 709 | |
|
695 | 710 | source = b'foo' * 60 |
|
696 | 711 | chunks = [] |
|
697 | 712 | |
|
698 | 713 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
699 | 714 | self.assertEqual(reader.tell(), 0) |
|
700 | 715 | |
|
701 | 716 | while True: |
|
702 | 717 | chunk = reader.read(1) |
|
703 | 718 | if not chunk: |
|
704 | 719 | break |
|
705 | 720 | |
|
706 | 721 | chunks.append(chunk) |
|
707 | 722 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
708 | 723 | |
|
709 | 724 | self.assertEqual(b''.join(chunks), cctx.compress(source)) |
|
710 | 725 | |
|
711 | 726 | def test_read_after_exit(self): |
|
712 | 727 | cctx = zstd.ZstdCompressor() |
|
713 | 728 | |
|
714 | 729 | with cctx.stream_reader(b'foo' * 60) as reader: |
|
715 | 730 | while reader.read(8192): |
|
716 | 731 | pass |
|
717 | 732 | |
|
718 |
with self.assertRaisesRegexp( |
|
|
733 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
719 | 734 | reader.read(10) |
|
720 | 735 | |
|
721 | 736 | def test_bad_size(self): |
|
722 | 737 | cctx = zstd.ZstdCompressor() |
|
723 | 738 | |
|
724 | 739 | source = io.BytesIO(b'foobar') |
|
725 | 740 | |
|
726 | 741 | with cctx.stream_reader(source, size=2) as reader: |
|
727 | 742 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
728 | 743 | reader.read(10) |
|
729 | 744 | |
|
730 | 745 | # Try another compression operation. |
|
731 | 746 | with cctx.stream_reader(source, size=42): |
|
732 | 747 | pass |
|
733 | 748 | |
|
734 | 749 | |
|
735 | 750 | @make_cffi |
|
736 | 751 | class TestCompressor_stream_writer(unittest.TestCase): |
|
737 | 752 | def test_empty(self): |
|
738 | 753 | buffer = io.BytesIO() |
|
739 | 754 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
740 | 755 | with cctx.stream_writer(buffer) as compressor: |
|
741 | 756 | compressor.write(b'') |
|
742 | 757 | |
|
743 | 758 | result = buffer.getvalue() |
|
744 | 759 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
745 | 760 | |
|
746 | 761 | params = zstd.get_frame_parameters(result) |
|
747 | 762 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
748 | 763 | self.assertEqual(params.window_size, 524288) |
|
749 | 764 | self.assertEqual(params.dict_id, 0) |
|
750 | 765 | self.assertFalse(params.has_checksum) |
|
751 | 766 | |
|
752 | 767 | def test_input_types(self): |
|
753 | 768 | expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' |
|
754 | 769 | cctx = zstd.ZstdCompressor(level=1) |
|
755 | 770 | |
|
756 | 771 | mutable_array = bytearray(3) |
|
757 | 772 | mutable_array[:] = b'foo' |
|
758 | 773 | |
|
759 | 774 | sources = [ |
|
760 | 775 | memoryview(b'foo'), |
|
761 | 776 | bytearray(b'foo'), |
|
762 | 777 | mutable_array, |
|
763 | 778 | ] |
|
764 | 779 | |
|
765 | 780 | for source in sources: |
|
766 | 781 | buffer = io.BytesIO() |
|
767 | 782 | with cctx.stream_writer(buffer) as compressor: |
|
768 | 783 | compressor.write(source) |
|
769 | 784 | |
|
770 | 785 | self.assertEqual(buffer.getvalue(), expected) |
|
771 | 786 | |
|
772 | 787 | def test_multiple_compress(self): |
|
773 | 788 | buffer = io.BytesIO() |
|
774 | 789 | cctx = zstd.ZstdCompressor(level=5) |
|
775 | 790 | with cctx.stream_writer(buffer) as compressor: |
|
776 | 791 | self.assertEqual(compressor.write(b'foo'), 0) |
|
777 | 792 | self.assertEqual(compressor.write(b'bar'), 0) |
|
778 | 793 | self.assertEqual(compressor.write(b'x' * 8192), 0) |
|
779 | 794 | |
|
780 | 795 | result = buffer.getvalue() |
|
781 | 796 | self.assertEqual(result, |
|
782 | 797 | b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f' |
|
783 | 798 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') |
|
784 | 799 | |
|
785 | 800 | def test_dictionary(self): |
|
786 | 801 | samples = [] |
|
787 | 802 | for i in range(128): |
|
788 | 803 | samples.append(b'foo' * 64) |
|
789 | 804 | samples.append(b'bar' * 64) |
|
790 | 805 | samples.append(b'foobar' * 64) |
|
791 | 806 | |
|
792 | 807 | d = zstd.train_dictionary(8192, samples) |
|
793 | 808 | |
|
794 | 809 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
795 | self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027') | |
|
810 | self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79') | |
|
796 | 811 | |
|
797 | 812 | buffer = io.BytesIO() |
|
798 | 813 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
799 | 814 | with cctx.stream_writer(buffer) as compressor: |
|
800 | 815 | self.assertEqual(compressor.write(b'foo'), 0) |
|
801 | 816 | self.assertEqual(compressor.write(b'bar'), 0) |
|
802 | 817 | self.assertEqual(compressor.write(b'foo' * 16384), 0) |
|
803 | 818 | |
|
804 | 819 | compressed = buffer.getvalue() |
|
805 | 820 | |
|
806 | 821 | params = zstd.get_frame_parameters(compressed) |
|
807 | 822 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
808 | 823 | self.assertEqual(params.window_size, 2097152) |
|
809 | 824 | self.assertEqual(params.dict_id, d.dict_id()) |
|
810 | 825 | self.assertFalse(params.has_checksum) |
|
811 | self.assertEqual(compressed, | |
|
812 | b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00' | |
|
813 | b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89') | |
|
826 | ||
|
827 | h = hashlib.sha1(compressed).hexdigest() | |
|
828 | self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786') | |
|
829 | ||
|
830 | source = b'foo' + b'bar' + (b'foo' * 16384) | |
|
831 | ||
|
832 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
|
833 | ||
|
834 | self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), | |
|
835 | source) | |
|
814 | 836 | |
|
815 | 837 | def test_compression_params(self): |
|
816 | 838 | params = zstd.ZstdCompressionParameters( |
|
817 | 839 | window_log=20, |
|
818 | 840 | chain_log=6, |
|
819 | 841 | hash_log=12, |
|
820 | 842 | min_match=5, |
|
821 | 843 | search_log=4, |
|
822 | 844 | target_length=10, |
|
823 | 845 | compression_strategy=zstd.STRATEGY_FAST) |
|
824 | 846 | |
|
825 | 847 | buffer = io.BytesIO() |
|
826 | 848 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
827 | 849 | with cctx.stream_writer(buffer) as compressor: |
|
828 | 850 | self.assertEqual(compressor.write(b'foo'), 0) |
|
829 | 851 | self.assertEqual(compressor.write(b'bar'), 0) |
|
830 | 852 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) |
|
831 | 853 | |
|
832 | 854 | compressed = buffer.getvalue() |
|
833 | 855 | |
|
834 | 856 | params = zstd.get_frame_parameters(compressed) |
|
835 | 857 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
836 | 858 | self.assertEqual(params.window_size, 1048576) |
|
837 | 859 | self.assertEqual(params.dict_id, 0) |
|
838 | 860 | self.assertFalse(params.has_checksum) |
|
839 | 861 | |
|
840 | 862 | h = hashlib.sha1(compressed).hexdigest() |
|
841 | 863 | self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef') |
|
842 | 864 | |
|
843 | 865 | def test_write_checksum(self): |
|
844 | 866 | no_checksum = io.BytesIO() |
|
845 | 867 | cctx = zstd.ZstdCompressor(level=1) |
|
846 | 868 | with cctx.stream_writer(no_checksum) as compressor: |
|
847 | 869 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
848 | 870 | |
|
849 | 871 | with_checksum = io.BytesIO() |
|
850 | 872 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
851 | 873 | with cctx.stream_writer(with_checksum) as compressor: |
|
852 | 874 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
853 | 875 | |
|
854 | 876 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
855 | 877 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
856 | 878 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
857 | 879 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
858 | 880 | self.assertEqual(no_params.dict_id, 0) |
|
859 | 881 | self.assertEqual(with_params.dict_id, 0) |
|
860 | 882 | self.assertFalse(no_params.has_checksum) |
|
861 | 883 | self.assertTrue(with_params.has_checksum) |
|
862 | 884 | |
|
863 | 885 | self.assertEqual(len(with_checksum.getvalue()), |
|
864 | 886 | len(no_checksum.getvalue()) + 4) |
|
865 | 887 | |
|
866 | 888 | def test_write_content_size(self): |
|
867 | 889 | no_size = io.BytesIO() |
|
868 | 890 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
869 | 891 | with cctx.stream_writer(no_size) as compressor: |
|
870 | 892 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
871 | 893 | |
|
872 | 894 | with_size = io.BytesIO() |
|
873 | 895 | cctx = zstd.ZstdCompressor(level=1) |
|
874 | 896 | with cctx.stream_writer(with_size) as compressor: |
|
875 | 897 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
876 | 898 | |
|
877 | 899 | # Source size is not known in streaming mode, so header not |
|
878 | 900 | # written. |
|
879 | 901 | self.assertEqual(len(with_size.getvalue()), |
|
880 | 902 | len(no_size.getvalue())) |
|
881 | 903 | |
|
882 | 904 | # Declaring size will write the header. |
|
883 | 905 | with_size = io.BytesIO() |
|
884 | 906 | with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor: |
|
885 | 907 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
886 | 908 | |
|
887 | 909 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
888 | 910 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
889 | 911 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
890 | 912 | self.assertEqual(with_params.content_size, 1536) |
|
891 | 913 | self.assertEqual(no_params.dict_id, 0) |
|
892 | 914 | self.assertEqual(with_params.dict_id, 0) |
|
893 | 915 | self.assertFalse(no_params.has_checksum) |
|
894 | 916 | self.assertFalse(with_params.has_checksum) |
|
895 | 917 | |
|
896 | 918 | self.assertEqual(len(with_size.getvalue()), |
|
897 | 919 | len(no_size.getvalue()) + 1) |
|
898 | 920 | |
|
899 | 921 | def test_no_dict_id(self): |
|
900 | 922 | samples = [] |
|
901 | 923 | for i in range(128): |
|
902 | 924 | samples.append(b'foo' * 64) |
|
903 | 925 | samples.append(b'bar' * 64) |
|
904 | 926 | samples.append(b'foobar' * 64) |
|
905 | 927 | |
|
906 | 928 | d = zstd.train_dictionary(1024, samples) |
|
907 | 929 | |
|
908 | 930 | with_dict_id = io.BytesIO() |
|
909 | 931 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
910 | 932 | with cctx.stream_writer(with_dict_id) as compressor: |
|
911 | 933 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
912 | 934 | |
|
913 | 935 | self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03') |
|
914 | 936 | |
|
915 | 937 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
916 | 938 | no_dict_id = io.BytesIO() |
|
917 | 939 | with cctx.stream_writer(no_dict_id) as compressor: |
|
918 | 940 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
919 | 941 | |
|
920 | 942 | self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00') |
|
921 | 943 | |
|
922 | 944 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
923 | 945 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
|
924 | 946 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
925 | 947 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
926 | 948 | self.assertEqual(no_params.dict_id, 0) |
|
927 | 949 | self.assertEqual(with_params.dict_id, d.dict_id()) |
|
928 | 950 | self.assertFalse(no_params.has_checksum) |
|
929 | 951 | self.assertFalse(with_params.has_checksum) |
|
930 | 952 | |
|
931 | 953 | self.assertEqual(len(with_dict_id.getvalue()), |
|
932 | 954 | len(no_dict_id.getvalue()) + 4) |
|
933 | 955 | |
|
934 | 956 | def test_memory_size(self): |
|
935 | 957 | cctx = zstd.ZstdCompressor(level=3) |
|
936 | 958 | buffer = io.BytesIO() |
|
937 | 959 | with cctx.stream_writer(buffer) as compressor: |
|
938 | 960 | compressor.write(b'foo') |
|
939 | 961 | size = compressor.memory_size() |
|
940 | 962 | |
|
941 | 963 | self.assertGreater(size, 100000) |
|
942 | 964 | |
|
943 | 965 | def test_write_size(self): |
|
944 | 966 | cctx = zstd.ZstdCompressor(level=3) |
|
945 | 967 | dest = OpCountingBytesIO() |
|
946 | 968 | with cctx.stream_writer(dest, write_size=1) as compressor: |
|
947 | 969 | self.assertEqual(compressor.write(b'foo'), 0) |
|
948 | 970 | self.assertEqual(compressor.write(b'bar'), 0) |
|
949 | 971 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
950 | 972 | |
|
951 | 973 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
952 | 974 | |
|
953 | 975 | def test_flush_repeated(self): |
|
954 | 976 | cctx = zstd.ZstdCompressor(level=3) |
|
955 | 977 | dest = OpCountingBytesIO() |
|
956 | 978 | with cctx.stream_writer(dest) as compressor: |
|
957 | 979 | self.assertEqual(compressor.write(b'foo'), 0) |
|
958 | 980 | self.assertEqual(dest._write_count, 0) |
|
959 | 981 | self.assertEqual(compressor.flush(), 12) |
|
960 | 982 | self.assertEqual(dest._write_count, 1) |
|
961 | 983 | self.assertEqual(compressor.write(b'bar'), 0) |
|
962 | 984 | self.assertEqual(dest._write_count, 1) |
|
963 | 985 | self.assertEqual(compressor.flush(), 6) |
|
964 | 986 | self.assertEqual(dest._write_count, 2) |
|
965 | 987 | self.assertEqual(compressor.write(b'baz'), 0) |
|
966 | 988 | |
|
967 | 989 | self.assertEqual(dest._write_count, 3) |
|
968 | 990 | |
|
969 | 991 | def test_flush_empty_block(self): |
|
970 | 992 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
971 | 993 | dest = OpCountingBytesIO() |
|
972 | 994 | with cctx.stream_writer(dest) as compressor: |
|
973 | 995 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) |
|
974 | 996 | count = dest._write_count |
|
975 | 997 | offset = dest.tell() |
|
976 | 998 | self.assertEqual(compressor.flush(), 23) |
|
977 | 999 | self.assertGreater(dest._write_count, count) |
|
978 | 1000 | self.assertGreater(dest.tell(), offset) |
|
979 | 1001 | offset = dest.tell() |
|
980 | 1002 | # Ending the write here should cause an empty block to be written |
|
981 | 1003 | # to denote end of frame. |
|
982 | 1004 | |
|
983 | 1005 | trailing = dest.getvalue()[offset:] |
|
984 | 1006 | # 3 bytes block header + 4 bytes frame checksum |
|
985 | 1007 | self.assertEqual(len(trailing), 7) |
|
986 | 1008 | |
|
987 | 1009 | header = trailing[0:3] |
|
988 | 1010 | self.assertEqual(header, b'\x01\x00\x00') |
|
989 | 1011 | |
|
990 | 1012 | def test_multithreaded(self): |
|
991 | 1013 | dest = io.BytesIO() |
|
992 | 1014 | cctx = zstd.ZstdCompressor(threads=2) |
|
993 | 1015 | with cctx.stream_writer(dest) as compressor: |
|
994 | 1016 | compressor.write(b'a' * 1048576) |
|
995 | 1017 | compressor.write(b'b' * 1048576) |
|
996 | 1018 | compressor.write(b'c' * 1048576) |
|
997 | 1019 | |
|
998 | 1020 | self.assertEqual(len(dest.getvalue()), 295) |
|
999 | 1021 | |
|
1000 | 1022 | def test_tell(self): |
|
1001 | 1023 | dest = io.BytesIO() |
|
1002 | 1024 | cctx = zstd.ZstdCompressor() |
|
1003 | 1025 | with cctx.stream_writer(dest) as compressor: |
|
1004 | 1026 | self.assertEqual(compressor.tell(), 0) |
|
1005 | 1027 | |
|
1006 | 1028 | for i in range(256): |
|
1007 | 1029 | compressor.write(b'foo' * (i + 1)) |
|
1008 | 1030 | self.assertEqual(compressor.tell(), dest.tell()) |
|
1009 | 1031 | |
|
1010 | 1032 | def test_bad_size(self): |
|
1011 | 1033 | cctx = zstd.ZstdCompressor() |
|
1012 | 1034 | |
|
1013 | 1035 | dest = io.BytesIO() |
|
1014 | 1036 | |
|
1015 | 1037 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
1016 | 1038 | with cctx.stream_writer(dest, size=2) as compressor: |
|
1017 | 1039 | compressor.write(b'foo') |
|
1018 | 1040 | |
|
1019 | 1041 | # Test another operation. |
|
1020 | 1042 | with cctx.stream_writer(dest, size=42): |
|
1021 | 1043 | pass |
|
1022 | 1044 | |
|
1023 | 1045 | def test_tarfile_compat(self): |
|
1024 | 1046 | raise unittest.SkipTest('not yet fully working') |
|
1025 | 1047 | |
|
1026 | 1048 | dest = io.BytesIO() |
|
1027 | 1049 | cctx = zstd.ZstdCompressor() |
|
1028 | 1050 | with cctx.stream_writer(dest) as compressor: |
|
1029 | 1051 | with tarfile.open('tf', mode='w', fileobj=compressor) as tf: |
|
1030 | 1052 | tf.add(__file__, 'test_compressor.py') |
|
1031 | 1053 | |
|
1032 | 1054 | dest.seek(0) |
|
1033 | 1055 | |
|
1034 | 1056 | dctx = zstd.ZstdDecompressor() |
|
1035 | 1057 | with dctx.stream_reader(dest) as reader: |
|
1036 | 1058 | with tarfile.open(mode='r:', fileobj=reader) as tf: |
|
1037 | 1059 | for member in tf: |
|
1038 | 1060 | self.assertEqual(member.name, 'test_compressor.py') |
|
1039 | 1061 | |
|
1040 | 1062 | @make_cffi |
|
1041 | 1063 | class TestCompressor_read_to_iter(unittest.TestCase): |
|
1042 | 1064 | def test_type_validation(self): |
|
1043 | 1065 | cctx = zstd.ZstdCompressor() |
|
1044 | 1066 | |
|
1045 | 1067 | # Object with read() works. |
|
1046 | 1068 | for chunk in cctx.read_to_iter(io.BytesIO()): |
|
1047 | 1069 | pass |
|
1048 | 1070 | |
|
1049 | 1071 | # Buffer protocol works. |
|
1050 | 1072 | for chunk in cctx.read_to_iter(b'foobar'): |
|
1051 | 1073 | pass |
|
1052 | 1074 | |
|
1053 | 1075 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
1054 | 1076 | for chunk in cctx.read_to_iter(True): |
|
1055 | 1077 | pass |
|
1056 | 1078 | |
|
1057 | 1079 | def test_read_empty(self): |
|
1058 | 1080 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1059 | 1081 | |
|
1060 | 1082 | source = io.BytesIO() |
|
1061 | 1083 | it = cctx.read_to_iter(source) |
|
1062 | 1084 | chunks = list(it) |
|
1063 | 1085 | self.assertEqual(len(chunks), 1) |
|
1064 | 1086 | compressed = b''.join(chunks) |
|
1065 | 1087 | self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
1066 | 1088 | |
|
1067 | 1089 | # And again with the buffer protocol. |
|
1068 | 1090 | it = cctx.read_to_iter(b'') |
|
1069 | 1091 | chunks = list(it) |
|
1070 | 1092 | self.assertEqual(len(chunks), 1) |
|
1071 | 1093 | compressed2 = b''.join(chunks) |
|
1072 | 1094 | self.assertEqual(compressed2, compressed) |
|
1073 | 1095 | |
|
1074 | 1096 | def test_read_large(self): |
|
1075 | 1097 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1076 | 1098 | |
|
1077 | 1099 | source = io.BytesIO() |
|
1078 | 1100 | source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
1079 | 1101 | source.write(b'o') |
|
1080 | 1102 | source.seek(0) |
|
1081 | 1103 | |
|
1082 | 1104 | # Creating an iterator should not perform any compression until |
|
1083 | 1105 | # first read. |
|
1084 | 1106 | it = cctx.read_to_iter(source, size=len(source.getvalue())) |
|
1085 | 1107 | self.assertEqual(source.tell(), 0) |
|
1086 | 1108 | |
|
1087 | 1109 | # We should have exactly 2 output chunks. |
|
1088 | 1110 | chunks = [] |
|
1089 | 1111 | chunk = next(it) |
|
1090 | 1112 | self.assertIsNotNone(chunk) |
|
1091 | 1113 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
1092 | 1114 | chunks.append(chunk) |
|
1093 | 1115 | chunk = next(it) |
|
1094 | 1116 | self.assertIsNotNone(chunk) |
|
1095 | 1117 | chunks.append(chunk) |
|
1096 | 1118 | |
|
1097 | 1119 | self.assertEqual(source.tell(), len(source.getvalue())) |
|
1098 | 1120 | |
|
1099 | 1121 | with self.assertRaises(StopIteration): |
|
1100 | 1122 | next(it) |
|
1101 | 1123 | |
|
1102 | 1124 | # And again for good measure. |
|
1103 | 1125 | with self.assertRaises(StopIteration): |
|
1104 | 1126 | next(it) |
|
1105 | 1127 | |
|
1106 | 1128 | # We should get the same output as the one-shot compression mechanism. |
|
1107 | 1129 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
1108 | 1130 | |
|
1109 | 1131 | params = zstd.get_frame_parameters(b''.join(chunks)) |
|
1110 | 1132 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1111 | 1133 | self.assertEqual(params.window_size, 262144) |
|
1112 | 1134 | self.assertEqual(params.dict_id, 0) |
|
1113 | 1135 | self.assertFalse(params.has_checksum) |
|
1114 | 1136 | |
|
1115 | 1137 | # Now check the buffer protocol. |
|
1116 | 1138 | it = cctx.read_to_iter(source.getvalue()) |
|
1117 | 1139 | chunks = list(it) |
|
1118 | 1140 | self.assertEqual(len(chunks), 2) |
|
1119 | 1141 | |
|
1120 | 1142 | params = zstd.get_frame_parameters(b''.join(chunks)) |
|
1121 | 1143 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1122 | 1144 | #self.assertEqual(params.window_size, 262144) |
|
1123 | 1145 | self.assertEqual(params.dict_id, 0) |
|
1124 | 1146 | self.assertFalse(params.has_checksum) |
|
1125 | 1147 | |
|
1126 | 1148 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
1127 | 1149 | |
|
1128 | 1150 | def test_read_write_size(self): |
|
1129 | 1151 | source = OpCountingBytesIO(b'foobarfoobar') |
|
1130 | 1152 | cctx = zstd.ZstdCompressor(level=3) |
|
1131 | 1153 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): |
|
1132 | 1154 | self.assertEqual(len(chunk), 1) |
|
1133 | 1155 | |
|
1134 | 1156 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
1135 | 1157 | |
|
1136 | 1158 | def test_multithreaded(self): |
|
1137 | 1159 | source = io.BytesIO() |
|
1138 | 1160 | source.write(b'a' * 1048576) |
|
1139 | 1161 | source.write(b'b' * 1048576) |
|
1140 | 1162 | source.write(b'c' * 1048576) |
|
1141 | 1163 | source.seek(0) |
|
1142 | 1164 | |
|
1143 | 1165 | cctx = zstd.ZstdCompressor(threads=2) |
|
1144 | 1166 | |
|
1145 | 1167 | compressed = b''.join(cctx.read_to_iter(source)) |
|
1146 | 1168 | self.assertEqual(len(compressed), 295) |
|
1147 | 1169 | |
|
1148 | 1170 | def test_bad_size(self): |
|
1149 | 1171 | cctx = zstd.ZstdCompressor() |
|
1150 | 1172 | |
|
1151 | 1173 | source = io.BytesIO(b'a' * 42) |
|
1152 | 1174 | |
|
1153 | 1175 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): |
|
1154 | 1176 | b''.join(cctx.read_to_iter(source, size=2)) |
|
1155 | 1177 | |
|
1156 | 1178 | # Test another operation on errored compressor. |
|
1157 | 1179 | b''.join(cctx.read_to_iter(source)) |
|
1158 | 1180 | |
|
1159 | 1181 | |
|
1182 | @make_cffi | |
|
1183 | class TestCompressor_chunker(unittest.TestCase): | |
|
1184 | def test_empty(self): | |
|
1185 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
|
1186 | chunker = cctx.chunker() | |
|
1187 | ||
|
1188 | it = chunker.compress(b'') | |
|
1189 | ||
|
1190 | with self.assertRaises(StopIteration): | |
|
1191 | next(it) | |
|
1192 | ||
|
1193 | it = chunker.finish() | |
|
1194 | ||
|
1195 | self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00') | |
|
1196 | ||
|
1197 | with self.assertRaises(StopIteration): | |
|
1198 | next(it) | |
|
1199 | ||
|
1200 | def test_simple_input(self): | |
|
1201 | cctx = zstd.ZstdCompressor() | |
|
1202 | chunker = cctx.chunker() | |
|
1203 | ||
|
1204 | it = chunker.compress(b'foobar') | |
|
1205 | ||
|
1206 | with self.assertRaises(StopIteration): | |
|
1207 | next(it) | |
|
1208 | ||
|
1209 | it = chunker.compress(b'baz' * 30) | |
|
1210 | ||
|
1211 | with self.assertRaises(StopIteration): | |
|
1212 | next(it) | |
|
1213 | ||
|
1214 | it = chunker.finish() | |
|
1215 | ||
|
1216 | self.assertEqual(next(it), | |
|
1217 | b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f' | |
|
1218 | b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') | |
|
1219 | ||
|
1220 | with self.assertRaises(StopIteration): | |
|
1221 | next(it) | |
|
1222 | ||
|
1223 | def test_input_size(self): | |
|
1224 | cctx = zstd.ZstdCompressor() | |
|
1225 | chunker = cctx.chunker(size=1024) | |
|
1226 | ||
|
1227 | it = chunker.compress(b'x' * 1000) | |
|
1228 | ||
|
1229 | with self.assertRaises(StopIteration): | |
|
1230 | next(it) | |
|
1231 | ||
|
1232 | it = chunker.compress(b'y' * 24) | |
|
1233 | ||
|
1234 | with self.assertRaises(StopIteration): | |
|
1235 | next(it) | |
|
1236 | ||
|
1237 | chunks = list(chunker.finish()) | |
|
1238 | ||
|
1239 | self.assertEqual(chunks, [ | |
|
1240 | b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' | |
|
1241 | b'\xa0\x16\xe3\x2b\x80\x05' | |
|
1242 | ]) | |
|
1243 | ||
|
1244 | dctx = zstd.ZstdDecompressor() | |
|
1245 | ||
|
1246 | self.assertEqual(dctx.decompress(b''.join(chunks)), | |
|
1247 | (b'x' * 1000) + (b'y' * 24)) | |
|
1248 | ||
|
1249 | def test_small_chunk_size(self): | |
|
1250 | cctx = zstd.ZstdCompressor() | |
|
1251 | chunker = cctx.chunker(chunk_size=1) | |
|
1252 | ||
|
1253 | chunks = list(chunker.compress(b'foo' * 1024)) | |
|
1254 | self.assertEqual(chunks, []) | |
|
1255 | ||
|
1256 | chunks = list(chunker.finish()) | |
|
1257 | self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) | |
|
1258 | ||
|
1259 | self.assertEqual( | |
|
1260 | b''.join(chunks), | |
|
1261 | b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00' | |
|
1262 | b'\xfa\xd3\x77\x43') | |
|
1263 | ||
|
1264 | dctx = zstd.ZstdDecompressor() | |
|
1265 | self.assertEqual(dctx.decompress(b''.join(chunks), | |
|
1266 | max_output_size=10000), | |
|
1267 | b'foo' * 1024) | |
|
1268 | ||
|
1269 | def test_input_types(self): | |
|
1270 | cctx = zstd.ZstdCompressor() | |
|
1271 | ||
|
1272 | mutable_array = bytearray(3) | |
|
1273 | mutable_array[:] = b'foo' | |
|
1274 | ||
|
1275 | sources = [ | |
|
1276 | memoryview(b'foo'), | |
|
1277 | bytearray(b'foo'), | |
|
1278 | mutable_array, | |
|
1279 | ] | |
|
1280 | ||
|
1281 | for source in sources: | |
|
1282 | chunker = cctx.chunker() | |
|
1283 | ||
|
1284 | self.assertEqual(list(chunker.compress(source)), []) | |
|
1285 | self.assertEqual(list(chunker.finish()), [ | |
|
1286 | b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f' | |
|
1287 | ]) | |
|
1288 | ||
|
1289 | def test_flush(self): | |
|
1290 | cctx = zstd.ZstdCompressor() | |
|
1291 | chunker = cctx.chunker() | |
|
1292 | ||
|
1293 | self.assertEqual(list(chunker.compress(b'foo' * 1024)), []) | |
|
1294 | self.assertEqual(list(chunker.compress(b'bar' * 1024)), []) | |
|
1295 | ||
|
1296 | chunks1 = list(chunker.flush()) | |
|
1297 | ||
|
1298 | self.assertEqual(chunks1, [ | |
|
1299 | b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' | |
|
1300 | b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' | |
|
1301 | ]) | |
|
1302 | ||
|
1303 | self.assertEqual(list(chunker.flush()), []) | |
|
1304 | self.assertEqual(list(chunker.flush()), []) | |
|
1305 | ||
|
1306 | self.assertEqual(list(chunker.compress(b'baz' * 1024)), []) | |
|
1307 | ||
|
1308 | chunks2 = list(chunker.flush()) | |
|
1309 | self.assertEqual(len(chunks2), 1) | |
|
1310 | ||
|
1311 | chunks3 = list(chunker.finish()) | |
|
1312 | self.assertEqual(len(chunks2), 1) | |
|
1313 | ||
|
1314 | dctx = zstd.ZstdDecompressor() | |
|
1315 | ||
|
1316 | self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), | |
|
1317 | max_output_size=10000), | |
|
1318 | (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) | |
|
1319 | ||
|
1320 | def test_compress_after_finish(self): | |
|
1321 | cctx = zstd.ZstdCompressor() | |
|
1322 | chunker = cctx.chunker() | |
|
1323 | ||
|
1324 | list(chunker.compress(b'foo')) | |
|
1325 | list(chunker.finish()) | |
|
1326 | ||
|
1327 | with self.assertRaisesRegexp( | |
|
1328 | zstd.ZstdError, | |
|
1329 | 'cannot call compress\(\) after compression finished'): | |
|
1330 | list(chunker.compress(b'foo')) | |
|
1331 | ||
|
1332 | def test_flush_after_finish(self): | |
|
1333 | cctx = zstd.ZstdCompressor() | |
|
1334 | chunker = cctx.chunker() | |
|
1335 | ||
|
1336 | list(chunker.compress(b'foo')) | |
|
1337 | list(chunker.finish()) | |
|
1338 | ||
|
1339 | with self.assertRaisesRegexp( | |
|
1340 | zstd.ZstdError, | |
|
1341 | 'cannot call flush\(\) after compression finished'): | |
|
1342 | list(chunker.flush()) | |
|
1343 | ||
|
1344 | def test_finish_after_finish(self): | |
|
1345 | cctx = zstd.ZstdCompressor() | |
|
1346 | chunker = cctx.chunker() | |
|
1347 | ||
|
1348 | list(chunker.compress(b'foo')) | |
|
1349 | list(chunker.finish()) | |
|
1350 | ||
|
1351 | with self.assertRaisesRegexp( | |
|
1352 | zstd.ZstdError, | |
|
1353 | 'cannot call finish\(\) after compression finished'): | |
|
1354 | list(chunker.finish()) | |
|
1355 | ||
|
1356 | ||
|
1160 | 1357 | class TestCompressor_multi_compress_to_buffer(unittest.TestCase): |
|
1161 | 1358 | def test_invalid_inputs(self): |
|
1162 | 1359 | cctx = zstd.ZstdCompressor() |
|
1163 | 1360 | |
|
1164 | 1361 | with self.assertRaises(TypeError): |
|
1165 | 1362 | cctx.multi_compress_to_buffer(True) |
|
1166 | 1363 | |
|
1167 | 1364 | with self.assertRaises(TypeError): |
|
1168 | 1365 | cctx.multi_compress_to_buffer((1, 2)) |
|
1169 | 1366 | |
|
1170 | 1367 | with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): |
|
1171 | 1368 | cctx.multi_compress_to_buffer([u'foo']) |
|
1172 | 1369 | |
|
1173 | 1370 | def test_empty_input(self): |
|
1174 | 1371 | cctx = zstd.ZstdCompressor() |
|
1175 | 1372 | |
|
1176 | 1373 | with self.assertRaisesRegexp(ValueError, 'no source elements found'): |
|
1177 | 1374 | cctx.multi_compress_to_buffer([]) |
|
1178 | 1375 | |
|
1179 | 1376 | with self.assertRaisesRegexp(ValueError, 'source elements are empty'): |
|
1180 | 1377 | cctx.multi_compress_to_buffer([b'', b'', b'']) |
|
1181 | 1378 | |
|
1182 | 1379 | def test_list_input(self): |
|
1183 | 1380 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1184 | 1381 | |
|
1185 | 1382 | original = [b'foo' * 12, b'bar' * 6] |
|
1186 | 1383 | frames = [cctx.compress(c) for c in original] |
|
1187 | 1384 | b = cctx.multi_compress_to_buffer(original) |
|
1188 | 1385 | |
|
1189 | 1386 | self.assertIsInstance(b, zstd.BufferWithSegmentsCollection) |
|
1190 | 1387 | |
|
1191 | 1388 | self.assertEqual(len(b), 2) |
|
1192 | 1389 | self.assertEqual(b.size(), 44) |
|
1193 | 1390 | |
|
1194 | 1391 | self.assertEqual(b[0].tobytes(), frames[0]) |
|
1195 | 1392 | self.assertEqual(b[1].tobytes(), frames[1]) |
|
1196 | 1393 | |
|
1197 | 1394 | def test_buffer_with_segments_input(self): |
|
1198 | 1395 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1199 | 1396 | |
|
1200 | 1397 | original = [b'foo' * 4, b'bar' * 6] |
|
1201 | 1398 | frames = [cctx.compress(c) for c in original] |
|
1202 | 1399 | |
|
1203 | 1400 | offsets = struct.pack('=QQQQ', 0, len(original[0]), |
|
1204 | 1401 | len(original[0]), len(original[1])) |
|
1205 | 1402 | segments = zstd.BufferWithSegments(b''.join(original), offsets) |
|
1206 | 1403 | |
|
1207 | 1404 | result = cctx.multi_compress_to_buffer(segments) |
|
1208 | 1405 | |
|
1209 | 1406 | self.assertEqual(len(result), 2) |
|
1210 | 1407 | self.assertEqual(result.size(), 47) |
|
1211 | 1408 | |
|
1212 | 1409 | self.assertEqual(result[0].tobytes(), frames[0]) |
|
1213 | 1410 | self.assertEqual(result[1].tobytes(), frames[1]) |
|
1214 | 1411 | |
|
1215 | 1412 | def test_buffer_with_segments_collection_input(self): |
|
1216 | 1413 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1217 | 1414 | |
|
1218 | 1415 | original = [ |
|
1219 | 1416 | b'foo1', |
|
1220 | 1417 | b'foo2' * 2, |
|
1221 | 1418 | b'foo3' * 3, |
|
1222 | 1419 | b'foo4' * 4, |
|
1223 | 1420 | b'foo5' * 5, |
|
1224 | 1421 | ] |
|
1225 | 1422 | |
|
1226 | 1423 | frames = [cctx.compress(c) for c in original] |
|
1227 | 1424 | |
|
1228 | 1425 | b = b''.join([original[0], original[1]]) |
|
1229 | 1426 | b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', |
|
1230 | 1427 | 0, len(original[0]), |
|
1231 | 1428 | len(original[0]), len(original[1]))) |
|
1232 | 1429 | b = b''.join([original[2], original[3], original[4]]) |
|
1233 | 1430 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', |
|
1234 | 1431 | 0, len(original[2]), |
|
1235 | 1432 | len(original[2]), len(original[3]), |
|
1236 | 1433 | len(original[2]) + len(original[3]), len(original[4]))) |
|
1237 | 1434 | |
|
1238 | 1435 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1239 | 1436 | |
|
1240 | 1437 | result = cctx.multi_compress_to_buffer(c) |
|
1241 | 1438 | |
|
1242 | 1439 | self.assertEqual(len(result), len(frames)) |
|
1243 | 1440 | |
|
1244 | 1441 | for i, frame in enumerate(frames): |
|
1245 | 1442 | self.assertEqual(result[i].tobytes(), frame) |
|
1246 | 1443 | |
|
1247 | 1444 | def test_multiple_threads(self): |
|
1248 | 1445 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will |
|
1249 | 1446 | # make output different. |
|
1250 | 1447 | refcctx = zstd.ZstdCompressor(write_checksum=True) |
|
1251 | 1448 | reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] |
|
1252 | 1449 | |
|
1253 | 1450 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1254 | 1451 | |
|
1255 | 1452 | frames = [] |
|
1256 | 1453 | frames.extend(b'x' * 64 for i in range(256)) |
|
1257 | 1454 | frames.extend(b'y' * 64 for i in range(256)) |
|
1258 | 1455 | |
|
1259 | 1456 | result = cctx.multi_compress_to_buffer(frames, threads=-1) |
|
1260 | 1457 | |
|
1261 | 1458 | self.assertEqual(len(result), 512) |
|
1262 | 1459 | for i in range(512): |
|
1263 | 1460 | if i < 256: |
|
1264 | 1461 | self.assertEqual(result[i].tobytes(), reference[0]) |
|
1265 | 1462 | else: |
|
1266 | 1463 | self.assertEqual(result[i].tobytes(), reference[1]) |
@@ -1,188 +1,320 | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | try: |
|
6 | 6 | import hypothesis |
|
7 | 7 | import hypothesis.strategies as strategies |
|
8 | 8 | except ImportError: |
|
9 | 9 | raise unittest.SkipTest('hypothesis not available') |
|
10 | 10 | |
|
11 | 11 | import zstandard as zstd |
|
12 | 12 | |
|
13 | 13 | from . common import ( |
|
14 | 14 | make_cffi, |
|
15 | 15 | random_input_data, |
|
16 | 16 | ) |
|
17 | 17 | |
|
18 | 18 | |
|
19 | 19 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
20 | 20 | @make_cffi |
|
21 | 21 | class TestCompressor_stream_reader_fuzzing(unittest.TestCase): |
|
22 | 22 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
23 | 23 | level=strategies.integers(min_value=1, max_value=5), |
|
24 | 24 | source_read_size=strategies.integers(1, 16384), |
|
25 | 25 | read_sizes=strategies.data()) |
|
26 | 26 | def test_stream_source_read_variance(self, original, level, source_read_size, |
|
27 | 27 | read_sizes): |
|
28 | 28 | refctx = zstd.ZstdCompressor(level=level) |
|
29 | 29 | ref_frame = refctx.compress(original) |
|
30 | 30 | |
|
31 | 31 | cctx = zstd.ZstdCompressor(level=level) |
|
32 | 32 | with cctx.stream_reader(io.BytesIO(original), size=len(original), |
|
33 | 33 | read_size=source_read_size) as reader: |
|
34 | 34 | chunks = [] |
|
35 | 35 | while True: |
|
36 | 36 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
37 | 37 | chunk = reader.read(read_size) |
|
38 | 38 | |
|
39 | 39 | if not chunk: |
|
40 | 40 | break |
|
41 | 41 | chunks.append(chunk) |
|
42 | 42 | |
|
43 | 43 | self.assertEqual(b''.join(chunks), ref_frame) |
|
44 | 44 | |
|
45 | 45 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
46 | 46 | level=strategies.integers(min_value=1, max_value=5), |
|
47 | 47 | source_read_size=strategies.integers(1, 16384), |
|
48 | 48 | read_sizes=strategies.data()) |
|
49 | 49 | def test_buffer_source_read_variance(self, original, level, source_read_size, |
|
50 | 50 | read_sizes): |
|
51 | 51 | |
|
52 | 52 | refctx = zstd.ZstdCompressor(level=level) |
|
53 | 53 | ref_frame = refctx.compress(original) |
|
54 | 54 | |
|
55 | 55 | cctx = zstd.ZstdCompressor(level=level) |
|
56 | 56 | with cctx.stream_reader(original, size=len(original), |
|
57 | 57 | read_size=source_read_size) as reader: |
|
58 | 58 | chunks = [] |
|
59 | 59 | while True: |
|
60 | 60 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
61 | 61 | chunk = reader.read(read_size) |
|
62 | 62 | if not chunk: |
|
63 | 63 | break |
|
64 | 64 | chunks.append(chunk) |
|
65 | 65 | |
|
66 | 66 | self.assertEqual(b''.join(chunks), ref_frame) |
|
67 | 67 | |
|
68 | 68 | |
|
69 | 69 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
70 | 70 | @make_cffi |
|
71 | 71 | class TestCompressor_stream_writer_fuzzing(unittest.TestCase): |
|
72 | 72 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
73 | 73 | level=strategies.integers(min_value=1, max_value=5), |
|
74 | 74 | write_size=strategies.integers(min_value=1, max_value=1048576)) |
|
75 | 75 | def test_write_size_variance(self, original, level, write_size): |
|
76 | 76 | refctx = zstd.ZstdCompressor(level=level) |
|
77 | 77 | ref_frame = refctx.compress(original) |
|
78 | 78 | |
|
79 | 79 | cctx = zstd.ZstdCompressor(level=level) |
|
80 | 80 | b = io.BytesIO() |
|
81 | 81 | with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: |
|
82 | 82 | compressor.write(original) |
|
83 | 83 | |
|
84 | 84 | self.assertEqual(b.getvalue(), ref_frame) |
|
85 | 85 | |
|
86 | 86 | |
|
87 | 87 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
88 | 88 | @make_cffi |
|
89 | 89 | class TestCompressor_copy_stream_fuzzing(unittest.TestCase): |
|
90 | 90 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
91 | 91 | level=strategies.integers(min_value=1, max_value=5), |
|
92 | 92 | read_size=strategies.integers(min_value=1, max_value=1048576), |
|
93 | 93 | write_size=strategies.integers(min_value=1, max_value=1048576)) |
|
94 | 94 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
95 | 95 | refctx = zstd.ZstdCompressor(level=level) |
|
96 | 96 | ref_frame = refctx.compress(original) |
|
97 | 97 | |
|
98 | 98 | cctx = zstd.ZstdCompressor(level=level) |
|
99 | 99 | source = io.BytesIO(original) |
|
100 | 100 | dest = io.BytesIO() |
|
101 | 101 | |
|
102 | 102 | cctx.copy_stream(source, dest, size=len(original), read_size=read_size, |
|
103 | 103 | write_size=write_size) |
|
104 | 104 | |
|
105 | 105 | self.assertEqual(dest.getvalue(), ref_frame) |
|
106 | 106 | |
|
107 | 107 | |
|
108 | 108 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
109 | 109 | @make_cffi |
|
110 | 110 | class TestCompressor_compressobj_fuzzing(unittest.TestCase): |
|
111 | 111 | @hypothesis.settings( |
|
112 | 112 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
113 | 113 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
114 | 114 | level=strategies.integers(min_value=1, max_value=5), |
|
115 | 115 | chunk_sizes=strategies.data()) |
|
116 | 116 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
117 | 117 | refctx = zstd.ZstdCompressor(level=level) |
|
118 | 118 | ref_frame = refctx.compress(original) |
|
119 | 119 | |
|
120 | 120 | cctx = zstd.ZstdCompressor(level=level) |
|
121 | 121 | cobj = cctx.compressobj(size=len(original)) |
|
122 | 122 | |
|
123 | 123 | chunks = [] |
|
124 | 124 | i = 0 |
|
125 | 125 | while True: |
|
126 | 126 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
127 | 127 | source = original[i:i + chunk_size] |
|
128 | 128 | if not source: |
|
129 | 129 | break |
|
130 | 130 | |
|
131 | 131 | chunks.append(cobj.compress(source)) |
|
132 | 132 | i += chunk_size |
|
133 | 133 | |
|
134 | 134 | chunks.append(cobj.flush()) |
|
135 | 135 | |
|
136 | 136 | self.assertEqual(b''.join(chunks), ref_frame) |
|
137 | 137 | |
|
138 | @hypothesis.settings( | |
|
139 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
140 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
141 | level=strategies.integers(min_value=1, max_value=5), | |
|
142 | chunk_sizes=strategies.data(), | |
|
143 | flushes=strategies.data()) | |
|
144 | def test_flush_block(self, original, level, chunk_sizes, flushes): | |
|
145 | cctx = zstd.ZstdCompressor(level=level) | |
|
146 | cobj = cctx.compressobj() | |
|
147 | ||
|
148 | dctx = zstd.ZstdDecompressor() | |
|
149 | dobj = dctx.decompressobj() | |
|
150 | ||
|
151 | compressed_chunks = [] | |
|
152 | decompressed_chunks = [] | |
|
153 | i = 0 | |
|
154 | while True: | |
|
155 | input_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
|
156 | source = original[i:i + input_size] | |
|
157 | if not source: | |
|
158 | break | |
|
159 | ||
|
160 | i += input_size | |
|
161 | ||
|
162 | chunk = cobj.compress(source) | |
|
163 | compressed_chunks.append(chunk) | |
|
164 | decompressed_chunks.append(dobj.decompress(chunk)) | |
|
165 | ||
|
166 | if not flushes.draw(strategies.booleans()): | |
|
167 | continue | |
|
168 | ||
|
169 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
|
170 | compressed_chunks.append(chunk) | |
|
171 | decompressed_chunks.append(dobj.decompress(chunk)) | |
|
172 | ||
|
173 | self.assertEqual(b''.join(decompressed_chunks), original[0:i]) | |
|
174 | ||
|
175 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) | |
|
176 | compressed_chunks.append(chunk) | |
|
177 | decompressed_chunks.append(dobj.decompress(chunk)) | |
|
178 | ||
|
179 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
180 | max_output_size=len(original)), | |
|
181 | original) | |
|
182 | self.assertEqual(b''.join(decompressed_chunks), original) | |
|
138 | 183 | |
|
139 | 184 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
140 | 185 | @make_cffi |
|
141 | 186 | class TestCompressor_read_to_iter_fuzzing(unittest.TestCase): |
|
142 | 187 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
143 | 188 | level=strategies.integers(min_value=1, max_value=5), |
|
144 | 189 | read_size=strategies.integers(min_value=1, max_value=4096), |
|
145 | 190 | write_size=strategies.integers(min_value=1, max_value=4096)) |
|
146 | 191 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
147 | 192 | refcctx = zstd.ZstdCompressor(level=level) |
|
148 | 193 | ref_frame = refcctx.compress(original) |
|
149 | 194 | |
|
150 | 195 | source = io.BytesIO(original) |
|
151 | 196 | |
|
152 | 197 | cctx = zstd.ZstdCompressor(level=level) |
|
153 | 198 | chunks = list(cctx.read_to_iter(source, size=len(original), |
|
154 | 199 | read_size=read_size, |
|
155 | 200 | write_size=write_size)) |
|
156 | 201 | |
|
157 | 202 | self.assertEqual(b''.join(chunks), ref_frame) |
|
158 | 203 | |
|
159 | 204 | |
|
160 | 205 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
161 | 206 | class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase): |
|
162 | 207 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), |
|
163 | 208 | min_size=1, max_size=1024), |
|
164 | 209 | threads=strategies.integers(min_value=1, max_value=8), |
|
165 | 210 | use_dict=strategies.booleans()) |
|
166 | 211 | def test_data_equivalence(self, original, threads, use_dict): |
|
167 | 212 | kwargs = {} |
|
168 | 213 | |
|
169 | 214 | # Use a content dictionary because it is cheap to create. |
|
170 | 215 | if use_dict: |
|
171 | 216 | kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) |
|
172 | 217 | |
|
173 | 218 | cctx = zstd.ZstdCompressor(level=1, |
|
174 | 219 | write_checksum=True, |
|
175 | 220 | **kwargs) |
|
176 | 221 | |
|
177 | 222 | result = cctx.multi_compress_to_buffer(original, threads=-1) |
|
178 | 223 | |
|
179 | 224 | self.assertEqual(len(result), len(original)) |
|
180 | 225 | |
|
181 | 226 | # The frame produced via the batch APIs may not be bit identical to that |
|
182 | 227 | # produced by compress() because compression parameters are adjusted |
|
183 | 228 | # from the first input in batch mode. So the only thing we can do is |
|
184 | 229 | # verify the decompressed data matches the input. |
|
185 | 230 | dctx = zstd.ZstdDecompressor(**kwargs) |
|
186 | 231 | |
|
187 | 232 | for i, frame in enumerate(result): |
|
188 | 233 | self.assertEqual(dctx.decompress(frame), original[i]) |
|
234 | ||
|
235 | ||
|
236 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
|
237 | @make_cffi | |
|
238 | class TestCompressor_chunker_fuzzing(unittest.TestCase): | |
|
239 | @hypothesis.settings( | |
|
240 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
241 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
242 | level=strategies.integers(min_value=1, max_value=5), | |
|
243 | chunk_size=strategies.integers( | |
|
244 | min_value=1, | |
|
245 | max_value=32 * 1048576), | |
|
246 | input_sizes=strategies.data()) | |
|
247 | def test_random_input_sizes(self, original, level, chunk_size, input_sizes): | |
|
248 | cctx = zstd.ZstdCompressor(level=level) | |
|
249 | chunker = cctx.chunker(chunk_size=chunk_size) | |
|
250 | ||
|
251 | chunks = [] | |
|
252 | i = 0 | |
|
253 | while True: | |
|
254 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
|
255 | source = original[i:i + input_size] | |
|
256 | if not source: | |
|
257 | break | |
|
258 | ||
|
259 | chunks.extend(chunker.compress(source)) | |
|
260 | i += input_size | |
|
261 | ||
|
262 | chunks.extend(chunker.finish()) | |
|
263 | ||
|
264 | dctx = zstd.ZstdDecompressor() | |
|
265 | ||
|
266 | self.assertEqual(dctx.decompress(b''.join(chunks), | |
|
267 | max_output_size=len(original)), | |
|
268 | original) | |
|
269 | ||
|
270 | self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) | |
|
271 | ||
|
272 | @hypothesis.settings( | |
|
273 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
274 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
275 | level=strategies.integers(min_value=1, max_value=5), | |
|
276 | chunk_size=strategies.integers( | |
|
277 | min_value=1, | |
|
278 | max_value=32 * 1048576), | |
|
279 | input_sizes=strategies.data(), | |
|
280 | flushes=strategies.data()) | |
|
281 | def test_flush_block(self, original, level, chunk_size, input_sizes, | |
|
282 | flushes): | |
|
283 | cctx = zstd.ZstdCompressor(level=level) | |
|
284 | chunker = cctx.chunker(chunk_size=chunk_size) | |
|
285 | ||
|
286 | dctx = zstd.ZstdDecompressor() | |
|
287 | dobj = dctx.decompressobj() | |
|
288 | ||
|
289 | compressed_chunks = [] | |
|
290 | decompressed_chunks = [] | |
|
291 | i = 0 | |
|
292 | while True: | |
|
293 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
|
294 | source = original[i:i + input_size] | |
|
295 | if not source: | |
|
296 | break | |
|
297 | ||
|
298 | i += input_size | |
|
299 | ||
|
300 | chunks = list(chunker.compress(source)) | |
|
301 | compressed_chunks.extend(chunks) | |
|
302 | decompressed_chunks.append(dobj.decompress(b''.join(chunks))) | |
|
303 | ||
|
304 | if not flushes.draw(strategies.booleans()): | |
|
305 | continue | |
|
306 | ||
|
307 | chunks = list(chunker.flush()) | |
|
308 | compressed_chunks.extend(chunks) | |
|
309 | decompressed_chunks.append(dobj.decompress(b''.join(chunks))) | |
|
310 | ||
|
311 | self.assertEqual(b''.join(decompressed_chunks), original[0:i]) | |
|
312 | ||
|
313 | chunks = list(chunker.finish()) | |
|
314 | compressed_chunks.extend(chunks) | |
|
315 | decompressed_chunks.append(dobj.decompress(b''.join(chunks))) | |
|
316 | ||
|
317 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
318 | max_output_size=len(original)), | |
|
319 | original) | |
|
320 | self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file |
@@ -1,202 +1,194 | |||
|
1 | 1 | import sys |
|
2 | 2 | import unittest |
|
3 | 3 | |
|
4 | 4 | import zstandard as zstd |
|
5 | 5 | |
|
6 | 6 | from . common import ( |
|
7 | 7 | make_cffi, |
|
8 | 8 | ) |
|
9 | 9 | |
|
10 | 10 | |
|
11 | 11 | @make_cffi |
|
12 | 12 | class TestCompressionParameters(unittest.TestCase): |
|
13 | 13 | def test_bounds(self): |
|
14 | 14 | zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN, |
|
15 | 15 | chain_log=zstd.CHAINLOG_MIN, |
|
16 | 16 | hash_log=zstd.HASHLOG_MIN, |
|
17 | 17 | search_log=zstd.SEARCHLOG_MIN, |
|
18 | 18 | min_match=zstd.SEARCHLENGTH_MIN + 1, |
|
19 | 19 | target_length=zstd.TARGETLENGTH_MIN, |
|
20 | 20 | compression_strategy=zstd.STRATEGY_FAST) |
|
21 | 21 | |
|
22 | 22 | zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX, |
|
23 | 23 | chain_log=zstd.CHAINLOG_MAX, |
|
24 | 24 | hash_log=zstd.HASHLOG_MAX, |
|
25 | 25 | search_log=zstd.SEARCHLOG_MAX, |
|
26 | 26 | min_match=zstd.SEARCHLENGTH_MAX - 1, |
|
27 | target_length=zstd.TARGETLENGTH_MAX, | |
|
27 | 28 | compression_strategy=zstd.STRATEGY_BTULTRA) |
|
28 | 29 | |
|
29 | 30 | def test_from_level(self): |
|
30 | 31 | p = zstd.ZstdCompressionParameters.from_level(1) |
|
31 | 32 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
32 | 33 | |
|
33 | 34 | self.assertEqual(p.window_log, 19) |
|
34 | 35 | |
|
35 | 36 | p = zstd.ZstdCompressionParameters.from_level(-4) |
|
36 | 37 | self.assertEqual(p.window_log, 19) |
|
37 | self.assertEqual(p.compress_literals, 0) | |
|
38 | 38 | |
|
39 | 39 | def test_members(self): |
|
40 | 40 | p = zstd.ZstdCompressionParameters(window_log=10, |
|
41 | 41 | chain_log=6, |
|
42 | 42 | hash_log=7, |
|
43 | 43 | search_log=4, |
|
44 | 44 | min_match=5, |
|
45 | 45 | target_length=8, |
|
46 | 46 | compression_strategy=1) |
|
47 | 47 | self.assertEqual(p.window_log, 10) |
|
48 | 48 | self.assertEqual(p.chain_log, 6) |
|
49 | 49 | self.assertEqual(p.hash_log, 7) |
|
50 | 50 | self.assertEqual(p.search_log, 4) |
|
51 | 51 | self.assertEqual(p.min_match, 5) |
|
52 | 52 | self.assertEqual(p.target_length, 8) |
|
53 | 53 | self.assertEqual(p.compression_strategy, 1) |
|
54 | 54 | |
|
55 | 55 | p = zstd.ZstdCompressionParameters(compression_level=2) |
|
56 | 56 | self.assertEqual(p.compression_level, 2) |
|
57 | 57 | |
|
58 | 58 | p = zstd.ZstdCompressionParameters(threads=4) |
|
59 | 59 | self.assertEqual(p.threads, 4) |
|
60 | 60 | |
|
61 | 61 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, |
|
62 | 62 | overlap_size_log=6) |
|
63 | 63 | self.assertEqual(p.threads, 2) |
|
64 | 64 | self.assertEqual(p.job_size, 1048576) |
|
65 | 65 | self.assertEqual(p.overlap_size_log, 6) |
|
66 | 66 | |
|
67 | p = zstd.ZstdCompressionParameters(compression_level=2) | |
|
68 | self.assertEqual(p.compress_literals, 1) | |
|
69 | ||
|
70 | p = zstd.ZstdCompressionParameters(compress_literals=False) | |
|
71 | self.assertEqual(p.compress_literals, 0) | |
|
72 | ||
|
73 | 67 | p = zstd.ZstdCompressionParameters(compression_level=-1) |
|
74 | 68 | self.assertEqual(p.compression_level, -1) |
|
75 | self.assertEqual(p.compress_literals, 0) | |
|
76 | 69 | |
|
77 |
p = zstd.ZstdCompressionParameters(compression_level=-2 |
|
|
70 | p = zstd.ZstdCompressionParameters(compression_level=-2) | |
|
78 | 71 | self.assertEqual(p.compression_level, -2) |
|
79 | self.assertEqual(p.compress_literals, 1) | |
|
80 | 72 | |
|
81 | 73 | p = zstd.ZstdCompressionParameters(force_max_window=True) |
|
82 | 74 | self.assertEqual(p.force_max_window, 1) |
|
83 | 75 | |
|
84 | 76 | p = zstd.ZstdCompressionParameters(enable_ldm=True) |
|
85 | 77 | self.assertEqual(p.enable_ldm, 1) |
|
86 | 78 | |
|
87 | 79 | p = zstd.ZstdCompressionParameters(ldm_hash_log=7) |
|
88 | 80 | self.assertEqual(p.ldm_hash_log, 7) |
|
89 | 81 | |
|
90 | 82 | p = zstd.ZstdCompressionParameters(ldm_min_match=6) |
|
91 | 83 | self.assertEqual(p.ldm_min_match, 6) |
|
92 | 84 | |
|
93 | 85 | p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) |
|
94 | 86 | self.assertEqual(p.ldm_bucket_size_log, 7) |
|
95 | 87 | |
|
96 | 88 | p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8) |
|
97 | 89 | self.assertEqual(p.ldm_hash_every_log, 8) |
|
98 | 90 | |
|
99 | 91 | def test_estimated_compression_context_size(self): |
|
100 | 92 | p = zstd.ZstdCompressionParameters(window_log=20, |
|
101 | 93 | chain_log=16, |
|
102 | 94 | hash_log=17, |
|
103 | 95 | search_log=1, |
|
104 | 96 | min_match=5, |
|
105 | 97 | target_length=16, |
|
106 | 98 | compression_strategy=zstd.STRATEGY_DFAST) |
|
107 | 99 | |
|
108 | 100 | # 32-bit has slightly different values from 64-bit. |
|
109 | 101 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072, |
|
110 | 102 | delta=250) |
|
111 | 103 | |
|
112 | 104 | |
|
113 | 105 | @make_cffi |
|
114 | 106 | class TestFrameParameters(unittest.TestCase): |
|
115 | 107 | def test_invalid_type(self): |
|
116 | 108 | with self.assertRaises(TypeError): |
|
117 | 109 | zstd.get_frame_parameters(None) |
|
118 | 110 | |
|
119 | 111 | # Python 3 doesn't appear to convert unicode to Py_buffer. |
|
120 | 112 | if sys.version_info[0] >= 3: |
|
121 | 113 | with self.assertRaises(TypeError): |
|
122 | 114 | zstd.get_frame_parameters(u'foobarbaz') |
|
123 | 115 | else: |
|
124 | 116 | # CPython will convert unicode to Py_buffer. But CFFI won't. |
|
125 | 117 | if zstd.backend == 'cffi': |
|
126 | 118 | with self.assertRaises(TypeError): |
|
127 | 119 | zstd.get_frame_parameters(u'foobarbaz') |
|
128 | 120 | else: |
|
129 | 121 | with self.assertRaises(zstd.ZstdError): |
|
130 | 122 | zstd.get_frame_parameters(u'foobarbaz') |
|
131 | 123 | |
|
132 | 124 | def test_invalid_input_sizes(self): |
|
133 | 125 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): |
|
134 | 126 | zstd.get_frame_parameters(b'') |
|
135 | 127 | |
|
136 | 128 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): |
|
137 | 129 | zstd.get_frame_parameters(zstd.FRAME_HEADER) |
|
138 | 130 | |
|
139 | 131 | def test_invalid_frame(self): |
|
140 | 132 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
141 | 133 | zstd.get_frame_parameters(b'foobarbaz') |
|
142 | 134 | |
|
143 | 135 | def test_attributes(self): |
|
144 | 136 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') |
|
145 | 137 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
146 | 138 | self.assertEqual(params.window_size, 1024) |
|
147 | 139 | self.assertEqual(params.dict_id, 0) |
|
148 | 140 | self.assertFalse(params.has_checksum) |
|
149 | 141 | |
|
150 | 142 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
151 | 143 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') |
|
152 | 144 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
153 | 145 | self.assertEqual(params.window_size, 1024) |
|
154 | 146 | self.assertEqual(params.dict_id, 255) |
|
155 | 147 | self.assertFalse(params.has_checksum) |
|
156 | 148 | |
|
157 | 149 | # Lowest 3rd bit indicates if checksum is present. |
|
158 | 150 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') |
|
159 | 151 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
160 | 152 | self.assertEqual(params.window_size, 1024) |
|
161 | 153 | self.assertEqual(params.dict_id, 0) |
|
162 | 154 | self.assertTrue(params.has_checksum) |
|
163 | 155 | |
|
164 | 156 | # Upper 2 bits indicate content size. |
|
165 | 157 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') |
|
166 | 158 | self.assertEqual(params.content_size, 511) |
|
167 | 159 | self.assertEqual(params.window_size, 1024) |
|
168 | 160 | self.assertEqual(params.dict_id, 0) |
|
169 | 161 | self.assertFalse(params.has_checksum) |
|
170 | 162 | |
|
171 | 163 | # Window descriptor is 2nd byte after frame header. |
|
172 | 164 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') |
|
173 | 165 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
174 | 166 | self.assertEqual(params.window_size, 262144) |
|
175 | 167 | self.assertEqual(params.dict_id, 0) |
|
176 | 168 | self.assertFalse(params.has_checksum) |
|
177 | 169 | |
|
178 | 170 | # Set multiple things. |
|
179 | 171 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') |
|
180 | 172 | self.assertEqual(params.content_size, 272) |
|
181 | 173 | self.assertEqual(params.window_size, 262144) |
|
182 | 174 | self.assertEqual(params.dict_id, 15) |
|
183 | 175 | self.assertTrue(params.has_checksum) |
|
184 | 176 | |
|
185 | 177 | def test_input_types(self): |
|
186 | 178 | v = zstd.FRAME_HEADER + b'\x00\x00' |
|
187 | 179 | |
|
188 | 180 | mutable_array = bytearray(len(v)) |
|
189 | 181 | mutable_array[:] = v |
|
190 | 182 | |
|
191 | 183 | sources = [ |
|
192 | 184 | memoryview(v), |
|
193 | 185 | bytearray(v), |
|
194 | 186 | mutable_array, |
|
195 | 187 | ] |
|
196 | 188 | |
|
197 | 189 | for source in sources: |
|
198 | 190 | params = zstd.get_frame_parameters(source) |
|
199 | 191 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
200 | 192 | self.assertEqual(params.window_size, 1024) |
|
201 | 193 | self.assertEqual(params.dict_id, 0) |
|
202 | 194 | self.assertFalse(params.has_checksum) |
@@ -1,75 +1,75 | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import sys |
|
4 | 4 | import unittest |
|
5 | 5 | |
|
6 | 6 | try: |
|
7 | 7 | import hypothesis |
|
8 | 8 | import hypothesis.strategies as strategies |
|
9 | 9 | except ImportError: |
|
10 | 10 | raise unittest.SkipTest('hypothesis not available') |
|
11 | 11 | |
|
12 | 12 | import zstandard as zstd |
|
13 | 13 | |
|
14 | 14 | from .common import ( |
|
15 | 15 | make_cffi, |
|
16 | 16 | ) |
|
17 | 17 | |
|
18 | 18 | |
|
19 | 19 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, |
|
20 | 20 | max_value=zstd.WINDOWLOG_MAX) |
|
21 | 21 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, |
|
22 | 22 | max_value=zstd.CHAINLOG_MAX) |
|
23 | 23 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, |
|
24 | 24 | max_value=zstd.HASHLOG_MAX) |
|
25 | 25 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, |
|
26 | 26 | max_value=zstd.SEARCHLOG_MAX) |
|
27 | 27 | s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN, |
|
28 | 28 | max_value=zstd.SEARCHLENGTH_MAX) |
|
29 | 29 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, |
|
30 |
max_value= |
|
|
30 | max_value=zstd.TARGETLENGTH_MAX) | |
|
31 | 31 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, |
|
32 | 32 | zstd.STRATEGY_DFAST, |
|
33 | 33 | zstd.STRATEGY_GREEDY, |
|
34 | 34 | zstd.STRATEGY_LAZY, |
|
35 | 35 | zstd.STRATEGY_LAZY2, |
|
36 | 36 | zstd.STRATEGY_BTLAZY2, |
|
37 | 37 | zstd.STRATEGY_BTOPT, |
|
38 | 38 | zstd.STRATEGY_BTULTRA)) |
|
39 | 39 | |
|
40 | 40 | |
|
41 | 41 | @make_cffi |
|
42 | 42 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
43 | 43 | class TestCompressionParametersHypothesis(unittest.TestCase): |
|
44 | 44 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
45 | 45 | s_searchlength, s_targetlength, s_strategy) |
|
46 | 46 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, |
|
47 | 47 | searchlength, targetlength, strategy): |
|
48 | 48 | zstd.ZstdCompressionParameters(window_log=windowlog, |
|
49 | 49 | chain_log=chainlog, |
|
50 | 50 | hash_log=hashlog, |
|
51 | 51 | search_log=searchlog, |
|
52 | 52 | min_match=searchlength, |
|
53 | 53 | target_length=targetlength, |
|
54 | 54 | compression_strategy=strategy) |
|
55 | 55 | |
|
56 | 56 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
57 | 57 | s_searchlength, s_targetlength, s_strategy) |
|
58 | 58 | def test_estimated_compression_context_size(self, windowlog, chainlog, |
|
59 | 59 | hashlog, searchlog, |
|
60 | 60 | searchlength, targetlength, |
|
61 | 61 | strategy): |
|
62 | 62 | if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): |
|
63 | 63 | searchlength += 1 |
|
64 | 64 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: |
|
65 | 65 | searchlength -= 1 |
|
66 | 66 | |
|
67 | 67 | p = zstd.ZstdCompressionParameters(window_log=windowlog, |
|
68 | 68 | chain_log=chainlog, |
|
69 | 69 | hash_log=hashlog, |
|
70 | 70 | search_log=searchlog, |
|
71 | 71 | min_match=searchlength, |
|
72 | 72 | target_length=targetlength, |
|
73 | 73 | compression_strategy=strategy) |
|
74 | 74 | size = p.estimated_compression_context_size() |
|
75 | 75 |
@@ -1,1139 +1,1178 | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import random |
|
4 | 4 | import struct |
|
5 | 5 | import sys |
|
6 | 6 | import unittest |
|
7 | 7 | |
|
8 | 8 | import zstandard as zstd |
|
9 | 9 | |
|
10 | 10 | from .common import ( |
|
11 | 11 | generate_samples, |
|
12 | 12 | make_cffi, |
|
13 | 13 | OpCountingBytesIO, |
|
14 | 14 | ) |
|
15 | 15 | |
|
16 | 16 | |
|
17 | 17 | if sys.version_info[0] >= 3: |
|
18 | 18 | next = lambda it: it.__next__() |
|
19 | 19 | else: |
|
20 | 20 | next = lambda it: it.next() |
|
21 | 21 | |
|
22 | 22 | |
|
23 | 23 | @make_cffi |
|
24 | 24 | class TestFrameHeaderSize(unittest.TestCase): |
|
25 | 25 | def test_empty(self): |
|
26 | 26 | with self.assertRaisesRegexp( |
|
27 | 27 | zstd.ZstdError, 'could not determine frame header size: Src size ' |
|
28 | 28 | 'is incorrect'): |
|
29 | 29 | zstd.frame_header_size(b'') |
|
30 | 30 | |
|
31 | 31 | def test_too_small(self): |
|
32 | 32 | with self.assertRaisesRegexp( |
|
33 | 33 | zstd.ZstdError, 'could not determine frame header size: Src size ' |
|
34 | 34 | 'is incorrect'): |
|
35 | 35 | zstd.frame_header_size(b'foob') |
|
36 | 36 | |
|
37 | 37 | def test_basic(self): |
|
38 | 38 | # It doesn't matter that it isn't a valid frame. |
|
39 | 39 | self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6) |
|
40 | 40 | |
|
41 | 41 | |
|
42 | 42 | @make_cffi |
|
43 | 43 | class TestFrameContentSize(unittest.TestCase): |
|
44 | 44 | def test_empty(self): |
|
45 | 45 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
46 | 46 | 'error when determining content size'): |
|
47 | 47 | zstd.frame_content_size(b'') |
|
48 | 48 | |
|
49 | 49 | def test_too_small(self): |
|
50 | 50 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
51 | 51 | 'error when determining content size'): |
|
52 | 52 | zstd.frame_content_size(b'foob') |
|
53 | 53 | |
|
54 | 54 | def test_bad_frame(self): |
|
55 | 55 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
56 | 56 | 'error when determining content size'): |
|
57 | 57 | zstd.frame_content_size(b'invalid frame header') |
|
58 | 58 | |
|
59 | 59 | def test_unknown(self): |
|
60 | 60 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
61 | 61 | frame = cctx.compress(b'foobar') |
|
62 | 62 | |
|
63 | 63 | self.assertEqual(zstd.frame_content_size(frame), -1) |
|
64 | 64 | |
|
65 | 65 | def test_empty(self): |
|
66 | 66 | cctx = zstd.ZstdCompressor() |
|
67 | 67 | frame = cctx.compress(b'') |
|
68 | 68 | |
|
69 | 69 | self.assertEqual(zstd.frame_content_size(frame), 0) |
|
70 | 70 | |
|
71 | 71 | def test_basic(self): |
|
72 | 72 | cctx = zstd.ZstdCompressor() |
|
73 | 73 | frame = cctx.compress(b'foobar') |
|
74 | 74 | |
|
75 | 75 | self.assertEqual(zstd.frame_content_size(frame), 6) |
|
76 | 76 | |
|
77 | 77 | |
|
78 | 78 | @make_cffi |
|
79 | 79 | class TestDecompressor(unittest.TestCase): |
|
80 | 80 | def test_memory_size(self): |
|
81 | 81 | dctx = zstd.ZstdDecompressor() |
|
82 | 82 | |
|
83 | 83 | self.assertGreater(dctx.memory_size(), 100) |
|
84 | 84 | |
|
85 | 85 | |
|
86 | 86 | @make_cffi |
|
87 | 87 | class TestDecompressor_decompress(unittest.TestCase): |
|
88 | 88 | def test_empty_input(self): |
|
89 | 89 | dctx = zstd.ZstdDecompressor() |
|
90 | 90 | |
|
91 | 91 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): |
|
92 | 92 | dctx.decompress(b'') |
|
93 | 93 | |
|
94 | 94 | def test_invalid_input(self): |
|
95 | 95 | dctx = zstd.ZstdDecompressor() |
|
96 | 96 | |
|
97 | 97 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): |
|
98 | 98 | dctx.decompress(b'foobar') |
|
99 | 99 | |
|
100 | 100 | def test_input_types(self): |
|
101 | 101 | cctx = zstd.ZstdCompressor(level=1) |
|
102 | 102 | compressed = cctx.compress(b'foo') |
|
103 | 103 | |
|
104 | 104 | mutable_array = bytearray(len(compressed)) |
|
105 | 105 | mutable_array[:] = compressed |
|
106 | 106 | |
|
107 | 107 | sources = [ |
|
108 | 108 | memoryview(compressed), |
|
109 | 109 | bytearray(compressed), |
|
110 | 110 | mutable_array, |
|
111 | 111 | ] |
|
112 | 112 | |
|
113 | 113 | dctx = zstd.ZstdDecompressor() |
|
114 | 114 | for source in sources: |
|
115 | 115 | self.assertEqual(dctx.decompress(source), b'foo') |
|
116 | 116 | |
|
117 | 117 | def test_no_content_size_in_frame(self): |
|
118 | 118 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
119 | 119 | compressed = cctx.compress(b'foobar') |
|
120 | 120 | |
|
121 | 121 | dctx = zstd.ZstdDecompressor() |
|
122 | 122 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): |
|
123 | 123 | dctx.decompress(compressed) |
|
124 | 124 | |
|
125 | 125 | def test_content_size_present(self): |
|
126 | 126 | cctx = zstd.ZstdCompressor() |
|
127 | 127 | compressed = cctx.compress(b'foobar') |
|
128 | 128 | |
|
129 | 129 | dctx = zstd.ZstdDecompressor() |
|
130 | 130 | decompressed = dctx.decompress(compressed) |
|
131 | 131 | self.assertEqual(decompressed, b'foobar') |
|
132 | 132 | |
|
133 | 133 | def test_empty_roundtrip(self): |
|
134 | 134 | cctx = zstd.ZstdCompressor() |
|
135 | 135 | compressed = cctx.compress(b'') |
|
136 | 136 | |
|
137 | 137 | dctx = zstd.ZstdDecompressor() |
|
138 | 138 | decompressed = dctx.decompress(compressed) |
|
139 | 139 | |
|
140 | 140 | self.assertEqual(decompressed, b'') |
|
141 | 141 | |
|
142 | 142 | def test_max_output_size(self): |
|
143 | 143 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
144 | 144 | source = b'foobar' * 256 |
|
145 | 145 | compressed = cctx.compress(source) |
|
146 | 146 | |
|
147 | 147 | dctx = zstd.ZstdDecompressor() |
|
148 | 148 | # Will fit into buffer exactly the size of input. |
|
149 | 149 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) |
|
150 | 150 | self.assertEqual(decompressed, source) |
|
151 | 151 | |
|
152 | 152 | # Input size - 1 fails |
|
153 | 153 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
154 | 154 | 'decompression error: did not decompress full frame'): |
|
155 | 155 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
156 | 156 | |
|
157 | 157 | # Input size + 1 works |
|
158 | 158 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) |
|
159 | 159 | self.assertEqual(decompressed, source) |
|
160 | 160 | |
|
161 | 161 | # A much larger buffer works. |
|
162 | 162 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) |
|
163 | 163 | self.assertEqual(decompressed, source) |
|
164 | 164 | |
|
165 | 165 | def test_stupidly_large_output_buffer(self): |
|
166 | 166 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
167 | 167 | compressed = cctx.compress(b'foobar' * 256) |
|
168 | 168 | dctx = zstd.ZstdDecompressor() |
|
169 | 169 | |
|
170 | 170 | # Will get OverflowError on some Python distributions that can't |
|
171 | 171 | # handle really large integers. |
|
172 | 172 | with self.assertRaises((MemoryError, OverflowError)): |
|
173 | 173 | dctx.decompress(compressed, max_output_size=2**62) |
|
174 | 174 | |
|
175 | 175 | def test_dictionary(self): |
|
176 | 176 | samples = [] |
|
177 | 177 | for i in range(128): |
|
178 | 178 | samples.append(b'foo' * 64) |
|
179 | 179 | samples.append(b'bar' * 64) |
|
180 | 180 | samples.append(b'foobar' * 64) |
|
181 | 181 | |
|
182 | 182 | d = zstd.train_dictionary(8192, samples) |
|
183 | 183 | |
|
184 | 184 | orig = b'foobar' * 16384 |
|
185 | 185 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
186 | 186 | compressed = cctx.compress(orig) |
|
187 | 187 | |
|
188 | 188 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
189 | 189 | decompressed = dctx.decompress(compressed) |
|
190 | 190 | |
|
191 | 191 | self.assertEqual(decompressed, orig) |
|
192 | 192 | |
|
193 | 193 | def test_dictionary_multiple(self): |
|
194 | 194 | samples = [] |
|
195 | 195 | for i in range(128): |
|
196 | 196 | samples.append(b'foo' * 64) |
|
197 | 197 | samples.append(b'bar' * 64) |
|
198 | 198 | samples.append(b'foobar' * 64) |
|
199 | 199 | |
|
200 | 200 | d = zstd.train_dictionary(8192, samples) |
|
201 | 201 | |
|
202 | 202 | sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) |
|
203 | 203 | compressed = [] |
|
204 | 204 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
205 | 205 | for source in sources: |
|
206 | 206 | compressed.append(cctx.compress(source)) |
|
207 | 207 | |
|
208 | 208 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
209 | 209 | for i in range(len(sources)): |
|
210 | 210 | decompressed = dctx.decompress(compressed[i]) |
|
211 | 211 | self.assertEqual(decompressed, sources[i]) |
|
212 | 212 | |
|
213 | 213 | def test_max_window_size(self): |
|
214 | 214 | with open(__file__, 'rb') as fh: |
|
215 | 215 | source = fh.read() |
|
216 | 216 | |
|
217 | 217 | # If we write a content size, the decompressor engages single pass |
|
218 | 218 | # mode and the window size doesn't come into play. |
|
219 | 219 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
220 | 220 | frame = cctx.compress(source) |
|
221 | 221 | |
|
222 | 222 | dctx = zstd.ZstdDecompressor(max_window_size=1) |
|
223 | 223 | |
|
224 | 224 | with self.assertRaisesRegexp( |
|
225 | 225 | zstd.ZstdError, 'decompression error: Frame requires too much memory'): |
|
226 | 226 | dctx.decompress(frame, max_output_size=len(source)) |
|
227 | 227 | |
|
228 | 228 | |
|
229 | 229 | @make_cffi |
|
230 | 230 | class TestDecompressor_copy_stream(unittest.TestCase): |
|
231 | 231 | def test_no_read(self): |
|
232 | 232 | source = object() |
|
233 | 233 | dest = io.BytesIO() |
|
234 | 234 | |
|
235 | 235 | dctx = zstd.ZstdDecompressor() |
|
236 | 236 | with self.assertRaises(ValueError): |
|
237 | 237 | dctx.copy_stream(source, dest) |
|
238 | 238 | |
|
239 | 239 | def test_no_write(self): |
|
240 | 240 | source = io.BytesIO() |
|
241 | 241 | dest = object() |
|
242 | 242 | |
|
243 | 243 | dctx = zstd.ZstdDecompressor() |
|
244 | 244 | with self.assertRaises(ValueError): |
|
245 | 245 | dctx.copy_stream(source, dest) |
|
246 | 246 | |
|
247 | 247 | def test_empty(self): |
|
248 | 248 | source = io.BytesIO() |
|
249 | 249 | dest = io.BytesIO() |
|
250 | 250 | |
|
251 | 251 | dctx = zstd.ZstdDecompressor() |
|
252 | 252 | # TODO should this raise an error? |
|
253 | 253 | r, w = dctx.copy_stream(source, dest) |
|
254 | 254 | |
|
255 | 255 | self.assertEqual(r, 0) |
|
256 | 256 | self.assertEqual(w, 0) |
|
257 | 257 | self.assertEqual(dest.getvalue(), b'') |
|
258 | 258 | |
|
259 | 259 | def test_large_data(self): |
|
260 | 260 | source = io.BytesIO() |
|
261 | 261 | for i in range(255): |
|
262 | 262 | source.write(struct.Struct('>B').pack(i) * 16384) |
|
263 | 263 | source.seek(0) |
|
264 | 264 | |
|
265 | 265 | compressed = io.BytesIO() |
|
266 | 266 | cctx = zstd.ZstdCompressor() |
|
267 | 267 | cctx.copy_stream(source, compressed) |
|
268 | 268 | |
|
269 | 269 | compressed.seek(0) |
|
270 | 270 | dest = io.BytesIO() |
|
271 | 271 | dctx = zstd.ZstdDecompressor() |
|
272 | 272 | r, w = dctx.copy_stream(compressed, dest) |
|
273 | 273 | |
|
274 | 274 | self.assertEqual(r, len(compressed.getvalue())) |
|
275 | 275 | self.assertEqual(w, len(source.getvalue())) |
|
276 | 276 | |
|
277 | 277 | def test_read_write_size(self): |
|
278 | 278 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( |
|
279 | 279 | b'foobarfoobar')) |
|
280 | 280 | |
|
281 | 281 | dest = OpCountingBytesIO() |
|
282 | 282 | dctx = zstd.ZstdDecompressor() |
|
283 | 283 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
284 | 284 | |
|
285 | 285 | self.assertEqual(r, len(source.getvalue())) |
|
286 | 286 | self.assertEqual(w, len(b'foobarfoobar')) |
|
287 | 287 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
288 | 288 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
289 | 289 | |
|
290 | 290 | |
|
291 | 291 | @make_cffi |
|
292 | 292 | class TestDecompressor_stream_reader(unittest.TestCase): |
|
293 | 293 | def test_context_manager(self): |
|
294 | 294 | dctx = zstd.ZstdDecompressor() |
|
295 | 295 | |
|
296 | reader = dctx.stream_reader(b'foo') | |
|
297 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |
|
298 | reader.read(1) | |
|
299 | ||
|
300 | 296 | with dctx.stream_reader(b'foo') as reader: |
|
301 | 297 | with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): |
|
302 | 298 | with reader as reader2: |
|
303 | 299 | pass |
|
304 | 300 | |
|
305 | 301 | def test_not_implemented(self): |
|
306 | 302 | dctx = zstd.ZstdDecompressor() |
|
307 | 303 | |
|
308 | 304 | with dctx.stream_reader(b'foo') as reader: |
|
309 | 305 | with self.assertRaises(NotImplementedError): |
|
310 | 306 | reader.readline() |
|
311 | 307 | |
|
312 | 308 | with self.assertRaises(NotImplementedError): |
|
313 | 309 | reader.readlines() |
|
314 | 310 | |
|
315 | 311 | with self.assertRaises(NotImplementedError): |
|
316 | 312 | reader.readall() |
|
317 | 313 | |
|
318 | 314 | with self.assertRaises(NotImplementedError): |
|
319 | 315 | iter(reader) |
|
320 | 316 | |
|
321 | 317 | with self.assertRaises(NotImplementedError): |
|
322 | 318 | next(reader) |
|
323 | 319 | |
|
324 | 320 | with self.assertRaises(io.UnsupportedOperation): |
|
325 | 321 | reader.write(b'foo') |
|
326 | 322 | |
|
327 | 323 | with self.assertRaises(io.UnsupportedOperation): |
|
328 | 324 | reader.writelines([]) |
|
329 | 325 | |
|
330 | 326 | def test_constant_methods(self): |
|
331 | 327 | dctx = zstd.ZstdDecompressor() |
|
332 | 328 | |
|
333 | 329 | with dctx.stream_reader(b'foo') as reader: |
|
330 | self.assertFalse(reader.closed) | |
|
334 | 331 | self.assertTrue(reader.readable()) |
|
335 | 332 | self.assertFalse(reader.writable()) |
|
336 | 333 | self.assertTrue(reader.seekable()) |
|
337 | 334 | self.assertFalse(reader.isatty()) |
|
335 | self.assertFalse(reader.closed) | |
|
338 | 336 | self.assertIsNone(reader.flush()) |
|
337 | self.assertFalse(reader.closed) | |
|
338 | ||
|
339 | self.assertTrue(reader.closed) | |
|
339 | 340 | |
|
340 | 341 | def test_read_closed(self): |
|
341 | 342 | dctx = zstd.ZstdDecompressor() |
|
342 | 343 | |
|
343 | 344 | with dctx.stream_reader(b'foo') as reader: |
|
344 | 345 | reader.close() |
|
346 | self.assertTrue(reader.closed) | |
|
345 | 347 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): |
|
346 | 348 | reader.read(1) |
|
347 | 349 | |
|
348 | 350 | def test_bad_read_size(self): |
|
349 | 351 | dctx = zstd.ZstdDecompressor() |
|
350 | 352 | |
|
351 | 353 | with dctx.stream_reader(b'foo') as reader: |
|
352 | 354 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): |
|
353 | 355 | reader.read(-1) |
|
354 | 356 | |
|
355 | 357 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): |
|
356 | 358 | reader.read(0) |
|
357 | 359 | |
|
358 | 360 | def test_read_buffer(self): |
|
359 | 361 | cctx = zstd.ZstdCompressor() |
|
360 | 362 | |
|
361 | 363 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
362 | 364 | frame = cctx.compress(source) |
|
363 | 365 | |
|
364 | 366 | dctx = zstd.ZstdDecompressor() |
|
365 | 367 | |
|
366 | 368 | with dctx.stream_reader(frame) as reader: |
|
367 | 369 | self.assertEqual(reader.tell(), 0) |
|
368 | 370 | |
|
369 | 371 | # We should get entire frame in one read. |
|
370 | 372 | result = reader.read(8192) |
|
371 | 373 | self.assertEqual(result, source) |
|
372 | 374 | self.assertEqual(reader.tell(), len(source)) |
|
373 | 375 | |
|
374 | 376 | # Read after EOF should return empty bytes. |
|
375 | self.assertEqual(reader.read(), b'') | |
|
377 | self.assertEqual(reader.read(1), b'') | |
|
376 | 378 | self.assertEqual(reader.tell(), len(result)) |
|
377 | 379 | |
|
378 |
self.assertTrue(reader.closed |
|
|
380 | self.assertTrue(reader.closed) | |
|
379 | 381 | |
|
380 | 382 | def test_read_buffer_small_chunks(self): |
|
381 | 383 | cctx = zstd.ZstdCompressor() |
|
382 | 384 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
383 | 385 | frame = cctx.compress(source) |
|
384 | 386 | |
|
385 | 387 | dctx = zstd.ZstdDecompressor() |
|
386 | 388 | chunks = [] |
|
387 | 389 | |
|
388 | 390 | with dctx.stream_reader(frame, read_size=1) as reader: |
|
389 | 391 | while True: |
|
390 | 392 | chunk = reader.read(1) |
|
391 | 393 | if not chunk: |
|
392 | 394 | break |
|
393 | 395 | |
|
394 | 396 | chunks.append(chunk) |
|
395 | 397 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
396 | 398 | |
|
397 | 399 | self.assertEqual(b''.join(chunks), source) |
|
398 | 400 | |
|
399 | 401 | def test_read_stream(self): |
|
400 | 402 | cctx = zstd.ZstdCompressor() |
|
401 | 403 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
402 | 404 | frame = cctx.compress(source) |
|
403 | 405 | |
|
404 | 406 | dctx = zstd.ZstdDecompressor() |
|
405 | 407 | with dctx.stream_reader(io.BytesIO(frame)) as reader: |
|
406 | 408 | self.assertEqual(reader.tell(), 0) |
|
407 | 409 | |
|
408 | 410 | chunk = reader.read(8192) |
|
409 | 411 | self.assertEqual(chunk, source) |
|
410 | 412 | self.assertEqual(reader.tell(), len(source)) |
|
411 | self.assertEqual(reader.read(), b'') | |
|
413 | self.assertEqual(reader.read(1), b'') | |
|
412 | 414 | self.assertEqual(reader.tell(), len(source)) |
|
415 | self.assertFalse(reader.closed) | |
|
416 | ||
|
417 | self.assertTrue(reader.closed) | |
|
413 | 418 | |
|
414 | 419 | def test_read_stream_small_chunks(self): |
|
415 | 420 | cctx = zstd.ZstdCompressor() |
|
416 | 421 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) |
|
417 | 422 | frame = cctx.compress(source) |
|
418 | 423 | |
|
419 | 424 | dctx = zstd.ZstdDecompressor() |
|
420 | 425 | chunks = [] |
|
421 | 426 | |
|
422 | 427 | with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: |
|
423 | 428 | while True: |
|
424 | 429 | chunk = reader.read(1) |
|
425 | 430 | if not chunk: |
|
426 | 431 | break |
|
427 | 432 | |
|
428 | 433 | chunks.append(chunk) |
|
429 | 434 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
430 | 435 | |
|
431 | 436 | self.assertEqual(b''.join(chunks), source) |
|
432 | 437 | |
|
433 | 438 | def test_read_after_exit(self): |
|
434 | 439 | cctx = zstd.ZstdCompressor() |
|
435 | 440 | frame = cctx.compress(b'foo' * 60) |
|
436 | 441 | |
|
437 | 442 | dctx = zstd.ZstdDecompressor() |
|
438 | 443 | |
|
439 | 444 | with dctx.stream_reader(frame) as reader: |
|
440 | 445 | while reader.read(16): |
|
441 | 446 | pass |
|
442 | 447 | |
|
443 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |
|
448 | self.assertTrue(reader.closed) | |
|
449 | ||
|
450 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
444 | 451 | reader.read(10) |
|
445 | 452 | |
|
446 | 453 | def test_illegal_seeks(self): |
|
447 | 454 | cctx = zstd.ZstdCompressor() |
|
448 | 455 | frame = cctx.compress(b'foo' * 60) |
|
449 | 456 | |
|
450 | 457 | dctx = zstd.ZstdDecompressor() |
|
451 | 458 | |
|
452 | 459 | with dctx.stream_reader(frame) as reader: |
|
453 | 460 | with self.assertRaisesRegexp(ValueError, |
|
454 | 461 | 'cannot seek to negative position'): |
|
455 | 462 | reader.seek(-1, os.SEEK_SET) |
|
456 | 463 | |
|
457 | 464 | reader.read(1) |
|
458 | 465 | |
|
459 | 466 | with self.assertRaisesRegexp( |
|
460 | 467 | ValueError, 'cannot seek zstd decompression stream backwards'): |
|
461 | 468 | reader.seek(0, os.SEEK_SET) |
|
462 | 469 | |
|
463 | 470 | with self.assertRaisesRegexp( |
|
464 | 471 | ValueError, 'cannot seek zstd decompression stream backwards'): |
|
465 | 472 | reader.seek(-1, os.SEEK_CUR) |
|
466 | 473 | |
|
467 | 474 | with self.assertRaisesRegexp( |
|
468 | 475 | ValueError, |
|
469 | 476 | 'zstd decompression streams cannot be seeked with SEEK_END'): |
|
470 | 477 | reader.seek(0, os.SEEK_END) |
|
471 | 478 | |
|
472 | 479 | reader.close() |
|
473 | 480 | |
|
474 | 481 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): |
|
475 | 482 | reader.seek(4, os.SEEK_SET) |
|
476 | 483 | |
|
477 | with self.assertRaisesRegexp( | |
|
478 | zstd.ZstdError, 'seek\(\) must be called from an active context'): | |
|
484 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
479 | 485 | reader.seek(0) |
|
480 | 486 | |
|
481 | 487 | def test_seek(self): |
|
482 | 488 | source = b'foobar' * 60 |
|
483 | 489 | cctx = zstd.ZstdCompressor() |
|
484 | 490 | frame = cctx.compress(source) |
|
485 | 491 | |
|
486 | 492 | dctx = zstd.ZstdDecompressor() |
|
487 | 493 | |
|
488 | 494 | with dctx.stream_reader(frame) as reader: |
|
489 | 495 | reader.seek(3) |
|
490 | 496 | self.assertEqual(reader.read(3), b'bar') |
|
491 | 497 | |
|
492 | 498 | reader.seek(4, os.SEEK_CUR) |
|
493 | 499 | self.assertEqual(reader.read(2), b'ar') |
|
494 | 500 | |
|
501 | def test_no_context_manager(self): | |
|
502 | source = b'foobar' * 60 | |
|
503 | cctx = zstd.ZstdCompressor() | |
|
504 | frame = cctx.compress(source) | |
|
505 | ||
|
506 | dctx = zstd.ZstdDecompressor() | |
|
507 | reader = dctx.stream_reader(frame) | |
|
508 | ||
|
509 | self.assertEqual(reader.read(6), b'foobar') | |
|
510 | self.assertEqual(reader.read(18), b'foobar' * 3) | |
|
511 | self.assertFalse(reader.closed) | |
|
512 | ||
|
513 | # Calling close prevents subsequent use. | |
|
514 | reader.close() | |
|
515 | self.assertTrue(reader.closed) | |
|
516 | ||
|
517 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
518 | reader.read(6) | |
|
519 | ||
|
520 | def test_read_after_error(self): | |
|
521 | source = io.BytesIO(b'') | |
|
522 | dctx = zstd.ZstdDecompressor() | |
|
523 | ||
|
524 | reader = dctx.stream_reader(source) | |
|
525 | ||
|
526 | with reader: | |
|
527 | with self.assertRaises(TypeError): | |
|
528 | reader.read() | |
|
529 | ||
|
530 | with reader: | |
|
531 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
|
532 | reader.read(100) | |
|
533 | ||
|
495 | 534 | |
|
496 | 535 | @make_cffi |
|
497 | 536 | class TestDecompressor_decompressobj(unittest.TestCase): |
|
498 | 537 | def test_simple(self): |
|
499 | 538 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
500 | 539 | |
|
501 | 540 | dctx = zstd.ZstdDecompressor() |
|
502 | 541 | dobj = dctx.decompressobj() |
|
503 | 542 | self.assertEqual(dobj.decompress(data), b'foobar') |
|
504 | 543 | |
|
505 | 544 | def test_input_types(self): |
|
506 | 545 | compressed = zstd.ZstdCompressor(level=1).compress(b'foo') |
|
507 | 546 | |
|
508 | 547 | dctx = zstd.ZstdDecompressor() |
|
509 | 548 | |
|
510 | 549 | mutable_array = bytearray(len(compressed)) |
|
511 | 550 | mutable_array[:] = compressed |
|
512 | 551 | |
|
513 | 552 | sources = [ |
|
514 | 553 | memoryview(compressed), |
|
515 | 554 | bytearray(compressed), |
|
516 | 555 | mutable_array, |
|
517 | 556 | ] |
|
518 | 557 | |
|
519 | 558 | for source in sources: |
|
520 | 559 | dobj = dctx.decompressobj() |
|
521 | 560 | self.assertEqual(dobj.decompress(source), b'foo') |
|
522 | 561 | |
|
523 | 562 | def test_reuse(self): |
|
524 | 563 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
525 | 564 | |
|
526 | 565 | dctx = zstd.ZstdDecompressor() |
|
527 | 566 | dobj = dctx.decompressobj() |
|
528 | 567 | dobj.decompress(data) |
|
529 | 568 | |
|
530 | 569 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): |
|
531 | 570 | dobj.decompress(data) |
|
532 | 571 | |
|
533 | 572 | def test_bad_write_size(self): |
|
534 | 573 | dctx = zstd.ZstdDecompressor() |
|
535 | 574 | |
|
536 | 575 | with self.assertRaisesRegexp(ValueError, 'write_size must be positive'): |
|
537 | 576 | dctx.decompressobj(write_size=0) |
|
538 | 577 | |
|
539 | 578 | def test_write_size(self): |
|
540 | 579 | source = b'foo' * 64 + b'bar' * 128 |
|
541 | 580 | data = zstd.ZstdCompressor(level=1).compress(source) |
|
542 | 581 | |
|
543 | 582 | dctx = zstd.ZstdDecompressor() |
|
544 | 583 | |
|
545 | 584 | for i in range(128): |
|
546 | 585 | dobj = dctx.decompressobj(write_size=i + 1) |
|
547 | 586 | self.assertEqual(dobj.decompress(data), source) |
|
548 | 587 | |
|
549 | 588 | def decompress_via_writer(data): |
|
550 | 589 | buffer = io.BytesIO() |
|
551 | 590 | dctx = zstd.ZstdDecompressor() |
|
552 | 591 | with dctx.stream_writer(buffer) as decompressor: |
|
553 | 592 | decompressor.write(data) |
|
554 | 593 | return buffer.getvalue() |
|
555 | 594 | |
|
556 | 595 | |
|
557 | 596 | @make_cffi |
|
558 | 597 | class TestDecompressor_stream_writer(unittest.TestCase): |
|
559 | 598 | def test_empty_roundtrip(self): |
|
560 | 599 | cctx = zstd.ZstdCompressor() |
|
561 | 600 | empty = cctx.compress(b'') |
|
562 | 601 | self.assertEqual(decompress_via_writer(empty), b'') |
|
563 | 602 | |
|
564 | 603 | def test_input_types(self): |
|
565 | 604 | cctx = zstd.ZstdCompressor(level=1) |
|
566 | 605 | compressed = cctx.compress(b'foo') |
|
567 | 606 | |
|
568 | 607 | mutable_array = bytearray(len(compressed)) |
|
569 | 608 | mutable_array[:] = compressed |
|
570 | 609 | |
|
571 | 610 | sources = [ |
|
572 | 611 | memoryview(compressed), |
|
573 | 612 | bytearray(compressed), |
|
574 | 613 | mutable_array, |
|
575 | 614 | ] |
|
576 | 615 | |
|
577 | 616 | dctx = zstd.ZstdDecompressor() |
|
578 | 617 | for source in sources: |
|
579 | 618 | buffer = io.BytesIO() |
|
580 | 619 | with dctx.stream_writer(buffer) as decompressor: |
|
581 | 620 | decompressor.write(source) |
|
582 | 621 | |
|
583 | 622 | self.assertEqual(buffer.getvalue(), b'foo') |
|
584 | 623 | |
|
585 | 624 | def test_large_roundtrip(self): |
|
586 | 625 | chunks = [] |
|
587 | 626 | for i in range(255): |
|
588 | 627 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
589 | 628 | orig = b''.join(chunks) |
|
590 | 629 | cctx = zstd.ZstdCompressor() |
|
591 | 630 | compressed = cctx.compress(orig) |
|
592 | 631 | |
|
593 | 632 | self.assertEqual(decompress_via_writer(compressed), orig) |
|
594 | 633 | |
|
595 | 634 | def test_multiple_calls(self): |
|
596 | 635 | chunks = [] |
|
597 | 636 | for i in range(255): |
|
598 | 637 | for j in range(255): |
|
599 | 638 | chunks.append(struct.Struct('>B').pack(j) * i) |
|
600 | 639 | |
|
601 | 640 | orig = b''.join(chunks) |
|
602 | 641 | cctx = zstd.ZstdCompressor() |
|
603 | 642 | compressed = cctx.compress(orig) |
|
604 | 643 | |
|
605 | 644 | buffer = io.BytesIO() |
|
606 | 645 | dctx = zstd.ZstdDecompressor() |
|
607 | 646 | with dctx.stream_writer(buffer) as decompressor: |
|
608 | 647 | pos = 0 |
|
609 | 648 | while pos < len(compressed): |
|
610 | 649 | pos2 = pos + 8192 |
|
611 | 650 | decompressor.write(compressed[pos:pos2]) |
|
612 | 651 | pos += 8192 |
|
613 | 652 | self.assertEqual(buffer.getvalue(), orig) |
|
614 | 653 | |
|
615 | 654 | def test_dictionary(self): |
|
616 | 655 | samples = [] |
|
617 | 656 | for i in range(128): |
|
618 | 657 | samples.append(b'foo' * 64) |
|
619 | 658 | samples.append(b'bar' * 64) |
|
620 | 659 | samples.append(b'foobar' * 64) |
|
621 | 660 | |
|
622 | 661 | d = zstd.train_dictionary(8192, samples) |
|
623 | 662 | |
|
624 | 663 | orig = b'foobar' * 16384 |
|
625 | 664 | buffer = io.BytesIO() |
|
626 | 665 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
627 | 666 | with cctx.stream_writer(buffer) as compressor: |
|
628 | 667 | self.assertEqual(compressor.write(orig), 0) |
|
629 | 668 | |
|
630 | 669 | compressed = buffer.getvalue() |
|
631 | 670 | buffer = io.BytesIO() |
|
632 | 671 | |
|
633 | 672 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
634 | 673 | with dctx.stream_writer(buffer) as decompressor: |
|
635 | 674 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
636 | 675 | |
|
637 | 676 | self.assertEqual(buffer.getvalue(), orig) |
|
638 | 677 | |
|
639 | 678 | def test_memory_size(self): |
|
640 | 679 | dctx = zstd.ZstdDecompressor() |
|
641 | 680 | buffer = io.BytesIO() |
|
642 | 681 | with dctx.stream_writer(buffer) as decompressor: |
|
643 | 682 | size = decompressor.memory_size() |
|
644 | 683 | |
|
645 | 684 | self.assertGreater(size, 100000) |
|
646 | 685 | |
|
647 | 686 | def test_write_size(self): |
|
648 | 687 | source = zstd.ZstdCompressor().compress(b'foobarfoobar') |
|
649 | 688 | dest = OpCountingBytesIO() |
|
650 | 689 | dctx = zstd.ZstdDecompressor() |
|
651 | 690 | with dctx.stream_writer(dest, write_size=1) as decompressor: |
|
652 | 691 | s = struct.Struct('>B') |
|
653 | 692 | for c in source: |
|
654 | 693 | if not isinstance(c, str): |
|
655 | 694 | c = s.pack(c) |
|
656 | 695 | decompressor.write(c) |
|
657 | 696 | |
|
658 | 697 | self.assertEqual(dest.getvalue(), b'foobarfoobar') |
|
659 | 698 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
660 | 699 | |
|
661 | 700 | |
|
662 | 701 | @make_cffi |
|
663 | 702 | class TestDecompressor_read_to_iter(unittest.TestCase): |
|
664 | 703 | def test_type_validation(self): |
|
665 | 704 | dctx = zstd.ZstdDecompressor() |
|
666 | 705 | |
|
667 | 706 | # Object with read() works. |
|
668 | 707 | dctx.read_to_iter(io.BytesIO()) |
|
669 | 708 | |
|
670 | 709 | # Buffer protocol works. |
|
671 | 710 | dctx.read_to_iter(b'foobar') |
|
672 | 711 | |
|
673 | 712 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
674 | 713 | b''.join(dctx.read_to_iter(True)) |
|
675 | 714 | |
|
676 | 715 | def test_empty_input(self): |
|
677 | 716 | dctx = zstd.ZstdDecompressor() |
|
678 | 717 | |
|
679 | 718 | source = io.BytesIO() |
|
680 | 719 | it = dctx.read_to_iter(source) |
|
681 | 720 | # TODO this is arguably wrong. Should get an error about missing frame foo. |
|
682 | 721 | with self.assertRaises(StopIteration): |
|
683 | 722 | next(it) |
|
684 | 723 | |
|
685 | 724 | it = dctx.read_to_iter(b'') |
|
686 | 725 | with self.assertRaises(StopIteration): |
|
687 | 726 | next(it) |
|
688 | 727 | |
|
689 | 728 | def test_invalid_input(self): |
|
690 | 729 | dctx = zstd.ZstdDecompressor() |
|
691 | 730 | |
|
692 | 731 | source = io.BytesIO(b'foobar') |
|
693 | 732 | it = dctx.read_to_iter(source) |
|
694 | 733 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
695 | 734 | next(it) |
|
696 | 735 | |
|
697 | 736 | it = dctx.read_to_iter(b'foobar') |
|
698 | 737 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
699 | 738 | next(it) |
|
700 | 739 | |
|
701 | 740 | def test_empty_roundtrip(self): |
|
702 | 741 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
703 | 742 | empty = cctx.compress(b'') |
|
704 | 743 | |
|
705 | 744 | source = io.BytesIO(empty) |
|
706 | 745 | source.seek(0) |
|
707 | 746 | |
|
708 | 747 | dctx = zstd.ZstdDecompressor() |
|
709 | 748 | it = dctx.read_to_iter(source) |
|
710 | 749 | |
|
711 | 750 | # No chunks should be emitted since there is no data. |
|
712 | 751 | with self.assertRaises(StopIteration): |
|
713 | 752 | next(it) |
|
714 | 753 | |
|
715 | 754 | # Again for good measure. |
|
716 | 755 | with self.assertRaises(StopIteration): |
|
717 | 756 | next(it) |
|
718 | 757 | |
|
719 | 758 | def test_skip_bytes_too_large(self): |
|
720 | 759 | dctx = zstd.ZstdDecompressor() |
|
721 | 760 | |
|
722 | 761 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): |
|
723 | 762 | b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) |
|
724 | 763 | |
|
725 | 764 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): |
|
726 | 765 | b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) |
|
727 | 766 | |
|
728 | 767 | def test_skip_bytes(self): |
|
729 | 768 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
730 | 769 | compressed = cctx.compress(b'foobar') |
|
731 | 770 | |
|
732 | 771 | dctx = zstd.ZstdDecompressor() |
|
733 | 772 | output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3)) |
|
734 | 773 | self.assertEqual(output, b'foobar') |
|
735 | 774 | |
|
736 | 775 | def test_large_output(self): |
|
737 | 776 | source = io.BytesIO() |
|
738 | 777 | source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) |
|
739 | 778 | source.write(b'o') |
|
740 | 779 | source.seek(0) |
|
741 | 780 | |
|
742 | 781 | cctx = zstd.ZstdCompressor(level=1) |
|
743 | 782 | compressed = io.BytesIO(cctx.compress(source.getvalue())) |
|
744 | 783 | compressed.seek(0) |
|
745 | 784 | |
|
746 | 785 | dctx = zstd.ZstdDecompressor() |
|
747 | 786 | it = dctx.read_to_iter(compressed) |
|
748 | 787 | |
|
749 | 788 | chunks = [] |
|
750 | 789 | chunks.append(next(it)) |
|
751 | 790 | chunks.append(next(it)) |
|
752 | 791 | |
|
753 | 792 | with self.assertRaises(StopIteration): |
|
754 | 793 | next(it) |
|
755 | 794 | |
|
756 | 795 | decompressed = b''.join(chunks) |
|
757 | 796 | self.assertEqual(decompressed, source.getvalue()) |
|
758 | 797 | |
|
759 | 798 | # And again with buffer protocol. |
|
760 | 799 | it = dctx.read_to_iter(compressed.getvalue()) |
|
761 | 800 | chunks = [] |
|
762 | 801 | chunks.append(next(it)) |
|
763 | 802 | chunks.append(next(it)) |
|
764 | 803 | |
|
765 | 804 | with self.assertRaises(StopIteration): |
|
766 | 805 | next(it) |
|
767 | 806 | |
|
768 | 807 | decompressed = b''.join(chunks) |
|
769 | 808 | self.assertEqual(decompressed, source.getvalue()) |
|
770 | 809 | |
|
771 | 810 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
772 | 811 | def test_large_input(self): |
|
773 | 812 | bytes = list(struct.Struct('>B').pack(i) for i in range(256)) |
|
774 | 813 | compressed = io.BytesIO() |
|
775 | 814 | input_size = 0 |
|
776 | 815 | cctx = zstd.ZstdCompressor(level=1) |
|
777 | 816 | with cctx.stream_writer(compressed) as compressor: |
|
778 | 817 | while True: |
|
779 | 818 | compressor.write(random.choice(bytes)) |
|
780 | 819 | input_size += 1 |
|
781 | 820 | |
|
782 | 821 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
783 | 822 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 |
|
784 | 823 | if have_compressed and have_raw: |
|
785 | 824 | break |
|
786 | 825 | |
|
787 | 826 | compressed.seek(0) |
|
788 | 827 | self.assertGreater(len(compressed.getvalue()), |
|
789 | 828 | zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
790 | 829 | |
|
791 | 830 | dctx = zstd.ZstdDecompressor() |
|
792 | 831 | it = dctx.read_to_iter(compressed) |
|
793 | 832 | |
|
794 | 833 | chunks = [] |
|
795 | 834 | chunks.append(next(it)) |
|
796 | 835 | chunks.append(next(it)) |
|
797 | 836 | chunks.append(next(it)) |
|
798 | 837 | |
|
799 | 838 | with self.assertRaises(StopIteration): |
|
800 | 839 | next(it) |
|
801 | 840 | |
|
802 | 841 | decompressed = b''.join(chunks) |
|
803 | 842 | self.assertEqual(len(decompressed), input_size) |
|
804 | 843 | |
|
805 | 844 | # And again with buffer protocol. |
|
806 | 845 | it = dctx.read_to_iter(compressed.getvalue()) |
|
807 | 846 | |
|
808 | 847 | chunks = [] |
|
809 | 848 | chunks.append(next(it)) |
|
810 | 849 | chunks.append(next(it)) |
|
811 | 850 | chunks.append(next(it)) |
|
812 | 851 | |
|
813 | 852 | with self.assertRaises(StopIteration): |
|
814 | 853 | next(it) |
|
815 | 854 | |
|
816 | 855 | decompressed = b''.join(chunks) |
|
817 | 856 | self.assertEqual(len(decompressed), input_size) |
|
818 | 857 | |
|
819 | 858 | def test_interesting(self): |
|
820 | 859 | # Found this edge case via fuzzing. |
|
821 | 860 | cctx = zstd.ZstdCompressor(level=1) |
|
822 | 861 | |
|
823 | 862 | source = io.BytesIO() |
|
824 | 863 | |
|
825 | 864 | compressed = io.BytesIO() |
|
826 | 865 | with cctx.stream_writer(compressed) as compressor: |
|
827 | 866 | for i in range(256): |
|
828 | 867 | chunk = b'\0' * 1024 |
|
829 | 868 | compressor.write(chunk) |
|
830 | 869 | source.write(chunk) |
|
831 | 870 | |
|
832 | 871 | dctx = zstd.ZstdDecompressor() |
|
833 | 872 | |
|
834 | 873 | simple = dctx.decompress(compressed.getvalue(), |
|
835 | 874 | max_output_size=len(source.getvalue())) |
|
836 | 875 | self.assertEqual(simple, source.getvalue()) |
|
837 | 876 | |
|
838 | 877 | compressed.seek(0) |
|
839 | 878 | streamed = b''.join(dctx.read_to_iter(compressed)) |
|
840 | 879 | self.assertEqual(streamed, source.getvalue()) |
|
841 | 880 | |
|
842 | 881 | def test_read_write_size(self): |
|
843 | 882 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) |
|
844 | 883 | dctx = zstd.ZstdDecompressor() |
|
845 | 884 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): |
|
846 | 885 | self.assertEqual(len(chunk), 1) |
|
847 | 886 | |
|
848 | 887 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
849 | 888 | |
|
850 | 889 | def test_magic_less(self): |
|
851 | 890 | params = zstd.CompressionParameters.from_level( |
|
852 | 891 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
853 | 892 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
854 | 893 | frame = cctx.compress(b'foobar') |
|
855 | 894 | |
|
856 | 895 | self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd') |
|
857 | 896 | |
|
858 | 897 | dctx = zstd.ZstdDecompressor() |
|
859 | 898 | with self.assertRaisesRegexp( |
|
860 | 899 | zstd.ZstdError, 'error determining content size from frame header'): |
|
861 | 900 | dctx.decompress(frame) |
|
862 | 901 | |
|
863 | 902 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
864 | 903 | res = b''.join(dctx.read_to_iter(frame)) |
|
865 | 904 | self.assertEqual(res, b'foobar') |
|
866 | 905 | |
|
867 | 906 | |
|
868 | 907 | @make_cffi |
|
869 | 908 | class TestDecompressor_content_dict_chain(unittest.TestCase): |
|
870 | 909 | def test_bad_inputs_simple(self): |
|
871 | 910 | dctx = zstd.ZstdDecompressor() |
|
872 | 911 | |
|
873 | 912 | with self.assertRaises(TypeError): |
|
874 | 913 | dctx.decompress_content_dict_chain(b'foo') |
|
875 | 914 | |
|
876 | 915 | with self.assertRaises(TypeError): |
|
877 | 916 | dctx.decompress_content_dict_chain((b'foo', b'bar')) |
|
878 | 917 | |
|
879 | 918 | with self.assertRaisesRegexp(ValueError, 'empty input chain'): |
|
880 | 919 | dctx.decompress_content_dict_chain([]) |
|
881 | 920 | |
|
882 | 921 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): |
|
883 | 922 | dctx.decompress_content_dict_chain([u'foo']) |
|
884 | 923 | |
|
885 | 924 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): |
|
886 | 925 | dctx.decompress_content_dict_chain([True]) |
|
887 | 926 | |
|
888 | 927 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): |
|
889 | 928 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) |
|
890 | 929 | |
|
891 | 930 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): |
|
892 | 931 | dctx.decompress_content_dict_chain([b'foo' * 8]) |
|
893 | 932 | |
|
894 | 933 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) |
|
895 | 934 | |
|
896 | 935 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): |
|
897 | 936 | dctx.decompress_content_dict_chain([no_size]) |
|
898 | 937 | |
|
899 | 938 | # Corrupt first frame. |
|
900 | 939 | frame = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
901 | 940 | frame = frame[0:12] + frame[15:] |
|
902 | 941 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
903 | 942 | 'chunk 0 did not decompress full frame'): |
|
904 | 943 | dctx.decompress_content_dict_chain([frame]) |
|
905 | 944 | |
|
906 | 945 | def test_bad_subsequent_input(self): |
|
907 | 946 | initial = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
908 | 947 | |
|
909 | 948 | dctx = zstd.ZstdDecompressor() |
|
910 | 949 | |
|
911 | 950 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): |
|
912 | 951 | dctx.decompress_content_dict_chain([initial, u'foo']) |
|
913 | 952 | |
|
914 | 953 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): |
|
915 | 954 | dctx.decompress_content_dict_chain([initial, None]) |
|
916 | 955 | |
|
917 | 956 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): |
|
918 | 957 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) |
|
919 | 958 | |
|
920 | 959 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): |
|
921 | 960 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) |
|
922 | 961 | |
|
923 | 962 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) |
|
924 | 963 | |
|
925 | 964 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): |
|
926 | 965 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
927 | 966 | |
|
928 | 967 | # Corrupt second frame. |
|
929 | 968 | cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) |
|
930 | 969 | frame = cctx.compress(b'bar' * 64) |
|
931 | 970 | frame = frame[0:12] + frame[15:] |
|
932 | 971 | |
|
933 | 972 | with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): |
|
934 | 973 | dctx.decompress_content_dict_chain([initial, frame]) |
|
935 | 974 | |
|
936 | 975 | def test_simple(self): |
|
937 | 976 | original = [ |
|
938 | 977 | b'foo' * 64, |
|
939 | 978 | b'foobar' * 64, |
|
940 | 979 | b'baz' * 64, |
|
941 | 980 | b'foobaz' * 64, |
|
942 | 981 | b'foobarbaz' * 64, |
|
943 | 982 | ] |
|
944 | 983 | |
|
945 | 984 | chunks = [] |
|
946 | 985 | chunks.append(zstd.ZstdCompressor().compress(original[0])) |
|
947 | 986 | for i, chunk in enumerate(original[1:]): |
|
948 | 987 | d = zstd.ZstdCompressionDict(original[i]) |
|
949 | 988 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
950 | 989 | chunks.append(cctx.compress(chunk)) |
|
951 | 990 | |
|
952 | 991 | for i in range(1, len(original)): |
|
953 | 992 | chain = chunks[0:i] |
|
954 | 993 | expected = original[i - 1] |
|
955 | 994 | dctx = zstd.ZstdDecompressor() |
|
956 | 995 | decompressed = dctx.decompress_content_dict_chain(chain) |
|
957 | 996 | self.assertEqual(decompressed, expected) |
|
958 | 997 | |
|
959 | 998 | |
|
960 | 999 | # TODO enable for CFFI |
|
961 | 1000 | class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase): |
|
962 | 1001 | def test_invalid_inputs(self): |
|
963 | 1002 | dctx = zstd.ZstdDecompressor() |
|
964 | 1003 | |
|
965 | 1004 | with self.assertRaises(TypeError): |
|
966 | 1005 | dctx.multi_decompress_to_buffer(True) |
|
967 | 1006 | |
|
968 | 1007 | with self.assertRaises(TypeError): |
|
969 | 1008 | dctx.multi_decompress_to_buffer((1, 2)) |
|
970 | 1009 | |
|
971 | 1010 | with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): |
|
972 | 1011 | dctx.multi_decompress_to_buffer([u'foo']) |
|
973 | 1012 | |
|
974 | 1013 | with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): |
|
975 | 1014 | dctx.multi_decompress_to_buffer([b'foobarbaz']) |
|
976 | 1015 | |
|
977 | 1016 | def test_list_input(self): |
|
978 | 1017 | cctx = zstd.ZstdCompressor() |
|
979 | 1018 | |
|
980 | 1019 | original = [b'foo' * 4, b'bar' * 6] |
|
981 | 1020 | frames = [cctx.compress(d) for d in original] |
|
982 | 1021 | |
|
983 | 1022 | dctx = zstd.ZstdDecompressor() |
|
984 | 1023 | result = dctx.multi_decompress_to_buffer(frames) |
|
985 | 1024 | |
|
986 | 1025 | self.assertEqual(len(result), len(frames)) |
|
987 | 1026 | self.assertEqual(result.size(), sum(map(len, original))) |
|
988 | 1027 | |
|
989 | 1028 | for i, data in enumerate(original): |
|
990 | 1029 | self.assertEqual(result[i].tobytes(), data) |
|
991 | 1030 | |
|
992 | 1031 | self.assertEqual(result[0].offset, 0) |
|
993 | 1032 | self.assertEqual(len(result[0]), 12) |
|
994 | 1033 | self.assertEqual(result[1].offset, 12) |
|
995 | 1034 | self.assertEqual(len(result[1]), 18) |
|
996 | 1035 | |
|
997 | 1036 | def test_list_input_frame_sizes(self): |
|
998 | 1037 | cctx = zstd.ZstdCompressor() |
|
999 | 1038 | |
|
1000 | 1039 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] |
|
1001 | 1040 | frames = [cctx.compress(d) for d in original] |
|
1002 | 1041 | sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) |
|
1003 | 1042 | |
|
1004 | 1043 | dctx = zstd.ZstdDecompressor() |
|
1005 | 1044 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
1006 | 1045 | |
|
1007 | 1046 | self.assertEqual(len(result), len(frames)) |
|
1008 | 1047 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1009 | 1048 | |
|
1010 | 1049 | for i, data in enumerate(original): |
|
1011 | 1050 | self.assertEqual(result[i].tobytes(), data) |
|
1012 | 1051 | |
|
1013 | 1052 | def test_buffer_with_segments_input(self): |
|
1014 | 1053 | cctx = zstd.ZstdCompressor() |
|
1015 | 1054 | |
|
1016 | 1055 | original = [b'foo' * 4, b'bar' * 6] |
|
1017 | 1056 | frames = [cctx.compress(d) for d in original] |
|
1018 | 1057 | |
|
1019 | 1058 | dctx = zstd.ZstdDecompressor() |
|
1020 | 1059 | |
|
1021 | 1060 | segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) |
|
1022 | 1061 | b = zstd.BufferWithSegments(b''.join(frames), segments) |
|
1023 | 1062 | |
|
1024 | 1063 | result = dctx.multi_decompress_to_buffer(b) |
|
1025 | 1064 | |
|
1026 | 1065 | self.assertEqual(len(result), len(frames)) |
|
1027 | 1066 | self.assertEqual(result[0].offset, 0) |
|
1028 | 1067 | self.assertEqual(len(result[0]), 12) |
|
1029 | 1068 | self.assertEqual(result[1].offset, 12) |
|
1030 | 1069 | self.assertEqual(len(result[1]), 18) |
|
1031 | 1070 | |
|
1032 | 1071 | def test_buffer_with_segments_sizes(self): |
|
1033 | 1072 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1034 | 1073 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] |
|
1035 | 1074 | frames = [cctx.compress(d) for d in original] |
|
1036 | 1075 | sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) |
|
1037 | 1076 | |
|
1038 | 1077 | segments = struct.pack('=QQQQQQ', 0, len(frames[0]), |
|
1039 | 1078 | len(frames[0]), len(frames[1]), |
|
1040 | 1079 | len(frames[0]) + len(frames[1]), len(frames[2])) |
|
1041 | 1080 | b = zstd.BufferWithSegments(b''.join(frames), segments) |
|
1042 | 1081 | |
|
1043 | 1082 | dctx = zstd.ZstdDecompressor() |
|
1044 | 1083 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) |
|
1045 | 1084 | |
|
1046 | 1085 | self.assertEqual(len(result), len(frames)) |
|
1047 | 1086 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1048 | 1087 | |
|
1049 | 1088 | for i, data in enumerate(original): |
|
1050 | 1089 | self.assertEqual(result[i].tobytes(), data) |
|
1051 | 1090 | |
|
1052 | 1091 | def test_buffer_with_segments_collection_input(self): |
|
1053 | 1092 | cctx = zstd.ZstdCompressor() |
|
1054 | 1093 | |
|
1055 | 1094 | original = [ |
|
1056 | 1095 | b'foo0' * 2, |
|
1057 | 1096 | b'foo1' * 3, |
|
1058 | 1097 | b'foo2' * 4, |
|
1059 | 1098 | b'foo3' * 5, |
|
1060 | 1099 | b'foo4' * 6, |
|
1061 | 1100 | ] |
|
1062 | 1101 | |
|
1063 | 1102 | frames = cctx.multi_compress_to_buffer(original) |
|
1064 | 1103 | |
|
1065 | 1104 | # Check round trip. |
|
1066 | 1105 | dctx = zstd.ZstdDecompressor() |
|
1067 | 1106 | decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) |
|
1068 | 1107 | |
|
1069 | 1108 | self.assertEqual(len(decompressed), len(original)) |
|
1070 | 1109 | |
|
1071 | 1110 | for i, data in enumerate(original): |
|
1072 | 1111 | self.assertEqual(data, decompressed[i].tobytes()) |
|
1073 | 1112 | |
|
1074 | 1113 | # And a manual mode. |
|
1075 | 1114 | b = b''.join([frames[0].tobytes(), frames[1].tobytes()]) |
|
1076 | 1115 | b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', |
|
1077 | 1116 | 0, len(frames[0]), |
|
1078 | 1117 | len(frames[0]), len(frames[1]))) |
|
1079 | 1118 | |
|
1080 | 1119 | b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) |
|
1081 | 1120 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', |
|
1082 | 1121 | 0, len(frames[2]), |
|
1083 | 1122 | len(frames[2]), len(frames[3]), |
|
1084 | 1123 | len(frames[2]) + len(frames[3]), len(frames[4]))) |
|
1085 | 1124 | |
|
1086 | 1125 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1087 | 1126 | |
|
1088 | 1127 | dctx = zstd.ZstdDecompressor() |
|
1089 | 1128 | decompressed = dctx.multi_decompress_to_buffer(c) |
|
1090 | 1129 | |
|
1091 | 1130 | self.assertEqual(len(decompressed), 5) |
|
1092 | 1131 | for i in range(5): |
|
1093 | 1132 | self.assertEqual(decompressed[i].tobytes(), original[i]) |
|
1094 | 1133 | |
|
1095 | 1134 | def test_dict(self): |
|
1096 | 1135 | d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) |
|
1097 | 1136 | |
|
1098 | 1137 | cctx = zstd.ZstdCompressor(dict_data=d, level=1) |
|
1099 | 1138 | frames = [cctx.compress(s) for s in generate_samples()] |
|
1100 | 1139 | |
|
1101 | 1140 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1102 | 1141 | result = dctx.multi_decompress_to_buffer(frames) |
|
1103 | 1142 | self.assertEqual([o.tobytes() for o in result], generate_samples()) |
|
1104 | 1143 | |
|
1105 | 1144 | def test_multiple_threads(self): |
|
1106 | 1145 | cctx = zstd.ZstdCompressor() |
|
1107 | 1146 | |
|
1108 | 1147 | frames = [] |
|
1109 | 1148 | frames.extend(cctx.compress(b'x' * 64) for i in range(256)) |
|
1110 | 1149 | frames.extend(cctx.compress(b'y' * 64) for i in range(256)) |
|
1111 | 1150 | |
|
1112 | 1151 | dctx = zstd.ZstdDecompressor() |
|
1113 | 1152 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) |
|
1114 | 1153 | |
|
1115 | 1154 | self.assertEqual(len(result), len(frames)) |
|
1116 | 1155 | self.assertEqual(result.size(), 2 * 64 * 256) |
|
1117 | 1156 | self.assertEqual(result[0].tobytes(), b'x' * 64) |
|
1118 | 1157 | self.assertEqual(result[256].tobytes(), b'y' * 64) |
|
1119 | 1158 | |
|
1120 | 1159 | def test_item_failure(self): |
|
1121 | 1160 | cctx = zstd.ZstdCompressor() |
|
1122 | 1161 | frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] |
|
1123 | 1162 | |
|
1124 | 1163 | frames[1] = frames[1][0:15] + b'extra' + frames[1][15:] |
|
1125 | 1164 | |
|
1126 | 1165 | dctx = zstd.ZstdDecompressor() |
|
1127 | 1166 | |
|
1128 | 1167 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
1129 | 1168 | 'error decompressing item 1: (' |
|
1130 | 1169 | 'Corrupted block|' |
|
1131 | 1170 | 'Destination buffer is too small)'): |
|
1132 | 1171 | dctx.multi_decompress_to_buffer(frames) |
|
1133 | 1172 | |
|
1134 | 1173 | with self.assertRaisesRegexp(zstd.ZstdError, |
|
1135 | 1174 | 'error decompressing item 1: (' |
|
1136 | 1175 | 'Corrupted block|' |
|
1137 | 1176 | 'Destination buffer is too small)'): |
|
1138 | 1177 | dctx.multi_decompress_to_buffer(frames, threads=2) |
|
1139 | 1178 |
@@ -1,59 +1,64 | |||
|
1 | 1 | from __future__ import unicode_literals |
|
2 | 2 | |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 | 7 | from . common import ( |
|
8 | 8 | make_cffi, |
|
9 | 9 | ) |
|
10 | 10 | |
|
11 | 11 | |
|
12 | 12 | @make_cffi |
|
13 | 13 | class TestModuleAttributes(unittest.TestCase): |
|
14 | 14 | def test_version(self): |
|
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 3, |
|
|
15 | self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 6)) | |
|
16 | ||
|
17 | self.assertEqual(zstd.__version__, '0.10.1') | |
|
16 | 18 | |
|
17 | 19 | def test_constants(self): |
|
18 | 20 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
19 | 21 | self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd') |
|
20 | 22 | |
|
21 | 23 | def test_hasattr(self): |
|
22 | 24 | attrs = ( |
|
23 | 25 | 'CONTENTSIZE_UNKNOWN', |
|
24 | 26 | 'CONTENTSIZE_ERROR', |
|
25 | 27 | 'COMPRESSION_RECOMMENDED_INPUT_SIZE', |
|
26 | 28 | 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE', |
|
27 | 29 | 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE', |
|
28 | 30 | 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE', |
|
29 | 31 | 'MAGIC_NUMBER', |
|
32 | 'BLOCKSIZELOG_MAX', | |
|
33 | 'BLOCKSIZE_MAX', | |
|
30 | 34 | 'WINDOWLOG_MIN', |
|
31 | 35 | 'WINDOWLOG_MAX', |
|
32 | 36 | 'CHAINLOG_MIN', |
|
33 | 37 | 'CHAINLOG_MAX', |
|
34 | 38 | 'HASHLOG_MIN', |
|
35 | 39 | 'HASHLOG_MAX', |
|
36 | 40 | 'HASHLOG3_MAX', |
|
37 | 41 | 'SEARCHLOG_MIN', |
|
38 | 42 | 'SEARCHLOG_MAX', |
|
39 | 43 | 'SEARCHLENGTH_MIN', |
|
40 | 44 | 'SEARCHLENGTH_MAX', |
|
41 | 45 | 'TARGETLENGTH_MIN', |
|
46 | 'TARGETLENGTH_MAX', | |
|
42 | 47 | 'LDM_MINMATCH_MIN', |
|
43 | 48 | 'LDM_MINMATCH_MAX', |
|
44 | 49 | 'LDM_BUCKETSIZELOG_MAX', |
|
45 | 50 | 'STRATEGY_FAST', |
|
46 | 51 | 'STRATEGY_DFAST', |
|
47 | 52 | 'STRATEGY_GREEDY', |
|
48 | 53 | 'STRATEGY_LAZY', |
|
49 | 54 | 'STRATEGY_LAZY2', |
|
50 | 55 | 'STRATEGY_BTLAZY2', |
|
51 | 56 | 'STRATEGY_BTOPT', |
|
52 | 57 | 'STRATEGY_BTULTRA', |
|
53 | 58 | 'DICT_TYPE_AUTO', |
|
54 | 59 | 'DICT_TYPE_RAWCONTENT', |
|
55 | 60 | 'DICT_TYPE_FULLDICT', |
|
56 | 61 | ) |
|
57 | 62 | |
|
58 | 63 | for a in attrs: |
|
59 | 64 | self.assertTrue(hasattr(zstd, a), a) |
@@ -1,87 +1,88 | |||
|
1 | 1 | import struct |
|
2 | 2 | import sys |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 | 7 | from . common import ( |
|
8 | 8 | generate_samples, |
|
9 | 9 | make_cffi, |
|
10 | 10 | ) |
|
11 | 11 | |
|
12 | 12 | if sys.version_info[0] >= 3: |
|
13 | 13 | int_type = int |
|
14 | 14 | else: |
|
15 | 15 | int_type = long |
|
16 | 16 | |
|
17 | 17 | |
|
18 | 18 | @make_cffi |
|
19 | 19 | class TestTrainDictionary(unittest.TestCase): |
|
20 | 20 | def test_no_args(self): |
|
21 | 21 | with self.assertRaises(TypeError): |
|
22 | 22 | zstd.train_dictionary() |
|
23 | 23 | |
|
24 | 24 | def test_bad_args(self): |
|
25 | 25 | with self.assertRaises(TypeError): |
|
26 | 26 | zstd.train_dictionary(8192, u'foo') |
|
27 | 27 | |
|
28 | 28 | with self.assertRaises(ValueError): |
|
29 | 29 | zstd.train_dictionary(8192, [u'foo']) |
|
30 | 30 | |
|
31 | 31 | def test_no_params(self): |
|
32 | 32 | d = zstd.train_dictionary(8192, generate_samples()) |
|
33 | 33 | self.assertIsInstance(d.dict_id(), int_type) |
|
34 | 34 | |
|
35 | 35 | # The dictionary ID may be different across platforms. |
|
36 | 36 | expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id()) |
|
37 | 37 | |
|
38 | 38 | data = d.as_bytes() |
|
39 | 39 | self.assertEqual(data[0:8], expected) |
|
40 | 40 | |
|
41 | 41 | def test_basic(self): |
|
42 | 42 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
43 | 43 | self.assertIsInstance(d.dict_id(), int_type) |
|
44 | 44 | |
|
45 | 45 | data = d.as_bytes() |
|
46 | 46 | self.assertEqual(data[0:4], b'\x37\xa4\x30\xec') |
|
47 | 47 | |
|
48 | 48 | self.assertEqual(d.k, 64) |
|
49 | 49 | self.assertEqual(d.d, 16) |
|
50 | 50 | |
|
51 | 51 | def test_set_dict_id(self): |
|
52 | 52 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, |
|
53 | 53 | dict_id=42) |
|
54 | 54 | self.assertEqual(d.dict_id(), 42) |
|
55 | 55 | |
|
56 | 56 | def test_optimize(self): |
|
57 | 57 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, |
|
58 | 58 | d=16) |
|
59 | 59 | |
|
60 | self.assertEqual(d.k, 50) | |
|
60 | # This varies by platform. | |
|
61 | self.assertIn(d.k, (50, 2000)) | |
|
61 | 62 | self.assertEqual(d.d, 16) |
|
62 | 63 | |
|
63 | 64 | @make_cffi |
|
64 | 65 | class TestCompressionDict(unittest.TestCase): |
|
65 | 66 | def test_bad_mode(self): |
|
66 | 67 | with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'): |
|
67 | 68 | zstd.ZstdCompressionDict(b'foo', dict_type=42) |
|
68 | 69 | |
|
69 | 70 | def test_bad_precompute_compress(self): |
|
70 | 71 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
71 | 72 | |
|
72 | 73 | with self.assertRaisesRegexp(ValueError, 'must specify one of level or '): |
|
73 | 74 | d.precompute_compress() |
|
74 | 75 | |
|
75 | 76 | with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '): |
|
76 | 77 | d.precompute_compress(level=3, |
|
77 | 78 | compression_params=zstd.CompressionParameters()) |
|
78 | 79 | |
|
79 | 80 | def test_precompute_compress_rawcontent(self): |
|
80 | 81 | d = zstd.ZstdCompressionDict(b'dictcontent' * 64, |
|
81 | 82 | dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
82 | 83 | d.precompute_compress(level=1) |
|
83 | 84 | |
|
84 | 85 | d = zstd.ZstdCompressionDict(b'dictcontent' * 64, |
|
85 | 86 | dict_type=zstd.DICT_TYPE_FULLDICT) |
|
86 | 87 | with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'): |
|
87 | 88 | d.precompute_compress(level=1) |
@@ -1,62 +1,65 | |||
|
1 | 1 | # Copyright (c) 2017-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | """Python interface to the Zstandard (zstd) compression library.""" |
|
8 | 8 | |
|
9 | 9 | from __future__ import absolute_import, unicode_literals |
|
10 | 10 | |
|
11 | 11 | # This module serves 2 roles: |
|
12 | 12 | # |
|
13 | 13 | # 1) Export the C or CFFI "backend" through a central module. |
|
14 | 14 | # 2) Implement additional functionality built on top of C or CFFI backend. |
|
15 | 15 | |
|
16 | 16 | import os |
|
17 | 17 | import platform |
|
18 | 18 | |
|
19 | 19 | # Some Python implementations don't support C extensions. That's why we have |
|
20 | 20 | # a CFFI implementation in the first place. The code here import one of our |
|
21 | 21 | # "backends" then re-exports the symbols from this module. For convenience, |
|
22 | 22 | # we support falling back to the CFFI backend if the C extension can't be |
|
23 | 23 | # imported. But for performance reasons, we only do this on unknown Python |
|
24 | 24 | # implementation. Notably, for CPython we require the C extension by default. |
|
25 | 25 | # Because someone will inevitably want special behavior, the behavior is |
|
26 | 26 | # configurable via an environment variable. A potentially better way to handle |
|
27 | 27 | # this is to import a special ``__importpolicy__`` module or something |
|
28 | 28 | # defining a variable and `setup.py` could write the file with whatever |
|
29 | 29 | # policy was specified at build time. Until someone needs it, we go with |
|
30 | 30 | # the hacky but simple environment variable approach. |
|
31 | 31 | _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default') |
|
32 | 32 | |
|
33 | 33 | if _module_policy == 'default': |
|
34 | 34 | if platform.python_implementation() in ('CPython',): |
|
35 | 35 | from zstd import * |
|
36 | 36 | backend = 'cext' |
|
37 | 37 | elif platform.python_implementation() in ('PyPy',): |
|
38 | 38 | from zstd_cffi import * |
|
39 | 39 | backend = 'cffi' |
|
40 | 40 | else: |
|
41 | 41 | try: |
|
42 | 42 | from zstd import * |
|
43 | 43 | backend = 'cext' |
|
44 | 44 | except ImportError: |
|
45 | 45 | from zstd_cffi import * |
|
46 | 46 | backend = 'cffi' |
|
47 | 47 | elif _module_policy == 'cffi_fallback': |
|
48 | 48 | try: |
|
49 | 49 | from zstd import * |
|
50 | 50 | backend = 'cext' |
|
51 | 51 | except ImportError: |
|
52 | 52 | from zstd_cffi import * |
|
53 | 53 | backend = 'cffi' |
|
54 | 54 | elif _module_policy == 'cext': |
|
55 | 55 | from zstd import * |
|
56 | 56 | backend = 'cext' |
|
57 | 57 | elif _module_policy == 'cffi': |
|
58 | 58 | from zstd_cffi import * |
|
59 | 59 | backend = 'cffi' |
|
60 | 60 | else: |
|
61 | 61 | raise ImportError('unknown module import policy: %s; use default, cffi_fallback, ' |
|
62 | 62 | 'cext, or cffi' % _module_policy) |
|
63 | ||
|
64 | # Keep this in sync with python-zstandard.h. | |
|
65 | __version__ = '0.10.1' |
@@ -1,342 +1,344 | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | /* A Python C extension for Zstandard. */ |
|
10 | 10 | |
|
11 | 11 | #if defined(_WIN32) |
|
12 | 12 | #define WIN32_LEAN_AND_MEAN |
|
13 | 13 | #include <Windows.h> |
|
14 | 14 | #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) |
|
15 | 15 | #include <sys/types.h> |
|
16 | 16 | #include <sys/sysctl.h> |
|
17 | 17 | #endif |
|
18 | 18 | |
|
19 | 19 | #include "python-zstandard.h" |
|
20 | 20 | |
|
21 | 21 | PyObject *ZstdError; |
|
22 | 22 | |
|
23 | 23 | PyDoc_STRVAR(estimate_decompression_context_size__doc__, |
|
24 | 24 | "estimate_decompression_context_size()\n" |
|
25 | 25 | "\n" |
|
26 | 26 | "Estimate the amount of memory allocated to a decompression context.\n" |
|
27 | 27 | ); |
|
28 | 28 | |
|
29 | 29 | static PyObject* estimate_decompression_context_size(PyObject* self) { |
|
30 | 30 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | PyDoc_STRVAR(frame_content_size__doc__, |
|
34 | 34 | "frame_content_size(data)\n" |
|
35 | 35 | "\n" |
|
36 | 36 | "Obtain the decompressed size of a frame." |
|
37 | 37 | ); |
|
38 | 38 | |
|
39 | 39 | static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
40 | 40 | static char* kwlist[] = { |
|
41 | 41 | "source", |
|
42 | 42 | NULL |
|
43 | 43 | }; |
|
44 | 44 | |
|
45 | 45 | Py_buffer source; |
|
46 | 46 | PyObject* result = NULL; |
|
47 | 47 | unsigned long long size; |
|
48 | 48 | |
|
49 | 49 | #if PY_MAJOR_VERSION >= 3 |
|
50 | 50 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size", |
|
51 | 51 | #else |
|
52 | 52 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size", |
|
53 | 53 | #endif |
|
54 | 54 | kwlist, &source)) { |
|
55 | 55 | return NULL; |
|
56 | 56 | } |
|
57 | 57 | |
|
58 | 58 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
59 | 59 | PyErr_SetString(PyExc_ValueError, |
|
60 | 60 | "data buffer should be contiguous and have at most one dimension"); |
|
61 | 61 | goto finally; |
|
62 | 62 | } |
|
63 | 63 | |
|
64 | 64 | size = ZSTD_getFrameContentSize(source.buf, source.len); |
|
65 | 65 | |
|
66 | 66 | if (size == ZSTD_CONTENTSIZE_ERROR) { |
|
67 | 67 | PyErr_SetString(ZstdError, "error when determining content size"); |
|
68 | 68 | } |
|
69 | 69 | else if (size == ZSTD_CONTENTSIZE_UNKNOWN) { |
|
70 | 70 | result = PyLong_FromLong(-1); |
|
71 | 71 | } |
|
72 | 72 | else { |
|
73 | 73 | result = PyLong_FromUnsignedLongLong(size); |
|
74 | 74 | } |
|
75 | 75 | |
|
76 | 76 | finally: |
|
77 | 77 | PyBuffer_Release(&source); |
|
78 | 78 | |
|
79 | 79 | return result; |
|
80 | 80 | } |
|
81 | 81 | |
|
82 | 82 | PyDoc_STRVAR(frame_header_size__doc__, |
|
83 | 83 | "frame_header_size(data)\n" |
|
84 | 84 | "\n" |
|
85 | 85 | "Obtain the size of a frame header.\n" |
|
86 | 86 | ); |
|
87 | 87 | |
|
88 | 88 | static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
89 | 89 | static char* kwlist[] = { |
|
90 | 90 | "source", |
|
91 | 91 | NULL |
|
92 | 92 | }; |
|
93 | 93 | |
|
94 | 94 | Py_buffer source; |
|
95 | 95 | PyObject* result = NULL; |
|
96 | 96 | size_t zresult; |
|
97 | 97 | |
|
98 | 98 | #if PY_MAJOR_VERSION >= 3 |
|
99 | 99 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size", |
|
100 | 100 | #else |
|
101 | 101 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size", |
|
102 | 102 | #endif |
|
103 | 103 | kwlist, &source)) { |
|
104 | 104 | return NULL; |
|
105 | 105 | } |
|
106 | 106 | |
|
107 | 107 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
|
108 | 108 | PyErr_SetString(PyExc_ValueError, |
|
109 | 109 | "data buffer should be contiguous and have at most one dimension"); |
|
110 | 110 | goto finally; |
|
111 | 111 | } |
|
112 | 112 | |
|
113 | 113 | zresult = ZSTD_frameHeaderSize(source.buf, source.len); |
|
114 | 114 | if (ZSTD_isError(zresult)) { |
|
115 | 115 | PyErr_Format(ZstdError, "could not determine frame header size: %s", |
|
116 | 116 | ZSTD_getErrorName(zresult)); |
|
117 | 117 | } |
|
118 | 118 | else { |
|
119 | 119 | result = PyLong_FromSize_t(zresult); |
|
120 | 120 | } |
|
121 | 121 | |
|
122 | 122 | finally: |
|
123 | 123 | |
|
124 | 124 | PyBuffer_Release(&source); |
|
125 | 125 | |
|
126 | 126 | return result; |
|
127 | 127 | } |
|
128 | 128 | |
|
129 | 129 | PyDoc_STRVAR(get_frame_parameters__doc__, |
|
130 | 130 | "get_frame_parameters(data)\n" |
|
131 | 131 | "\n" |
|
132 | 132 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); |
|
133 | 133 | |
|
134 | 134 | PyDoc_STRVAR(train_dictionary__doc__, |
|
135 | 135 | "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n" |
|
136 | 136 | " threads=None,notifications=0, dict_id=0, level=0)\n" |
|
137 | 137 | "\n" |
|
138 | 138 | "Train a dictionary from sample data using the COVER algorithm.\n" |
|
139 | 139 | "\n" |
|
140 | 140 | "A compression dictionary of size ``dict_size`` will be created from the\n" |
|
141 | 141 | "iterable of ``samples``. The raw dictionary bytes will be returned.\n" |
|
142 | 142 | "\n" |
|
143 | 143 | "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n" |
|
144 | 144 | "*segment size* and *dmer size*. A reasonable range for ``k`` is\n" |
|
145 | 145 | "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" |
|
146 | 146 | "``d`` must be less than or equal to ``k``.\n" |
|
147 | 147 | "\n" |
|
148 | 148 | "``steps`` can be specified to control the number of steps through potential\n" |
|
149 | 149 | "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n" |
|
150 | 150 | "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n" |
|
151 | 151 | "will be varied in this mode.\n" |
|
152 | 152 | "\n" |
|
153 | 153 | "``threads`` can specify how many threads to use to test various ``k`` and\n" |
|
154 | 154 | "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n" |
|
155 | 155 | "a single thread is used.\n" |
|
156 | 156 | "\n" |
|
157 | 157 | "When ``k`` and ``d`` are not defined, default values are used and the\n" |
|
158 | 158 | "algorithm will perform multiple iterations - or steps - to try to find\n" |
|
159 | 159 | "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n" |
|
160 | 160 | "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n" |
|
161 | 161 | "multiple ``k`` and ``d`` variations.\n" |
|
162 | 162 | ); |
|
163 | 163 | |
|
164 | 164 | static char zstd_doc[] = "Interface to zstandard"; |
|
165 | 165 | |
|
166 | 166 | static PyMethodDef zstd_methods[] = { |
|
167 | 167 | { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, |
|
168 | 168 | METH_NOARGS, estimate_decompression_context_size__doc__ }, |
|
169 | 169 | { "frame_content_size", (PyCFunction)frame_content_size, |
|
170 | 170 | METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ }, |
|
171 | 171 | { "frame_header_size", (PyCFunction)frame_header_size, |
|
172 | 172 | METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ }, |
|
173 | 173 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, |
|
174 | 174 | METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ }, |
|
175 | 175 | { "train_dictionary", (PyCFunction)train_dictionary, |
|
176 | 176 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, |
|
177 | 177 | { NULL, NULL } |
|
178 | 178 | }; |
|
179 | 179 | |
|
180 | 180 | void bufferutil_module_init(PyObject* mod); |
|
181 | 181 | void compressobj_module_init(PyObject* mod); |
|
182 | 182 | void compressor_module_init(PyObject* mod); |
|
183 | 183 | void compressionparams_module_init(PyObject* mod); |
|
184 | 184 | void constants_module_init(PyObject* mod); |
|
185 | void compressionchunker_module_init(PyObject* mod); | |
|
185 | 186 | void compressiondict_module_init(PyObject* mod); |
|
186 | 187 | void compressionreader_module_init(PyObject* mod); |
|
187 | 188 | void compressionwriter_module_init(PyObject* mod); |
|
188 | 189 | void compressoriterator_module_init(PyObject* mod); |
|
189 | 190 | void decompressor_module_init(PyObject* mod); |
|
190 | 191 | void decompressobj_module_init(PyObject* mod); |
|
191 | 192 | void decompressionreader_module_init(PyObject *mod); |
|
192 | 193 | void decompressionwriter_module_init(PyObject* mod); |
|
193 | 194 | void decompressoriterator_module_init(PyObject* mod); |
|
194 | 195 | void frameparams_module_init(PyObject* mod); |
|
195 | 196 | |
|
196 | 197 | void zstd_module_init(PyObject* m) { |
|
197 | 198 | /* python-zstandard relies on unstable zstd C API features. This means |
|
198 | 199 | that changes in zstd may break expectations in python-zstandard. |
|
199 | 200 | |
|
200 | 201 | python-zstandard is distributed with a copy of the zstd sources. |
|
201 | 202 | python-zstandard is only guaranteed to work with the bundled version |
|
202 | 203 | of zstd. |
|
203 | 204 | |
|
204 | 205 | However, downstream redistributors or packagers may unbundle zstd |
|
205 | 206 | from python-zstandard. This can result in a mismatch between zstd |
|
206 | 207 | versions and API semantics. This essentially "voids the warranty" |
|
207 | 208 | of python-zstandard and may cause undefined behavior. |
|
208 | 209 | |
|
209 | 210 | We detect this mismatch here and refuse to load the module if this |
|
210 | 211 | scenario is detected. |
|
211 | 212 | */ |
|
212 |
if (ZSTD_VERSION_NUMBER != 1030 |
|
|
213 | if (ZSTD_VERSION_NUMBER != 10306 || ZSTD_versionNumber() != 10306) { | |
|
213 | 214 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
214 | 215 | return; |
|
215 | 216 | } |
|
216 | 217 | |
|
217 | 218 | bufferutil_module_init(m); |
|
218 | 219 | compressionparams_module_init(m); |
|
219 | 220 | compressiondict_module_init(m); |
|
220 | 221 | compressobj_module_init(m); |
|
221 | 222 | compressor_module_init(m); |
|
223 | compressionchunker_module_init(m); | |
|
222 | 224 | compressionreader_module_init(m); |
|
223 | 225 | compressionwriter_module_init(m); |
|
224 | 226 | compressoriterator_module_init(m); |
|
225 | 227 | constants_module_init(m); |
|
226 | 228 | decompressor_module_init(m); |
|
227 | 229 | decompressobj_module_init(m); |
|
228 | 230 | decompressionreader_module_init(m); |
|
229 | 231 | decompressionwriter_module_init(m); |
|
230 | 232 | decompressoriterator_module_init(m); |
|
231 | 233 | frameparams_module_init(m); |
|
232 | 234 | } |
|
233 | 235 | |
|
234 | 236 | #if defined(__GNUC__) && (__GNUC__ >= 4) |
|
235 | 237 | # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default"))) |
|
236 | 238 | #else |
|
237 | 239 | # define PYTHON_ZSTD_VISIBILITY |
|
238 | 240 | #endif |
|
239 | 241 | |
|
240 | 242 | #if PY_MAJOR_VERSION >= 3 |
|
241 | 243 | static struct PyModuleDef zstd_module = { |
|
242 | 244 | PyModuleDef_HEAD_INIT, |
|
243 | 245 | "zstd", |
|
244 | 246 | zstd_doc, |
|
245 | 247 | -1, |
|
246 | 248 | zstd_methods |
|
247 | 249 | }; |
|
248 | 250 | |
|
249 | 251 | PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) { |
|
250 | 252 | PyObject *m = PyModule_Create(&zstd_module); |
|
251 | 253 | if (m) { |
|
252 | 254 | zstd_module_init(m); |
|
253 | 255 | if (PyErr_Occurred()) { |
|
254 | 256 | Py_DECREF(m); |
|
255 | 257 | m = NULL; |
|
256 | 258 | } |
|
257 | 259 | } |
|
258 | 260 | return m; |
|
259 | 261 | } |
|
260 | 262 | #else |
|
261 | 263 | PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) { |
|
262 | 264 | PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); |
|
263 | 265 | if (m) { |
|
264 | 266 | zstd_module_init(m); |
|
265 | 267 | } |
|
266 | 268 | } |
|
267 | 269 | #endif |
|
268 | 270 | |
|
269 | 271 | /* Attempt to resolve the number of CPUs in the system. */ |
|
270 | 272 | int cpu_count() { |
|
271 | 273 | int count = 0; |
|
272 | 274 | |
|
273 | 275 | #if defined(_WIN32) |
|
274 | 276 | SYSTEM_INFO si; |
|
275 | 277 | si.dwNumberOfProcessors = 0; |
|
276 | 278 | GetSystemInfo(&si); |
|
277 | 279 | count = si.dwNumberOfProcessors; |
|
278 | 280 | #elif defined(__APPLE__) |
|
279 | 281 | int num; |
|
280 | 282 | size_t size = sizeof(int); |
|
281 | 283 | |
|
282 | 284 | if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) { |
|
283 | 285 | count = num; |
|
284 | 286 | } |
|
285 | 287 | #elif defined(__linux__) |
|
286 | 288 | count = sysconf(_SC_NPROCESSORS_ONLN); |
|
287 | 289 | #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) |
|
288 | 290 | int mib[2]; |
|
289 | 291 | size_t len = sizeof(count); |
|
290 | 292 | mib[0] = CTL_HW; |
|
291 | 293 | mib[1] = HW_NCPU; |
|
292 | 294 | if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) { |
|
293 | 295 | count = 0; |
|
294 | 296 | } |
|
295 | 297 | #elif defined(__hpux) |
|
296 | 298 | count = mpctl(MPC_GETNUMSPUS, NULL, NULL); |
|
297 | 299 | #endif |
|
298 | 300 | |
|
299 | 301 | return count; |
|
300 | 302 | } |
|
301 | 303 | |
|
302 | 304 | size_t roundpow2(size_t i) { |
|
303 | 305 | i--; |
|
304 | 306 | i |= i >> 1; |
|
305 | 307 | i |= i >> 2; |
|
306 | 308 | i |= i >> 4; |
|
307 | 309 | i |= i >> 8; |
|
308 | 310 | i |= i >> 16; |
|
309 | 311 | i++; |
|
310 | 312 | |
|
311 | 313 | return i; |
|
312 | 314 | } |
|
313 | 315 | |
|
314 | 316 | /* Safer version of _PyBytes_Resize(). |
|
315 | 317 | * |
|
316 | 318 | * _PyBytes_Resize() only works if the refcount is 1. In some scenarios, |
|
317 | 319 | * we can get an object with a refcount > 1, even if it was just created |
|
318 | 320 | * with PyBytes_FromStringAndSize()! That's because (at least) CPython |
|
319 | 321 | * pre-allocates PyBytes instances of size 1 for every possible byte value. |
|
320 | 322 | * |
|
321 | 323 | * If non-0 is returned, obj may or may not be NULL. |
|
322 | 324 | */ |
|
323 | 325 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) { |
|
324 | 326 | PyObject* tmp; |
|
325 | 327 | |
|
326 | 328 | if ((*obj)->ob_refcnt == 1) { |
|
327 | 329 | return _PyBytes_Resize(obj, size); |
|
328 | 330 | } |
|
329 | 331 | |
|
330 | 332 | tmp = PyBytes_FromStringAndSize(NULL, size); |
|
331 | 333 | if (!tmp) { |
|
332 | 334 | return -1; |
|
333 | 335 | } |
|
334 | 336 | |
|
335 | 337 | memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj), |
|
336 | 338 | PyBytes_GET_SIZE(*obj)); |
|
337 | 339 | |
|
338 | 340 | Py_DECREF(*obj); |
|
339 | 341 | *obj = tmp; |
|
340 | 342 | |
|
341 | 343 | return 0; |
|
342 | 344 | } No newline at end of file |
@@ -1,471 +1,458 | |||
|
1 | 1 | /* ****************************************************************** |
|
2 | 2 | bitstream |
|
3 | 3 | Part of FSE library |
|
4 | header file (to include) | |
|
5 | Copyright (C) 2013-2017, Yann Collet. | |
|
4 | Copyright (C) 2013-present, Yann Collet. | |
|
6 | 5 | |
|
7 | 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
|
8 | 7 | |
|
9 | 8 | Redistribution and use in source and binary forms, with or without |
|
10 | 9 | modification, are permitted provided that the following conditions are |
|
11 | 10 | met: |
|
12 | 11 | |
|
13 | 12 | * Redistributions of source code must retain the above copyright |
|
14 | 13 | notice, this list of conditions and the following disclaimer. |
|
15 | 14 | * Redistributions in binary form must reproduce the above |
|
16 | 15 | copyright notice, this list of conditions and the following disclaimer |
|
17 | 16 | in the documentation and/or other materials provided with the |
|
18 | 17 | distribution. |
|
19 | 18 | |
|
20 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
21 | 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
22 | 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
23 | 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
24 | 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
25 | 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
26 | 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
27 | 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
28 | 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
29 | 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
30 | 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
31 | 30 | |
|
32 | 31 | You can contact the author at : |
|
33 | 32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
|
34 | 33 | ****************************************************************** */ |
|
35 | 34 | #ifndef BITSTREAM_H_MODULE |
|
36 | 35 | #define BITSTREAM_H_MODULE |
|
37 | 36 | |
|
38 | 37 | #if defined (__cplusplus) |
|
39 | 38 | extern "C" { |
|
40 | 39 | #endif |
|
41 | 40 | |
|
42 | 41 | /* |
|
43 | 42 | * This API consists of small unitary functions, which must be inlined for best performance. |
|
44 | 43 | * Since link-time-optimization is not available for all compilers, |
|
45 | 44 | * these functions are defined into a .h to be included. |
|
46 | 45 | */ |
|
47 | 46 | |
|
48 | 47 | /*-**************************************** |
|
49 | 48 | * Dependencies |
|
50 | 49 | ******************************************/ |
|
51 | 50 | #include "mem.h" /* unaligned access routines */ |
|
51 | #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ | |
|
52 | 52 | #include "error_private.h" /* error codes and messages */ |
|
53 | 53 | |
|
54 | 54 | |
|
55 | /*-************************************* | |
|
56 | * Debug | |
|
57 | ***************************************/ | |
|
58 | #if defined(BIT_DEBUG) && (BIT_DEBUG>=1) | |
|
59 | # include <assert.h> | |
|
60 | #else | |
|
61 | # ifndef assert | |
|
62 | # define assert(condition) ((void)0) | |
|
63 | # endif | |
|
64 | #endif | |
|
65 | ||
|
66 | ||
|
67 | 55 | /*========================================= |
|
68 | 56 | * Target specific |
|
69 | 57 | =========================================*/ |
|
70 | 58 | #if defined(__BMI__) && defined(__GNUC__) |
|
71 | 59 | # include <immintrin.h> /* support for bextr (experimental) */ |
|
72 | 60 | #endif |
|
73 | 61 | |
|
74 | 62 | #define STREAM_ACCUMULATOR_MIN_32 25 |
|
75 | 63 | #define STREAM_ACCUMULATOR_MIN_64 57 |
|
76 | 64 | #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) |
|
77 | 65 | |
|
78 | 66 | |
|
79 | 67 | /*-****************************************** |
|
80 | 68 | * bitStream encoding API (write forward) |
|
81 | 69 | ********************************************/ |
|
82 | 70 | /* bitStream can mix input from multiple sources. |
|
83 | 71 | * A critical property of these streams is that they encode and decode in **reverse** direction. |
|
84 | 72 | * So the first bit sequence you add will be the last to be read, like a LIFO stack. |
|
85 | 73 | */ |
|
86 | typedef struct | |
|
87 | { | |
|
74 | typedef struct { | |
|
88 | 75 | size_t bitContainer; |
|
89 | 76 | unsigned bitPos; |
|
90 | 77 | char* startPtr; |
|
91 | 78 | char* ptr; |
|
92 | 79 | char* endPtr; |
|
93 | 80 | } BIT_CStream_t; |
|
94 | 81 | |
|
95 | 82 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); |
|
96 | 83 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
|
97 | 84 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); |
|
98 | 85 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); |
|
99 | 86 | |
|
100 | 87 | /* Start with initCStream, providing the size of buffer to write into. |
|
101 | 88 | * bitStream will never write outside of this buffer. |
|
102 | 89 | * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. |
|
103 | 90 | * |
|
104 | 91 | * bits are first added to a local register. |
|
105 | 92 | * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. |
|
106 | 93 | * Writing data into memory is an explicit operation, performed by the flushBits function. |
|
107 | 94 | * Hence keep track how many bits are potentially stored into local register to avoid register overflow. |
|
108 | 95 | * After a flushBits, a maximum of 7 bits might still be stored into local register. |
|
109 | 96 | * |
|
110 | 97 | * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. |
|
111 | 98 | * |
|
112 | 99 | * Last operation is to close the bitStream. |
|
113 | 100 | * The function returns the final size of CStream in bytes. |
|
114 | 101 | * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) |
|
115 | 102 | */ |
|
116 | 103 | |
|
117 | 104 | |
|
118 | 105 | /*-******************************************** |
|
119 | 106 | * bitStream decoding API (read backward) |
|
120 | 107 | **********************************************/ |
|
121 | typedef struct | |
|
122 | { | |
|
108 | typedef struct { | |
|
123 | 109 | size_t bitContainer; |
|
124 | 110 | unsigned bitsConsumed; |
|
125 | 111 | const char* ptr; |
|
126 | 112 | const char* start; |
|
127 | 113 | const char* limitPtr; |
|
128 | 114 | } BIT_DStream_t; |
|
129 | 115 | |
|
130 | 116 | typedef enum { BIT_DStream_unfinished = 0, |
|
131 | 117 | BIT_DStream_endOfBuffer = 1, |
|
132 | 118 | BIT_DStream_completed = 2, |
|
133 | 119 | BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ |
|
134 | 120 | /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ |
|
135 | 121 | |
|
136 | 122 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); |
|
137 | 123 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); |
|
138 | 124 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); |
|
139 | 125 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); |
|
140 | 126 | |
|
141 | 127 | |
|
142 | 128 | /* Start by invoking BIT_initDStream(). |
|
143 | 129 | * A chunk of the bitStream is then stored into a local register. |
|
144 | 130 | * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). |
|
145 | 131 | * You can then retrieve bitFields stored into the local register, **in reverse order**. |
|
146 | 132 | * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. |
|
147 | 133 | * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. |
|
148 | 134 | * Otherwise, it can be less than that, so proceed accordingly. |
|
149 | 135 | * Checking if DStream has reached its end can be performed with BIT_endOfDStream(). |
|
150 | 136 | */ |
|
151 | 137 | |
|
152 | 138 | |
|
153 | 139 | /*-**************************************** |
|
154 | 140 | * unsafe API |
|
155 | 141 | ******************************************/ |
|
156 | 142 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
|
157 | 143 | /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ |
|
158 | 144 | |
|
159 | 145 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); |
|
160 | 146 | /* unsafe version; does not check buffer overflow */ |
|
161 | 147 | |
|
162 | 148 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); |
|
163 | 149 | /* faster, but works only if nbBits >= 1 */ |
|
164 | 150 | |
|
165 | 151 | |
|
166 | 152 | |
|
167 | 153 | /*-************************************************************** |
|
168 | 154 | * Internal functions |
|
169 | 155 | ****************************************************************/ |
|
170 | 156 | MEM_STATIC unsigned BIT_highbit32 (U32 val) |
|
171 | 157 | { |
|
172 | 158 | assert(val != 0); |
|
173 | 159 | { |
|
174 | 160 | # if defined(_MSC_VER) /* Visual */ |
|
175 | 161 | unsigned long r=0; |
|
176 | 162 | _BitScanReverse ( &r, val ); |
|
177 | 163 | return (unsigned) r; |
|
178 | 164 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
|
179 | 165 | return 31 - __builtin_clz (val); |
|
180 | 166 | # else /* Software version */ |
|
181 | 167 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, |
|
182 | 168 | 11, 14, 16, 18, 22, 25, 3, 30, |
|
183 | 169 | 8, 12, 20, 28, 15, 17, 24, 7, |
|
184 | 170 | 19, 27, 23, 6, 26, 5, 4, 31 }; |
|
185 | 171 | U32 v = val; |
|
186 | 172 | v |= v >> 1; |
|
187 | 173 | v |= v >> 2; |
|
188 | 174 | v |= v >> 4; |
|
189 | 175 | v |= v >> 8; |
|
190 | 176 | v |= v >> 16; |
|
191 | 177 | return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; |
|
192 | 178 | # endif |
|
193 | 179 | } |
|
194 | 180 | } |
|
195 | 181 | |
|
196 | 182 | /*===== Local Constants =====*/ |
|
197 | 183 | static const unsigned BIT_mask[] = { |
|
198 | 184 | 0, 1, 3, 7, 0xF, 0x1F, |
|
199 | 185 | 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, |
|
200 | 186 | 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, |
|
201 | 187 | 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, |
|
202 | 188 | 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, |
|
203 | 189 | 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ |
|
204 | 190 | #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) |
|
205 | 191 | |
|
206 | 192 | /*-************************************************************** |
|
207 | 193 | * bitStream encoding |
|
208 | 194 | ****************************************************************/ |
|
209 | 195 | /*! BIT_initCStream() : |
|
210 | 196 | * `dstCapacity` must be > sizeof(size_t) |
|
211 | 197 | * @return : 0 if success, |
|
212 | 198 | * otherwise an error code (can be tested using ERR_isError()) */ |
|
213 | 199 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, |
|
214 | 200 | void* startPtr, size_t dstCapacity) |
|
215 | 201 | { |
|
216 | 202 | bitC->bitContainer = 0; |
|
217 | 203 | bitC->bitPos = 0; |
|
218 | 204 | bitC->startPtr = (char*)startPtr; |
|
219 | 205 | bitC->ptr = bitC->startPtr; |
|
220 | 206 | bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); |
|
221 | 207 | if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); |
|
222 | 208 | return 0; |
|
223 | 209 | } |
|
224 | 210 | |
|
225 | 211 | /*! BIT_addBits() : |
|
226 | 212 | * can add up to 31 bits into `bitC`. |
|
227 | 213 | * Note : does not check for register overflow ! */ |
|
228 | 214 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, |
|
229 | 215 | size_t value, unsigned nbBits) |
|
230 | 216 | { |
|
231 | 217 | MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); |
|
232 | 218 | assert(nbBits < BIT_MASK_SIZE); |
|
233 | 219 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
234 | 220 | bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; |
|
235 | 221 | bitC->bitPos += nbBits; |
|
236 | 222 | } |
|
237 | 223 | |
|
238 | 224 | /*! BIT_addBitsFast() : |
|
239 |
* works only if `value` is _clean_, |
|
|
225 | * works only if `value` is _clean_, | |
|
226 | * meaning all high bits above nbBits are 0 */ | |
|
240 | 227 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, |
|
241 | 228 | size_t value, unsigned nbBits) |
|
242 | 229 | { |
|
243 | 230 | assert((value>>nbBits) == 0); |
|
244 | 231 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
245 | 232 | bitC->bitContainer |= value << bitC->bitPos; |
|
246 | 233 | bitC->bitPos += nbBits; |
|
247 | 234 | } |
|
248 | 235 | |
|
249 | 236 | /*! BIT_flushBitsFast() : |
|
250 | 237 | * assumption : bitContainer has not overflowed |
|
251 | 238 | * unsafe version; does not check buffer overflow */ |
|
252 | 239 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) |
|
253 | 240 | { |
|
254 | 241 | size_t const nbBytes = bitC->bitPos >> 3; |
|
255 | 242 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
256 | 243 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
257 | 244 | bitC->ptr += nbBytes; |
|
258 | 245 | assert(bitC->ptr <= bitC->endPtr); |
|
259 | 246 | bitC->bitPos &= 7; |
|
260 | 247 | bitC->bitContainer >>= nbBytes*8; |
|
261 | 248 | } |
|
262 | 249 | |
|
263 | 250 | /*! BIT_flushBits() : |
|
264 | 251 | * assumption : bitContainer has not overflowed |
|
265 | 252 | * safe version; check for buffer overflow, and prevents it. |
|
266 | 253 | * note : does not signal buffer overflow. |
|
267 | 254 | * overflow will be revealed later on using BIT_closeCStream() */ |
|
268 | 255 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) |
|
269 | 256 | { |
|
270 | 257 | size_t const nbBytes = bitC->bitPos >> 3; |
|
271 | 258 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
|
272 | 259 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
273 | 260 | bitC->ptr += nbBytes; |
|
274 | 261 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; |
|
275 | 262 | bitC->bitPos &= 7; |
|
276 | 263 | bitC->bitContainer >>= nbBytes*8; |
|
277 | 264 | } |
|
278 | 265 | |
|
279 | 266 | /*! BIT_closeCStream() : |
|
280 | 267 | * @return : size of CStream, in bytes, |
|
281 | 268 | * or 0 if it could not fit into dstBuffer */ |
|
282 | 269 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) |
|
283 | 270 | { |
|
284 | 271 | BIT_addBitsFast(bitC, 1, 1); /* endMark */ |
|
285 | 272 | BIT_flushBits(bitC); |
|
286 | 273 | if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ |
|
287 | 274 | return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); |
|
288 | 275 | } |
|
289 | 276 | |
|
290 | 277 | |
|
291 | 278 | /*-******************************************************** |
|
292 | 279 | * bitStream decoding |
|
293 | 280 | **********************************************************/ |
|
294 | 281 | /*! BIT_initDStream() : |
|
295 | 282 | * Initialize a BIT_DStream_t. |
|
296 | 283 | * `bitD` : a pointer to an already allocated BIT_DStream_t structure. |
|
297 | 284 | * `srcSize` must be the *exact* size of the bitStream, in bytes. |
|
298 | 285 | * @return : size of stream (== srcSize), or an errorCode if a problem is detected |
|
299 | 286 | */ |
|
300 | 287 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) |
|
301 | 288 | { |
|
302 | 289 | if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } |
|
303 | 290 | |
|
304 | 291 | bitD->start = (const char*)srcBuffer; |
|
305 | 292 | bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); |
|
306 | 293 | |
|
307 | 294 | if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ |
|
308 | 295 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); |
|
309 | 296 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
|
310 | 297 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
|
311 | 298 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ |
|
312 | 299 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } |
|
313 | 300 | } else { |
|
314 | 301 | bitD->ptr = bitD->start; |
|
315 | 302 | bitD->bitContainer = *(const BYTE*)(bitD->start); |
|
316 | 303 | switch(srcSize) |
|
317 | 304 | { |
|
318 | 305 | case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); |
|
319 | 306 | /* fall-through */ |
|
320 | 307 | |
|
321 | 308 | case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); |
|
322 | 309 | /* fall-through */ |
|
323 | 310 | |
|
324 | 311 | case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); |
|
325 | 312 | /* fall-through */ |
|
326 | 313 | |
|
327 | 314 | case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; |
|
328 | 315 | /* fall-through */ |
|
329 | 316 | |
|
330 | 317 | case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; |
|
331 | 318 | /* fall-through */ |
|
332 | 319 | |
|
333 | 320 | case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; |
|
334 | 321 | /* fall-through */ |
|
335 | 322 | |
|
336 | 323 | default: break; |
|
337 | 324 | } |
|
338 | 325 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
|
339 | 326 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; |
|
340 | 327 | if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ |
|
341 | 328 | } |
|
342 | 329 | bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; |
|
343 | 330 | } |
|
344 | 331 | |
|
345 | 332 | return srcSize; |
|
346 | 333 | } |
|
347 | 334 | |
|
348 | 335 | MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) |
|
349 | 336 | { |
|
350 | 337 | return bitContainer >> start; |
|
351 | 338 | } |
|
352 | 339 | |
|
353 | 340 | MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) |
|
354 | 341 | { |
|
355 | 342 | #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ |
|
356 | 343 | # if defined(__x86_64__) |
|
357 | 344 | if (sizeof(bitContainer)==8) |
|
358 | 345 | return _bextr_u64(bitContainer, start, nbBits); |
|
359 | 346 | else |
|
360 | 347 | # endif |
|
361 | 348 | return _bextr_u32(bitContainer, start, nbBits); |
|
362 | 349 | #else |
|
363 | 350 | assert(nbBits < BIT_MASK_SIZE); |
|
364 | 351 | return (bitContainer >> start) & BIT_mask[nbBits]; |
|
365 | 352 | #endif |
|
366 | 353 | } |
|
367 | 354 | |
|
368 | 355 | MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) |
|
369 | 356 | { |
|
370 | 357 | assert(nbBits < BIT_MASK_SIZE); |
|
371 | 358 | return bitContainer & BIT_mask[nbBits]; |
|
372 | 359 | } |
|
373 | 360 | |
|
374 | 361 | /*! BIT_lookBits() : |
|
375 | 362 | * Provides next n bits from local register. |
|
376 | 363 | * local register is not modified. |
|
377 | 364 | * On 32-bits, maxNbBits==24. |
|
378 | 365 | * On 64-bits, maxNbBits==56. |
|
379 | 366 | * @return : value extracted */ |
|
380 | 367 | MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) |
|
381 | 368 | { |
|
382 | 369 | #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ |
|
383 | 370 | return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); |
|
384 | 371 | #else |
|
385 | 372 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
|
386 | 373 | return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); |
|
387 | 374 | #endif |
|
388 | 375 | } |
|
389 | 376 | |
|
390 | 377 | /*! BIT_lookBitsFast() : |
|
391 | 378 | * unsafe version; only works if nbBits >= 1 */ |
|
392 | 379 | MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) |
|
393 | 380 | { |
|
394 | 381 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
|
395 | 382 | assert(nbBits >= 1); |
|
396 | 383 | return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); |
|
397 | 384 | } |
|
398 | 385 | |
|
399 | 386 | MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) |
|
400 | 387 | { |
|
401 | 388 | bitD->bitsConsumed += nbBits; |
|
402 | 389 | } |
|
403 | 390 | |
|
404 | 391 | /*! BIT_readBits() : |
|
405 | 392 | * Read (consume) next n bits from local register and update. |
|
406 | 393 | * Pay attention to not read more than nbBits contained into local register. |
|
407 | 394 | * @return : extracted value. */ |
|
408 | 395 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) |
|
409 | 396 | { |
|
410 | 397 | size_t const value = BIT_lookBits(bitD, nbBits); |
|
411 | 398 | BIT_skipBits(bitD, nbBits); |
|
412 | 399 | return value; |
|
413 | 400 | } |
|
414 | 401 | |
|
415 | 402 | /*! BIT_readBitsFast() : |
|
416 | 403 | * unsafe version; only works only if nbBits >= 1 */ |
|
417 | 404 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) |
|
418 | 405 | { |
|
419 | 406 | size_t const value = BIT_lookBitsFast(bitD, nbBits); |
|
420 | 407 | assert(nbBits >= 1); |
|
421 | 408 | BIT_skipBits(bitD, nbBits); |
|
422 | 409 | return value; |
|
423 | 410 | } |
|
424 | 411 | |
|
425 | 412 | /*! BIT_reloadDStream() : |
|
426 | 413 | * Refill `bitD` from buffer previously set in BIT_initDStream() . |
|
427 | 414 | * This function is safe, it guarantees it will not read beyond src buffer. |
|
428 | 415 | * @return : status of `BIT_DStream_t` internal register. |
|
429 | 416 | * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ |
|
430 | 417 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) |
|
431 | 418 | { |
|
432 | 419 | if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ |
|
433 | 420 | return BIT_DStream_overflow; |
|
434 | 421 | |
|
435 | 422 | if (bitD->ptr >= bitD->limitPtr) { |
|
436 | 423 | bitD->ptr -= bitD->bitsConsumed >> 3; |
|
437 | 424 | bitD->bitsConsumed &= 7; |
|
438 | 425 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
|
439 | 426 | return BIT_DStream_unfinished; |
|
440 | 427 | } |
|
441 | 428 | if (bitD->ptr == bitD->start) { |
|
442 | 429 | if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; |
|
443 | 430 | return BIT_DStream_completed; |
|
444 | 431 | } |
|
445 | 432 | /* start < ptr < limitPtr */ |
|
446 | 433 | { U32 nbBytes = bitD->bitsConsumed >> 3; |
|
447 | 434 | BIT_DStream_status result = BIT_DStream_unfinished; |
|
448 | 435 | if (bitD->ptr - nbBytes < bitD->start) { |
|
449 | 436 | nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ |
|
450 | 437 | result = BIT_DStream_endOfBuffer; |
|
451 | 438 | } |
|
452 | 439 | bitD->ptr -= nbBytes; |
|
453 | 440 | bitD->bitsConsumed -= nbBytes*8; |
|
454 | 441 | bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ |
|
455 | 442 | return result; |
|
456 | 443 | } |
|
457 | 444 | } |
|
458 | 445 | |
|
459 | 446 | /*! BIT_endOfDStream() : |
|
460 | 447 | * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). |
|
461 | 448 | */ |
|
462 | 449 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) |
|
463 | 450 | { |
|
464 | 451 | return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); |
|
465 | 452 | } |
|
466 | 453 | |
|
467 | 454 | #if defined (__cplusplus) |
|
468 | 455 | } |
|
469 | 456 | #endif |
|
470 | 457 | |
|
471 | 458 | #endif /* BITSTREAM_H_MODULE */ |
@@ -1,111 +1,133 | |||
|
1 | 1 | /* |
|
2 | 2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This source code is licensed under both the BSD-style license (found in the |
|
6 | 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
|
7 | 7 | * in the COPYING file in the root directory of this source tree). |
|
8 | 8 | * You may select, at your option, one of the above-listed licenses. |
|
9 | 9 | */ |
|
10 | 10 | |
|
11 | 11 | #ifndef ZSTD_COMPILER_H |
|
12 | 12 | #define ZSTD_COMPILER_H |
|
13 | 13 | |
|
14 | 14 | /*-******************************************************* |
|
15 | 15 | * Compiler specifics |
|
16 | 16 | *********************************************************/ |
|
17 | 17 | /* force inlining */ |
|
18 | 18 | #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ |
|
19 | 19 | # define INLINE_KEYWORD inline |
|
20 | 20 | #else |
|
21 | 21 | # define INLINE_KEYWORD |
|
22 | 22 | #endif |
|
23 | 23 | |
|
24 | 24 | #if defined(__GNUC__) |
|
25 | 25 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) |
|
26 | 26 | #elif defined(_MSC_VER) |
|
27 | 27 | # define FORCE_INLINE_ATTR __forceinline |
|
28 | 28 | #else |
|
29 | 29 | # define FORCE_INLINE_ATTR |
|
30 | 30 | #endif |
|
31 | 31 | |
|
32 | 32 | /** |
|
33 | 33 | * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant |
|
34 | 34 | * parameters. They must be inlined for the compiler to elimininate the constant |
|
35 | 35 | * branches. |
|
36 | 36 | */ |
|
37 | 37 | #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR |
|
38 | 38 | /** |
|
39 | 39 | * HINT_INLINE is used to help the compiler generate better code. It is *not* |
|
40 | 40 | * used for "templates", so it can be tweaked based on the compilers |
|
41 | 41 | * performance. |
|
42 | 42 | * |
|
43 | 43 | * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the |
|
44 | 44 | * always_inline attribute. |
|
45 | 45 | * |
|
46 | 46 | * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline |
|
47 | 47 | * attribute. |
|
48 | 48 | */ |
|
49 | 49 | #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 |
|
50 | 50 | # define HINT_INLINE static INLINE_KEYWORD |
|
51 | 51 | #else |
|
52 | 52 | # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR |
|
53 | 53 | #endif |
|
54 | 54 | |
|
55 | 55 | /* force no inlining */ |
|
56 | 56 | #ifdef _MSC_VER |
|
57 | 57 | # define FORCE_NOINLINE static __declspec(noinline) |
|
58 | 58 | #else |
|
59 | 59 | # ifdef __GNUC__ |
|
60 | 60 | # define FORCE_NOINLINE static __attribute__((__noinline__)) |
|
61 | 61 | # else |
|
62 | 62 | # define FORCE_NOINLINE static |
|
63 | 63 | # endif |
|
64 | 64 | #endif |
|
65 | 65 | |
|
66 | 66 | /* target attribute */ |
|
67 | 67 | #ifndef __has_attribute |
|
68 | 68 | #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ |
|
69 | 69 | #endif |
|
70 | 70 | #if defined(__GNUC__) |
|
71 | 71 | # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) |
|
72 | 72 | #else |
|
73 | 73 | # define TARGET_ATTRIBUTE(target) |
|
74 | 74 | #endif |
|
75 | 75 | |
|
76 | 76 | /* Enable runtime BMI2 dispatch based on the CPU. |
|
77 | 77 | * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. |
|
78 | 78 | */ |
|
79 | 79 | #ifndef DYNAMIC_BMI2 |
|
80 | #if (defined(__clang__) && __has_attribute(__target__)) \ | |
|
80 | #if ((defined(__clang__) && __has_attribute(__target__)) \ | |
|
81 | 81 | || (defined(__GNUC__) \ |
|
82 | && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ | |
|
82 | && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | |
|
83 | 83 | && (defined(__x86_64__) || defined(_M_X86)) \ |
|
84 | 84 | && !defined(__BMI2__) |
|
85 | 85 | # define DYNAMIC_BMI2 1 |
|
86 | 86 | #else |
|
87 | 87 | # define DYNAMIC_BMI2 0 |
|
88 | 88 | #endif |
|
89 | 89 | #endif |
|
90 | 90 | |
|
91 |
/* prefetch |
|
|
91 | /* prefetch | |
|
92 | * can be disabled, by declaring NO_PREFETCH macro | |
|
93 | * All prefetch invocations use a single default locality 2, | |
|
94 | * generating instruction prefetcht1, | |
|
95 | * which, according to Intel, means "load data into L2 cache". | |
|
96 | * This is a good enough "middle ground" for the time being, | |
|
97 | * though in theory, it would be better to specialize locality depending on data being prefetched. | |
|
98 | * Tests could not determine any sensible difference based on locality value. */ | |
|
99 | #if defined(NO_PREFETCH) | |
|
100 | # define PREFETCH(ptr) (void)(ptr) /* disabled */ | |
|
101 | #else | |
|
92 | 102 | #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ |
|
93 | 103 | # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ |
|
94 |
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T |
|
|
95 | #elif defined(__GNUC__) | |
|
96 |
# define PREFETCH(ptr) __builtin_prefetch( |
|
|
104 | # define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) | |
|
105 | # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) | |
|
106 | # define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) | |
|
97 | 107 | #else |
|
98 | # define PREFETCH(ptr) /* disabled */ | |
|
108 | # define PREFETCH(ptr) (void)(ptr) /* disabled */ | |
|
99 | 109 | #endif |
|
110 | #endif /* NO_PREFETCH */ | |
|
111 | ||
|
112 | #define CACHELINE_SIZE 64 | |
|
113 | ||
|
114 | #define PREFETCH_AREA(p, s) { \ | |
|
115 | const char* const _ptr = (const char*)(p); \ | |
|
116 | size_t const _size = (size_t)(s); \ | |
|
117 | size_t _pos; \ | |
|
118 | for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ | |
|
119 | PREFETCH(_ptr + _pos); \ | |
|
120 | } \ | |
|
121 | } | |
|
100 | 122 | |
|
101 | 123 | /* disable warnings */ |
|
102 | 124 | #ifdef _MSC_VER /* Visual Studio */ |
|
103 | 125 | # include <intrin.h> /* For Visual 2005 */ |
|
104 | 126 | # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ |
|
105 | 127 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
|
106 | 128 | # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ |
|
107 | 129 | # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ |
|
108 | 130 | # pragma warning(disable : 4324) /* disable: C4324: padded structure */ |
|
109 | 131 | #endif |
|
110 | 132 | |
|
111 | 133 | #endif /* ZSTD_COMPILER_H */ |
@@ -1,216 +1,215 | |||
|
1 | 1 | /* |
|
2 | 2 | * Copyright (c) 2018-present, Facebook, Inc. |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This source code is licensed under both the BSD-style license (found in the |
|
6 | 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
|
7 | 7 | * in the COPYING file in the root directory of this source tree). |
|
8 | 8 | * You may select, at your option, one of the above-listed licenses. |
|
9 | 9 | */ |
|
10 | 10 | |
|
11 | 11 | #ifndef ZSTD_COMMON_CPU_H |
|
12 | 12 | #define ZSTD_COMMON_CPU_H |
|
13 | 13 | |
|
14 | 14 | /** |
|
15 | 15 | * Implementation taken from folly/CpuId.h |
|
16 | 16 | * https://github.com/facebook/folly/blob/master/folly/CpuId.h |
|
17 | 17 | */ |
|
18 | 18 | |
|
19 | 19 | #include <string.h> |
|
20 | 20 | |
|
21 | 21 | #include "mem.h" |
|
22 | 22 | |
|
23 | 23 | #ifdef _MSC_VER |
|
24 | 24 | #include <intrin.h> |
|
25 | 25 | #endif |
|
26 | 26 | |
|
27 | 27 | typedef struct { |
|
28 | 28 | U32 f1c; |
|
29 | 29 | U32 f1d; |
|
30 | 30 | U32 f7b; |
|
31 | 31 | U32 f7c; |
|
32 | 32 | } ZSTD_cpuid_t; |
|
33 | 33 | |
|
34 | 34 | MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { |
|
35 | 35 | U32 f1c = 0; |
|
36 | 36 | U32 f1d = 0; |
|
37 | 37 | U32 f7b = 0; |
|
38 | 38 | U32 f7c = 0; |
|
39 | #ifdef _MSC_VER | |
|
39 | #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) | |
|
40 | 40 | int reg[4]; |
|
41 | 41 | __cpuid((int*)reg, 0); |
|
42 | 42 | { |
|
43 | 43 | int const n = reg[0]; |
|
44 | 44 | if (n >= 1) { |
|
45 | 45 | __cpuid((int*)reg, 1); |
|
46 | 46 | f1c = (U32)reg[2]; |
|
47 | 47 | f1d = (U32)reg[3]; |
|
48 | 48 | } |
|
49 | 49 | if (n >= 7) { |
|
50 | 50 | __cpuidex((int*)reg, 7, 0); |
|
51 | 51 | f7b = (U32)reg[1]; |
|
52 | 52 | f7c = (U32)reg[2]; |
|
53 | 53 | } |
|
54 | 54 | } |
|
55 | 55 | #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) |
|
56 | 56 | /* The following block like the normal cpuid branch below, but gcc |
|
57 | 57 | * reserves ebx for use of its pic register so we must specially |
|
58 | 58 | * handle the save and restore to avoid clobbering the register |
|
59 | 59 | */ |
|
60 | 60 | U32 n; |
|
61 | 61 | __asm__( |
|
62 | 62 | "pushl %%ebx\n\t" |
|
63 | 63 | "cpuid\n\t" |
|
64 | 64 | "popl %%ebx\n\t" |
|
65 | 65 | : "=a"(n) |
|
66 | 66 | : "a"(0) |
|
67 | 67 | : "ecx", "edx"); |
|
68 | 68 | if (n >= 1) { |
|
69 | 69 | U32 f1a; |
|
70 | 70 | __asm__( |
|
71 | 71 | "pushl %%ebx\n\t" |
|
72 | 72 | "cpuid\n\t" |
|
73 | 73 | "popl %%ebx\n\t" |
|
74 | 74 | : "=a"(f1a), "=c"(f1c), "=d"(f1d) |
|
75 | : "a"(1) | |
|
76 | :); | |
|
75 | : "a"(1)); | |
|
77 | 76 | } |
|
78 | 77 | if (n >= 7) { |
|
79 | 78 | __asm__( |
|
80 | 79 | "pushl %%ebx\n\t" |
|
81 | 80 | "cpuid\n\t" |
|
82 | 81 | "movl %%ebx, %%eax\n\r" |
|
83 | 82 | "popl %%ebx" |
|
84 | 83 | : "=a"(f7b), "=c"(f7c) |
|
85 | 84 | : "a"(7), "c"(0) |
|
86 | 85 | : "edx"); |
|
87 | 86 | } |
|
88 | 87 | #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) |
|
89 | 88 | U32 n; |
|
90 | 89 | __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); |
|
91 | 90 | if (n >= 1) { |
|
92 | 91 | U32 f1a; |
|
93 | 92 | __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); |
|
94 | 93 | } |
|
95 | 94 | if (n >= 7) { |
|
96 | 95 | U32 f7a; |
|
97 | 96 | __asm__("cpuid" |
|
98 | 97 | : "=a"(f7a), "=b"(f7b), "=c"(f7c) |
|
99 | 98 | : "a"(7), "c"(0) |
|
100 | 99 | : "edx"); |
|
101 | 100 | } |
|
102 | 101 | #endif |
|
103 | 102 | { |
|
104 | 103 | ZSTD_cpuid_t cpuid; |
|
105 | 104 | cpuid.f1c = f1c; |
|
106 | 105 | cpuid.f1d = f1d; |
|
107 | 106 | cpuid.f7b = f7b; |
|
108 | 107 | cpuid.f7c = f7c; |
|
109 | 108 | return cpuid; |
|
110 | 109 | } |
|
111 | 110 | } |
|
112 | 111 | |
|
113 | 112 | #define X(name, r, bit) \ |
|
114 | 113 | MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ |
|
115 | 114 | return ((cpuid.r) & (1U << bit)) != 0; \ |
|
116 | 115 | } |
|
117 | 116 | |
|
118 | 117 | /* cpuid(1): Processor Info and Feature Bits. */ |
|
119 | 118 | #define C(name, bit) X(name, f1c, bit) |
|
120 | 119 | C(sse3, 0) |
|
121 | 120 | C(pclmuldq, 1) |
|
122 | 121 | C(dtes64, 2) |
|
123 | 122 | C(monitor, 3) |
|
124 | 123 | C(dscpl, 4) |
|
125 | 124 | C(vmx, 5) |
|
126 | 125 | C(smx, 6) |
|
127 | 126 | C(eist, 7) |
|
128 | 127 | C(tm2, 8) |
|
129 | 128 | C(ssse3, 9) |
|
130 | 129 | C(cnxtid, 10) |
|
131 | 130 | C(fma, 12) |
|
132 | 131 | C(cx16, 13) |
|
133 | 132 | C(xtpr, 14) |
|
134 | 133 | C(pdcm, 15) |
|
135 | 134 | C(pcid, 17) |
|
136 | 135 | C(dca, 18) |
|
137 | 136 | C(sse41, 19) |
|
138 | 137 | C(sse42, 20) |
|
139 | 138 | C(x2apic, 21) |
|
140 | 139 | C(movbe, 22) |
|
141 | 140 | C(popcnt, 23) |
|
142 | 141 | C(tscdeadline, 24) |
|
143 | 142 | C(aes, 25) |
|
144 | 143 | C(xsave, 26) |
|
145 | 144 | C(osxsave, 27) |
|
146 | 145 | C(avx, 28) |
|
147 | 146 | C(f16c, 29) |
|
148 | 147 | C(rdrand, 30) |
|
149 | 148 | #undef C |
|
150 | 149 | #define D(name, bit) X(name, f1d, bit) |
|
151 | 150 | D(fpu, 0) |
|
152 | 151 | D(vme, 1) |
|
153 | 152 | D(de, 2) |
|
154 | 153 | D(pse, 3) |
|
155 | 154 | D(tsc, 4) |
|
156 | 155 | D(msr, 5) |
|
157 | 156 | D(pae, 6) |
|
158 | 157 | D(mce, 7) |
|
159 | 158 | D(cx8, 8) |
|
160 | 159 | D(apic, 9) |
|
161 | 160 | D(sep, 11) |
|
162 | 161 | D(mtrr, 12) |
|
163 | 162 | D(pge, 13) |
|
164 | 163 | D(mca, 14) |
|
165 | 164 | D(cmov, 15) |
|
166 | 165 | D(pat, 16) |
|
167 | 166 | D(pse36, 17) |
|
168 | 167 | D(psn, 18) |
|
169 | 168 | D(clfsh, 19) |
|
170 | 169 | D(ds, 21) |
|
171 | 170 | D(acpi, 22) |
|
172 | 171 | D(mmx, 23) |
|
173 | 172 | D(fxsr, 24) |
|
174 | 173 | D(sse, 25) |
|
175 | 174 | D(sse2, 26) |
|
176 | 175 | D(ss, 27) |
|
177 | 176 | D(htt, 28) |
|
178 | 177 | D(tm, 29) |
|
179 | 178 | D(pbe, 31) |
|
180 | 179 | #undef D |
|
181 | 180 | |
|
182 | 181 | /* cpuid(7): Extended Features. */ |
|
183 | 182 | #define B(name, bit) X(name, f7b, bit) |
|
184 | 183 | B(bmi1, 3) |
|
185 | 184 | B(hle, 4) |
|
186 | 185 | B(avx2, 5) |
|
187 | 186 | B(smep, 7) |
|
188 | 187 | B(bmi2, 8) |
|
189 | 188 | B(erms, 9) |
|
190 | 189 | B(invpcid, 10) |
|
191 | 190 | B(rtm, 11) |
|
192 | 191 | B(mpx, 14) |
|
193 | 192 | B(avx512f, 16) |
|
194 | 193 | B(avx512dq, 17) |
|
195 | 194 | B(rdseed, 18) |
|
196 | 195 | B(adx, 19) |
|
197 | 196 | B(smap, 20) |
|
198 | 197 | B(avx512ifma, 21) |
|
199 | 198 | B(pcommit, 22) |
|
200 | 199 | B(clflushopt, 23) |
|
201 | 200 | B(clwb, 24) |
|
202 | 201 | B(avx512pf, 26) |
|
203 | 202 | B(avx512er, 27) |
|
204 | 203 | B(avx512cd, 28) |
|
205 | 204 | B(sha, 29) |
|
206 | 205 | B(avx512bw, 30) |
|
207 | 206 | B(avx512vl, 31) |
|
208 | 207 | #undef B |
|
209 | 208 | #define C(name, bit) X(name, f7c, bit) |
|
210 | 209 | C(prefetchwt1, 0) |
|
211 | 210 | C(avx512vbmi, 1) |
|
212 | 211 | #undef C |
|
213 | 212 | |
|
214 | 213 | #undef X |
|
215 | 214 | |
|
216 | 215 | #endif /* ZSTD_COMMON_CPU_H */ |
@@ -1,221 +1,236 | |||
|
1 | 1 | /* |
|
2 | 2 | Common functions of New Generation Entropy library |
|
3 | 3 | Copyright (C) 2016, Yann Collet. |
|
4 | 4 | |
|
5 | 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
|
6 | 6 | |
|
7 | 7 | Redistribution and use in source and binary forms, with or without |
|
8 | 8 | modification, are permitted provided that the following conditions are |
|
9 | 9 | met: |
|
10 | 10 | |
|
11 | 11 | * Redistributions of source code must retain the above copyright |
|
12 | 12 | notice, this list of conditions and the following disclaimer. |
|
13 | 13 | * Redistributions in binary form must reproduce the above |
|
14 | 14 | copyright notice, this list of conditions and the following disclaimer |
|
15 | 15 | in the documentation and/or other materials provided with the |
|
16 | 16 | distribution. |
|
17 | 17 | |
|
18 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 | 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 | 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 | 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 | 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 | 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 | 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 | 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 | 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 | 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 | 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 | 29 | |
|
30 | 30 | You can contact the author at : |
|
31 | 31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
|
32 | 32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
|
33 | 33 | *************************************************************************** */ |
|
34 | 34 | |
|
35 | 35 | /* ************************************* |
|
36 | 36 | * Dependencies |
|
37 | 37 | ***************************************/ |
|
38 | 38 | #include "mem.h" |
|
39 | 39 | #include "error_private.h" /* ERR_*, ERROR */ |
|
40 | 40 | #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ |
|
41 | 41 | #include "fse.h" |
|
42 | 42 | #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ |
|
43 | 43 | #include "huf.h" |
|
44 | 44 | |
|
45 | 45 | |
|
46 | 46 | /*=== Version ===*/ |
|
47 | 47 | unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } |
|
48 | 48 | |
|
49 | 49 | |
|
50 | 50 | /*=== Error Management ===*/ |
|
51 | 51 | unsigned FSE_isError(size_t code) { return ERR_isError(code); } |
|
52 | 52 | const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } |
|
53 | 53 | |
|
54 | 54 | unsigned HUF_isError(size_t code) { return ERR_isError(code); } |
|
55 | 55 | const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } |
|
56 | 56 | |
|
57 | 57 | |
|
58 | 58 | /*-************************************************************** |
|
59 | 59 | * FSE NCount encoding-decoding |
|
60 | 60 | ****************************************************************/ |
|
61 | 61 | size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, |
|
62 | 62 | const void* headerBuffer, size_t hbSize) |
|
63 | 63 | { |
|
64 | 64 | const BYTE* const istart = (const BYTE*) headerBuffer; |
|
65 | 65 | const BYTE* const iend = istart + hbSize; |
|
66 | 66 | const BYTE* ip = istart; |
|
67 | 67 | int nbBits; |
|
68 | 68 | int remaining; |
|
69 | 69 | int threshold; |
|
70 | 70 | U32 bitStream; |
|
71 | 71 | int bitCount; |
|
72 | 72 | unsigned charnum = 0; |
|
73 | 73 | int previous0 = 0; |
|
74 | 74 | |
|
75 | if (hbSize < 4) return ERROR(srcSize_wrong); | |
|
75 | if (hbSize < 4) { | |
|
76 | /* This function only works when hbSize >= 4 */ | |
|
77 | char buffer[4]; | |
|
78 | memset(buffer, 0, sizeof(buffer)); | |
|
79 | memcpy(buffer, headerBuffer, hbSize); | |
|
80 | { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | |
|
81 | buffer, sizeof(buffer)); | |
|
82 | if (FSE_isError(countSize)) return countSize; | |
|
83 | if (countSize > hbSize) return ERROR(corruption_detected); | |
|
84 | return countSize; | |
|
85 | } } | |
|
86 | assert(hbSize >= 4); | |
|
87 | ||
|
88 | /* init */ | |
|
89 | memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | |
|
76 | 90 | bitStream = MEM_readLE32(ip); |
|
77 | 91 | nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ |
|
78 | 92 | if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); |
|
79 | 93 | bitStream >>= 4; |
|
80 | 94 | bitCount = 4; |
|
81 | 95 | *tableLogPtr = nbBits; |
|
82 | 96 | remaining = (1<<nbBits)+1; |
|
83 | 97 | threshold = 1<<nbBits; |
|
84 | 98 | nbBits++; |
|
85 | 99 | |
|
86 | 100 | while ((remaining>1) & (charnum<=*maxSVPtr)) { |
|
87 | 101 | if (previous0) { |
|
88 | 102 | unsigned n0 = charnum; |
|
89 | 103 | while ((bitStream & 0xFFFF) == 0xFFFF) { |
|
90 | 104 | n0 += 24; |
|
91 | 105 | if (ip < iend-5) { |
|
92 | 106 | ip += 2; |
|
93 | 107 | bitStream = MEM_readLE32(ip) >> bitCount; |
|
94 | 108 | } else { |
|
95 | 109 | bitStream >>= 16; |
|
96 | 110 | bitCount += 16; |
|
97 | 111 | } } |
|
98 | 112 | while ((bitStream & 3) == 3) { |
|
99 | 113 | n0 += 3; |
|
100 | 114 | bitStream >>= 2; |
|
101 | 115 | bitCount += 2; |
|
102 | 116 | } |
|
103 | 117 | n0 += bitStream & 3; |
|
104 | 118 | bitCount += 2; |
|
105 | 119 | if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); |
|
106 | 120 | while (charnum < n0) normalizedCounter[charnum++] = 0; |
|
107 | 121 | if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { |
|
122 | assert((bitCount >> 3) <= 3); /* For first condition to work */ | |
|
108 | 123 | ip += bitCount>>3; |
|
109 | 124 | bitCount &= 7; |
|
110 | 125 | bitStream = MEM_readLE32(ip) >> bitCount; |
|
111 | 126 | } else { |
|
112 | 127 | bitStream >>= 2; |
|
113 | 128 | } } |
|
114 | 129 | { int const max = (2*threshold-1) - remaining; |
|
115 | 130 | int count; |
|
116 | 131 | |
|
117 | 132 | if ((bitStream & (threshold-1)) < (U32)max) { |
|
118 | 133 | count = bitStream & (threshold-1); |
|
119 | 134 | bitCount += nbBits-1; |
|
120 | 135 | } else { |
|
121 | 136 | count = bitStream & (2*threshold-1); |
|
122 | 137 | if (count >= threshold) count -= max; |
|
123 | 138 | bitCount += nbBits; |
|
124 | 139 | } |
|
125 | 140 | |
|
126 | 141 | count--; /* extra accuracy */ |
|
127 | 142 | remaining -= count < 0 ? -count : count; /* -1 means +1 */ |
|
128 | 143 | normalizedCounter[charnum++] = (short)count; |
|
129 | 144 | previous0 = !count; |
|
130 | 145 | while (remaining < threshold) { |
|
131 | 146 | nbBits--; |
|
132 | 147 | threshold >>= 1; |
|
133 | 148 | } |
|
134 | 149 | |
|
135 | 150 | if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { |
|
136 | 151 | ip += bitCount>>3; |
|
137 | 152 | bitCount &= 7; |
|
138 | 153 | } else { |
|
139 | 154 | bitCount -= (int)(8 * (iend - 4 - ip)); |
|
140 | 155 | ip = iend - 4; |
|
141 | 156 | } |
|
142 | 157 | bitStream = MEM_readLE32(ip) >> (bitCount & 31); |
|
143 | 158 | } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ |
|
144 | 159 | if (remaining != 1) return ERROR(corruption_detected); |
|
145 | 160 | if (bitCount > 32) return ERROR(corruption_detected); |
|
146 | 161 | *maxSVPtr = charnum-1; |
|
147 | 162 | |
|
148 | 163 | ip += (bitCount+7)>>3; |
|
149 | 164 | return ip-istart; |
|
150 | 165 | } |
|
151 | 166 | |
|
152 | 167 | |
|
153 | 168 | /*! HUF_readStats() : |
|
154 | 169 | Read compact Huffman tree, saved by HUF_writeCTable(). |
|
155 | 170 | `huffWeight` is destination buffer. |
|
156 | 171 | `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. |
|
157 | 172 | @return : size read from `src` , or an error Code . |
|
158 | 173 | Note : Needed by HUF_readCTable() and HUF_readDTableX?() . |
|
159 | 174 | */ |
|
160 | 175 | size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, |
|
161 | 176 | U32* nbSymbolsPtr, U32* tableLogPtr, |
|
162 | 177 | const void* src, size_t srcSize) |
|
163 | 178 | { |
|
164 | 179 | U32 weightTotal; |
|
165 | 180 | const BYTE* ip = (const BYTE*) src; |
|
166 | 181 | size_t iSize; |
|
167 | 182 | size_t oSize; |
|
168 | 183 | |
|
169 | 184 | if (!srcSize) return ERROR(srcSize_wrong); |
|
170 | 185 | iSize = ip[0]; |
|
171 | 186 | /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ |
|
172 | 187 | |
|
173 | 188 | if (iSize >= 128) { /* special header */ |
|
174 | 189 | oSize = iSize - 127; |
|
175 | 190 | iSize = ((oSize+1)/2); |
|
176 | 191 | if (iSize+1 > srcSize) return ERROR(srcSize_wrong); |
|
177 | 192 | if (oSize >= hwSize) return ERROR(corruption_detected); |
|
178 | 193 | ip += 1; |
|
179 | 194 | { U32 n; |
|
180 | 195 | for (n=0; n<oSize; n+=2) { |
|
181 | 196 | huffWeight[n] = ip[n/2] >> 4; |
|
182 | 197 | huffWeight[n+1] = ip[n/2] & 15; |
|
183 | 198 | } } } |
|
184 | 199 | else { /* header compressed with FSE (normal case) */ |
|
185 | 200 | FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ |
|
186 | 201 | if (iSize+1 > srcSize) return ERROR(srcSize_wrong); |
|
187 | 202 | oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ |
|
188 | 203 | if (FSE_isError(oSize)) return oSize; |
|
189 | 204 | } |
|
190 | 205 | |
|
191 | 206 | /* collect weight stats */ |
|
192 | 207 | memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); |
|
193 | 208 | weightTotal = 0; |
|
194 | 209 | { U32 n; for (n=0; n<oSize; n++) { |
|
195 | 210 | if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); |
|
196 | 211 | rankStats[huffWeight[n]]++; |
|
197 | 212 | weightTotal += (1 << huffWeight[n]) >> 1; |
|
198 | 213 | } } |
|
199 | 214 | if (weightTotal == 0) return ERROR(corruption_detected); |
|
200 | 215 | |
|
201 | 216 | /* get last non-null symbol weight (implied, total must be 2^n) */ |
|
202 | 217 | { U32 const tableLog = BIT_highbit32(weightTotal) + 1; |
|
203 | 218 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); |
|
204 | 219 | *tableLogPtr = tableLog; |
|
205 | 220 | /* determine last weight */ |
|
206 | 221 | { U32 const total = 1 << tableLog; |
|
207 | 222 | U32 const rest = total - weightTotal; |
|
208 | 223 | U32 const verif = 1 << BIT_highbit32(rest); |
|
209 | 224 | U32 const lastWeight = BIT_highbit32(rest) + 1; |
|
210 | 225 | if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ |
|
211 | 226 | huffWeight[oSize] = (BYTE)lastWeight; |
|
212 | 227 | rankStats[lastWeight]++; |
|
213 | 228 | } } |
|
214 | 229 | |
|
215 | 230 | /* check tree construction validity */ |
|
216 | 231 | if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ |
|
217 | 232 | |
|
218 | 233 | /* results */ |
|
219 | 234 | *nbSymbolsPtr = (U32)(oSize+1); |
|
220 | 235 | return iSize+1; |
|
221 | 236 | } |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
General Comments 0
You need to be logged in to leave comments.
Login now