Show More
The requested changes are too big and content was truncated. Show full diff
@@ -0,0 +1,405 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This software may be modified and distributed under the terms | |||
|
6 | * of the BSD license. See the LICENSE file for details. | |||
|
7 | */ | |||
|
8 | ||||
|
9 | #include "python-zstandard.h" | |||
|
10 | ||||
|
11 | extern PyObject* ZstdError; | |||
|
12 | ||||
|
13 | static void set_unsupported_operation(void) { | |||
|
14 | PyObject* iomod; | |||
|
15 | PyObject* exc; | |||
|
16 | ||||
|
17 | iomod = PyImport_ImportModule("io"); | |||
|
18 | if (NULL == iomod) { | |||
|
19 | return; | |||
|
20 | } | |||
|
21 | ||||
|
22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |||
|
23 | if (NULL == exc) { | |||
|
24 | Py_DECREF(iomod); | |||
|
25 | return; | |||
|
26 | } | |||
|
27 | ||||
|
28 | PyErr_SetNone(exc); | |||
|
29 | Py_DECREF(exc); | |||
|
30 | Py_DECREF(iomod); | |||
|
31 | } | |||
|
32 | ||||
|
33 | static void reader_dealloc(ZstdCompressionReader* self) { | |||
|
34 | Py_XDECREF(self->compressor); | |||
|
35 | Py_XDECREF(self->reader); | |||
|
36 | ||||
|
37 | if (self->buffer.buf) { | |||
|
38 | PyBuffer_Release(&self->buffer); | |||
|
39 | memset(&self->buffer, 0, sizeof(self->buffer)); | |||
|
40 | } | |||
|
41 | ||||
|
42 | PyObject_Del(self); | |||
|
43 | } | |||
|
44 | ||||
|
45 | static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) { | |||
|
46 | size_t zresult; | |||
|
47 | ||||
|
48 | if (self->entered) { | |||
|
49 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |||
|
50 | return NULL; | |||
|
51 | } | |||
|
52 | ||||
|
53 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize); | |||
|
54 | if (ZSTD_isError(zresult)) { | |||
|
55 | PyErr_Format(ZstdError, "error setting source size: %s", | |||
|
56 | ZSTD_getErrorName(zresult)); | |||
|
57 | return NULL; | |||
|
58 | } | |||
|
59 | ||||
|
60 | self->entered = 1; | |||
|
61 | ||||
|
62 | Py_INCREF(self); | |||
|
63 | return self; | |||
|
64 | } | |||
|
65 | ||||
|
66 | static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) { | |||
|
67 | PyObject* exc_type; | |||
|
68 | PyObject* exc_value; | |||
|
69 | PyObject* exc_tb; | |||
|
70 | ||||
|
71 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |||
|
72 | return NULL; | |||
|
73 | } | |||
|
74 | ||||
|
75 | self->entered = 0; | |||
|
76 | self->closed = 1; | |||
|
77 | ||||
|
78 | /* Release resources associated with source. */ | |||
|
79 | Py_CLEAR(self->reader); | |||
|
80 | if (self->buffer.buf) { | |||
|
81 | PyBuffer_Release(&self->buffer); | |||
|
82 | memset(&self->buffer, 0, sizeof(self->buffer)); | |||
|
83 | } | |||
|
84 | ||||
|
85 | Py_CLEAR(self->compressor); | |||
|
86 | ||||
|
87 | Py_RETURN_FALSE; | |||
|
88 | } | |||
|
89 | ||||
|
90 | static PyObject* reader_readable(ZstdCompressionReader* self) { | |||
|
91 | Py_RETURN_TRUE; | |||
|
92 | } | |||
|
93 | ||||
|
94 | static PyObject* reader_writable(ZstdCompressionReader* self) { | |||
|
95 | Py_RETURN_FALSE; | |||
|
96 | } | |||
|
97 | ||||
|
98 | static PyObject* reader_seekable(ZstdCompressionReader* self) { | |||
|
99 | Py_RETURN_FALSE; | |||
|
100 | } | |||
|
101 | ||||
|
102 | static PyObject* reader_readline(PyObject* self, PyObject* args) { | |||
|
103 | set_unsupported_operation(); | |||
|
104 | return NULL; | |||
|
105 | } | |||
|
106 | ||||
|
107 | static PyObject* reader_readlines(PyObject* self, PyObject* args) { | |||
|
108 | set_unsupported_operation(); | |||
|
109 | return NULL; | |||
|
110 | } | |||
|
111 | ||||
|
112 | static PyObject* reader_write(PyObject* self, PyObject* args) { | |||
|
113 | PyErr_SetString(PyExc_OSError, "stream is not writable"); | |||
|
114 | return NULL; | |||
|
115 | } | |||
|
116 | ||||
|
117 | static PyObject* reader_writelines(PyObject* self, PyObject* args) { | |||
|
118 | PyErr_SetString(PyExc_OSError, "stream is not writable"); | |||
|
119 | return NULL; | |||
|
120 | } | |||
|
121 | ||||
|
122 | static PyObject* reader_isatty(PyObject* self) { | |||
|
123 | Py_RETURN_FALSE; | |||
|
124 | } | |||
|
125 | ||||
|
126 | static PyObject* reader_flush(PyObject* self) { | |||
|
127 | Py_RETURN_NONE; | |||
|
128 | } | |||
|
129 | ||||
|
130 | static PyObject* reader_close(ZstdCompressionReader* self) { | |||
|
131 | self->closed = 1; | |||
|
132 | Py_RETURN_NONE; | |||
|
133 | } | |||
|
134 | ||||
|
135 | static PyObject* reader_closed(ZstdCompressionReader* self) { | |||
|
136 | if (self->closed) { | |||
|
137 | Py_RETURN_TRUE; | |||
|
138 | } | |||
|
139 | else { | |||
|
140 | Py_RETURN_FALSE; | |||
|
141 | } | |||
|
142 | } | |||
|
143 | ||||
|
144 | static PyObject* reader_tell(ZstdCompressionReader* self) { | |||
|
145 | /* TODO should this raise OSError since stream isn't seekable? */ | |||
|
146 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |||
|
147 | } | |||
|
148 | ||||
|
149 | static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) { | |||
|
150 | static char* kwlist[] = { | |||
|
151 | "size", | |||
|
152 | NULL | |||
|
153 | }; | |||
|
154 | ||||
|
155 | Py_ssize_t size = -1; | |||
|
156 | PyObject* result = NULL; | |||
|
157 | char* resultBuffer; | |||
|
158 | Py_ssize_t resultSize; | |||
|
159 | size_t zresult; | |||
|
160 | size_t oldPos; | |||
|
161 | ||||
|
162 | if (!self->entered) { | |||
|
163 | PyErr_SetString(ZstdError, "read() must be called from an active context manager"); | |||
|
164 | return NULL; | |||
|
165 | } | |||
|
166 | ||||
|
167 | if (self->closed) { | |||
|
168 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |||
|
169 | return NULL; | |||
|
170 | } | |||
|
171 | ||||
|
172 | if (self->finishedOutput) { | |||
|
173 | return PyBytes_FromStringAndSize("", 0); | |||
|
174 | } | |||
|
175 | ||||
|
176 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) { | |||
|
177 | return NULL; | |||
|
178 | } | |||
|
179 | ||||
|
180 | if (size < 1) { | |||
|
181 | PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts"); | |||
|
182 | return NULL; | |||
|
183 | } | |||
|
184 | ||||
|
185 | result = PyBytes_FromStringAndSize(NULL, size); | |||
|
186 | if (NULL == result) { | |||
|
187 | return NULL; | |||
|
188 | } | |||
|
189 | ||||
|
190 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |||
|
191 | ||||
|
192 | self->output.dst = resultBuffer; | |||
|
193 | self->output.size = resultSize; | |||
|
194 | self->output.pos = 0; | |||
|
195 | ||||
|
196 | readinput: | |||
|
197 | ||||
|
198 | /* If we have data left over, consume it. */ | |||
|
199 | if (self->input.pos < self->input.size) { | |||
|
200 | oldPos = self->output.pos; | |||
|
201 | ||||
|
202 | Py_BEGIN_ALLOW_THREADS | |||
|
203 | zresult = ZSTD_compress_generic(self->compressor->cctx, | |||
|
204 | &self->output, &self->input, ZSTD_e_continue); | |||
|
205 | ||||
|
206 | Py_END_ALLOW_THREADS | |||
|
207 | ||||
|
208 | self->bytesCompressed += self->output.pos - oldPos; | |||
|
209 | ||||
|
210 | /* Input exhausted. Clear out state tracking. */ | |||
|
211 | if (self->input.pos == self->input.size) { | |||
|
212 | memset(&self->input, 0, sizeof(self->input)); | |||
|
213 | Py_CLEAR(self->readResult); | |||
|
214 | ||||
|
215 | if (self->buffer.buf) { | |||
|
216 | self->finishedInput = 1; | |||
|
217 | } | |||
|
218 | } | |||
|
219 | ||||
|
220 | if (ZSTD_isError(zresult)) { | |||
|
221 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |||
|
222 | return NULL; | |||
|
223 | } | |||
|
224 | ||||
|
225 | if (self->output.pos) { | |||
|
226 | /* If no more room in output, emit it. */ | |||
|
227 | if (self->output.pos == self->output.size) { | |||
|
228 | memset(&self->output, 0, sizeof(self->output)); | |||
|
229 | return result; | |||
|
230 | } | |||
|
231 | ||||
|
232 | /* | |||
|
233 | * There is room in the output. We fall through to below, which will either | |||
|
234 | * get more input for us or will attempt to end the stream. | |||
|
235 | */ | |||
|
236 | } | |||
|
237 | ||||
|
238 | /* Fall through to gather more input. */ | |||
|
239 | } | |||
|
240 | ||||
|
241 | if (!self->finishedInput) { | |||
|
242 | if (self->reader) { | |||
|
243 | Py_buffer buffer; | |||
|
244 | ||||
|
245 | assert(self->readResult == NULL); | |||
|
246 | self->readResult = PyObject_CallMethod(self->reader, "read", | |||
|
247 | "k", self->readSize); | |||
|
248 | if (self->readResult == NULL) { | |||
|
249 | return NULL; | |||
|
250 | } | |||
|
251 | ||||
|
252 | memset(&buffer, 0, sizeof(buffer)); | |||
|
253 | ||||
|
254 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |||
|
255 | return NULL; | |||
|
256 | } | |||
|
257 | ||||
|
258 | /* EOF */ | |||
|
259 | if (0 == buffer.len) { | |||
|
260 | self->finishedInput = 1; | |||
|
261 | Py_CLEAR(self->readResult); | |||
|
262 | } | |||
|
263 | else { | |||
|
264 | self->input.src = buffer.buf; | |||
|
265 | self->input.size = buffer.len; | |||
|
266 | self->input.pos = 0; | |||
|
267 | } | |||
|
268 | ||||
|
269 | PyBuffer_Release(&buffer); | |||
|
270 | } | |||
|
271 | else { | |||
|
272 | assert(self->buffer.buf); | |||
|
273 | ||||
|
274 | self->input.src = self->buffer.buf; | |||
|
275 | self->input.size = self->buffer.len; | |||
|
276 | self->input.pos = 0; | |||
|
277 | } | |||
|
278 | } | |||
|
279 | ||||
|
280 | if (self->input.size) { | |||
|
281 | goto readinput; | |||
|
282 | } | |||
|
283 | ||||
|
284 | /* Else EOF */ | |||
|
285 | oldPos = self->output.pos; | |||
|
286 | ||||
|
287 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |||
|
288 | &self->input, ZSTD_e_end); | |||
|
289 | ||||
|
290 | self->bytesCompressed += self->output.pos - oldPos; | |||
|
291 | ||||
|
292 | if (ZSTD_isError(zresult)) { | |||
|
293 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |||
|
294 | ZSTD_getErrorName(zresult)); | |||
|
295 | return NULL; | |||
|
296 | } | |||
|
297 | ||||
|
298 | assert(self->output.pos); | |||
|
299 | ||||
|
300 | if (0 == zresult) { | |||
|
301 | self->finishedOutput = 1; | |||
|
302 | } | |||
|
303 | ||||
|
304 | if (safe_pybytes_resize(&result, self->output.pos)) { | |||
|
305 | Py_XDECREF(result); | |||
|
306 | return NULL; | |||
|
307 | } | |||
|
308 | ||||
|
309 | memset(&self->output, 0, sizeof(self->output)); | |||
|
310 | ||||
|
311 | return result; | |||
|
312 | } | |||
|
313 | ||||
|
314 | static PyObject* reader_readall(PyObject* self) { | |||
|
315 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
316 | return NULL; | |||
|
317 | } | |||
|
318 | ||||
|
319 | static PyObject* reader_iter(PyObject* self) { | |||
|
320 | set_unsupported_operation(); | |||
|
321 | return NULL; | |||
|
322 | } | |||
|
323 | ||||
|
324 | static PyObject* reader_iternext(PyObject* self) { | |||
|
325 | set_unsupported_operation(); | |||
|
326 | return NULL; | |||
|
327 | } | |||
|
328 | ||||
|
329 | static PyMethodDef reader_methods[] = { | |||
|
330 | { "__enter__", (PyCFunction)reader_enter, METH_NOARGS, | |||
|
331 | PyDoc_STR("Enter a compression context") }, | |||
|
332 | { "__exit__", (PyCFunction)reader_exit, METH_VARARGS, | |||
|
333 | PyDoc_STR("Exit a compression context") }, | |||
|
334 | { "close", (PyCFunction)reader_close, METH_NOARGS, | |||
|
335 | PyDoc_STR("Close the stream so it cannot perform any more operations") }, | |||
|
336 | { "closed", (PyCFunction)reader_closed, METH_NOARGS, | |||
|
337 | PyDoc_STR("Whether stream is closed") }, | |||
|
338 | { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") }, | |||
|
339 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, | |||
|
340 | { "readable", (PyCFunction)reader_readable, METH_NOARGS, | |||
|
341 | PyDoc_STR("Returns True") }, | |||
|
342 | { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") }, | |||
|
343 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, | |||
|
344 | { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") }, | |||
|
345 | { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") }, | |||
|
346 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, | |||
|
347 | PyDoc_STR("Returns False") }, | |||
|
348 | { "tell", (PyCFunction)reader_tell, METH_NOARGS, | |||
|
349 | PyDoc_STR("Returns current number of bytes compressed") }, | |||
|
350 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, | |||
|
351 | PyDoc_STR("Returns False") }, | |||
|
352 | { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") }, | |||
|
353 | { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") }, | |||
|
354 | { NULL, NULL } | |||
|
355 | }; | |||
|
356 | ||||
|
357 | PyTypeObject ZstdCompressionReaderType = { | |||
|
358 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
359 | "zstd.ZstdCompressionReader", /* tp_name */ | |||
|
360 | sizeof(ZstdCompressionReader), /* tp_basicsize */ | |||
|
361 | 0, /* tp_itemsize */ | |||
|
362 | (destructor)reader_dealloc, /* tp_dealloc */ | |||
|
363 | 0, /* tp_print */ | |||
|
364 | 0, /* tp_getattr */ | |||
|
365 | 0, /* tp_setattr */ | |||
|
366 | 0, /* tp_compare */ | |||
|
367 | 0, /* tp_repr */ | |||
|
368 | 0, /* tp_as_number */ | |||
|
369 | 0, /* tp_as_sequence */ | |||
|
370 | 0, /* tp_as_mapping */ | |||
|
371 | 0, /* tp_hash */ | |||
|
372 | 0, /* tp_call */ | |||
|
373 | 0, /* tp_str */ | |||
|
374 | 0, /* tp_getattro */ | |||
|
375 | 0, /* tp_setattro */ | |||
|
376 | 0, /* tp_as_buffer */ | |||
|
377 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
378 | 0, /* tp_doc */ | |||
|
379 | 0, /* tp_traverse */ | |||
|
380 | 0, /* tp_clear */ | |||
|
381 | 0, /* tp_richcompare */ | |||
|
382 | 0, /* tp_weaklistoffset */ | |||
|
383 | reader_iter, /* tp_iter */ | |||
|
384 | reader_iternext, /* tp_iternext */ | |||
|
385 | reader_methods, /* tp_methods */ | |||
|
386 | 0, /* tp_members */ | |||
|
387 | 0, /* tp_getset */ | |||
|
388 | 0, /* tp_base */ | |||
|
389 | 0, /* tp_dict */ | |||
|
390 | 0, /* tp_descr_get */ | |||
|
391 | 0, /* tp_descr_set */ | |||
|
392 | 0, /* tp_dictoffset */ | |||
|
393 | 0, /* tp_init */ | |||
|
394 | 0, /* tp_alloc */ | |||
|
395 | PyType_GenericNew, /* tp_new */ | |||
|
396 | }; | |||
|
397 | ||||
|
398 | void compressionreader_module_init(PyObject* mod) { | |||
|
399 | /* TODO make reader a sub-class of io.RawIOBase */ | |||
|
400 | ||||
|
401 | Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type; | |||
|
402 | if (PyType_Ready(&ZstdCompressionReaderType) < 0) { | |||
|
403 | return; | |||
|
404 | } | |||
|
405 | } |
@@ -0,0 +1,459 b'' | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This software may be modified and distributed under the terms | |||
|
6 | * of the BSD license. See the LICENSE file for details. | |||
|
7 | */ | |||
|
8 | ||||
|
9 | #include "python-zstandard.h" | |||
|
10 | ||||
|
11 | extern PyObject* ZstdError; | |||
|
12 | ||||
|
13 | static void set_unsupported_operation(void) { | |||
|
14 | PyObject* iomod; | |||
|
15 | PyObject* exc; | |||
|
16 | ||||
|
17 | iomod = PyImport_ImportModule("io"); | |||
|
18 | if (NULL == iomod) { | |||
|
19 | return; | |||
|
20 | } | |||
|
21 | ||||
|
22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |||
|
23 | if (NULL == exc) { | |||
|
24 | Py_DECREF(iomod); | |||
|
25 | return; | |||
|
26 | } | |||
|
27 | ||||
|
28 | PyErr_SetNone(exc); | |||
|
29 | Py_DECREF(exc); | |||
|
30 | Py_DECREF(iomod); | |||
|
31 | } | |||
|
32 | ||||
|
33 | static void reader_dealloc(ZstdDecompressionReader* self) { | |||
|
34 | Py_XDECREF(self->decompressor); | |||
|
35 | Py_XDECREF(self->reader); | |||
|
36 | ||||
|
37 | if (self->buffer.buf) { | |||
|
38 | PyBuffer_Release(&self->buffer); | |||
|
39 | } | |||
|
40 | ||||
|
41 | PyObject_Del(self); | |||
|
42 | } | |||
|
43 | ||||
|
44 | static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) { | |||
|
45 | if (self->entered) { | |||
|
46 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |||
|
47 | return NULL; | |||
|
48 | } | |||
|
49 | ||||
|
50 | if (ensure_dctx(self->decompressor, 1)) { | |||
|
51 | return NULL; | |||
|
52 | } | |||
|
53 | ||||
|
54 | self->entered = 1; | |||
|
55 | ||||
|
56 | Py_INCREF(self); | |||
|
57 | return self; | |||
|
58 | } | |||
|
59 | ||||
|
60 | static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) { | |||
|
61 | PyObject* exc_type; | |||
|
62 | PyObject* exc_value; | |||
|
63 | PyObject* exc_tb; | |||
|
64 | ||||
|
65 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |||
|
66 | return NULL; | |||
|
67 | } | |||
|
68 | ||||
|
69 | self->entered = 0; | |||
|
70 | self->closed = 1; | |||
|
71 | ||||
|
72 | /* Release resources. */ | |||
|
73 | Py_CLEAR(self->reader); | |||
|
74 | if (self->buffer.buf) { | |||
|
75 | PyBuffer_Release(&self->buffer); | |||
|
76 | memset(&self->buffer, 0, sizeof(self->buffer)); | |||
|
77 | } | |||
|
78 | ||||
|
79 | Py_CLEAR(self->decompressor); | |||
|
80 | ||||
|
81 | Py_RETURN_FALSE; | |||
|
82 | } | |||
|
83 | ||||
|
84 | static PyObject* reader_readable(PyObject* self) { | |||
|
85 | Py_RETURN_TRUE; | |||
|
86 | } | |||
|
87 | ||||
|
88 | static PyObject* reader_writable(PyObject* self) { | |||
|
89 | Py_RETURN_FALSE; | |||
|
90 | } | |||
|
91 | ||||
|
92 | static PyObject* reader_seekable(PyObject* self) { | |||
|
93 | Py_RETURN_TRUE; | |||
|
94 | } | |||
|
95 | ||||
|
96 | static PyObject* reader_close(ZstdDecompressionReader* self) { | |||
|
97 | self->closed = 1; | |||
|
98 | Py_RETURN_NONE; | |||
|
99 | } | |||
|
100 | ||||
|
101 | static PyObject* reader_closed(ZstdDecompressionReader* self) { | |||
|
102 | if (self->closed) { | |||
|
103 | Py_RETURN_TRUE; | |||
|
104 | } | |||
|
105 | else { | |||
|
106 | Py_RETURN_FALSE; | |||
|
107 | } | |||
|
108 | } | |||
|
109 | ||||
|
110 | static PyObject* reader_flush(PyObject* self) { | |||
|
111 | Py_RETURN_NONE; | |||
|
112 | } | |||
|
113 | ||||
|
114 | static PyObject* reader_isatty(PyObject* self) { | |||
|
115 | Py_RETURN_FALSE; | |||
|
116 | } | |||
|
117 | ||||
|
118 | static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) { | |||
|
119 | static char* kwlist[] = { | |||
|
120 | "size", | |||
|
121 | NULL | |||
|
122 | }; | |||
|
123 | ||||
|
124 | Py_ssize_t size = -1; | |||
|
125 | PyObject* result = NULL; | |||
|
126 | char* resultBuffer; | |||
|
127 | Py_ssize_t resultSize; | |||
|
128 | ZSTD_outBuffer output; | |||
|
129 | size_t zresult; | |||
|
130 | ||||
|
131 | if (!self->entered) { | |||
|
132 | PyErr_SetString(ZstdError, "read() must be called from an active context manager"); | |||
|
133 | return NULL; | |||
|
134 | } | |||
|
135 | ||||
|
136 | if (self->closed) { | |||
|
137 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |||
|
138 | return NULL; | |||
|
139 | } | |||
|
140 | ||||
|
141 | if (self->finishedOutput) { | |||
|
142 | return PyBytes_FromStringAndSize("", 0); | |||
|
143 | } | |||
|
144 | ||||
|
145 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) { | |||
|
146 | return NULL; | |||
|
147 | } | |||
|
148 | ||||
|
149 | if (size < 1) { | |||
|
150 | PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts"); | |||
|
151 | return NULL; | |||
|
152 | } | |||
|
153 | ||||
|
154 | result = PyBytes_FromStringAndSize(NULL, size); | |||
|
155 | if (NULL == result) { | |||
|
156 | return NULL; | |||
|
157 | } | |||
|
158 | ||||
|
159 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |||
|
160 | ||||
|
161 | output.dst = resultBuffer; | |||
|
162 | output.size = resultSize; | |||
|
163 | output.pos = 0; | |||
|
164 | ||||
|
165 | readinput: | |||
|
166 | ||||
|
167 | /* Consume input data left over from last time. */ | |||
|
168 | if (self->input.pos < self->input.size) { | |||
|
169 | Py_BEGIN_ALLOW_THREADS | |||
|
170 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, | |||
|
171 | &output, &self->input); | |||
|
172 | Py_END_ALLOW_THREADS | |||
|
173 | ||||
|
174 | /* Input exhausted. Clear our state tracking. */ | |||
|
175 | if (self->input.pos == self->input.size) { | |||
|
176 | memset(&self->input, 0, sizeof(self->input)); | |||
|
177 | Py_CLEAR(self->readResult); | |||
|
178 | ||||
|
179 | if (self->buffer.buf) { | |||
|
180 | self->finishedInput = 1; | |||
|
181 | } | |||
|
182 | } | |||
|
183 | ||||
|
184 | if (ZSTD_isError(zresult)) { | |||
|
185 | PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult)); | |||
|
186 | return NULL; | |||
|
187 | } | |||
|
188 | else if (0 == zresult) { | |||
|
189 | self->finishedOutput = 1; | |||
|
190 | } | |||
|
191 | ||||
|
192 | /* We fulfilled the full read request. Emit it. */ | |||
|
193 | if (output.pos && output.pos == output.size) { | |||
|
194 | self->bytesDecompressed += output.size; | |||
|
195 | return result; | |||
|
196 | } | |||
|
197 | ||||
|
198 | /* | |||
|
199 | * There is more room in the output. Fall through to try to collect | |||
|
200 | * more data so we can try to fill the output. | |||
|
201 | */ | |||
|
202 | } | |||
|
203 | ||||
|
204 | if (!self->finishedInput) { | |||
|
205 | if (self->reader) { | |||
|
206 | Py_buffer buffer; | |||
|
207 | ||||
|
208 | assert(self->readResult == NULL); | |||
|
209 | self->readResult = PyObject_CallMethod(self->reader, "read", | |||
|
210 | "k", self->readSize); | |||
|
211 | if (NULL == self->readResult) { | |||
|
212 | return NULL; | |||
|
213 | } | |||
|
214 | ||||
|
215 | memset(&buffer, 0, sizeof(buffer)); | |||
|
216 | ||||
|
217 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |||
|
218 | return NULL; | |||
|
219 | } | |||
|
220 | ||||
|
221 | /* EOF */ | |||
|
222 | if (0 == buffer.len) { | |||
|
223 | self->finishedInput = 1; | |||
|
224 | Py_CLEAR(self->readResult); | |||
|
225 | } | |||
|
226 | else { | |||
|
227 | self->input.src = buffer.buf; | |||
|
228 | self->input.size = buffer.len; | |||
|
229 | self->input.pos = 0; | |||
|
230 | } | |||
|
231 | ||||
|
232 | PyBuffer_Release(&buffer); | |||
|
233 | } | |||
|
234 | else { | |||
|
235 | assert(self->buffer.buf); | |||
|
236 | /* | |||
|
237 | * We should only get here once since above block will exhaust | |||
|
238 | * source buffer until finishedInput is set. | |||
|
239 | */ | |||
|
240 | assert(self->input.src == NULL); | |||
|
241 | ||||
|
242 | self->input.src = self->buffer.buf; | |||
|
243 | self->input.size = self->buffer.len; | |||
|
244 | self->input.pos = 0; | |||
|
245 | } | |||
|
246 | } | |||
|
247 | ||||
|
248 | if (self->input.size) { | |||
|
249 | goto readinput; | |||
|
250 | } | |||
|
251 | ||||
|
252 | /* EOF */ | |||
|
253 | self->bytesDecompressed += output.pos; | |||
|
254 | ||||
|
255 | if (safe_pybytes_resize(&result, output.pos)) { | |||
|
256 | Py_XDECREF(result); | |||
|
257 | return NULL; | |||
|
258 | } | |||
|
259 | ||||
|
260 | return result; | |||
|
261 | } | |||
|
262 | ||||
|
263 | static PyObject* reader_readall(PyObject* self) { | |||
|
264 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
265 | return NULL; | |||
|
266 | } | |||
|
267 | ||||
|
268 | static PyObject* reader_readline(PyObject* self) { | |||
|
269 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
270 | return NULL; | |||
|
271 | } | |||
|
272 | ||||
|
273 | static PyObject* reader_readlines(PyObject* self) { | |||
|
274 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
275 | return NULL; | |||
|
276 | } | |||
|
277 | ||||
|
278 | static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) { | |||
|
279 | Py_ssize_t pos; | |||
|
280 | int whence = 0; | |||
|
281 | unsigned long long readAmount = 0; | |||
|
282 | size_t defaultOutSize = ZSTD_DStreamOutSize(); | |||
|
283 | ||||
|
284 | if (!self->entered) { | |||
|
285 | PyErr_SetString(ZstdError, "seek() must be called from an active context manager"); | |||
|
286 | return NULL; | |||
|
287 | } | |||
|
288 | ||||
|
289 | if (self->closed) { | |||
|
290 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |||
|
291 | return NULL; | |||
|
292 | } | |||
|
293 | ||||
|
294 | if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) { | |||
|
295 | return NULL; | |||
|
296 | } | |||
|
297 | ||||
|
298 | if (whence == SEEK_SET) { | |||
|
299 | if (pos < 0) { | |||
|
300 | PyErr_SetString(PyExc_ValueError, | |||
|
301 | "cannot seek to negative position with SEEK_SET"); | |||
|
302 | return NULL; | |||
|
303 | } | |||
|
304 | ||||
|
305 | if ((unsigned long long)pos < self->bytesDecompressed) { | |||
|
306 | PyErr_SetString(PyExc_ValueError, | |||
|
307 | "cannot seek zstd decompression stream backwards"); | |||
|
308 | return NULL; | |||
|
309 | } | |||
|
310 | ||||
|
311 | readAmount = pos - self->bytesDecompressed; | |||
|
312 | } | |||
|
313 | else if (whence == SEEK_CUR) { | |||
|
314 | if (pos < 0) { | |||
|
315 | PyErr_SetString(PyExc_ValueError, | |||
|
316 | "cannot seek zstd decompression stream backwards"); | |||
|
317 | return NULL; | |||
|
318 | } | |||
|
319 | ||||
|
320 | readAmount = pos; | |||
|
321 | } | |||
|
322 | else if (whence == SEEK_END) { | |||
|
323 | /* We /could/ support this with pos==0. But let's not do that until someone | |||
|
324 | needs it. */ | |||
|
325 | PyErr_SetString(PyExc_ValueError, | |||
|
326 | "zstd decompression streams cannot be seeked with SEEK_END"); | |||
|
327 | return NULL; | |||
|
328 | } | |||
|
329 | ||||
|
330 | /* It is a bit inefficient to do this via the Python API. But since there | |||
|
331 | is a bit of state tracking involved to read from this type, it is the | |||
|
332 | easiest to implement. */ | |||
|
333 | while (readAmount) { | |||
|
334 | Py_ssize_t readSize; | |||
|
335 | PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K", | |||
|
336 | readAmount < defaultOutSize ? readAmount : defaultOutSize); | |||
|
337 | ||||
|
338 | if (!readResult) { | |||
|
339 | return NULL; | |||
|
340 | } | |||
|
341 | ||||
|
342 | readSize = PyBytes_GET_SIZE(readResult); | |||
|
343 | ||||
|
344 | /* Empty read means EOF. */ | |||
|
345 | if (!readSize) { | |||
|
346 | break; | |||
|
347 | } | |||
|
348 | ||||
|
349 | readAmount -= readSize; | |||
|
350 | } | |||
|
351 | ||||
|
352 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); | |||
|
353 | } | |||
|
354 | ||||
|
355 | static PyObject* reader_tell(ZstdDecompressionReader* self) { | |||
|
356 | /* TODO should this raise OSError since stream isn't seekable? */ | |||
|
357 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); | |||
|
358 | } | |||
|
359 | ||||
|
360 | static PyObject* reader_write(PyObject* self, PyObject* args) { | |||
|
361 | set_unsupported_operation(); | |||
|
362 | return NULL; | |||
|
363 | } | |||
|
364 | ||||
|
365 | static PyObject* reader_writelines(PyObject* self, PyObject* args) { | |||
|
366 | set_unsupported_operation(); | |||
|
367 | return NULL; | |||
|
368 | } | |||
|
369 | ||||
|
370 | static PyObject* reader_iter(PyObject* self) { | |||
|
371 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
372 | return NULL; | |||
|
373 | } | |||
|
374 | ||||
|
375 | static PyObject* reader_iternext(PyObject* self) { | |||
|
376 | PyErr_SetNone(PyExc_NotImplementedError); | |||
|
377 | return NULL; | |||
|
378 | } | |||
|
379 | ||||
|
380 | static PyMethodDef reader_methods[] = { | |||
|
381 | { "__enter__", (PyCFunction)reader_enter, METH_NOARGS, | |||
|
382 | PyDoc_STR("Enter a compression context") }, | |||
|
383 | { "__exit__", (PyCFunction)reader_exit, METH_VARARGS, | |||
|
384 | PyDoc_STR("Exit a compression context") }, | |||
|
385 | { "close", (PyCFunction)reader_close, METH_NOARGS, | |||
|
386 | PyDoc_STR("Close the stream so it cannot perform any more operations") }, | |||
|
387 | { "closed", (PyCFunction)reader_closed, METH_NOARGS, | |||
|
388 | PyDoc_STR("Whether stream is closed") }, | |||
|
389 | { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") }, | |||
|
390 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, | |||
|
391 | { "readable", (PyCFunction)reader_readable, METH_NOARGS, | |||
|
392 | PyDoc_STR("Returns True") }, | |||
|
393 | { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, | |||
|
394 | PyDoc_STR("read compressed data") }, | |||
|
395 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, | |||
|
396 | { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") }, | |||
|
397 | { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") }, | |||
|
398 | { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") }, | |||
|
399 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, | |||
|
400 | PyDoc_STR("Returns True") }, | |||
|
401 | { "tell", (PyCFunction)reader_tell, METH_NOARGS, | |||
|
402 | PyDoc_STR("Returns current number of bytes compressed") }, | |||
|
403 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, | |||
|
404 | PyDoc_STR("Returns False") }, | |||
|
405 | { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") }, | |||
|
406 | { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") }, | |||
|
407 | { NULL, NULL } | |||
|
408 | }; | |||
|
409 | ||||
|
410 | PyTypeObject ZstdDecompressionReaderType = { | |||
|
411 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
412 | "zstd.ZstdDecompressionReader", /* tp_name */ | |||
|
413 | sizeof(ZstdDecompressionReader), /* tp_basicsize */ | |||
|
414 | 0, /* tp_itemsize */ | |||
|
415 | (destructor)reader_dealloc, /* tp_dealloc */ | |||
|
416 | 0, /* tp_print */ | |||
|
417 | 0, /* tp_getattr */ | |||
|
418 | 0, /* tp_setattr */ | |||
|
419 | 0, /* tp_compare */ | |||
|
420 | 0, /* tp_repr */ | |||
|
421 | 0, /* tp_as_number */ | |||
|
422 | 0, /* tp_as_sequence */ | |||
|
423 | 0, /* tp_as_mapping */ | |||
|
424 | 0, /* tp_hash */ | |||
|
425 | 0, /* tp_call */ | |||
|
426 | 0, /* tp_str */ | |||
|
427 | 0, /* tp_getattro */ | |||
|
428 | 0, /* tp_setattro */ | |||
|
429 | 0, /* tp_as_buffer */ | |||
|
430 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
431 | 0, /* tp_doc */ | |||
|
432 | 0, /* tp_traverse */ | |||
|
433 | 0, /* tp_clear */ | |||
|
434 | 0, /* tp_richcompare */ | |||
|
435 | 0, /* tp_weaklistoffset */ | |||
|
436 | reader_iter, /* tp_iter */ | |||
|
437 | reader_iternext, /* tp_iternext */ | |||
|
438 | reader_methods, /* tp_methods */ | |||
|
439 | 0, /* tp_members */ | |||
|
440 | 0, /* tp_getset */ | |||
|
441 | 0, /* tp_base */ | |||
|
442 | 0, /* tp_dict */ | |||
|
443 | 0, /* tp_descr_get */ | |||
|
444 | 0, /* tp_descr_set */ | |||
|
445 | 0, /* tp_dictoffset */ | |||
|
446 | 0, /* tp_init */ | |||
|
447 | 0, /* tp_alloc */ | |||
|
448 | PyType_GenericNew, /* tp_new */ | |||
|
449 | }; | |||
|
450 | ||||
|
451 | ||||
|
452 | void decompressionreader_module_init(PyObject* mod) { | |||
|
453 | /* TODO make reader a sub-class of io.RawIOBase */ | |||
|
454 | ||||
|
455 | Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type; | |||
|
456 | if (PyType_Ready(&ZstdDecompressionReaderType) < 0) { | |||
|
457 | return; | |||
|
458 | } | |||
|
459 | } |
@@ -0,0 +1,62 b'' | |||||
|
1 | # Copyright (c) 2017-present, Gregory Szorc | |||
|
2 | # All rights reserved. | |||
|
3 | # | |||
|
4 | # This software may be modified and distributed under the terms | |||
|
5 | # of the BSD license. See the LICENSE file for details. | |||
|
6 | ||||
|
7 | """Python interface to the Zstandard (zstd) compression library.""" | |||
|
8 | ||||
|
9 | from __future__ import absolute_import, unicode_literals | |||
|
10 | ||||
|
11 | # This module serves 2 roles: | |||
|
12 | # | |||
|
13 | # 1) Export the C or CFFI "backend" through a central module. | |||
|
14 | # 2) Implement additional functionality built on top of C or CFFI backend. | |||
|
15 | ||||
|
16 | import os | |||
|
17 | import platform | |||
|
18 | ||||
|
19 | # Some Python implementations don't support C extensions. That's why we have | |||
|
20 | # a CFFI implementation in the first place. The code here import one of our | |||
|
21 | # "backends" then re-exports the symbols from this module. For convenience, | |||
|
22 | # we support falling back to the CFFI backend if the C extension can't be | |||
|
23 | # imported. But for performance reasons, we only do this on unknown Python | |||
|
24 | # implementation. Notably, for CPython we require the C extension by default. | |||
|
25 | # Because someone will inevitably want special behavior, the behavior is | |||
|
26 | # configurable via an environment variable. A potentially better way to handle | |||
|
27 | # this is to import a special ``__importpolicy__`` module or something | |||
|
28 | # defining a variable and `setup.py` could write the file with whatever | |||
|
29 | # policy was specified at build time. Until someone needs it, we go with | |||
|
30 | # the hacky but simple environment variable approach. | |||
|
31 | _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default') | |||
|
32 | ||||
|
33 | if _module_policy == 'default': | |||
|
34 | if platform.python_implementation() in ('CPython',): | |||
|
35 | from zstd import * | |||
|
36 | backend = 'cext' | |||
|
37 | elif platform.python_implementation() in ('PyPy',): | |||
|
38 | from zstd_cffi import * | |||
|
39 | backend = 'cffi' | |||
|
40 | else: | |||
|
41 | try: | |||
|
42 | from zstd import * | |||
|
43 | backend = 'cext' | |||
|
44 | except ImportError: | |||
|
45 | from zstd_cffi import * | |||
|
46 | backend = 'cffi' | |||
|
47 | elif _module_policy == 'cffi_fallback': | |||
|
48 | try: | |||
|
49 | from zstd import * | |||
|
50 | backend = 'cext' | |||
|
51 | except ImportError: | |||
|
52 | from zstd_cffi import * | |||
|
53 | backend = 'cffi' | |||
|
54 | elif _module_policy == 'cext': | |||
|
55 | from zstd import * | |||
|
56 | backend = 'cext' | |||
|
57 | elif _module_policy == 'cffi': | |||
|
58 | from zstd_cffi import * | |||
|
59 | backend = 'cffi' | |||
|
60 | else: | |||
|
61 | raise ImportError('unknown module import policy: %s; use default, cffi_fallback, ' | |||
|
62 | 'cext, or cffi' % _module_policy) |
@@ -0,0 +1,339 b'' | |||||
|
1 | GNU GENERAL PUBLIC LICENSE | |||
|
2 | Version 2, June 1991 | |||
|
3 | ||||
|
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., | |||
|
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
|
6 | Everyone is permitted to copy and distribute verbatim copies | |||
|
7 | of this license document, but changing it is not allowed. | |||
|
8 | ||||
|
9 | Preamble | |||
|
10 | ||||
|
11 | The licenses for most software are designed to take away your | |||
|
12 | freedom to share and change it. By contrast, the GNU General Public | |||
|
13 | License is intended to guarantee your freedom to share and change free | |||
|
14 | software--to make sure the software is free for all its users. This | |||
|
15 | General Public License applies to most of the Free Software | |||
|
16 | Foundation's software and to any other program whose authors commit to | |||
|
17 | using it. (Some other Free Software Foundation software is covered by | |||
|
18 | the GNU Lesser General Public License instead.) You can apply it to | |||
|
19 | your programs, too. | |||
|
20 | ||||
|
21 | When we speak of free software, we are referring to freedom, not | |||
|
22 | price. Our General Public Licenses are designed to make sure that you | |||
|
23 | have the freedom to distribute copies of free software (and charge for | |||
|
24 | this service if you wish), that you receive source code or can get it | |||
|
25 | if you want it, that you can change the software or use pieces of it | |||
|
26 | in new free programs; and that you know you can do these things. | |||
|
27 | ||||
|
28 | To protect your rights, we need to make restrictions that forbid | |||
|
29 | anyone to deny you these rights or to ask you to surrender the rights. | |||
|
30 | These restrictions translate to certain responsibilities for you if you | |||
|
31 | distribute copies of the software, or if you modify it. | |||
|
32 | ||||
|
33 | For example, if you distribute copies of such a program, whether | |||
|
34 | gratis or for a fee, you must give the recipients all the rights that | |||
|
35 | you have. You must make sure that they, too, receive or can get the | |||
|
36 | source code. And you must show them these terms so they know their | |||
|
37 | rights. | |||
|
38 | ||||
|
39 | We protect your rights with two steps: (1) copyright the software, and | |||
|
40 | (2) offer you this license which gives you legal permission to copy, | |||
|
41 | distribute and/or modify the software. | |||
|
42 | ||||
|
43 | Also, for each author's protection and ours, we want to make certain | |||
|
44 | that everyone understands that there is no warranty for this free | |||
|
45 | software. If the software is modified by someone else and passed on, we | |||
|
46 | want its recipients to know that what they have is not the original, so | |||
|
47 | that any problems introduced by others will not reflect on the original | |||
|
48 | authors' reputations. | |||
|
49 | ||||
|
50 | Finally, any free program is threatened constantly by software | |||
|
51 | patents. We wish to avoid the danger that redistributors of a free | |||
|
52 | program will individually obtain patent licenses, in effect making the | |||
|
53 | program proprietary. To prevent this, we have made it clear that any | |||
|
54 | patent must be licensed for everyone's free use or not licensed at all. | |||
|
55 | ||||
|
56 | The precise terms and conditions for copying, distribution and | |||
|
57 | modification follow. | |||
|
58 | ||||
|
59 | GNU GENERAL PUBLIC LICENSE | |||
|
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | |||
|
61 | ||||
|
62 | 0. This License applies to any program or other work which contains | |||
|
63 | a notice placed by the copyright holder saying it may be distributed | |||
|
64 | under the terms of this General Public License. The "Program", below, | |||
|
65 | refers to any such program or work, and a "work based on the Program" | |||
|
66 | means either the Program or any derivative work under copyright law: | |||
|
67 | that is to say, a work containing the Program or a portion of it, | |||
|
68 | either verbatim or with modifications and/or translated into another | |||
|
69 | language. (Hereinafter, translation is included without limitation in | |||
|
70 | the term "modification".) Each licensee is addressed as "you". | |||
|
71 | ||||
|
72 | Activities other than copying, distribution and modification are not | |||
|
73 | covered by this License; they are outside its scope. The act of | |||
|
74 | running the Program is not restricted, and the output from the Program | |||
|
75 | is covered only if its contents constitute a work based on the | |||
|
76 | Program (independent of having been made by running the Program). | |||
|
77 | Whether that is true depends on what the Program does. | |||
|
78 | ||||
|
79 | 1. You may copy and distribute verbatim copies of the Program's | |||
|
80 | source code as you receive it, in any medium, provided that you | |||
|
81 | conspicuously and appropriately publish on each copy an appropriate | |||
|
82 | copyright notice and disclaimer of warranty; keep intact all the | |||
|
83 | notices that refer to this License and to the absence of any warranty; | |||
|
84 | and give any other recipients of the Program a copy of this License | |||
|
85 | along with the Program. | |||
|
86 | ||||
|
87 | You may charge a fee for the physical act of transferring a copy, and | |||
|
88 | you may at your option offer warranty protection in exchange for a fee. | |||
|
89 | ||||
|
90 | 2. You may modify your copy or copies of the Program or any portion | |||
|
91 | of it, thus forming a work based on the Program, and copy and | |||
|
92 | distribute such modifications or work under the terms of Section 1 | |||
|
93 | above, provided that you also meet all of these conditions: | |||
|
94 | ||||
|
95 | a) You must cause the modified files to carry prominent notices | |||
|
96 | stating that you changed the files and the date of any change. | |||
|
97 | ||||
|
98 | b) You must cause any work that you distribute or publish, that in | |||
|
99 | whole or in part contains or is derived from the Program or any | |||
|
100 | part thereof, to be licensed as a whole at no charge to all third | |||
|
101 | parties under the terms of this License. | |||
|
102 | ||||
|
103 | c) If the modified program normally reads commands interactively | |||
|
104 | when run, you must cause it, when started running for such | |||
|
105 | interactive use in the most ordinary way, to print or display an | |||
|
106 | announcement including an appropriate copyright notice and a | |||
|
107 | notice that there is no warranty (or else, saying that you provide | |||
|
108 | a warranty) and that users may redistribute the program under | |||
|
109 | these conditions, and telling the user how to view a copy of this | |||
|
110 | License. (Exception: if the Program itself is interactive but | |||
|
111 | does not normally print such an announcement, your work based on | |||
|
112 | the Program is not required to print an announcement.) | |||
|
113 | ||||
|
114 | These requirements apply to the modified work as a whole. If | |||
|
115 | identifiable sections of that work are not derived from the Program, | |||
|
116 | and can be reasonably considered independent and separate works in | |||
|
117 | themselves, then this License, and its terms, do not apply to those | |||
|
118 | sections when you distribute them as separate works. But when you | |||
|
119 | distribute the same sections as part of a whole which is a work based | |||
|
120 | on the Program, the distribution of the whole must be on the terms of | |||
|
121 | this License, whose permissions for other licensees extend to the | |||
|
122 | entire whole, and thus to each and every part regardless of who wrote it. | |||
|
123 | ||||
|
124 | Thus, it is not the intent of this section to claim rights or contest | |||
|
125 | your rights to work written entirely by you; rather, the intent is to | |||
|
126 | exercise the right to control the distribution of derivative or | |||
|
127 | collective works based on the Program. | |||
|
128 | ||||
|
129 | In addition, mere aggregation of another work not based on the Program | |||
|
130 | with the Program (or with a work based on the Program) on a volume of | |||
|
131 | a storage or distribution medium does not bring the other work under | |||
|
132 | the scope of this License. | |||
|
133 | ||||
|
134 | 3. You may copy and distribute the Program (or a work based on it, | |||
|
135 | under Section 2) in object code or executable form under the terms of | |||
|
136 | Sections 1 and 2 above provided that you also do one of the following: | |||
|
137 | ||||
|
138 | a) Accompany it with the complete corresponding machine-readable | |||
|
139 | source code, which must be distributed under the terms of Sections | |||
|
140 | 1 and 2 above on a medium customarily used for software interchange; or, | |||
|
141 | ||||
|
142 | b) Accompany it with a written offer, valid for at least three | |||
|
143 | years, to give any third party, for a charge no more than your | |||
|
144 | cost of physically performing source distribution, a complete | |||
|
145 | machine-readable copy of the corresponding source code, to be | |||
|
146 | distributed under the terms of Sections 1 and 2 above on a medium | |||
|
147 | customarily used for software interchange; or, | |||
|
148 | ||||
|
149 | c) Accompany it with the information you received as to the offer | |||
|
150 | to distribute corresponding source code. (This alternative is | |||
|
151 | allowed only for noncommercial distribution and only if you | |||
|
152 | received the program in object code or executable form with such | |||
|
153 | an offer, in accord with Subsection b above.) | |||
|
154 | ||||
|
155 | The source code for a work means the preferred form of the work for | |||
|
156 | making modifications to it. For an executable work, complete source | |||
|
157 | code means all the source code for all modules it contains, plus any | |||
|
158 | associated interface definition files, plus the scripts used to | |||
|
159 | control compilation and installation of the executable. However, as a | |||
|
160 | special exception, the source code distributed need not include | |||
|
161 | anything that is normally distributed (in either source or binary | |||
|
162 | form) with the major components (compiler, kernel, and so on) of the | |||
|
163 | operating system on which the executable runs, unless that component | |||
|
164 | itself accompanies the executable. | |||
|
165 | ||||
|
166 | If distribution of executable or object code is made by offering | |||
|
167 | access to copy from a designated place, then offering equivalent | |||
|
168 | access to copy the source code from the same place counts as | |||
|
169 | distribution of the source code, even though third parties are not | |||
|
170 | compelled to copy the source along with the object code. | |||
|
171 | ||||
|
172 | 4. You may not copy, modify, sublicense, or distribute the Program | |||
|
173 | except as expressly provided under this License. Any attempt | |||
|
174 | otherwise to copy, modify, sublicense or distribute the Program is | |||
|
175 | void, and will automatically terminate your rights under this License. | |||
|
176 | However, parties who have received copies, or rights, from you under | |||
|
177 | this License will not have their licenses terminated so long as such | |||
|
178 | parties remain in full compliance. | |||
|
179 | ||||
|
180 | 5. You are not required to accept this License, since you have not | |||
|
181 | signed it. However, nothing else grants you permission to modify or | |||
|
182 | distribute the Program or its derivative works. These actions are | |||
|
183 | prohibited by law if you do not accept this License. Therefore, by | |||
|
184 | modifying or distributing the Program (or any work based on the | |||
|
185 | Program), you indicate your acceptance of this License to do so, and | |||
|
186 | all its terms and conditions for copying, distributing or modifying | |||
|
187 | the Program or works based on it. | |||
|
188 | ||||
|
189 | 6. Each time you redistribute the Program (or any work based on the | |||
|
190 | Program), the recipient automatically receives a license from the | |||
|
191 | original licensor to copy, distribute or modify the Program subject to | |||
|
192 | these terms and conditions. You may not impose any further | |||
|
193 | restrictions on the recipients' exercise of the rights granted herein. | |||
|
194 | You are not responsible for enforcing compliance by third parties to | |||
|
195 | this License. | |||
|
196 | ||||
|
197 | 7. If, as a consequence of a court judgment or allegation of patent | |||
|
198 | infringement or for any other reason (not limited to patent issues), | |||
|
199 | conditions are imposed on you (whether by court order, agreement or | |||
|
200 | otherwise) that contradict the conditions of this License, they do not | |||
|
201 | excuse you from the conditions of this License. If you cannot | |||
|
202 | distribute so as to satisfy simultaneously your obligations under this | |||
|
203 | License and any other pertinent obligations, then as a consequence you | |||
|
204 | may not distribute the Program at all. For example, if a patent | |||
|
205 | license would not permit royalty-free redistribution of the Program by | |||
|
206 | all those who receive copies directly or indirectly through you, then | |||
|
207 | the only way you could satisfy both it and this License would be to | |||
|
208 | refrain entirely from distribution of the Program. | |||
|
209 | ||||
|
210 | If any portion of this section is held invalid or unenforceable under | |||
|
211 | any particular circumstance, the balance of the section is intended to | |||
|
212 | apply and the section as a whole is intended to apply in other | |||
|
213 | circumstances. | |||
|
214 | ||||
|
215 | It is not the purpose of this section to induce you to infringe any | |||
|
216 | patents or other property right claims or to contest validity of any | |||
|
217 | such claims; this section has the sole purpose of protecting the | |||
|
218 | integrity of the free software distribution system, which is | |||
|
219 | implemented by public license practices. Many people have made | |||
|
220 | generous contributions to the wide range of software distributed | |||
|
221 | through that system in reliance on consistent application of that | |||
|
222 | system; it is up to the author/donor to decide if he or she is willing | |||
|
223 | to distribute software through any other system and a licensee cannot | |||
|
224 | impose that choice. | |||
|
225 | ||||
|
226 | This section is intended to make thoroughly clear what is believed to | |||
|
227 | be a consequence of the rest of this License. | |||
|
228 | ||||
|
229 | 8. If the distribution and/or use of the Program is restricted in | |||
|
230 | certain countries either by patents or by copyrighted interfaces, the | |||
|
231 | original copyright holder who places the Program under this License | |||
|
232 | may add an explicit geographical distribution limitation excluding | |||
|
233 | those countries, so that distribution is permitted only in or among | |||
|
234 | countries not thus excluded. In such case, this License incorporates | |||
|
235 | the limitation as if written in the body of this License. | |||
|
236 | ||||
|
237 | 9. The Free Software Foundation may publish revised and/or new versions | |||
|
238 | of the General Public License from time to time. Such new versions will | |||
|
239 | be similar in spirit to the present version, but may differ in detail to | |||
|
240 | address new problems or concerns. | |||
|
241 | ||||
|
242 | Each version is given a distinguishing version number. If the Program | |||
|
243 | specifies a version number of this License which applies to it and "any | |||
|
244 | later version", you have the option of following the terms and conditions | |||
|
245 | either of that version or of any later version published by the Free | |||
|
246 | Software Foundation. If the Program does not specify a version number of | |||
|
247 | this License, you may choose any version ever published by the Free Software | |||
|
248 | Foundation. | |||
|
249 | ||||
|
250 | 10. If you wish to incorporate parts of the Program into other free | |||
|
251 | programs whose distribution conditions are different, write to the author | |||
|
252 | to ask for permission. For software which is copyrighted by the Free | |||
|
253 | Software Foundation, write to the Free Software Foundation; we sometimes | |||
|
254 | make exceptions for this. Our decision will be guided by the two goals | |||
|
255 | of preserving the free status of all derivatives of our free software and | |||
|
256 | of promoting the sharing and reuse of software generally. | |||
|
257 | ||||
|
258 | NO WARRANTY | |||
|
259 | ||||
|
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | |||
|
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN | |||
|
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | |||
|
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | |||
|
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |||
|
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS | |||
|
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE | |||
|
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | |||
|
268 | REPAIR OR CORRECTION. | |||
|
269 | ||||
|
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | |||
|
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | |||
|
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | |||
|
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | |||
|
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED | |||
|
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | |||
|
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER | |||
|
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | |||
|
278 | POSSIBILITY OF SUCH DAMAGES. | |||
|
279 | ||||
|
280 | END OF TERMS AND CONDITIONS | |||
|
281 | ||||
|
282 | How to Apply These Terms to Your New Programs | |||
|
283 | ||||
|
284 | If you develop a new program, and you want it to be of the greatest | |||
|
285 | possible use to the public, the best way to achieve this is to make it | |||
|
286 | free software which everyone can redistribute and change under these terms. | |||
|
287 | ||||
|
288 | To do so, attach the following notices to the program. It is safest | |||
|
289 | to attach them to the start of each source file to most effectively | |||
|
290 | convey the exclusion of warranty; and each file should have at least | |||
|
291 | the "copyright" line and a pointer to where the full notice is found. | |||
|
292 | ||||
|
293 | <one line to give the program's name and a brief idea of what it does.> | |||
|
294 | Copyright (C) <year> <name of author> | |||
|
295 | ||||
|
296 | This program is free software; you can redistribute it and/or modify | |||
|
297 | it under the terms of the GNU General Public License as published by | |||
|
298 | the Free Software Foundation; either version 2 of the License, or | |||
|
299 | (at your option) any later version. | |||
|
300 | ||||
|
301 | This program is distributed in the hope that it will be useful, | |||
|
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
|
304 | GNU General Public License for more details. | |||
|
305 | ||||
|
306 | You should have received a copy of the GNU General Public License along | |||
|
307 | with this program; if not, write to the Free Software Foundation, Inc., | |||
|
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||
|
309 | ||||
|
310 | Also add information on how to contact you by electronic and paper mail. | |||
|
311 | ||||
|
312 | If the program is interactive, make it output a short notice like this | |||
|
313 | when it starts in an interactive mode: | |||
|
314 | ||||
|
315 | Gnomovision version 69, Copyright (C) year name of author | |||
|
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | |||
|
317 | This is free software, and you are welcome to redistribute it | |||
|
318 | under certain conditions; type `show c' for details. | |||
|
319 | ||||
|
320 | The hypothetical commands `show w' and `show c' should show the appropriate | |||
|
321 | parts of the General Public License. Of course, the commands you use may | |||
|
322 | be called something other than `show w' and `show c'; they could even be | |||
|
323 | mouse-clicks or menu items--whatever suits your program. | |||
|
324 | ||||
|
325 | You should also get your employer (if you work as a programmer) or your | |||
|
326 | school, if any, to sign a "copyright disclaimer" for the program, if | |||
|
327 | necessary. Here is a sample; alter the names: | |||
|
328 | ||||
|
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program | |||
|
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. | |||
|
331 | ||||
|
332 | <signature of Ty Coon>, 1 April 1989 | |||
|
333 | Ty Coon, President of Vice | |||
|
334 | ||||
|
335 | This General Public License does not permit incorporating your program into | |||
|
336 | proprietary programs. If your program is a subroutine library, you may | |||
|
337 | consider it more useful to permit linking proprietary applications with the | |||
|
338 | library. If this is what you want to do, use the GNU Lesser General | |||
|
339 | Public License instead of this License. No newline at end of file |
@@ -0,0 +1,111 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | #ifndef ZSTD_COMPILER_H | |||
|
12 | #define ZSTD_COMPILER_H | |||
|
13 | ||||
|
14 | /*-******************************************************* | |||
|
15 | * Compiler specifics | |||
|
16 | *********************************************************/ | |||
|
17 | /* force inlining */ | |||
|
18 | #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | |||
|
19 | # define INLINE_KEYWORD inline | |||
|
20 | #else | |||
|
21 | # define INLINE_KEYWORD | |||
|
22 | #endif | |||
|
23 | ||||
|
24 | #if defined(__GNUC__) | |||
|
25 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) | |||
|
26 | #elif defined(_MSC_VER) | |||
|
27 | # define FORCE_INLINE_ATTR __forceinline | |||
|
28 | #else | |||
|
29 | # define FORCE_INLINE_ATTR | |||
|
30 | #endif | |||
|
31 | ||||
|
32 | /** | |||
|
33 | * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | |||
|
34 | * parameters. They must be inlined for the compiler to elimininate the constant | |||
|
35 | * branches. | |||
|
36 | */ | |||
|
37 | #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR | |||
|
38 | /** | |||
|
39 | * HINT_INLINE is used to help the compiler generate better code. It is *not* | |||
|
40 | * used for "templates", so it can be tweaked based on the compilers | |||
|
41 | * performance. | |||
|
42 | * | |||
|
43 | * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the | |||
|
44 | * always_inline attribute. | |||
|
45 | * | |||
|
46 | * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline | |||
|
47 | * attribute. | |||
|
48 | */ | |||
|
49 | #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 | |||
|
50 | # define HINT_INLINE static INLINE_KEYWORD | |||
|
51 | #else | |||
|
52 | # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR | |||
|
53 | #endif | |||
|
54 | ||||
|
55 | /* force no inlining */ | |||
|
56 | #ifdef _MSC_VER | |||
|
57 | # define FORCE_NOINLINE static __declspec(noinline) | |||
|
58 | #else | |||
|
59 | # ifdef __GNUC__ | |||
|
60 | # define FORCE_NOINLINE static __attribute__((__noinline__)) | |||
|
61 | # else | |||
|
62 | # define FORCE_NOINLINE static | |||
|
63 | # endif | |||
|
64 | #endif | |||
|
65 | ||||
|
66 | /* target attribute */ | |||
|
67 | #ifndef __has_attribute | |||
|
68 | #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | |||
|
69 | #endif | |||
|
70 | #if defined(__GNUC__) | |||
|
71 | # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | |||
|
72 | #else | |||
|
73 | # define TARGET_ATTRIBUTE(target) | |||
|
74 | #endif | |||
|
75 | ||||
|
76 | /* Enable runtime BMI2 dispatch based on the CPU. | |||
|
77 | * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | |||
|
78 | */ | |||
|
79 | #ifndef DYNAMIC_BMI2 | |||
|
80 | #if (defined(__clang__) && __has_attribute(__target__)) \ | |||
|
81 | || (defined(__GNUC__) \ | |||
|
82 | && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ | |||
|
83 | && (defined(__x86_64__) || defined(_M_X86)) \ | |||
|
84 | && !defined(__BMI2__) | |||
|
85 | # define DYNAMIC_BMI2 1 | |||
|
86 | #else | |||
|
87 | # define DYNAMIC_BMI2 0 | |||
|
88 | #endif | |||
|
89 | #endif | |||
|
90 | ||||
|
91 | /* prefetch */ | |||
|
92 | #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ | |||
|
93 | # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | |||
|
94 | # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) | |||
|
95 | #elif defined(__GNUC__) | |||
|
96 | # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) | |||
|
97 | #else | |||
|
98 | # define PREFETCH(ptr) /* disabled */ | |||
|
99 | #endif | |||
|
100 | ||||
|
101 | /* disable warnings */ | |||
|
102 | #ifdef _MSC_VER /* Visual Studio */ | |||
|
103 | # include <intrin.h> /* For Visual 2005 */ | |||
|
104 | # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ | |||
|
105 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ | |||
|
106 | # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ | |||
|
107 | # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ | |||
|
108 | # pragma warning(disable : 4324) /* disable: C4324: padded structure */ | |||
|
109 | #endif | |||
|
110 | ||||
|
111 | #endif /* ZSTD_COMPILER_H */ |
@@ -0,0 +1,216 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (c) 2018-present, Facebook, Inc. | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This source code is licensed under both the BSD-style license (found in the | |||
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
|
7 | * in the COPYING file in the root directory of this source tree). | |||
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
|
9 | */ | |||
|
10 | ||||
|
11 | #ifndef ZSTD_COMMON_CPU_H | |||
|
12 | #define ZSTD_COMMON_CPU_H | |||
|
13 | ||||
|
14 | /** | |||
|
15 | * Implementation taken from folly/CpuId.h | |||
|
16 | * https://github.com/facebook/folly/blob/master/folly/CpuId.h | |||
|
17 | */ | |||
|
18 | ||||
|
19 | #include <string.h> | |||
|
20 | ||||
|
21 | #include "mem.h" | |||
|
22 | ||||
|
23 | #ifdef _MSC_VER | |||
|
24 | #include <intrin.h> | |||
|
25 | #endif | |||
|
26 | ||||
|
27 | typedef struct { | |||
|
28 | U32 f1c; | |||
|
29 | U32 f1d; | |||
|
30 | U32 f7b; | |||
|
31 | U32 f7c; | |||
|
32 | } ZSTD_cpuid_t; | |||
|
33 | ||||
|
34 | MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { | |||
|
35 | U32 f1c = 0; | |||
|
36 | U32 f1d = 0; | |||
|
37 | U32 f7b = 0; | |||
|
38 | U32 f7c = 0; | |||
|
39 | #ifdef _MSC_VER | |||
|
40 | int reg[4]; | |||
|
41 | __cpuid((int*)reg, 0); | |||
|
42 | { | |||
|
43 | int const n = reg[0]; | |||
|
44 | if (n >= 1) { | |||
|
45 | __cpuid((int*)reg, 1); | |||
|
46 | f1c = (U32)reg[2]; | |||
|
47 | f1d = (U32)reg[3]; | |||
|
48 | } | |||
|
49 | if (n >= 7) { | |||
|
50 | __cpuidex((int*)reg, 7, 0); | |||
|
51 | f7b = (U32)reg[1]; | |||
|
52 | f7c = (U32)reg[2]; | |||
|
53 | } | |||
|
54 | } | |||
|
55 | #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) | |||
|
56 | /* The following block like the normal cpuid branch below, but gcc | |||
|
57 | * reserves ebx for use of its pic register so we must specially | |||
|
58 | * handle the save and restore to avoid clobbering the register | |||
|
59 | */ | |||
|
60 | U32 n; | |||
|
61 | __asm__( | |||
|
62 | "pushl %%ebx\n\t" | |||
|
63 | "cpuid\n\t" | |||
|
64 | "popl %%ebx\n\t" | |||
|
65 | : "=a"(n) | |||
|
66 | : "a"(0) | |||
|
67 | : "ecx", "edx"); | |||
|
68 | if (n >= 1) { | |||
|
69 | U32 f1a; | |||
|
70 | __asm__( | |||
|
71 | "pushl %%ebx\n\t" | |||
|
72 | "cpuid\n\t" | |||
|
73 | "popl %%ebx\n\t" | |||
|
74 | : "=a"(f1a), "=c"(f1c), "=d"(f1d) | |||
|
75 | : "a"(1) | |||
|
76 | :); | |||
|
77 | } | |||
|
78 | if (n >= 7) { | |||
|
79 | __asm__( | |||
|
80 | "pushl %%ebx\n\t" | |||
|
81 | "cpuid\n\t" | |||
|
82 | "movl %%ebx, %%eax\n\r" | |||
|
83 | "popl %%ebx" | |||
|
84 | : "=a"(f7b), "=c"(f7c) | |||
|
85 | : "a"(7), "c"(0) | |||
|
86 | : "edx"); | |||
|
87 | } | |||
|
88 | #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) | |||
|
89 | U32 n; | |||
|
90 | __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); | |||
|
91 | if (n >= 1) { | |||
|
92 | U32 f1a; | |||
|
93 | __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); | |||
|
94 | } | |||
|
95 | if (n >= 7) { | |||
|
96 | U32 f7a; | |||
|
97 | __asm__("cpuid" | |||
|
98 | : "=a"(f7a), "=b"(f7b), "=c"(f7c) | |||
|
99 | : "a"(7), "c"(0) | |||
|
100 | : "edx"); | |||
|
101 | } | |||
|
102 | #endif | |||
|
103 | { | |||
|
104 | ZSTD_cpuid_t cpuid; | |||
|
105 | cpuid.f1c = f1c; | |||
|
106 | cpuid.f1d = f1d; | |||
|
107 | cpuid.f7b = f7b; | |||
|
108 | cpuid.f7c = f7c; | |||
|
109 | return cpuid; | |||
|
110 | } | |||
|
111 | } | |||
|
112 | ||||
|
113 | #define X(name, r, bit) \ | |||
|
114 | MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ | |||
|
115 | return ((cpuid.r) & (1U << bit)) != 0; \ | |||
|
116 | } | |||
|
117 | ||||
|
118 | /* cpuid(1): Processor Info and Feature Bits. */ | |||
|
119 | #define C(name, bit) X(name, f1c, bit) | |||
|
120 | C(sse3, 0) | |||
|
121 | C(pclmuldq, 1) | |||
|
122 | C(dtes64, 2) | |||
|
123 | C(monitor, 3) | |||
|
124 | C(dscpl, 4) | |||
|
125 | C(vmx, 5) | |||
|
126 | C(smx, 6) | |||
|
127 | C(eist, 7) | |||
|
128 | C(tm2, 8) | |||
|
129 | C(ssse3, 9) | |||
|
130 | C(cnxtid, 10) | |||
|
131 | C(fma, 12) | |||
|
132 | C(cx16, 13) | |||
|
133 | C(xtpr, 14) | |||
|
134 | C(pdcm, 15) | |||
|
135 | C(pcid, 17) | |||
|
136 | C(dca, 18) | |||
|
137 | C(sse41, 19) | |||
|
138 | C(sse42, 20) | |||
|
139 | C(x2apic, 21) | |||
|
140 | C(movbe, 22) | |||
|
141 | C(popcnt, 23) | |||
|
142 | C(tscdeadline, 24) | |||
|
143 | C(aes, 25) | |||
|
144 | C(xsave, 26) | |||
|
145 | C(osxsave, 27) | |||
|
146 | C(avx, 28) | |||
|
147 | C(f16c, 29) | |||
|
148 | C(rdrand, 30) | |||
|
149 | #undef C | |||
|
150 | #define D(name, bit) X(name, f1d, bit) | |||
|
151 | D(fpu, 0) | |||
|
152 | D(vme, 1) | |||
|
153 | D(de, 2) | |||
|
154 | D(pse, 3) | |||
|
155 | D(tsc, 4) | |||
|
156 | D(msr, 5) | |||
|
157 | D(pae, 6) | |||
|
158 | D(mce, 7) | |||
|
159 | D(cx8, 8) | |||
|
160 | D(apic, 9) | |||
|
161 | D(sep, 11) | |||
|
162 | D(mtrr, 12) | |||
|
163 | D(pge, 13) | |||
|
164 | D(mca, 14) | |||
|
165 | D(cmov, 15) | |||
|
166 | D(pat, 16) | |||
|
167 | D(pse36, 17) | |||
|
168 | D(psn, 18) | |||
|
169 | D(clfsh, 19) | |||
|
170 | D(ds, 21) | |||
|
171 | D(acpi, 22) | |||
|
172 | D(mmx, 23) | |||
|
173 | D(fxsr, 24) | |||
|
174 | D(sse, 25) | |||
|
175 | D(sse2, 26) | |||
|
176 | D(ss, 27) | |||
|
177 | D(htt, 28) | |||
|
178 | D(tm, 29) | |||
|
179 | D(pbe, 31) | |||
|
180 | #undef D | |||
|
181 | ||||
|
182 | /* cpuid(7): Extended Features. */ | |||
|
183 | #define B(name, bit) X(name, f7b, bit) | |||
|
184 | B(bmi1, 3) | |||
|
185 | B(hle, 4) | |||
|
186 | B(avx2, 5) | |||
|
187 | B(smep, 7) | |||
|
188 | B(bmi2, 8) | |||
|
189 | B(erms, 9) | |||
|
190 | B(invpcid, 10) | |||
|
191 | B(rtm, 11) | |||
|
192 | B(mpx, 14) | |||
|
193 | B(avx512f, 16) | |||
|
194 | B(avx512dq, 17) | |||
|
195 | B(rdseed, 18) | |||
|
196 | B(adx, 19) | |||
|
197 | B(smap, 20) | |||
|
198 | B(avx512ifma, 21) | |||
|
199 | B(pcommit, 22) | |||
|
200 | B(clflushopt, 23) | |||
|
201 | B(clwb, 24) | |||
|
202 | B(avx512pf, 26) | |||
|
203 | B(avx512er, 27) | |||
|
204 | B(avx512cd, 28) | |||
|
205 | B(sha, 29) | |||
|
206 | B(avx512bw, 30) | |||
|
207 | B(avx512vl, 31) | |||
|
208 | #undef B | |||
|
209 | #define C(name, bit) X(name, f7c, bit) | |||
|
210 | C(prefetchwt1, 0) | |||
|
211 | C(avx512vbmi, 1) | |||
|
212 | #undef C | |||
|
213 | ||||
|
214 | #undef X | |||
|
215 | ||||
|
216 | #endif /* ZSTD_COMMON_CPU_H */ |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
@@ -8,11 +8,13 b' mercurial/cext/revlog.c' | |||||
8 | contrib/python-zstandard/c-ext/bufferutil.c |
|
8 | contrib/python-zstandard/c-ext/bufferutil.c | |
9 | contrib/python-zstandard/c-ext/compressiondict.c |
|
9 | contrib/python-zstandard/c-ext/compressiondict.c | |
10 | contrib/python-zstandard/c-ext/compressionparams.c |
|
10 | contrib/python-zstandard/c-ext/compressionparams.c | |
|
11 | contrib/python-zstandard/c-ext/compressionreader.c | |||
11 | contrib/python-zstandard/c-ext/compressionwriter.c |
|
12 | contrib/python-zstandard/c-ext/compressionwriter.c | |
12 | contrib/python-zstandard/c-ext/compressobj.c |
|
13 | contrib/python-zstandard/c-ext/compressobj.c | |
13 | contrib/python-zstandard/c-ext/compressor.c |
|
14 | contrib/python-zstandard/c-ext/compressor.c | |
14 | contrib/python-zstandard/c-ext/compressoriterator.c |
|
15 | contrib/python-zstandard/c-ext/compressoriterator.c | |
15 | contrib/python-zstandard/c-ext/constants.c |
|
16 | contrib/python-zstandard/c-ext/constants.c | |
|
17 | contrib/python-zstandard/c-ext/decompressionreader.c | |||
16 | contrib/python-zstandard/c-ext/decompressionwriter.c |
|
18 | contrib/python-zstandard/c-ext/decompressionwriter.c | |
17 | contrib/python-zstandard/c-ext/decompressobj.c |
|
19 | contrib/python-zstandard/c-ext/decompressobj.c | |
18 | contrib/python-zstandard/c-ext/decompressor.c |
|
20 | contrib/python-zstandard/c-ext/decompressor.c | |
@@ -21,11 +23,13 b' contrib/python-zstandard/c-ext/framepara' | |||||
21 | contrib/python-zstandard/c-ext/python-zstandard.h |
|
23 | contrib/python-zstandard/c-ext/python-zstandard.h | |
22 | contrib/python-zstandard/zstd.c |
|
24 | contrib/python-zstandard/zstd.c | |
23 | contrib/python-zstandard/zstd/common/bitstream.h |
|
25 | contrib/python-zstandard/zstd/common/bitstream.h | |
|
26 | contrib/python-zstandard/zstd/common/compiler.h | |||
|
27 | contrib/python-zstandard/zstd/common/cpu.h | |||
24 | contrib/python-zstandard/zstd/common/entropy_common.c |
|
28 | contrib/python-zstandard/zstd/common/entropy_common.c | |
25 | contrib/python-zstandard/zstd/common/error_private.c |
|
29 | contrib/python-zstandard/zstd/common/error_private.c | |
26 | contrib/python-zstandard/zstd/common/error_private.h |
|
30 | contrib/python-zstandard/zstd/common/error_private.h | |
|
31 | contrib/python-zstandard/zstd/common/fse_decompress.c | |||
27 | contrib/python-zstandard/zstd/common/fse.h |
|
32 | contrib/python-zstandard/zstd/common/fse.h | |
28 | contrib/python-zstandard/zstd/common/fse_decompress.c |
|
|||
29 | contrib/python-zstandard/zstd/common/huf.h |
|
33 | contrib/python-zstandard/zstd/common/huf.h | |
30 | contrib/python-zstandard/zstd/common/mem.h |
|
34 | contrib/python-zstandard/zstd/common/mem.h | |
31 | contrib/python-zstandard/zstd/common/pool.c |
|
35 | contrib/python-zstandard/zstd/common/pool.c | |
@@ -40,11 +44,25 b' contrib/python-zstandard/zstd/common/zst' | |||||
40 | contrib/python-zstandard/zstd/compress/fse_compress.c |
|
44 | contrib/python-zstandard/zstd/compress/fse_compress.c | |
41 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
45 | contrib/python-zstandard/zstd/compress/huf_compress.c | |
42 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
46 | contrib/python-zstandard/zstd/compress/zstd_compress.c | |
43 | contrib/python-zstandard/zstd/compress/zstd_opt.h |
|
47 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h | |
|
48 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c | |||
|
49 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h | |||
|
50 | contrib/python-zstandard/zstd/compress/zstd_fast.c | |||
|
51 | contrib/python-zstandard/zstd/compress/zstd_fast.h | |||
|
52 | contrib/python-zstandard/zstd/compress/zstd_lazy.c | |||
|
53 | contrib/python-zstandard/zstd/compress/zstd_lazy.h | |||
|
54 | contrib/python-zstandard/zstd/compress/zstd_ldm.c | |||
|
55 | contrib/python-zstandard/zstd/compress/zstd_ldm.h | |||
44 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c |
|
56 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c | |
45 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h |
|
57 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h | |
|
58 | contrib/python-zstandard/zstd/compress/zstd_opt.c | |||
|
59 | contrib/python-zstandard/zstd/compress/zstd_opt.h | |||
46 | contrib/python-zstandard/zstd/decompress/huf_decompress.c |
|
60 | contrib/python-zstandard/zstd/decompress/huf_decompress.c | |
47 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c |
|
61 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c | |
|
62 | contrib/python-zstandard/zstd/deprecated/zbuff_common.c | |||
|
63 | contrib/python-zstandard/zstd/deprecated/zbuff_compress.c | |||
|
64 | contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c | |||
|
65 | contrib/python-zstandard/zstd/deprecated/zbuff.h | |||
48 | contrib/python-zstandard/zstd/dictBuilder/cover.c |
|
66 | contrib/python-zstandard/zstd/dictBuilder/cover.c | |
49 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c |
|
67 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c | |
50 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h |
|
68 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h |
@@ -1,5 +1,7 b'' | |||||
1 | graft c-ext |
|
1 | graft c-ext | |
2 | graft zstd |
|
2 | graft zstd | |
|
3 | graft tests | |||
3 | include make_cffi.py |
|
4 | include make_cffi.py | |
4 | include setup_zstd.py |
|
5 | include setup_zstd.py | |
5 | include zstd.c |
|
6 | include zstd.c | |
|
7 | include LICENSE |
@@ -1,13 +1,201 b'' | |||||
|
1 | =============== | |||
1 | Version History |
|
2 | Version History | |
2 | =============== |
|
3 | =============== | |
3 |
|
4 | |||
|
5 | 1.0.0 (not yet released) | |||
|
6 | ======================== | |||
|
7 | ||||
|
8 | Actions Blocking Release | |||
|
9 | ------------------------ | |||
|
10 | ||||
|
11 | * compression and decompression APIs that support ``io.rawIOBase`` interface | |||
|
12 | (#13). | |||
|
13 | * Refactor module names so C and CFFI extensions live under ``zstandard`` | |||
|
14 | package. | |||
|
15 | * Overall API design review. | |||
|
16 | * Use Python allocator where possible. | |||
|
17 | * Figure out what to do about experimental APIs not implemented by CFFI. | |||
|
18 | * APIs for auto adjusting compression parameters based on input size. e.g. | |||
|
19 | clamping the window log so it isn't too large for input. | |||
|
20 | * Consider allowing compressor and decompressor instances to be thread safe, | |||
|
21 | support concurrent operations. Or track when an operation is in progress and | |||
|
22 | refuse to let concurrent operations use the same instance. | |||
|
23 | * Support for magic-less frames for all decompression operations (``decompress()`` | |||
|
24 | doesn't work due to sniffing the content size and the lack of a ZSTD API to | |||
|
25 | sniff magic-less frames - this should be fixed in 1.3.5.). | |||
|
26 | * Audit for complete flushing when ending compression streams. | |||
|
27 | * Deprecate legacy APIs. | |||
|
28 | * Audit for ability to control read/write sizes on all APIs. | |||
|
29 | * Detect memory leaks via bench.py. | |||
|
30 | * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and | |||
|
31 | require use of ``CompressionParameters``. | |||
|
32 | * Expose ``ZSTD_getFrameProgression()`` from more compressor types. | |||
|
33 | ||||
|
34 | Other Actions Not Blocking Release | |||
|
35 | --------------------------------------- | |||
|
36 | ||||
|
37 | * Support for block compression APIs. | |||
|
38 | * API for ensuring max memory ceiling isn't exceeded. | |||
|
39 | * Move off nose for testing. | |||
|
40 | ||||
|
41 | 0.9.0 (released 2018-04-08) | |||
|
42 | =========================== | |||
|
43 | ||||
|
44 | Backwards Compatibility Notes | |||
|
45 | ----------------------------- | |||
|
46 | ||||
|
47 | * CFFI 1.11 or newer is now required (previous requirement was 1.8). | |||
|
48 | * The primary module is now ``zstandard``. Please change imports of ``zstd`` | |||
|
49 | and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support | |||
|
50 | for importing the old names will be dropped in the next release. | |||
|
51 | * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have | |||
|
52 | been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new | |||
|
53 | name and will be deleted in a future release. | |||
|
54 | * Support for Python 2.6 has been removed. | |||
|
55 | * Support for Python 3.3 has been removed. | |||
|
56 | * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as | |||
|
57 | the feature disappeared from zstd 1.3. | |||
|
58 | * Support for legacy dictionaries has been removed. Cover dictionaries are now | |||
|
59 | the default. ``train_cover_dictionary()`` has effectively been renamed to | |||
|
60 | ``train_dictionary()``. | |||
|
61 | * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been | |||
|
62 | deleted and the method now allows empty inputs to be compressed by default. | |||
|
63 | * ``estimate_compression_context_size()`` has been removed. Use | |||
|
64 | ``CompressionParameters.estimated_compression_context_size()`` instead. | |||
|
65 | * ``get_compression_parameters()`` has been removed. Use | |||
|
66 | ``CompressionParameters.from_level()`` instead. | |||
|
67 | * The arguments to ``CompressionParameters.__init__()`` have changed. If you | |||
|
68 | were using positional arguments before, the positions now map to different | |||
|
69 | arguments. It is recommended to use keyword arguments to construct | |||
|
70 | ``CompressionParameters`` instances. | |||
|
71 | * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard | |||
|
72 | 1.3.4). | |||
|
73 | * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been | |||
|
74 | renamed to ``ZstdCompressor.stream_writer()`` and | |||
|
75 | ``ZstdDecompressor.stream_writer()``, respectively. The old names are still | |||
|
76 | aliased, but will be removed in the next major release. | |||
|
77 | * Content sizes are written into frame headers by default | |||
|
78 | (``ZstdCompressor(write_content_size=True)`` is now the default). | |||
|
79 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters`` | |||
|
80 | for consistency with other types. The old name is an alias and will be removed | |||
|
81 | in the next major release. | |||
|
82 | ||||
|
83 | Bug Fixes | |||
|
84 | --------- | |||
|
85 | ||||
|
86 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2). | |||
|
87 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2). | |||
|
88 | * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``. | |||
|
89 | ||||
|
90 | New Features | |||
|
91 | ------------ | |||
|
92 | ||||
|
93 | * Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various | |||
|
94 | bug fixes and performance improvements. It also gives us access to newer | |||
|
95 | features. | |||
|
96 | * Support for negative compression levels. | |||
|
97 | * Support for *long distance matching* (facilitates compression ratios that approach | |||
|
98 | LZMA). | |||
|
99 | * Supporting for reading empty zstandard frames (with an embedded content size | |||
|
100 | of 0). | |||
|
101 | * Support for writing and partial support for reading zstandard frames without a | |||
|
102 | magic header. | |||
|
103 | * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows | |||
|
104 | you to ``.read()`` from a file-like object). | |||
|
105 | * Several minor features, bug fixes, and performance enhancements. | |||
|
106 | * Wheels for Linux and macOS are now provided with releases. | |||
|
107 | ||||
|
108 | Changes | |||
|
109 | ------- | |||
|
110 | ||||
|
111 | * Functions accepting bytes data now use the buffer protocol and can accept | |||
|
112 | more types (like ``memoryview`` and ``bytearray``) (#26). | |||
|
113 | * Add #includes so compilation on OS X and BSDs works (#20). | |||
|
114 | * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream | |||
|
115 | of decompressed data for a source. | |||
|
116 | * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of | |||
|
117 | compressed data for a source. | |||
|
118 | * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``. | |||
|
119 | The old name is still available. | |||
|
120 | * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``. | |||
|
121 | ``read_from()`` is still available at its old location. | |||
|
122 | * Introduce the ``zstandard`` module to import and re-export the C or CFFI | |||
|
123 | *backend* as appropriate. Behavior can be controlled via the | |||
|
124 | ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for | |||
|
125 | usage info. | |||
|
126 | * Vendored version of zstd upgraded to 1.3.4. | |||
|
127 | * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``. | |||
|
128 | * Add ``STRATEGY_BTULTRA`` compression strategy constant. | |||
|
129 | * Switch from deprecated ``ZSTD_getDecompressedSize()`` to | |||
|
130 | ``ZSTD_getFrameContentSize()`` replacement. | |||
|
131 | * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring | |||
|
132 | special handling. | |||
|
133 | * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()`` | |||
|
134 | method for determining the current memory utilization of the underlying zstd | |||
|
135 | primitive. | |||
|
136 | * ``train_dictionary()`` has new arguments and functionality for trying multiple | |||
|
137 | variations of COVER parameters and selecting the best one. | |||
|
138 | * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and | |||
|
139 | ``LDM_BUCKETSIZELOG_MAX``. | |||
|
140 | * Converted all consumers to the zstandard *new advanced API*, which uses | |||
|
141 | ``ZSTD_compress_generic()`` | |||
|
142 | * ``CompressionParameters.__init__`` now accepts several more arguments, | |||
|
143 | including support for *long distance matching*. | |||
|
144 | * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that | |||
|
145 | controls how the dictionary should be interpreted. This can be used to | |||
|
146 | force the use of *content-only* dictionaries or to require the presence | |||
|
147 | of the dictionary magic header. | |||
|
148 | * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the | |||
|
149 | compression dictionary so it can efficiently be used with multiple | |||
|
150 | ``ZstdCompressor`` instances. | |||
|
151 | * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances, | |||
|
152 | created automatically on first use, and automatically reused by all | |||
|
153 | ``ZstdDecompressor`` instances bound to that dictionary. | |||
|
154 | * All meaningful functions now accept keyword arguments. | |||
|
155 | * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument | |||
|
156 | to control how much work to perform on every decompressor invocation. | |||
|
157 | * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the | |||
|
158 | total number of bytes written so far. | |||
|
159 | * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving | |||
|
160 | forward in the stream. | |||
|
161 | * Removed ``TARGETLENGTH_MAX`` constant. | |||
|
162 | * Added ``frame_header_size(data)`` function. | |||
|
163 | * Added ``frame_content_size(data)`` function. | |||
|
164 | * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced | |||
|
165 | decompression* API. | |||
|
166 | * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with | |||
|
167 | negative compression levels. | |||
|
168 | * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the | |||
|
169 | amount of memory required for decompression operations. | |||
|
170 | * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with | |||
|
171 | the ``format`` compression parameter to control whether the frame magic | |||
|
172 | header is written. | |||
|
173 | * ``ZstdDecompressor`` now accepts a ``format`` argument to control the | |||
|
174 | expected frame format. | |||
|
175 | * ``ZstdCompressor`` now has a ``frame_progression()`` method to return | |||
|
176 | information about the current compression operation. | |||
|
177 | * Error messages in CFFI no longer have ``b''`` literals. | |||
|
178 | * Compiler warnings and underlying overflow issues on 32-bit platforms have been | |||
|
179 | fixed. | |||
|
180 | * Builds in CI now build with compiler warnings as errors. This should hopefully | |||
|
181 | fix new compiler warnings from being introduced. | |||
|
182 | * Make ``ZstdCompressor(write_content_size=True)`` and | |||
|
183 | ``CompressionParameters(write_content_size=True)`` the default. | |||
|
184 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``. | |||
|
185 | ||||
|
186 | 0.8.2 (released 2018-02-22) | |||
|
187 | --------------------------- | |||
|
188 | ||||
|
189 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40). | |||
|
190 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35). | |||
|
191 | ||||
4 | 0.8.1 (released 2017-04-08) |
|
192 | 0.8.1 (released 2017-04-08) | |
5 | --------------------------- |
|
193 | --------------------------- | |
6 |
|
194 | |||
7 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
195 | * Add #includes so compilation on OS X and BSDs works (#20). | |
8 |
|
196 | |||
9 | 0.8.0 (released 2017-03-08) |
|
197 | 0.8.0 (released 2017-03-08) | |
10 | --------------------------- |
|
198 | =========================== | |
11 |
|
199 | |||
12 | * CompressionParameters now has a estimated_compression_context_size() method. |
|
200 | * CompressionParameters now has a estimated_compression_context_size() method. | |
13 | zstd.estimate_compression_context_size() is now deprecated and slated for |
|
201 | zstd.estimate_compression_context_size() is now deprecated and slated for | |
@@ -35,7 +223,7 b' 0.8.0 (released 2017-03-08)' | |||||
35 | DictParameters instance to control dictionary generation. |
|
223 | DictParameters instance to control dictionary generation. | |
36 |
|
224 | |||
37 | 0.7.0 (released 2017-02-07) |
|
225 | 0.7.0 (released 2017-02-07) | |
38 | --------------------------- |
|
226 | =========================== | |
39 |
|
227 | |||
40 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. |
|
228 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. | |
41 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient |
|
229 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient | |
@@ -62,7 +250,7 b' 0.7.0 (released 2017-02-07)' | |||||
62 | * DictParameters instances now expose their values as attributes. |
|
250 | * DictParameters instances now expose their values as attributes. | |
63 |
|
251 | |||
64 | 0.6.0 (released 2017-01-14) |
|
252 | 0.6.0 (released 2017-01-14) | |
65 | --------------------------- |
|
253 | =========================== | |
66 |
|
254 | |||
67 | * Support for legacy zstd protocols (build time opt in feature). |
|
255 | * Support for legacy zstd protocols (build time opt in feature). | |
68 | * Automation improvements to test against Python 3.6, latest versions |
|
256 | * Automation improvements to test against Python 3.6, latest versions | |
@@ -79,17 +267,17 b' 0.6.0 (released 2017-01-14)' | |||||
79 | * Disallow compress(b'') when writing content sizes by default (issue #11). |
|
267 | * Disallow compress(b'') when writing content sizes by default (issue #11). | |
80 |
|
268 | |||
81 | 0.5.2 (released 2016-11-12) |
|
269 | 0.5.2 (released 2016-11-12) | |
82 | --------------------------- |
|
270 | =========================== | |
83 |
|
271 | |||
84 | * more packaging fixes for source distribution |
|
272 | * more packaging fixes for source distribution | |
85 |
|
273 | |||
86 | 0.5.1 (released 2016-11-12) |
|
274 | 0.5.1 (released 2016-11-12) | |
87 | --------------------------- |
|
275 | =========================== | |
88 |
|
276 | |||
89 | * setup_zstd.py is included in the source distribution |
|
277 | * setup_zstd.py is included in the source distribution | |
90 |
|
278 | |||
91 | 0.5.0 (released 2016-11-10) |
|
279 | 0.5.0 (released 2016-11-10) | |
92 | --------------------------- |
|
280 | =========================== | |
93 |
|
281 | |||
94 | * Vendored version of zstd updated to 1.1.1. |
|
282 | * Vendored version of zstd updated to 1.1.1. | |
95 | * Continuous integration for Python 3.6 and 3.7 |
|
283 | * Continuous integration for Python 3.6 and 3.7 | |
@@ -114,8 +302,8 b' 0.5.0 (released 2016-11-10)' | |||||
114 | * The monolithic ``zstd.c`` file has been split into a header file defining |
|
302 | * The monolithic ``zstd.c`` file has been split into a header file defining | |
115 | types and separate ``.c`` source files for the implementation. |
|
303 | types and separate ``.c`` source files for the implementation. | |
116 |
|
304 | |||
117 | History of the Project |
|
305 | Older History | |
118 |
============= |
|
306 | ============= | |
119 |
|
307 | |||
120 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a |
|
308 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a | |
121 | Python extension for use by the Mercurial project. A very hacky prototype |
|
309 | Python extension for use by the Mercurial project. A very hacky prototype |
This diff has been collapsed as it changes many lines, (971 lines changed) Show them Hide them | |||||
@@ -11,69 +11,18 b' underlying C API through a Pythonic inte' | |||||
11 | performance. This means exposing most of the features and flexibility |
|
11 | performance. This means exposing most of the features and flexibility | |
12 | of the C API while not sacrificing usability or safety that Python provides. |
|
12 | of the C API while not sacrificing usability or safety that Python provides. | |
13 |
|
13 | |||
14 | The canonical home for this project is |
|
14 | The canonical home for this project lives in a Mercurial repository run by | |
|
15 | the author. For convenience, that repository is frequently synchronized to | |||
15 | https://github.com/indygreg/python-zstandard. |
|
16 | https://github.com/indygreg/python-zstandard. | |
16 |
|
17 | |||
17 | | |ci-status| |win-ci-status| |
|
18 | | |ci-status| |win-ci-status| | |
18 |
|
19 | |||
19 | State of Project |
|
|||
20 | ================ |
|
|||
21 |
|
||||
22 | The project is officially in beta state. The author is reasonably satisfied |
|
|||
23 | that functionality works as advertised. **There will be some backwards |
|
|||
24 | incompatible changes before 1.0, probably in the 0.9 release.** This may |
|
|||
25 | involve renaming the main module from *zstd* to *zstandard* and renaming |
|
|||
26 | various types and methods. Pin the package version to prevent unwanted |
|
|||
27 | breakage when this change occurs! |
|
|||
28 |
|
||||
29 | This project is vendored and distributed with Mercurial 4.1, where it is |
|
|||
30 | used in a production capacity. |
|
|||
31 |
|
||||
32 | There is continuous integration for Python versions 2.6, 2.7, and 3.3+ |
|
|||
33 | on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably |
|
|||
34 | confident the extension is stable and works as advertised on these |
|
|||
35 | platforms. |
|
|||
36 |
|
||||
37 | The CFFI bindings are mostly feature complete. Where a feature is implemented |
|
|||
38 | in CFFI, unit tests run against both C extension and CFFI implementation to |
|
|||
39 | ensure behavior parity. |
|
|||
40 |
|
||||
41 | Expected Changes |
|
|||
42 | ---------------- |
|
|||
43 |
|
||||
44 | The author is reasonably confident in the current state of what's |
|
|||
45 | implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types. |
|
|||
46 | Those APIs likely won't change significantly. Some low-level behavior |
|
|||
47 | (such as naming and types expected by arguments) may change. |
|
|||
48 |
|
||||
49 | There will likely be arguments added to control the input and output |
|
|||
50 | buffer sizes (currently, certain operations read and write in chunk |
|
|||
51 | sizes using zstd's preferred defaults). |
|
|||
52 |
|
||||
53 | There should be an API that accepts an object that conforms to the buffer |
|
|||
54 | interface and returns an iterator over compressed or decompressed output. |
|
|||
55 |
|
||||
56 | There should be an API that exposes an ``io.RawIOBase`` interface to |
|
|||
57 | compressor and decompressor streams, like how ``gzip.GzipFile`` from |
|
|||
58 | the standard library works (issue 13). |
|
|||
59 |
|
||||
60 | The author is on the fence as to whether to support the extremely |
|
|||
61 | low level compression and decompression APIs. It could be useful to |
|
|||
62 | support compression without the framing headers. But the author doesn't |
|
|||
63 | believe it a high priority at this time. |
|
|||
64 |
|
||||
65 | There will likely be a refactoring of the module names. Currently, |
|
|||
66 | ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface. |
|
|||
67 | This means that all code for the C extension must be implemented in |
|
|||
68 | C. ``zstd`` may be converted to a Python module so code can be reused |
|
|||
69 | between CFFI and C and so not all code in the C extension has to be C. |
|
|||
70 |
|
||||
71 | Requirements |
|
20 | Requirements | |
72 | ============ |
|
21 | ============ | |
73 |
|
22 | |||
74 |
This extension is designed to run with Python 2. |
|
23 | This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6 | |
75 |
|
|
24 | on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested | |
76 | currently well-tested as an architecture. |
|
25 | on Windows. Only x86_64 is well-tested on Linux and macOS. | |
77 |
|
26 | |||
78 | Installing |
|
27 | Installing | |
79 | ========== |
|
28 | ========== | |
@@ -96,114 +45,82 b' this package with ``conda``.' | |||||
96 | Performance |
|
45 | Performance | |
97 | =========== |
|
46 | =========== | |
98 |
|
47 | |||
99 | Very crude and non-scientific benchmarking (most benchmarks fall in this |
|
48 | zstandard is a highly tunable compression algorithm. In its default settings | |
100 | category because proper benchmarking is hard) show that the Python bindings |
|
49 | (compression level 3), it will be faster at compression and decompression and | |
101 | perform within 10% of the native C implementation. |
|
50 | will have better compression ratios than zlib on most data sets. When tuned | |
102 |
|
51 | for speed, it approaches lz4's speed and ratios. When tuned for compression | ||
103 | The following table compares the performance of compressing and decompressing |
|
52 | ratio, it approaches lzma ratios and compression speed, but decompression | |
104 | a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values |
|
53 | speed is much faster. See the official zstandard documentation for more. | |
105 | obtained with the ``zstd`` program are on the left. The remaining columns detail |
|
|||
106 | performance of various compression APIs in the Python bindings. |
|
|||
107 |
|
54 | |||
108 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
55 | zstandard and this library support multi-threaded compression. There is a | |
109 | | Level | Native | Simple | Stream In | Stream Out | |
|
56 | mechanism to compress large inputs using multiple threads. | |
110 | | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp | |
|
|||
111 | +=======+=================+=================+=================+===============+ |
|
|||
112 | | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s | |
|
|||
113 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
|||
114 | | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s | |
|
|||
115 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
|||
116 | | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s | |
|
|||
117 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
|||
118 | | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s | |
|
|||
119 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
|||
120 |
|
||||
121 | Again, these are very unscientific. But it shows that Python is capable of |
|
|||
122 | compressing at several hundred MB/s and decompressing at over 1 GB/s. |
|
|||
123 |
|
||||
124 | Comparison to Other Python Bindings |
|
|||
125 | =================================== |
|
|||
126 |
|
||||
127 | https://pypi.python.org/pypi/zstd is an alternate Python binding to |
|
|||
128 | Zstandard. At the time this was written, the latest release of that |
|
|||
129 | package (1.1.2) only exposed the simple APIs for compression and decompression. |
|
|||
130 | This package exposes much more of the zstd API, including streaming and |
|
|||
131 | dictionary compression. This package also has CFFI support. |
|
|||
132 |
|
||||
133 | Bundling of Zstandard Source Code |
|
|||
134 | ================================= |
|
|||
135 |
|
||||
136 | The source repository for this project contains a vendored copy of the |
|
|||
137 | Zstandard source code. This is done for a few reasons. |
|
|||
138 |
|
57 | |||
139 | First, Zstandard is relatively new and not yet widely available as a system |
|
58 | The performance of this library is usually very similar to what the zstandard | |
140 | package. Providing a copy of the source code enables the Python C extension |
|
59 | C API can deliver. Overhead in this library is due to general Python overhead | |
141 | to be compiled without requiring the user to obtain the Zstandard source code |
|
60 | and can't easily be avoided by *any* zstandard Python binding. This library | |
142 | separately. |
|
61 | exposes multiple APIs for performing compression and decompression so callers | |
143 |
|
62 | can pick an API suitable for their need. Contrast with the compression | ||
144 | Second, Zstandard has both a stable *public* API and an *experimental* API. |
|
63 | modules in Python's standard library (like ``zlib``), which only offer limited | |
145 | The *experimental* API is actually quite useful (contains functionality for |
|
64 | mechanisms for performing operations. The API flexibility means consumers can | |
146 | training dictionaries for example), so it is something we wish to expose to |
|
65 | choose to use APIs that facilitate zero copying or minimize Python object | |
147 | Python. However, the *experimental* API is only available via static linking. |
|
66 | creation and garbage collection overhead. | |
148 | Furthermore, the *experimental* API can change at any time. So, control over |
|
|||
149 | the exact version of the Zstandard library linked against is important to |
|
|||
150 | ensure known behavior. |
|
|||
151 |
|
||||
152 | Instructions for Building and Testing |
|
|||
153 | ===================================== |
|
|||
154 |
|
||||
155 | Once you have the source code, the extension can be built via setup.py:: |
|
|||
156 |
|
||||
157 | $ python setup.py build_ext |
|
|||
158 |
|
||||
159 | We recommend testing with ``nose``:: |
|
|||
160 |
|
||||
161 | $ nosetests |
|
|||
162 |
|
67 | |||
163 | A Tox configuration is present to test against multiple Python versions:: |
|
68 | This library is capable of single-threaded throughputs well over 1 GB/s. For | |
164 |
|
69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` | ||
165 | $ tox |
|
70 | script that can be used to measure things. | |
166 |
|
||||
167 | Tests use the ``hypothesis`` Python package to perform fuzzing. If you |
|
|||
168 | don't have it, those tests won't run. Since the fuzzing tests take longer |
|
|||
169 | to execute than normal tests, you'll need to opt in to running them by |
|
|||
170 | setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set |
|
|||
171 | automatically when using ``tox``. |
|
|||
172 |
|
||||
173 | The ``cffi`` Python package needs to be installed in order to build the CFFI |
|
|||
174 | bindings. If it isn't present, the CFFI bindings won't be built. |
|
|||
175 |
|
||||
176 | To create a virtualenv with all development dependencies, do something |
|
|||
177 | like the following:: |
|
|||
178 |
|
||||
179 | # Python 2 |
|
|||
180 | $ virtualenv venv |
|
|||
181 |
|
||||
182 | # Python 3 |
|
|||
183 | $ python3 -m venv venv |
|
|||
184 |
|
||||
185 | $ source venv/bin/activate |
|
|||
186 | $ pip install cffi hypothesis nose tox |
|
|||
187 |
|
71 | |||
188 | API |
|
72 | API | |
189 | === |
|
73 | === | |
190 |
|
74 | |||
191 | The compiled C extension provides a ``zstd`` Python module. The CFFI |
|
75 | To interface with Zstandard, simply import the ``zstandard`` module:: | |
192 | bindings provide a ``zstd_cffi`` module. Both provide an identical API |
|
76 | ||
193 | interface. The types, functions, and attributes exposed by these modules |
|
77 | import zstandard | |
|
78 | ||||
|
79 | It is a popular convention to alias the module as a different name for | |||
|
80 | brevity:: | |||
|
81 | ||||
|
82 | import zstandard as zstd | |||
|
83 | ||||
|
84 | This module attempts to import and use either the C extension or CFFI | |||
|
85 | implementation. On Python platforms known to support C extensions (like | |||
|
86 | CPython), it raises an ImportError if the C extension cannot be imported. | |||
|
87 | On Python platforms known to not support C extensions (like PyPy), it only | |||
|
88 | attempts to import the CFFI implementation and raises ImportError if that | |||
|
89 | can't be done. On other platforms, it first tries to import the C extension | |||
|
90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. | |||
|
91 | ||||
|
92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` | |||
|
93 | environment variable can be set. The following values are accepted: | |||
|
94 | ||||
|
95 | default | |||
|
96 | The behavior described above. | |||
|
97 | cffi_fallback | |||
|
98 | Always try to import the C extension then fall back to CFFI if that | |||
|
99 | fails. | |||
|
100 | cext | |||
|
101 | Only attempt to import the C extension. | |||
|
102 | cffi | |||
|
103 | Only attempt to import the CFFI implementation. | |||
|
104 | ||||
|
105 | In addition, the ``zstandard`` module exports a ``backend`` attribute | |||
|
106 | containing the string name of the backend being used. It will be one | |||
|
107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). | |||
|
108 | ||||
|
109 | The types, functions, and attributes exposed by the ``zstandard`` module | |||
194 | are documented in the sections below. |
|
110 | are documented in the sections below. | |
195 |
|
111 | |||
196 | .. note:: |
|
112 | .. note:: | |
197 |
|
113 | |||
198 | The documentation in this section makes references to various zstd |
|
114 | The documentation in this section makes references to various zstd | |
199 |
concepts and functionality. The |
|
115 | concepts and functionality. The source repository contains a | |
200 | these concepts in more detail. |
|
116 | ``docs/concepts.rst`` file explaining these in more detail. | |
201 |
|
117 | |||
202 | ZstdCompressor |
|
118 | ZstdCompressor | |
203 | -------------- |
|
119 | -------------- | |
204 |
|
120 | |||
205 | The ``ZstdCompressor`` class provides an interface for performing |
|
121 | The ``ZstdCompressor`` class provides an interface for performing | |
206 | compression operations. |
|
122 | compression operations. Each instance is essentially a wrapper around a | |
|
123 | ``ZSTD_CCtx`` from the C API. | |||
207 |
|
124 | |||
208 | Each instance is associated with parameters that control compression |
|
125 | Each instance is associated with parameters that control compression | |
209 | behavior. These come from the following named arguments (all optional): |
|
126 | behavior. These come from the following named arguments (all optional): | |
@@ -214,21 +131,21 b' dict_data' | |||||
214 | Compression dictionary to use. |
|
131 | Compression dictionary to use. | |
215 |
|
132 | |||
216 | Note: When using dictionary data and ``compress()`` is called multiple |
|
133 | Note: When using dictionary data and ``compress()`` is called multiple | |
217 |
times, the ``CompressionParameters`` derived from an integer |
|
134 | times, the ``ZstdCompressionParameters`` derived from an integer | |
218 |
``level`` and the first compressed data's size will be reused |
|
135 | compression ``level`` and the first compressed data's size will be reused | |
219 |
subsequent operations. This may not be desirable if source data |
|
136 | for all subsequent operations. This may not be desirable if source data | |
220 | varies significantly. |
|
137 | size varies significantly. | |
221 | compression_params |
|
138 | compression_params | |
222 |
A ``CompressionParameters`` instance |
|
139 | A ``ZstdCompressionParameters`` instance defining compression settings. | |
223 | write_checksum |
|
140 | write_checksum | |
224 | Whether a 4 byte checksum should be written with the compressed data. |
|
141 | Whether a 4 byte checksum should be written with the compressed data. | |
225 | Defaults to False. If True, the decompressor can verify that decompressed |
|
142 | Defaults to False. If True, the decompressor can verify that decompressed | |
226 | data matches the original input data. |
|
143 | data matches the original input data. | |
227 | write_content_size |
|
144 | write_content_size | |
228 | Whether the size of the uncompressed data will be written into the |
|
145 | Whether the size of the uncompressed data will be written into the | |
229 |
header of compressed data. Defaults to |
|
146 | header of compressed data. Defaults to True. The data will only be | |
230 | written if the compressor knows the size of the input data. This is |
|
147 | written if the compressor knows the size of the input data. This is | |
231 |
|
|
148 | often not true for streaming compression. | |
232 | write_dict_id |
|
149 | write_dict_id | |
233 | Whether to write the dictionary ID into the compressed data. |
|
150 | Whether to write the dictionary ID into the compressed data. | |
234 | Defaults to True. The dictionary ID is only written if a dictionary |
|
151 | Defaults to True. The dictionary ID is only written if a dictionary | |
@@ -242,10 +159,25 b' threads' | |||||
242 | data. APIs that spawn multiple threads for working on multiple pieces of |
|
159 | data. APIs that spawn multiple threads for working on multiple pieces of | |
243 | data have their own ``threads`` argument. |
|
160 | data have their own ``threads`` argument. | |
244 |
|
161 | |||
|
162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, | |||
|
163 | ``write_content_size``, ``write_dict_id``, and ``threads``. | |||
|
164 | ||||
245 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` |
|
165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` | |
246 | instances can be called from multiple Python threads simultaneously. In other |
|
166 | instances can be called from multiple Python threads simultaneously. In other | |
247 | words, assume instances are not thread safe unless stated otherwise. |
|
167 | words, assume instances are not thread safe unless stated otherwise. | |
248 |
|
168 | |||
|
169 | Utility Methods | |||
|
170 | ^^^^^^^^^^^^^^^ | |||
|
171 | ||||
|
172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes | |||
|
173 | ingested, consumed, and produced by the current compression operation. | |||
|
174 | ||||
|
175 | ``memory_size()`` obtains the memory utilization of the underlying zstd | |||
|
176 | compression context, in bytes.:: | |||
|
177 | ||||
|
178 | cctx = zstd.ZstdCompressor() | |||
|
179 | memory = cctx.memory_size() | |||
|
180 | ||||
249 | Simple API |
|
181 | Simple API | |
250 | ^^^^^^^^^^ |
|
182 | ^^^^^^^^^^ | |
251 |
|
183 | |||
@@ -256,40 +188,75 b' Simple API' | |||||
256 |
|
188 | |||
257 | The ``data`` argument can be any object that implements the *buffer protocol*. |
|
189 | The ``data`` argument can be any object that implements the *buffer protocol*. | |
258 |
|
190 | |||
259 | Unless ``compression_params`` or ``dict_data`` are passed to the |
|
191 | Stream Reader API | |
260 | ``ZstdCompressor``, each invocation of ``compress()`` will calculate the |
|
192 | ^^^^^^^^^^^^^^^^^ | |
261 | optimal compression parameters for the configured compression ``level`` and |
|
193 | ||
262 | input data size (some parameters are fine-tuned for small input sizes). |
|
194 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |
|
195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: | |||
|
196 | ||||
|
197 | with open(path, 'rb') as fh: | |||
|
198 | cctx = zstd.ZstdCompressor() | |||
|
199 | with cctx.stream_reader(fh) as reader: | |||
|
200 | while True: | |||
|
201 | chunk = reader.read(16384) | |||
|
202 | if not chunk: | |||
|
203 | break | |||
|
204 | ||||
|
205 | # Do something with compressed chunk. | |||
|
206 | ||||
|
207 | The stream can only be read within a context manager. When the context | |||
|
208 | manager exits, the stream is closed and the underlying resource is | |||
|
209 | released and future operations against the compression stream stream will fail. | |||
|
210 | ||||
|
211 | The ``source`` argument to ``stream_reader()`` can be any object with a | |||
|
212 | ``read(size)`` method or any object implementing the *buffer protocol*. | |||
263 |
|
213 | |||
264 | If a compression dictionary is being used, the compression parameters |
|
214 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input | |
265 | determined from the first input's size will be reused for subsequent |
|
215 | stream is. This is used to adjust compression parameters so they are | |
266 | operations. |
|
216 | tailored to the source size.:: | |
|
217 | ||||
|
218 | with open(path, 'rb') as fh: | |||
|
219 | cctx = zstd.ZstdCompressor() | |||
|
220 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: | |||
|
221 | ... | |||
|
222 | ||||
|
223 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |||
|
224 | to that stream should be via the ``read_size`` argument. It defaults to | |||
|
225 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |||
267 |
|
|
226 | ||
268 | There is currently a deficiency in zstd's C APIs that makes it difficult |
|
227 | with open(path, 'rb') as fh: | |
269 | to round trip empty inputs when ``write_content_size=True``. Attempting |
|
228 | cctx = zstd.ZstdCompressor() | |
270 | this will raise a ``ValueError`` unless ``allow_empty=True`` is passed |
|
229 | # Will perform fh.read(8192) when obtaining data to feed into the | |
271 | to ``compress()``. |
|
230 | # compressor. | |
|
231 | with cctx.stream_reader(fh, read_size=8192) as reader: | |||
|
232 | ... | |||
|
233 | ||||
|
234 | The stream returned by ``stream_reader()`` is neither writable nor seekable | |||
|
235 | (even if the underlying source is seekable). ``readline()`` and | |||
|
236 | ``readlines()`` are not implemented because they don't make sense for | |||
|
237 | compressed data. ``tell()`` returns the number of compressed bytes | |||
|
238 | emitted so far. | |||
272 |
|
239 | |||
273 | Streaming Input API |
|
240 | Streaming Input API | |
274 | ^^^^^^^^^^^^^^^^^^^ |
|
241 | ^^^^^^^^^^^^^^^^^^^ | |
275 |
|
242 | |||
276 |
`` |
|
243 | ``stream_writer(fh)`` (which behaves as a context manager) allows you to *stream* | |
277 | data into a compressor.:: |
|
244 | data into a compressor.:: | |
278 |
|
245 | |||
279 | cctx = zstd.ZstdCompressor(level=10) |
|
246 | cctx = zstd.ZstdCompressor(level=10) | |
280 |
with cctx. |
|
247 | with cctx.stream_writer(fh) as compressor: | |
281 | compressor.write(b'chunk 0') |
|
248 | compressor.write(b'chunk 0') | |
282 | compressor.write(b'chunk 1') |
|
249 | compressor.write(b'chunk 1') | |
283 | ... |
|
250 | ... | |
284 |
|
251 | |||
285 |
The argument to `` |
|
252 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As | |
286 | compressed data is available, ``write()`` will be called with the compressed |
|
253 | compressed data is available, ``write()`` will be called with the compressed | |
287 | data as its argument. Many common Python types implement ``write()``, including |
|
254 | data as its argument. Many common Python types implement ``write()``, including | |
288 | open file handles and ``io.BytesIO``. |
|
255 | open file handles and ``io.BytesIO``. | |
289 |
|
256 | |||
290 |
`` |
|
257 | ``stream_writer()`` returns an object representing a streaming compressor | |
291 |
It **must** be used as a context manager. That object's |
|
258 | instance. It **must** be used as a context manager. That object's | |
292 | is used to feed data into the compressor. |
|
259 | ``write(data)`` method is used to feed data into the compressor. | |
293 |
|
260 | |||
294 | A ``flush()`` method can be called to evict whatever data remains within the |
|
261 | A ``flush()`` method can be called to evict whatever data remains within the | |
295 | compressor's internal state into the output object. This may result in 0 or |
|
262 | compressor's internal state into the output object. This may result in 0 or | |
@@ -303,7 +270,7 b' If the size of the data being fed to thi' | |||||
303 | you can declare it before compression begins:: |
|
270 | you can declare it before compression begins:: | |
304 |
|
271 | |||
305 | cctx = zstd.ZstdCompressor() |
|
272 | cctx = zstd.ZstdCompressor() | |
306 |
with cctx. |
|
273 | with cctx.stream_writer(fh, size=data_len) as compressor: | |
307 | compressor.write(chunk0) |
|
274 | compressor.write(chunk0) | |
308 | compressor.write(chunk1) |
|
275 | compressor.write(chunk1) | |
309 | ... |
|
276 | ... | |
@@ -315,29 +282,35 b' content size being written into the fram' | |||||
315 | The size of chunks being ``write()`` to the destination can be specified:: |
|
282 | The size of chunks being ``write()`` to the destination can be specified:: | |
316 |
|
283 | |||
317 | cctx = zstd.ZstdCompressor() |
|
284 | cctx = zstd.ZstdCompressor() | |
318 |
with cctx. |
|
285 | with cctx.stream_writer(fh, write_size=32768) as compressor: | |
319 | ... |
|
286 | ... | |
320 |
|
287 | |||
321 | To see how much memory is being used by the streaming compressor:: |
|
288 | To see how much memory is being used by the streaming compressor:: | |
322 |
|
289 | |||
323 | cctx = zstd.ZstdCompressor() |
|
290 | cctx = zstd.ZstdCompressor() | |
324 |
with cctx. |
|
291 | with cctx.stream_writer(fh) as compressor: | |
325 | ... |
|
292 | ... | |
326 | byte_size = compressor.memory_size() |
|
293 | byte_size = compressor.memory_size() | |
327 |
|
294 | |||
|
295 | Thte total number of bytes written so far are exposed via ``tell()``:: | |||
|
296 | ||||
|
297 | cctx = zstd.ZstdCompressor() | |||
|
298 | with cctx.stream_writer(fh) as compressor: | |||
|
299 | ... | |||
|
300 | total_written = compressor.tell() | |||
|
301 | ||||
328 | Streaming Output API |
|
302 | Streaming Output API | |
329 | ^^^^^^^^^^^^^^^^^^^^ |
|
303 | ^^^^^^^^^^^^^^^^^^^^ | |
330 |
|
304 | |||
331 |
``read_ |
|
305 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a | |
332 | as an iterator of data chunks.:: |
|
306 | compressor as an iterator of data chunks.:: | |
333 |
|
307 | |||
334 | cctx = zstd.ZstdCompressor() |
|
308 | cctx = zstd.ZstdCompressor() | |
335 |
for chunk in cctx.read_ |
|
309 | for chunk in cctx.read_to_iter(fh): | |
336 | # Do something with emitted data. |
|
310 | # Do something with emitted data. | |
337 |
|
311 | |||
338 |
``read_ |
|
312 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or | |
339 | to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that |
|
313 | conforms to the buffer protocol. | |
340 | provide the buffer protocol.) |
|
|||
341 |
|
314 | |||
342 | Uncompressed data is fetched from the source either by calling ``read(size)`` |
|
315 | Uncompressed data is fetched from the source either by calling ``read(size)`` | |
343 | or by fetching a slice of data from the object directly (in the case where |
|
316 | or by fetching a slice of data from the object directly (in the case where | |
@@ -348,23 +321,24 b' If reading from the source via ``read()`' | |||||
348 | it raises or returns an empty bytes (``b''``). It is perfectly valid for |
|
321 | it raises or returns an empty bytes (``b''``). It is perfectly valid for | |
349 | the source to deliver fewer bytes than were what requested by ``read(size)``. |
|
322 | the source to deliver fewer bytes than were what requested by ``read(size)``. | |
350 |
|
323 | |||
351 |
Like `` |
|
324 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument | |
352 | declaring the size of the input stream:: |
|
325 | declaring the size of the input stream:: | |
353 |
|
326 | |||
354 | cctx = zstd.ZstdCompressor() |
|
327 | cctx = zstd.ZstdCompressor() | |
355 |
for chunk in cctx.read_ |
|
328 | for chunk in cctx.read_to_iter(fh, size=some_int): | |
356 | pass |
|
329 | pass | |
357 |
|
330 | |||
358 | You can also control the size that data is ``read()`` from the source and |
|
331 | You can also control the size that data is ``read()`` from the source and | |
359 | the ideal size of output chunks:: |
|
332 | the ideal size of output chunks:: | |
360 |
|
333 | |||
361 | cctx = zstd.ZstdCompressor() |
|
334 | cctx = zstd.ZstdCompressor() | |
362 |
for chunk in cctx.read_ |
|
335 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): | |
363 | pass |
|
336 | pass | |
364 |
|
337 | |||
365 |
Unlike `` |
|
338 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control | |
366 |
sizes of chunks fed into the compressor. Instead, chunk sizes will |
|
339 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will | |
367 |
the object being read from delivers. These will often be of a |
|
340 | be whatever the object being read from delivers. These will often be of a | |
|
341 | uniform size. | |||
368 |
|
342 | |||
369 | Stream Copying API |
|
343 | Stream Copying API | |
370 | ^^^^^^^^^^^^^^^^^^ |
|
344 | ^^^^^^^^^^^^^^^^^^ | |
@@ -404,7 +378,7 b' Compressor API' | |||||
404 | ``flush()`` methods. Each returns compressed data or an empty bytes. |
|
378 | ``flush()`` methods. Each returns compressed data or an empty bytes. | |
405 |
|
379 | |||
406 | The purpose of ``compressobj()`` is to provide an API-compatible interface |
|
380 | The purpose of ``compressobj()`` is to provide an API-compatible interface | |
407 |
with ``zlib.compressobj`` |
|
381 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to | |
408 | swap in different compressor objects while using the same API. |
|
382 | swap in different compressor objects while using the same API. | |
409 |
|
383 | |||
410 | ``flush()`` accepts an optional argument indicating how to end the stream. |
|
384 | ``flush()`` accepts an optional argument indicating how to end the stream. | |
@@ -485,13 +459,23 b' ZstdDecompressor' | |||||
485 | ---------------- |
|
459 | ---------------- | |
486 |
|
460 | |||
487 | The ``ZstdDecompressor`` class provides an interface for performing |
|
461 | The ``ZstdDecompressor`` class provides an interface for performing | |
488 | decompression. |
|
462 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from | |
|
463 | the C API. | |||
489 |
|
464 | |||
490 | Each instance is associated with parameters that control decompression. These |
|
465 | Each instance is associated with parameters that control decompression. These | |
491 | come from the following named arguments (all optional): |
|
466 | come from the following named arguments (all optional): | |
492 |
|
467 | |||
493 | dict_data |
|
468 | dict_data | |
494 | Compression dictionary to use. |
|
469 | Compression dictionary to use. | |
|
470 | max_window_size | |||
|
471 | Sets an uppet limit on the window size for decompression operations in | |||
|
472 | kibibytes. This setting can be used to prevent large memory allocations | |||
|
473 | for inputs using large compression windows. | |||
|
474 | format | |||
|
475 | Set the format of data for the decoder. By default, this is | |||
|
476 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to | |||
|
477 | allow decoding frames without the 4 byte magic header. Not all decompression | |||
|
478 | APIs support this mode. | |||
495 |
|
479 | |||
496 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
480 | The interface of this class is very similar to ``ZstdCompressor`` (by design). | |
497 |
|
481 | |||
@@ -499,6 +483,15 b' Unless specified otherwise, assume that ' | |||||
499 | instances can be called from multiple Python threads simultaneously. In other |
|
483 | instances can be called from multiple Python threads simultaneously. In other | |
500 | words, assume instances are not thread safe unless stated otherwise. |
|
484 | words, assume instances are not thread safe unless stated otherwise. | |
501 |
|
485 | |||
|
486 | Utility Methods | |||
|
487 | ^^^^^^^^^^^^^^^ | |||
|
488 | ||||
|
489 | ``memory_size()`` obtains the size of the underlying zstd decompression context, | |||
|
490 | in bytes.:: | |||
|
491 | ||||
|
492 | dctx = zstd.ZstdDecompressor() | |||
|
493 | size = dctx.memory_size() | |||
|
494 | ||||
502 | Simple API |
|
495 | Simple API | |
503 | ^^^^^^^^^^ |
|
496 | ^^^^^^^^^^ | |
504 |
|
497 | |||
@@ -509,9 +502,10 b' frame in a single operation.::' | |||||
509 | decompressed = dctx.decompress(data) |
|
502 | decompressed = dctx.decompress(data) | |
510 |
|
503 | |||
511 | By default, ``decompress(data)`` will only work on data written with the content |
|
504 | By default, ``decompress(data)`` will only work on data written with the content | |
512 | size encoded in its header. This can be achieved by creating a |
|
505 | size encoded in its header (this is the default behavior of | |
513 | ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without |
|
506 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If | |
514 |
an embedded content size is seen, ``zstd.ZstdError`` will |
|
507 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will | |
|
508 | be raised. | |||
515 |
|
509 | |||
516 | If the compressed data doesn't have its content size embedded within it, |
|
510 | If the compressed data doesn't have its content size embedded within it, | |
517 | decompression can be attempted by specifying the ``max_output_size`` |
|
511 | decompression can be attempted by specifying the ``max_output_size`` | |
@@ -534,17 +528,67 b' performed every time the method is calle' | |||||
534 | result in a lot of work for the memory allocator and may result in |
|
528 | result in a lot of work for the memory allocator and may result in | |
535 | ``MemoryError`` being raised if the allocation fails. |
|
529 | ``MemoryError`` being raised if the allocation fails. | |
536 |
|
530 | |||
537 | If the exact size of decompressed data is unknown, it is **strongly** |
|
531 | .. important:: | |
538 | recommended to use a streaming API. |
|
532 | ||
|
533 | If the exact size of decompressed data is unknown (not passed in explicitly | |||
|
534 | and not stored in the zstandard frame), for performance reasons it is | |||
|
535 | encouraged to use a streaming API. | |||
|
536 | ||||
|
537 | Stream Reader API | |||
|
538 | ^^^^^^^^^^^^^^^^^ | |||
|
539 | ||||
|
540 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |||
|
541 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: | |||
|
542 | ||||
|
543 | with open(path, 'rb') as fh: | |||
|
544 | dctx = zstd.ZstdDecompressor() | |||
|
545 | with dctx.stream_reader(fh) as reader: | |||
|
546 | while True: | |||
|
547 | chunk = reader.read(16384) | |||
|
548 | if not chunk: | |||
|
549 | break | |||
|
550 | ||||
|
551 | # Do something with decompressed chunk. | |||
|
552 | ||||
|
553 | The stream can only be read within a context manager. When the context | |||
|
554 | manager exits, the stream is closed and the underlying resource is | |||
|
555 | released and future operations against the stream will fail. | |||
|
556 | ||||
|
557 | The ``source`` argument to ``stream_reader()`` can be any object with a | |||
|
558 | ``read(size)`` method or any object implementing the *buffer protocol*. | |||
|
559 | ||||
|
560 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |||
|
561 | to that stream should be via the ``read_size`` argument. It defaults to | |||
|
562 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |||
|
563 | ||||
|
564 | with open(path, 'rb') as fh: | |||
|
565 | dctx = zstd.ZstdDecompressor() | |||
|
566 | # Will perform fh.read(8192) when obtaining data for the decompressor. | |||
|
567 | with dctx.stream_reader(fh, read_size=8192) as reader: | |||
|
568 | ... | |||
|
569 | ||||
|
570 | The stream returned by ``stream_reader()`` is not writable. | |||
|
571 | ||||
|
572 | The stream returned by ``stream_reader()`` is *partially* seekable. | |||
|
573 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward | |||
|
574 | of the current position are allowed. Offsets behind the current read | |||
|
575 | position and offsets relative to the end of stream are not allowed and | |||
|
576 | will raise ``ValueError`` if attempted. | |||
|
577 | ||||
|
578 | ``tell()`` returns the number of decompressed bytes read so far. | |||
|
579 | ||||
|
580 | Not all I/O methods are implemented. Notably missing is support for | |||
|
581 | ``readline()``, ``readlines()``, and linewise iteration support. Support for | |||
|
582 | these is planned for a future release. | |||
539 |
|
583 | |||
540 | Streaming Input API |
|
584 | Streaming Input API | |
541 | ^^^^^^^^^^^^^^^^^^^ |
|
585 | ^^^^^^^^^^^^^^^^^^^ | |
542 |
|
586 | |||
543 |
`` |
|
587 | ``stream_writer(fh)`` can be used to incrementally send compressed data to a | |
544 | decompressor.:: |
|
588 | decompressor.:: | |
545 |
|
589 | |||
546 | dctx = zstd.ZstdDecompressor() |
|
590 | dctx = zstd.ZstdDecompressor() | |
547 |
with dctx. |
|
591 | with dctx.stream_writer(fh) as decompressor: | |
548 | decompressor.write(compressed_data) |
|
592 | decompressor.write(compressed_data) | |
549 |
|
593 | |||
550 | This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to |
|
594 | This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to | |
@@ -558,54 +602,56 b' of ``0`` are possible.' | |||||
558 | The size of chunks being ``write()`` to the destination can be specified:: |
|
602 | The size of chunks being ``write()`` to the destination can be specified:: | |
559 |
|
603 | |||
560 | dctx = zstd.ZstdDecompressor() |
|
604 | dctx = zstd.ZstdDecompressor() | |
561 |
with dctx. |
|
605 | with dctx.stream_writer(fh, write_size=16384) as decompressor: | |
562 | pass |
|
606 | pass | |
563 |
|
607 | |||
564 | You can see how much memory is being used by the decompressor:: |
|
608 | You can see how much memory is being used by the decompressor:: | |
565 |
|
609 | |||
566 | dctx = zstd.ZstdDecompressor() |
|
610 | dctx = zstd.ZstdDecompressor() | |
567 |
with dctx. |
|
611 | with dctx.stream_writer(fh) as decompressor: | |
568 | byte_size = decompressor.memory_size() |
|
612 | byte_size = decompressor.memory_size() | |
569 |
|
613 | |||
570 | Streaming Output API |
|
614 | Streaming Output API | |
571 | ^^^^^^^^^^^^^^^^^^^^ |
|
615 | ^^^^^^^^^^^^^^^^^^^^ | |
572 |
|
616 | |||
573 |
``read_ |
|
617 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a | |
574 | compressed source as an iterator of data chunks.:: |
|
618 | compressed source as an iterator of data chunks.:: | |
575 |
|
619 | |||
576 | dctx = zstd.ZstdDecompressor() |
|
620 | dctx = zstd.ZstdDecompressor() | |
577 |
for chunk in dctx.read_ |
|
621 | for chunk in dctx.read_to_iter(fh): | |
578 | # Do something with original data. |
|
622 | # Do something with original data. | |
579 |
|
623 | |||
580 |
``read_ |
|
624 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will | |
581 |
return compressed bytes |
|
625 | return compressed bytes or an object conforming to the buffer protocol that | |
582 |
can expose its data as a contiguous range of bytes. |
|
626 | can expose its data as a contiguous range of bytes. | |
583 | ``memoryview`` types expose this buffer protocol. |
|
|||
584 |
|
627 | |||
585 |
``read_ |
|
628 | ``read_to_iter()`` returns an iterator whose elements are chunks of the | |
586 | decompressed data. |
|
629 | decompressed data. | |
587 |
|
630 | |||
588 | The size of requested ``read()`` from the source can be specified:: |
|
631 | The size of requested ``read()`` from the source can be specified:: | |
589 |
|
632 | |||
590 | dctx = zstd.ZstdDecompressor() |
|
633 | dctx = zstd.ZstdDecompressor() | |
591 |
for chunk in dctx.read_ |
|
634 | for chunk in dctx.read_to_iter(fh, read_size=16384): | |
592 | pass |
|
635 | pass | |
593 |
|
636 | |||
594 | It is also possible to skip leading bytes in the input data:: |
|
637 | It is also possible to skip leading bytes in the input data:: | |
595 |
|
638 | |||
596 | dctx = zstd.ZstdDecompressor() |
|
639 | dctx = zstd.ZstdDecompressor() | |
597 |
for chunk in dctx.read_ |
|
640 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): | |
598 | pass |
|
641 | pass | |
599 |
|
642 | |||
600 | Skipping leading bytes is useful if the source data contains extra |
|
643 | .. tip:: | |
601 | *header* data but you want to avoid the overhead of making a buffer copy |
|
|||
602 | or allocating a new ``memoryview`` object in order to decompress the data. |
|
|||
603 |
|
644 | |||
604 | Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator |
|
645 | Skipping leading bytes is useful if the source data contains extra | |
|
646 | *header* data. Traditionally, you would need to create a slice or | |||
|
647 | ``memoryview`` of the data you want to decompress. This would create | |||
|
648 | overhead. It is more efficient to pass the offset into this API. | |||
|
649 | ||||
|
650 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator | |||
605 | controls when data is decompressed. If the iterator isn't consumed, |
|
651 | controls when data is decompressed. If the iterator isn't consumed, | |
606 | decompression is put on hold. |
|
652 | decompression is put on hold. | |
607 |
|
653 | |||
608 |
When ``read_ |
|
654 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, | |
609 | the behavior may seem similar to what occurs when the simple decompression |
|
655 | the behavior may seem similar to what occurs when the simple decompression | |
610 | API is used. However, this API works when the decompressed size is unknown. |
|
656 | API is used. However, this API works when the decompressed size is unknown. | |
611 | Furthermore, if feeding large inputs, the decompressor will work in chunks |
|
657 | Furthermore, if feeding large inputs, the decompressor will work in chunks | |
@@ -636,7 +682,7 b' Decompressor API' | |||||
636 | ^^^^^^^^^^^^^^^^ |
|
682 | ^^^^^^^^^^^^^^^^ | |
637 |
|
683 | |||
638 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` |
|
684 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` | |
639 |
method |
|
685 | method. Compressed data chunks are fed into ``decompress(data)`` and | |
640 | uncompressed output (or an empty bytes) is returned. Output from subsequent |
|
686 | uncompressed output (or an empty bytes) is returned. Output from subsequent | |
641 | calls needs to be concatenated to reassemble the full decompressed byte |
|
687 | calls needs to be concatenated to reassemble the full decompressed byte | |
642 | sequence. |
|
688 | sequence. | |
@@ -650,11 +696,25 b' can no longer be called.' | |||||
650 |
|
696 | |||
651 | Here is how this API should be used:: |
|
697 | Here is how this API should be used:: | |
652 |
|
698 | |||
653 | dctx = zstd.ZstdDeompressor() |
|
699 | dctx = zstd.ZstdDecompressor() | |
654 |
dobj = |
|
700 | dobj = dctx.decompressobj() | |
655 | data = dobj.decompress(compressed_chunk_0) |
|
701 | data = dobj.decompress(compressed_chunk_0) | |
656 | data = dobj.decompress(compressed_chunk_1) |
|
702 | data = dobj.decompress(compressed_chunk_1) | |
657 |
|
703 | |||
|
704 | By default, calls to ``decompress()`` write output data in chunks of size | |||
|
705 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated | |||
|
706 | before being returned to the caller. It is possible to define the size of | |||
|
707 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: | |||
|
708 | ||||
|
709 | dctx = zstd.ZstdDecompressor() | |||
|
710 | dobj = dctx.decompressobj(write_size=1048576) | |||
|
711 | ||||
|
712 | .. note:: | |||
|
713 | ||||
|
714 | Because calls to ``decompress()`` may need to perform multiple | |||
|
715 | memory (re)allocations, this streaming decompression API isn't as | |||
|
716 | efficient as other APIs. | |||
|
717 | ||||
658 | Batch Decompression API |
|
718 | Batch Decompression API | |
659 | ^^^^^^^^^^^^^^^^^^^^^^^ |
|
719 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
660 |
|
720 | |||
@@ -671,9 +731,12 b' conform to the buffer protocol. For best' | |||||
671 | minimal input validation will be done for that type. If calling from |
|
731 | minimal input validation will be done for that type. If calling from | |
672 | Python (as opposed to C), constructing one of these instances may add |
|
732 | Python (as opposed to C), constructing one of these instances may add | |
673 | overhead cancelling out the performance overhead of validation for list |
|
733 | overhead cancelling out the performance overhead of validation for list | |
674 | inputs. |
|
734 | inputs.:: | |
675 |
|
|
735 | ||
676 | The decompressed size of each frame must be discoverable. It can either be |
|
736 | dctx = zstd.ZstdDecompressor() | |
|
737 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) | |||
|
738 | ||||
|
739 | The decompressed size of each frame MUST be discoverable. It can either be | |||
677 | embedded within the zstd frame (``write_content_size=True`` argument to |
|
740 | embedded within the zstd frame (``write_content_size=True`` argument to | |
678 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. |
|
741 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. | |
679 |
|
742 | |||
@@ -681,7 +744,13 b' The ``decompressed_sizes`` argument is a' | |||||
681 | protocol which holds an array of 64-bit unsigned integers in the machine's |
|
744 | protocol which holds an array of 64-bit unsigned integers in the machine's | |
682 | native format defining the decompressed sizes of each frame. If this argument |
|
745 | native format defining the decompressed sizes of each frame. If this argument | |
683 | is passed, it avoids having to scan each frame for its decompressed size. |
|
746 | is passed, it avoids having to scan each frame for its decompressed size. | |
684 | This frame scanning can add noticeable overhead in some scenarios. |
|
747 | This frame scanning can add noticeable overhead in some scenarios.:: | |
|
748 | ||||
|
749 | frames = [...] | |||
|
750 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) | |||
|
751 | ||||
|
752 | dctx = zstd.ZstdDecompressor() | |||
|
753 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) | |||
685 |
|
|
754 | ||
686 | The ``threads`` argument controls the number of threads to use to perform |
|
755 | The ``threads`` argument controls the number of threads to use to perform | |
687 | decompression operations. The default (``0``) or the value ``1`` means to |
|
756 | decompression operations. The default (``0``) or the value ``1`` means to | |
@@ -701,22 +770,23 b' This function exists to perform decompre' | |||||
701 | as possible by having as little overhead as possible. Since decompression is |
|
770 | as possible by having as little overhead as possible. Since decompression is | |
702 | performed as a single operation and since the decompressed output is stored in |
|
771 | performed as a single operation and since the decompressed output is stored in | |
703 | a single buffer, extra memory allocations, Python objects, and Python function |
|
772 | a single buffer, extra memory allocations, Python objects, and Python function | |
704 |
calls are avoided. This is ideal for scenarios where callers |
|
773 | calls are avoided. This is ideal for scenarios where callers know up front that | |
705 | decompressed data for multiple frames. |
|
774 | they need to access data for multiple frames, such as when *delta chains* are | |
|
775 | being used. | |||
706 |
|
776 | |||
707 | Currently, the implementation always spawns multiple threads when requested, |
|
777 | Currently, the implementation always spawns multiple threads when requested, | |
708 | even if the amount of work to do is small. In the future, it will be smarter |
|
778 | even if the amount of work to do is small. In the future, it will be smarter | |
709 | about avoiding threads and their associated overhead when the amount of |
|
779 | about avoiding threads and their associated overhead when the amount of | |
710 | work to do is small. |
|
780 | work to do is small. | |
711 |
|
781 | |||
712 |
|
|
782 | Prefix Dictionary Chain Decompression | |
713 |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
783 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
714 |
|
784 | |||
715 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of |
|
785 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of | |
716 |
zstd frames produced using chained * |
|
786 | zstd frames produced using chained *prefix* dictionary compression. Such | |
717 | a list of frames is produced by compressing discrete inputs where each |
|
787 | a list of frames is produced by compressing discrete inputs where each | |
718 |
non-initial input is compressed with a * |
|
788 | non-initial input is compressed with a *prefix* dictionary consisting of the | |
719 |
|
|
789 | content of the previous input. | |
720 |
|
790 | |||
721 | For example, say you have the following inputs:: |
|
791 | For example, say you have the following inputs:: | |
722 |
|
792 | |||
@@ -725,25 +795,25 b' For example, say you have the following ' | |||||
725 | The zstd frame chain consists of: |
|
795 | The zstd frame chain consists of: | |
726 |
|
796 | |||
727 | 1. ``b'input 1'`` compressed in standalone/discrete mode |
|
797 | 1. ``b'input 1'`` compressed in standalone/discrete mode | |
728 |
2. ``b'input 2'`` compressed using ``b'input 1'`` as a * |
|
798 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary | |
729 |
3. ``b'input 3'`` compressed using ``b'input 2'`` as a * |
|
799 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary | |
730 |
|
800 | |||
731 | Each zstd frame **must** have the content size written. |
|
801 | Each zstd frame **must** have the content size written. | |
732 |
|
802 | |||
733 |
The following Python code can be used to produce a * |
|
803 | The following Python code can be used to produce a *prefix dictionary chain*:: | |
734 | chain*:: |
|
|||
735 |
|
804 | |||
736 | def make_chain(inputs): |
|
805 | def make_chain(inputs): | |
737 | frames = [] |
|
806 | frames = [] | |
738 |
|
807 | |||
739 | # First frame is compressed in standalone/discrete mode. |
|
808 | # First frame is compressed in standalone/discrete mode. | |
740 |
zctx = zstd.ZstdCompressor( |
|
809 | zctx = zstd.ZstdCompressor() | |
741 | frames.append(zctx.compress(inputs[0])) |
|
810 | frames.append(zctx.compress(inputs[0])) | |
742 |
|
811 | |||
743 |
# Subsequent frames use the previous fulltext as a |
|
812 | # Subsequent frames use the previous fulltext as a prefix dictionary | |
744 | for i, raw in enumerate(inputs[1:]): |
|
813 | for i, raw in enumerate(inputs[1:]): | |
745 |
dict_data = zstd.ZstdCompressionDict( |
|
814 | dict_data = zstd.ZstdCompressionDict( | |
746 | zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data) |
|
815 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) | |
|
816 | zctx = zstd.ZstdCompressor(dict_data=dict_data) | |||
747 | frames.append(zctx.compress(raw)) |
|
817 | frames.append(zctx.compress(raw)) | |
748 |
|
818 | |||
749 | return frames |
|
819 | return frames | |
@@ -751,10 +821,13 b' chain*::' | |||||
751 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last |
|
821 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last | |
752 | element in the input chain. |
|
822 | element in the input chain. | |
753 |
|
823 | |||
754 | It is possible to implement *content-only dictionary chain* decompression |
|
824 | ||
755 | on top of other Python APIs. However, this function will likely be significantly |
|
825 | .. note:: | |
756 | faster, especially for long input chains, as it avoids the overhead of |
|
826 | ||
757 | instantiating and passing around intermediate objects between C and Python. |
|
827 | It is possible to implement *prefix dictionary chain* decompression | |
|
828 | on top of other APIs. However, this function will likely be faster - | |||
|
829 | especially for long input chains - as it avoids the overhead of instantiating | |||
|
830 | and passing around intermediate objects between C and Python. | |||
758 |
|
831 | |||
759 | Multi-Threaded Compression |
|
832 | Multi-Threaded Compression | |
760 | -------------------------- |
|
833 | -------------------------- | |
@@ -764,9 +837,15 b' of threads to use for compression. The w' | |||||
764 | into segments and each segment is fed into a worker pool for compression. Once |
|
837 | into segments and each segment is fed into a worker pool for compression. Once | |
765 | a segment is compressed, it is flushed/appended to the output. |
|
838 | a segment is compressed, it is flushed/appended to the output. | |
766 |
|
839 | |||
|
840 | .. note:: | |||
|
841 | ||||
|
842 | These threads are created at the C layer and are not Python threads. So they | |||
|
843 | work outside the GIL. It is therefore possible to CPU saturate multiple cores | |||
|
844 | from Python. | |||
|
845 | ||||
767 | The segment size for multi-threaded compression is chosen from the window size |
|
846 | The segment size for multi-threaded compression is chosen from the window size | |
768 | of the compressor. This is derived from the ``window_log`` attribute of a |
|
847 | of the compressor. This is derived from the ``window_log`` attribute of a | |
769 | ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB |
|
848 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB | |
770 | range. |
|
849 | range. | |
771 |
|
850 | |||
772 | If multi-threaded compression is requested and the input is smaller than the |
|
851 | If multi-threaded compression is requested and the input is smaller than the | |
@@ -785,31 +864,33 b' than non-multi-threaded compression. The' | |||||
785 | there is a CPU/wall time versus size trade off that may warrant investigation. |
|
864 | there is a CPU/wall time versus size trade off that may warrant investigation. | |
786 |
|
865 | |||
787 | Output from multi-threaded compression does not require any special handling |
|
866 | Output from multi-threaded compression does not require any special handling | |
788 |
on the decompression side. |
|
867 | on the decompression side. To the decompressor, data generated with single | |
789 | to consume data produced with multi-threaded compression. |
|
868 | threaded compressor looks the same as data generated by a multi-threaded | |
|
869 | compressor and does not require any special handling or additional resource | |||
|
870 | requirements. | |||
790 |
|
871 | |||
791 | Dictionary Creation and Management |
|
872 | Dictionary Creation and Management | |
792 | ---------------------------------- |
|
873 | ---------------------------------- | |
793 |
|
874 | |||
794 |
Compression dictionaries are represented |
|
875 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. | |
795 |
|
876 | |||
796 | Instances can be constructed from bytes:: |
|
877 | Instances can be constructed from bytes:: | |
797 |
|
878 | |||
798 | dict_data = zstd.ZstdCompressionDict(data) |
|
879 | dict_data = zstd.ZstdCompressionDict(data) | |
799 |
|
880 | |||
800 |
It is possible to construct a dictionary from *any* data. |
|
881 | It is possible to construct a dictionary from *any* data. If the data doesn't | |
801 |
|
|
882 | begin with a magic header, it will be treated as a *prefix* dictionary. | |
802 |
* |
|
883 | *Prefix* dictionaries allow compression operations to reference raw data | |
803 | that follow to reference raw data within the content. For one use of |
|
884 | within the dictionary. | |
804 | *content-only* dictionaries, see |
|
|||
805 | ``ZstdDecompressor.decompress_content_dict_chain()``. |
|
|||
806 |
|
885 | |||
807 | More interestingly, instances can be created by *training* on sample data:: |
|
886 | It is possible to force the use of *prefix* dictionaries or to require a | |
|
887 | dictionary header: | |||
808 |
|
888 | |||
809 |
dict_data = zstd. |
|
889 | dict_data = zstd.ZstdCompressionDict(data, | |
|
890 | dict_type=zstd.DICT_TYPE_RAWCONTENT) | |||
810 |
|
891 | |||
811 | This takes a list of bytes instances and creates and returns a |
|
892 | dict_data = zstd.ZstdCompressionDict(data, | |
812 | ``ZstdCompressionDict``. |
|
893 | dict_type=zstd.DICT_TYPE_FULLDICT) | |
813 |
|
894 | |||
814 | You can see how many bytes are in the dictionary by calling ``len()``:: |
|
895 | You can see how many bytes are in the dictionary by calling ``len()``:: | |
815 |
|
896 | |||
@@ -819,7 +900,7 b' You can see how many bytes are in the di' | |||||
819 | Once you have a dictionary, you can pass it to the objects performing |
|
900 | Once you have a dictionary, you can pass it to the objects performing | |
820 | compression and decompression:: |
|
901 | compression and decompression:: | |
821 |
|
902 | |||
822 |
dict_data = zstd.train_dictionary(1 |
|
903 | dict_data = zstd.train_dictionary(131072, samples) | |
823 |
|
904 | |||
824 | cctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
905 | cctx = zstd.ZstdCompressor(dict_data=dict_data) | |
825 | for source_data in input_data: |
|
906 | for source_data in input_data: | |
@@ -829,7 +910,7 b' compression and decompression::' | |||||
829 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) |
|
910 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) | |
830 | for compressed_data in input_data: |
|
911 | for compressed_data in input_data: | |
831 | buffer = io.BytesIO() |
|
912 | buffer = io.BytesIO() | |
832 |
with dctx. |
|
913 | with dctx.stream_writer(buffer) as decompressor: | |
833 | decompressor.write(compressed_data) |
|
914 | decompressor.write(compressed_data) | |
834 | # Do something with raw data in ``buffer``. |
|
915 | # Do something with raw data in ``buffer``. | |
835 |
|
916 | |||
@@ -843,56 +924,69 b' a ``ZstdCompressionDict`` later) via ``a' | |||||
843 | dict_data = zstd.train_dictionary(size, samples) |
|
924 | dict_data = zstd.train_dictionary(size, samples) | |
844 | raw_data = dict_data.as_bytes() |
|
925 | raw_data = dict_data.as_bytes() | |
845 |
|
926 | |||
846 | The following named arguments to ``train_dictionary`` can also be used |
|
927 | By default, when a ``ZstdCompressionDict`` is *attached* to a | |
847 | to further control dictionary generation. |
|
928 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the | |
|
929 | dictionary for use. This is fine if only 1 compression operation is being | |||
|
930 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. | |||
|
931 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, | |||
|
932 | this can add overhead. | |||
848 |
|
933 | |||
849 | selectivity |
|
934 | It is possible to *precompute* the dictionary so it can readily be consumed | |
850 | Integer selectivity level. Default is 9. Larger values yield more data in |
|
935 | by multiple ``ZstdCompressor`` instances:: | |
851 | dictionary. |
|
936 | ||
852 | level |
|
937 | d = zstd.ZstdCompressionDict(data) | |
853 | Integer compression level. Default is 6. |
|
|||
854 | dict_id |
|
|||
855 | Integer dictionary ID for the produced dictionary. Default is 0, which |
|
|||
856 | means to use a random value. |
|
|||
857 | notifications |
|
|||
858 | Controls writing of informational messages to ``stderr``. ``0`` (the |
|
|||
859 | default) means to write nothing. ``1`` writes errors. ``2`` writes |
|
|||
860 | progression info. ``3`` writes more details. And ``4`` writes all info. |
|
|||
861 |
|
938 | |||
862 | Cover Dictionaries |
|
939 | # Precompute for compression level 3. | |
863 | ^^^^^^^^^^^^^^^^^^ |
|
940 | d.precompute_compress(level=3) | |
864 |
|
|
941 | ||
865 | An alternate dictionary training mechanism named *cover* is also available. |
|
942 | # Precompute with specific compression parameters. | |
866 | More details about this training mechanism are available in the paper |
|
943 | params = zstd.ZstdCompressionParameters(...) | |
867 | *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors: |
|
944 | d.precompute_compress(compression_params=params) | |
868 | Liao, Petri, Moffat, Wirth). |
|
|||
869 |
|
||||
870 | To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of |
|
|||
871 | ``zstd.train_dictionary()``. The function behaves nearly the same except |
|
|||
872 | its arguments are different and the returned dictionary will contain ``k`` |
|
|||
873 | and ``d`` attributes reflecting the parameters to the cover algorithm. |
|
|||
874 |
|
|
945 | ||
875 | .. note:: |
|
946 | .. note:: | |
876 |
|
947 | |||
877 | The ``k`` and ``d`` attributes are only populated on dictionary |
|
948 | When a dictionary is precomputed, the compression parameters used to | |
878 | instances created by this function. If a ``ZstdCompressionDict`` is |
|
949 | precompute the dictionary overwrite some of the compression parameters | |
879 | constructed from raw bytes data, the ``k`` and ``d`` attributes will |
|
950 | specified to ``ZstdCompressor.__init__``. | |
880 | be ``0``. |
|
951 | ||
|
952 | Training Dictionaries | |||
|
953 | ^^^^^^^^^^^^^^^^^^^^^ | |||
|
954 | ||||
|
955 | Unless using *prefix* dictionaries, dictionary data is produced by *training* | |||
|
956 | on existing data:: | |||
|
957 | ||||
|
958 | dict_data = zstd.train_dictionary(size, samples) | |||
|
959 | ||||
|
960 | This takes a target dictionary size and list of bytes instances and creates and | |||
|
961 | returns a ``ZstdCompressionDict``. | |||
|
962 | ||||
|
963 | The dictionary training mechanism is known as *cover*. More details about it are | |||
|
964 | available in the paper *Effective Construction of Relative Lempel-Ziv | |||
|
965 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). | |||
|
966 | ||||
|
967 | The cover algorithm takes parameters ``k` and ``d``. These are the | |||
|
968 | *segment size* and *dmer size*, respectively. The returned dictionary | |||
|
969 | instance created by this function has ``k`` and ``d`` attributes | |||
|
970 | containing the values for these parameters. If a ``ZstdCompressionDict`` | |||
|
971 | is constructed from raw bytes data (a content-only dictionary), the | |||
|
972 | ``k`` and ``d`` attributes will be ``0``. | |||
881 |
|
973 | |||
882 | The segment and dmer size parameters to the cover algorithm can either be |
|
974 | The segment and dmer size parameters to the cover algorithm can either be | |
883 |
specified manually or |
|
975 | specified manually or ``train_dictionary()`` can try multiple values | |
884 |
|
|
976 | and pick the best one, where *best* means the smallest compressed data size. | |
885 | compressed data size. |
|
977 | This later mode is called *optimization* mode. | |
886 |
|
||||
887 | In manual mode, the ``k`` and ``d`` arguments must be specified or a |
|
|||
888 | ``ZstdError`` will be raised. |
|
|||
889 |
|
978 | |||
890 | In automatic mode (triggered by specifying ``optimize=True``), ``k`` |
|
979 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, | |
891 | and ``d`` are optional. If a value isn't specified, then default values for |
|
980 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` | |
892 | both are tested. The ``steps`` argument can control the number of steps |
|
981 | struct) are defined, *optimization* mode is used with default parameter | |
893 | through ``k`` values. The ``level`` argument defines the compression level |
|
982 | values. | |
894 | that will be used when testing the compressed size. And ``threads`` can |
|
983 | ||
895 | specify the number of threads to use for concurrent operation. |
|
984 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged | |
|
985 | with explicit control over those parameters. Specifying ``threads=0`` or | |||
|
986 | ``threads=1`` can be used to engage *optimization* mode if other parameters | |||
|
987 | are not defined. | |||
|
988 | ||||
|
989 | Otherwise, non-*optimization* mode is used with the parameters specified. | |||
896 |
|
990 | |||
897 | This function takes the following arguments: |
|
991 | This function takes the following arguments: | |
898 |
|
992 | |||
@@ -909,64 +1003,92 b' d' | |||||
909 | dict_id |
|
1003 | dict_id | |
910 | Integer dictionary ID for the produced dictionary. Default is 0, which uses |
|
1004 | Integer dictionary ID for the produced dictionary. Default is 0, which uses | |
911 | a random value. |
|
1005 | a random value. | |
912 | optimize |
|
1006 | steps | |
913 | When true, test dictionary generation with multiple parameters. |
|
1007 | Number of steps through ``k`` values to perform when trying parameter | |
|
1008 | variations. | |||
|
1009 | threads | |||
|
1010 | Number of threads to use when trying parameter variations. Default is 0, | |||
|
1011 | which means to use a single thread. A negative value can be specified to | |||
|
1012 | use as many threads as there are detected logical CPUs. | |||
914 | level |
|
1013 | level | |
915 |
Integer target compression level when t |
|
1014 | Integer target compression level when trying parameter variations. | |
916 | ``optimize=True``. Default is 1. |
|
|||
917 | steps |
|
|||
918 | Number of steps through ``k`` values to perform when ``optimize=True``. |
|
|||
919 | Default is 32. |
|
|||
920 | threads |
|
|||
921 | Number of threads to use when ``optimize=True``. Default is 0, which means |
|
|||
922 | to use a single thread. A negative value can be specified to use as many |
|
|||
923 | threads as there are detected logical CPUs. |
|
|||
924 | notifications |
|
1015 | notifications | |
925 |
Controls writing of informational messages to ``stderr``. |
|
1016 | Controls writing of informational messages to ``stderr``. ``0`` (the | |
926 | documentation for ``train_dictionary()`` for more. |
|
1017 | default) means to write nothing. ``1`` writes errors. ``2`` writes | |
|
1018 | progression info. ``3`` writes more details. And ``4`` writes all info. | |||
927 |
|
1019 | |||
928 | Explicit Compression Parameters |
|
1020 | Explicit Compression Parameters | |
929 | ------------------------------- |
|
1021 | ------------------------------- | |
930 |
|
1022 | |||
931 | Zstandard's integer compression levels along with the input size and dictionary |
|
1023 | Zstandard offers a high-level *compression level* that maps to lower-level | |
932 | size are converted into a data structure defining multiple parameters to tune |
|
1024 | compression parameters. For many consumers, this numeric level is the only | |
933 | behavior of the compression algorithm. It is possible to use define this |
|
1025 | compression setting you'll need to touch. | |
934 | data structure explicitly to have lower-level control over compression behavior. |
|
1026 | ||
|
1027 | But for advanced use cases, it might be desirable to tweak these lower-level | |||
|
1028 | settings. | |||
935 |
|
1029 | |||
936 |
The `` |
|
1030 | The ``ZstdCompressionParameters`` type represents these low-level compression | |
937 | You can see how Zstandard converts compression levels to this data structure |
|
1031 | settings. | |
938 | by calling ``zstd.get_compression_parameters()``. e.g.:: |
|
|||
939 |
|
1032 | |||
940 | params = zstd.get_compression_parameters(5) |
|
1033 | Instances of this type can be constructed from a myriad of keyword arguments | |
|
1034 | (defined below) for complete low-level control over each adjustable | |||
|
1035 | compression setting. | |||
|
1036 | ||||
|
1037 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance | |||
|
1038 | given a desired compression level and target input and dictionary size | |||
|
1039 | using ``ZstdCompressionParameters.from_level()``. e.g.:: | |||
941 |
|
|
1040 | ||
942 | This function also accepts the uncompressed data size and dictionary size |
|
1041 | # Derive compression settings for compression level 7. | |
943 | to adjust parameters:: |
|
1042 | params = zstd.ZstdCompressionParameters.from_level(7) | |
944 |
|
1043 | |||
945 | params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data)) |
|
1044 | # With an input size of 1MB | |
|
1045 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) | |||
|
1046 | ||||
|
1047 | Using ``from_level()``, it is also possible to override individual compression | |||
|
1048 | parameters or to define additional settings that aren't automatically derived. | |||
|
1049 | e.g.:: | |||
946 |
|
|
1050 | ||
947 | You can also construct compression parameters from their low-level components:: |
|
1051 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) | |
|
1052 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) | |||
|
1053 | ||||
|
1054 | Or you can define low-level compression settings directly:: | |||
948 |
|
1055 | |||
949 |
params = zstd.CompressionParameters( |
|
1056 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) | |
950 |
|
1057 | |||
951 | You can then configure a compressor to use the custom parameters:: |
|
1058 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to | |
|
1059 | configure a compressor:: | |||
952 |
|
1060 | |||
953 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1061 | cctx = zstd.ZstdCompressor(compression_params=params) | |
954 |
|
1062 | |||
955 |
The |
|
1063 | The named arguments and attributes of ``ZstdCompressionParameters`` are as | |
|
1064 | follows: | |||
956 |
|
1065 | |||
|
1066 | * format | |||
|
1067 | * compression_level | |||
957 | * window_log |
|
1068 | * window_log | |
|
1069 | * hash_log | |||
958 | * chain_log |
|
1070 | * chain_log | |
959 | * hash_log |
|
|||
960 | * search_log |
|
1071 | * search_log | |
961 | * search_length |
|
1072 | * min_match | |
962 | * target_length |
|
1073 | * target_length | |
963 | * strategy |
|
1074 | * compression_strategy | |
|
1075 | * write_content_size | |||
|
1076 | * write_checksum | |||
|
1077 | * write_dict_id | |||
|
1078 | * job_size | |||
|
1079 | * overlap_size_log | |||
|
1080 | * compress_literals | |||
|
1081 | * force_max_window | |||
|
1082 | * enable_ldm | |||
|
1083 | * ldm_hash_log | |||
|
1084 | * ldm_min_match | |||
|
1085 | * ldm_bucket_size_log | |||
|
1086 | * ldm_hash_every_log | |||
|
1087 | * threads | |||
964 |
|
1088 | |||
965 | This is the order the arguments are passed to the constructor if not using |
|
1089 | Some of these are very low-level settings. It may help to consult the official | |
966 | named arguments. |
|
1090 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants | |
967 |
|
1091 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). | ||
968 | You'll need to read the Zstandard documentation for what these parameters |
|
|||
969 | do. |
|
|||
970 |
|
1092 | |||
971 | Frame Inspection |
|
1093 | Frame Inspection | |
972 | ---------------- |
|
1094 | ---------------- | |
@@ -1003,15 +1125,17 b' has_checksum' | |||||
1003 | Bool indicating whether a 4 byte content checksum is stored at the end |
|
1125 | Bool indicating whether a 4 byte content checksum is stored at the end | |
1004 | of the frame. |
|
1126 | of the frame. | |
1005 |
|
1127 | |||
|
1128 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame | |||
|
1129 | header. | |||
|
1130 | ||||
|
1131 | ``zstd.frame_content_size(data)`` returns the content size as parsed from | |||
|
1132 | the frame header. ``-1`` means the content size is unknown. ``0`` means | |||
|
1133 | an empty frame. The content size is usually correct. However, it may not | |||
|
1134 | be accurate. | |||
|
1135 | ||||
1006 | Misc Functionality |
|
1136 | Misc Functionality | |
1007 | ------------------ |
|
1137 | ------------------ | |
1008 |
|
1138 | |||
1009 | estimate_compression_context_size(CompressionParameters) |
|
|||
1010 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
|||
1011 |
|
||||
1012 | Given a ``CompressionParameters`` struct, estimate the memory size required |
|
|||
1013 | to perform compression. |
|
|||
1014 |
|
||||
1015 | estimate_decompression_context_size() |
|
1139 | estimate_decompression_context_size() | |
1016 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1140 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1017 |
|
1141 | |||
@@ -1041,6 +1165,11 b' FRAME_HEADER' | |||||
1041 | MAGIC_NUMBER |
|
1165 | MAGIC_NUMBER | |
1042 | Frame header as an integer |
|
1166 | Frame header as an integer | |
1043 |
|
1167 | |||
|
1168 | CONTENTSIZE_UNKNOWN | |||
|
1169 | Value for content size when the content size is unknown. | |||
|
1170 | CONTENTSIZE_ERROR | |||
|
1171 | Value for content size when content size couldn't be determined. | |||
|
1172 | ||||
1044 | WINDOWLOG_MIN |
|
1173 | WINDOWLOG_MIN | |
1045 | Minimum value for compression parameter |
|
1174 | Minimum value for compression parameter | |
1046 | WINDOWLOG_MAX |
|
1175 | WINDOWLOG_MAX | |
@@ -1063,8 +1192,6 b' SEARCHLENGTH_MAX' | |||||
1063 | Maximum value for compression parameter |
|
1192 | Maximum value for compression parameter | |
1064 | TARGETLENGTH_MIN |
|
1193 | TARGETLENGTH_MIN | |
1065 | Minimum value for compression parameter |
|
1194 | Minimum value for compression parameter | |
1066 | TARGETLENGTH_MAX |
|
|||
1067 | Maximum value for compression parameter |
|
|||
1068 | STRATEGY_FAST |
|
1195 | STRATEGY_FAST | |
1069 | Compression strategy |
|
1196 | Compression strategy | |
1070 | STRATEGY_DFAST |
|
1197 | STRATEGY_DFAST | |
@@ -1079,6 +1206,13 b' STRATEGY_BTLAZY2' | |||||
1079 | Compression strategy |
|
1206 | Compression strategy | |
1080 | STRATEGY_BTOPT |
|
1207 | STRATEGY_BTOPT | |
1081 | Compression strategy |
|
1208 | Compression strategy | |
|
1209 | STRATEGY_BTULTRA | |||
|
1210 | Compression strategy | |||
|
1211 | ||||
|
1212 | FORMAT_ZSTD1 | |||
|
1213 | Zstandard frame format | |||
|
1214 | FORMAT_ZSTD1_MAGICLESS | |||
|
1215 | Zstandard frame format without magic header | |||
1082 |
|
1216 | |||
1083 | Performance Considerations |
|
1217 | Performance Considerations | |
1084 | -------------------------- |
|
1218 | -------------------------- | |
@@ -1090,7 +1224,7 b' instantiating a new ``ZstdCompressor`` o' | |||||
1090 | operation. The differences are magnified as the size of data decreases. For |
|
1224 | operation. The differences are magnified as the size of data decreases. For | |
1091 | example, the difference between *context* reuse and non-reuse for 100,000 |
|
1225 | example, the difference between *context* reuse and non-reuse for 100,000 | |
1092 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) |
|
1226 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) | |
1093 | whereas 10 1,000,000 byte inputs will be more similar in speed (because the |
|
1227 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the | |
1094 | time spent doing compression dwarfs time spent creating new *contexts*). |
|
1228 | time spent doing compression dwarfs time spent creating new *contexts*). | |
1095 |
|
1229 | |||
1096 | Buffer Types |
|
1230 | Buffer Types | |
@@ -1187,9 +1321,8 b' There are multiple APIs for performing c' | |||||
1187 | because different applications have different needs and the library wants to |
|
1321 | because different applications have different needs and the library wants to | |
1188 | facilitate optimal use in as many use cases as possible. |
|
1322 | facilitate optimal use in as many use cases as possible. | |
1189 |
|
1323 | |||
1190 |
From a high-level, APIs are divided into *one-shot* and *streaming* |
|
1324 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you | |
1191 | the ``Concepts`` section for a description of how these are different at |
|
1325 | are operating on all data at once or you operate on it piecemeal. | |
1192 | the C layer. |
|
|||
1193 |
|
1326 | |||
1194 | The *one-shot* APIs are useful for small data, where the input or output |
|
1327 | The *one-shot* APIs are useful for small data, where the input or output | |
1195 | size is known. (The size can come from a buffer length, file size, or |
|
1328 | size is known. (The size can come from a buffer length, file size, or | |
@@ -1222,145 +1355,39 b' There is potential for long pauses as da' | |||||
1222 | underlying stream (say from interacting with a filesystem or network). This |
|
1355 | underlying stream (say from interacting with a filesystem or network). This | |
1223 | could add considerable overhead. |
|
1356 | could add considerable overhead. | |
1224 |
|
1357 | |||
1225 | Concepts |
|
1358 | Thread Safety | |
1226 | ======== |
|
1359 | ============= | |
1227 |
|
||||
1228 | It is important to have a basic understanding of how Zstandard works in order |
|
|||
1229 | to optimally use this library. In addition, there are some low-level Python |
|
|||
1230 | concepts that are worth explaining to aid understanding. This section aims to |
|
|||
1231 | provide that knowledge. |
|
|||
1232 |
|
||||
1233 | Zstandard Frames and Compression Format |
|
|||
1234 | --------------------------------------- |
|
|||
1235 |
|
||||
1236 | Compressed zstandard data almost always exists within a container called a |
|
|||
1237 | *frame*. (For the technically curious, see the |
|
|||
1238 | `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.) |
|
|||
1239 |
|
||||
1240 | The frame contains a header and optional trailer. The header contains a |
|
|||
1241 | magic number to self-identify as a zstd frame and a description of the |
|
|||
1242 | compressed data that follows. |
|
|||
1243 |
|
||||
1244 | Among other things, the frame *optionally* contains the size of the |
|
|||
1245 | decompressed data the frame represents, a 32-bit checksum of the |
|
|||
1246 | decompressed data (to facilitate verification during decompression), |
|
|||
1247 | and the ID of the dictionary used to compress the data. |
|
|||
1248 |
|
||||
1249 | Storing the original content size in the frame (``write_content_size=True`` |
|
|||
1250 | to ``ZstdCompressor``) is important for performance in some scenarios. Having |
|
|||
1251 | the decompressed size stored there (or storing it elsewhere) allows |
|
|||
1252 | decompression to perform a single memory allocation that is exactly sized to |
|
|||
1253 | the output. This is faster than continuously growing a memory buffer to hold |
|
|||
1254 | output. |
|
|||
1255 |
|
1360 | |||
1256 | Compression and Decompression Contexts |
|
1361 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees | |
1257 | -------------------------------------- |
|
1362 | about thread safety. Do not operate on the same ``ZstdCompressor`` and | |
1258 |
|
1363 | ``ZstdDecompressor`` instance simultaneously from different threads. It is | ||
1259 | In order to perform a compression or decompression operation with the zstd |
|
1364 | fine to have different threads call into a single instance, just not at the | |
1260 | C API, you need what's called a *context*. A context essentially holds |
|
1365 | same time. | |
1261 | configuration and state for a compression or decompression operation. For |
|
|||
1262 | example, a compression context holds the configured compression level. |
|
|||
1263 |
|
||||
1264 | Contexts can be reused for multiple operations. Since creating and |
|
|||
1265 | destroying contexts is not free, there are performance advantages to |
|
|||
1266 | reusing contexts. |
|
|||
1267 |
|
||||
1268 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially |
|
|||
1269 | wrappers around these contexts in the zstd C API. |
|
|||
1270 |
|
1366 | |||
1271 | One-shot And Streaming Operations |
|
1367 | Some operations require multiple function calls to complete. e.g. streaming | |
1272 | --------------------------------- |
|
1368 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used | |
1273 |
|
1369 | for simultaneously active operations. e.g. you must not start a streaming | ||
1274 | A compression or decompression operation can either be performed as a |
|
1370 | operation when another streaming operation is already active. | |
1275 | single *one-shot* operation or as a continuous *streaming* operation. |
|
|||
1276 |
|
||||
1277 | In one-shot mode (the *simple* APIs provided by the Python interface), |
|
|||
1278 | **all** input is handed to the compressor or decompressor as a single buffer |
|
|||
1279 | and **all** output is returned as a single buffer. |
|
|||
1280 |
|
||||
1281 | In streaming mode, input is delivered to the compressor or decompressor as |
|
|||
1282 | a series of chunks via multiple function calls. Likewise, output is |
|
|||
1283 | obtained in chunks as well. |
|
|||
1284 |
|
||||
1285 | Streaming operations require an additional *stream* object to be created |
|
|||
1286 | to track the operation. These are logical extensions of *context* |
|
|||
1287 | instances. |
|
|||
1288 |
|
1371 | |||
1289 | There are advantages and disadvantages to each mode of operation. There |
|
1372 | The C extension releases the GIL during non-trivial calls into the zstd C | |
1290 | are scenarios where certain modes can't be used. See the |
|
1373 | API. Non-trivial calls are notably compression and decompression. Trivial | |
1291 | ``Choosing an API`` section for more. |
|
1374 | calls are things like parsing frame parameters. Where the GIL is released | |
1292 |
|
1375 | is considered an implementation detail and can change in any release. | ||
1293 | Dictionaries |
|
|||
1294 | ------------ |
|
|||
1295 |
|
||||
1296 | A compression *dictionary* is essentially data used to seed the compressor |
|
|||
1297 | state so it can achieve better compression. The idea is that if you are |
|
|||
1298 | compressing a lot of similar pieces of data (e.g. JSON documents or anything |
|
|||
1299 | sharing similar structure), then you can find common patterns across multiple |
|
|||
1300 | objects then leverage those common patterns during compression and |
|
|||
1301 | decompression operations to achieve better compression ratios. |
|
|||
1302 |
|
||||
1303 | Dictionary compression is generally only useful for small inputs - data no |
|
|||
1304 | larger than a few kilobytes. The upper bound on this range is highly dependent |
|
|||
1305 | on the input data and the dictionary. |
|
|||
1306 |
|
||||
1307 | Python Buffer Protocol |
|
|||
1308 | ---------------------- |
|
|||
1309 |
|
||||
1310 | Many functions in the library operate on objects that implement Python's |
|
|||
1311 | `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_. |
|
|||
1312 |
|
||||
1313 | The *buffer protocol* is an internal implementation detail of a Python |
|
|||
1314 | type that allows instances of that type (objects) to be exposed as a raw |
|
|||
1315 | pointer (or buffer) in the C API. In other words, it allows objects to be |
|
|||
1316 | exposed as an array of bytes. |
|
|||
1317 |
|
1376 | |||
1318 | From the perspective of the C API, objects implementing the *buffer protocol* |
|
1377 | APIs that accept bytes-like objects don't enforce that the underlying object | |
1319 | all look the same: they are just a pointer to a memory address of a defined |
|
1378 | is read-only. However, it is assumed that the passed object is read-only for | |
1320 | length. This allows the C API to be largely type agnostic when accessing their |
|
1379 | the duration of the function call. It is possible to pass a mutable object | |
1321 | data. This allows custom types to be passed in without first converting them |
|
1380 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL | |
1322 | to a specific type. |
|
1381 | released, and mutate the object from another thread. Such a race condition | |
1323 |
|
1382 | is a bug in the consumer of python-zstandard. Most Python data types are | ||
1324 | Many Python types implement the buffer protocol. These include ``bytes`` |
|
1383 | immutable, so unless you are doing something fancy, you don't need to | |
1325 | (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``, |
|
1384 | worry about this. | |
1326 | ``mmap.mmap``, and ``memoryview``. |
|
|||
1327 |
|
||||
1328 | ``python-zstandard`` APIs that accept objects conforming to the buffer |
|
|||
1329 | protocol require that the buffer is *C contiguous* and has a single |
|
|||
1330 | dimension (``ndim==1``). This is usually the case. An example of where it |
|
|||
1331 | is not is a Numpy matrix type. |
|
|||
1332 |
|
||||
1333 | Requiring Output Sizes for Non-Streaming Decompression APIs |
|
|||
1334 | ----------------------------------------------------------- |
|
|||
1335 |
|
||||
1336 | Non-streaming decompression APIs require that either the output size is |
|
|||
1337 | explicitly defined (either in the zstd frame header or passed into the |
|
|||
1338 | function) or that a max output size is specified. This restriction is for |
|
|||
1339 | your safety. |
|
|||
1340 |
|
||||
1341 | The *one-shot* decompression APIs store the decompressed result in a |
|
|||
1342 | single buffer. This means that a buffer needs to be pre-allocated to hold |
|
|||
1343 | the result. If the decompressed size is not known, then there is no universal |
|
|||
1344 | good default size to use. Any default will fail or will be highly sub-optimal |
|
|||
1345 | in some scenarios (it will either be too small or will put stress on the |
|
|||
1346 | memory allocator to allocate a too large block). |
|
|||
1347 |
|
||||
1348 | A *helpful* API may retry decompression with buffers of increasing size. |
|
|||
1349 | While useful, there are obvious performance disadvantages, namely redoing |
|
|||
1350 | decompression N times until it works. In addition, there is a security |
|
|||
1351 | concern. Say the input came from highly compressible data, like 1 GB of the |
|
|||
1352 | same byte value. The output size could be several magnitudes larger than the |
|
|||
1353 | input size. An input of <100KB could decompress to >1GB. Without a bounds |
|
|||
1354 | restriction on the decompressed size, certain inputs could exhaust all system |
|
|||
1355 | memory. That's not good and is why the maximum output size is limited. |
|
|||
1356 |
|
1385 | |||
1357 | Note on Zstandard's *Experimental* API |
|
1386 | Note on Zstandard's *Experimental* API | |
1358 | ====================================== |
|
1387 | ====================================== | |
1359 |
|
1388 | |||
1360 | Many of the Zstandard APIs used by this module are marked as *experimental* |
|
1389 | Many of the Zstandard APIs used by this module are marked as *experimental* | |
1361 | within the Zstandard project. This includes a large number of useful |
|
1390 | within the Zstandard project. | |
1362 | features, such as compression and frame parameters and parts of dictionary |
|
|||
1363 | compression. |
|
|||
1364 |
|
1391 | |||
1365 | It is unclear how Zstandard's C API will evolve over time, especially with |
|
1392 | It is unclear how Zstandard's C API will evolve over time, especially with | |
1366 | regards to this *experimental* functionality. We will try to maintain |
|
1393 | regards to this *experimental* functionality. We will try to maintain | |
@@ -1371,7 +1398,7 b' Since a copy of the Zstandard source cod' | |||||
1371 | module and since we compile against it, the behavior of a specific |
|
1398 | module and since we compile against it, the behavior of a specific | |
1372 | version of this module should be constant for all of time. So if you |
|
1399 | version of this module should be constant for all of time. So if you | |
1373 | pin the version of this module used in your projects (which is a Python |
|
1400 | pin the version of this module used in your projects (which is a Python | |
1374 |
best practice), you should be |
|
1401 | best practice), you should be shielded from unwanted future changes. | |
1375 |
|
1402 | |||
1376 | Donate |
|
1403 | Donate | |
1377 | ====== |
|
1404 | ====== |
@@ -83,7 +83,7 b' static int BufferWithSegments_init(ZstdB' | |||||
83 | } |
|
83 | } | |
84 |
|
84 | |||
85 | if (segments.len % sizeof(BufferSegment)) { |
|
85 | if (segments.len % sizeof(BufferSegment)) { | |
86 |
PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of % |
|
86 | PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu", | |
87 | sizeof(BufferSegment)); |
|
87 | sizeof(BufferSegment)); | |
88 | goto except; |
|
88 | goto except; | |
89 | } |
|
89 | } | |
@@ -123,7 +123,7 b' except:' | |||||
123 | PyBuffer_Release(&self->parent); |
|
123 | PyBuffer_Release(&self->parent); | |
124 | PyBuffer_Release(&segments); |
|
124 | PyBuffer_Release(&segments); | |
125 | return -1; |
|
125 | return -1; | |
126 |
} |
|
126 | } | |
127 |
|
127 | |||
128 | /** |
|
128 | /** | |
129 | * Construct a BufferWithSegments from existing memory and offsets. |
|
129 | * Construct a BufferWithSegments from existing memory and offsets. | |
@@ -188,6 +188,12 b' static ZstdBufferSegment* BufferWithSegm' | |||||
188 | return NULL; |
|
188 | return NULL; | |
189 | } |
|
189 | } | |
190 |
|
190 | |||
|
191 | if (self->segments[i].length > PY_SSIZE_T_MAX) { | |||
|
192 | PyErr_Format(PyExc_ValueError, | |||
|
193 | "item at offset %zd is too large for this platform", i); | |||
|
194 | return NULL; | |||
|
195 | } | |||
|
196 | ||||
191 | result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL); |
|
197 | result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL); | |
192 | if (NULL == result) { |
|
198 | if (NULL == result) { | |
193 | return NULL; |
|
199 | return NULL; | |
@@ -197,7 +203,7 b' static ZstdBufferSegment* BufferWithSegm' | |||||
197 | Py_INCREF(self); |
|
203 | Py_INCREF(self); | |
198 |
|
204 | |||
199 | result->data = (char*)self->data + self->segments[i].offset; |
|
205 | result->data = (char*)self->data + self->segments[i].offset; | |
200 | result->dataSize = self->segments[i].length; |
|
206 | result->dataSize = (Py_ssize_t)self->segments[i].length; | |
201 | result->offset = self->segments[i].offset; |
|
207 | result->offset = self->segments[i].offset; | |
202 |
|
208 | |||
203 | return result; |
|
209 | return result; | |
@@ -205,7 +211,13 b' static ZstdBufferSegment* BufferWithSegm' | |||||
205 |
|
211 | |||
206 | #if PY_MAJOR_VERSION >= 3 |
|
212 | #if PY_MAJOR_VERSION >= 3 | |
207 | static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) { |
|
213 | static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) { | |
208 | return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags); |
|
214 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
215 | view->obj = NULL; | |||
|
216 | PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform"); | |||
|
217 | return -1; | |||
|
218 | } | |||
|
219 | ||||
|
220 | return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags); | |||
209 | } |
|
221 | } | |
210 | #else |
|
222 | #else | |
211 | static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) { |
|
223 | static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) { | |
@@ -214,8 +226,13 b' static Py_ssize_t BufferWithSegments_get' | |||||
214 | return -1; |
|
226 | return -1; | |
215 | } |
|
227 | } | |
216 |
|
228 | |||
|
229 | if (self->dataSize > PY_SSIZE_T_MAX) { | |||
|
230 | PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform"); | |||
|
231 | return -1; | |||
|
232 | } | |||
|
233 | ||||
217 | *ptrptr = self->data; |
|
234 | *ptrptr = self->data; | |
218 | return self->dataSize; |
|
235 | return (Py_ssize_t)self->dataSize; | |
219 | } |
|
236 | } | |
220 |
|
237 | |||
221 | static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) { |
|
238 | static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) { | |
@@ -232,7 +249,12 b' PyDoc_STRVAR(BufferWithSegments_tobytes_' | |||||
232 | ); |
|
249 | ); | |
233 |
|
250 | |||
234 | static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) { |
|
251 | static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) { | |
235 | return PyBytes_FromStringAndSize(self->data, self->dataSize); |
|
252 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
253 | PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform"); | |||
|
254 | return NULL; | |||
|
255 | } | |||
|
256 | ||||
|
257 | return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize); | |||
236 | } |
|
258 | } | |
237 |
|
259 | |||
238 | PyDoc_STRVAR(BufferWithSegments_segments__doc__, |
|
260 | PyDoc_STRVAR(BufferWithSegments_segments__doc__, |
@@ -14,125 +14,11 b' ZstdCompressionDict* train_dictionary(Py' | |||||
14 | static char* kwlist[] = { |
|
14 | static char* kwlist[] = { | |
15 | "dict_size", |
|
15 | "dict_size", | |
16 | "samples", |
|
16 | "samples", | |
17 | "selectivity", |
|
|||
18 | "level", |
|
|||
19 | "notifications", |
|
|||
20 | "dict_id", |
|
|||
21 | NULL |
|
|||
22 | }; |
|
|||
23 | size_t capacity; |
|
|||
24 | PyObject* samples; |
|
|||
25 | Py_ssize_t samplesLen; |
|
|||
26 | unsigned selectivity = 0; |
|
|||
27 | int level = 0; |
|
|||
28 | unsigned notifications = 0; |
|
|||
29 | unsigned dictID = 0; |
|
|||
30 | ZDICT_params_t zparams; |
|
|||
31 | Py_ssize_t sampleIndex; |
|
|||
32 | Py_ssize_t sampleSize; |
|
|||
33 | PyObject* sampleItem; |
|
|||
34 | size_t zresult; |
|
|||
35 | void* sampleBuffer = NULL; |
|
|||
36 | void* sampleOffset; |
|
|||
37 | size_t samplesSize = 0; |
|
|||
38 | size_t* sampleSizes = NULL; |
|
|||
39 | void* dict = NULL; |
|
|||
40 | ZstdCompressionDict* result = NULL; |
|
|||
41 |
|
||||
42 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary", |
|
|||
43 | kwlist, |
|
|||
44 | &capacity, |
|
|||
45 | &PyList_Type, &samples, |
|
|||
46 | &selectivity, &level, ¬ifications, &dictID)) { |
|
|||
47 | return NULL; |
|
|||
48 | } |
|
|||
49 |
|
||||
50 | memset(&zparams, 0, sizeof(zparams)); |
|
|||
51 |
|
||||
52 | zparams.selectivityLevel = selectivity; |
|
|||
53 | zparams.compressionLevel = level; |
|
|||
54 | zparams.notificationLevel = notifications; |
|
|||
55 | zparams.dictID = dictID; |
|
|||
56 |
|
||||
57 | /* Figure out the size of the raw samples */ |
|
|||
58 | samplesLen = PyList_Size(samples); |
|
|||
59 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { |
|
|||
60 | sampleItem = PyList_GetItem(samples, sampleIndex); |
|
|||
61 | if (!PyBytes_Check(sampleItem)) { |
|
|||
62 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); |
|
|||
63 | return NULL; |
|
|||
64 | } |
|
|||
65 | samplesSize += PyBytes_GET_SIZE(sampleItem); |
|
|||
66 | } |
|
|||
67 |
|
||||
68 | /* Now that we know the total size of the raw simples, we can allocate |
|
|||
69 | a buffer for the raw data */ |
|
|||
70 | sampleBuffer = PyMem_Malloc(samplesSize); |
|
|||
71 | if (!sampleBuffer) { |
|
|||
72 | PyErr_NoMemory(); |
|
|||
73 | goto finally; |
|
|||
74 | } |
|
|||
75 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); |
|
|||
76 | if (!sampleSizes) { |
|
|||
77 | PyErr_NoMemory(); |
|
|||
78 | goto finally; |
|
|||
79 | } |
|
|||
80 |
|
||||
81 | sampleOffset = sampleBuffer; |
|
|||
82 | /* Now iterate again and assemble the samples in the buffer */ |
|
|||
83 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { |
|
|||
84 | sampleItem = PyList_GetItem(samples, sampleIndex); |
|
|||
85 | sampleSize = PyBytes_GET_SIZE(sampleItem); |
|
|||
86 | sampleSizes[sampleIndex] = sampleSize; |
|
|||
87 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); |
|
|||
88 | sampleOffset = (char*)sampleOffset + sampleSize; |
|
|||
89 | } |
|
|||
90 |
|
||||
91 | dict = PyMem_Malloc(capacity); |
|
|||
92 | if (!dict) { |
|
|||
93 | PyErr_NoMemory(); |
|
|||
94 | goto finally; |
|
|||
95 | } |
|
|||
96 |
|
||||
97 | /* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */ |
|
|||
98 | Py_BEGIN_ALLOW_THREADS |
|
|||
99 | zresult = ZDICT_trainFromBuffer_advanced(dict, capacity, |
|
|||
100 | sampleBuffer, sampleSizes, (unsigned int)samplesLen, |
|
|||
101 | zparams); |
|
|||
102 | Py_END_ALLOW_THREADS |
|
|||
103 | if (ZDICT_isError(zresult)) { |
|
|||
104 | PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult)); |
|
|||
105 | PyMem_Free(dict); |
|
|||
106 | goto finally; |
|
|||
107 | } |
|
|||
108 |
|
||||
109 | result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType); |
|
|||
110 | if (!result) { |
|
|||
111 | goto finally; |
|
|||
112 | } |
|
|||
113 |
|
||||
114 | result->dictData = dict; |
|
|||
115 | result->dictSize = zresult; |
|
|||
116 | result->d = 0; |
|
|||
117 | result->k = 0; |
|
|||
118 |
|
||||
119 | finally: |
|
|||
120 | PyMem_Free(sampleBuffer); |
|
|||
121 | PyMem_Free(sampleSizes); |
|
|||
122 |
|
||||
123 | return result; |
|
|||
124 | } |
|
|||
125 |
|
||||
126 | ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
|||
127 | static char* kwlist[] = { |
|
|||
128 | "dict_size", |
|
|||
129 | "samples", |
|
|||
130 | "k", |
|
17 | "k", | |
131 | "d", |
|
18 | "d", | |
132 | "notifications", |
|
19 | "notifications", | |
133 | "dict_id", |
|
20 | "dict_id", | |
134 | "level", |
|
21 | "level", | |
135 | "optimize", |
|
|||
136 | "steps", |
|
22 | "steps", | |
137 | "threads", |
|
23 | "threads", | |
138 | NULL |
|
24 | NULL | |
@@ -145,10 +31,9 b' ZstdCompressionDict* train_cover_diction' | |||||
145 | unsigned notifications = 0; |
|
31 | unsigned notifications = 0; | |
146 | unsigned dictID = 0; |
|
32 | unsigned dictID = 0; | |
147 | int level = 0; |
|
33 | int level = 0; | |
148 | PyObject* optimize = NULL; |
|
|||
149 | unsigned steps = 0; |
|
34 | unsigned steps = 0; | |
150 | int threads = 0; |
|
35 | int threads = 0; | |
151 |
|
|
36 | ZDICT_cover_params_t params; | |
152 | Py_ssize_t samplesLen; |
|
37 | Py_ssize_t samplesLen; | |
153 | Py_ssize_t i; |
|
38 | Py_ssize_t i; | |
154 | size_t samplesSize = 0; |
|
39 | size_t samplesSize = 0; | |
@@ -160,9 +45,9 b' ZstdCompressionDict* train_cover_diction' | |||||
160 | size_t zresult; |
|
45 | size_t zresult; | |
161 | ZstdCompressionDict* result = NULL; |
|
46 | ZstdCompressionDict* result = NULL; | |
162 |
|
47 | |||
163 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIi |
|
48 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary", | |
164 | kwlist, &capacity, &PyList_Type, &samples, |
|
49 | kwlist, &capacity, &PyList_Type, &samples, | |
165 |
&k, &d, ¬ifications, &dictID, &level, & |
|
50 | &k, &d, ¬ifications, &dictID, &level, &steps, &threads)) { | |
166 | return NULL; |
|
51 | return NULL; | |
167 | } |
|
52 | } | |
168 |
|
53 | |||
@@ -175,9 +60,9 b' ZstdCompressionDict* train_cover_diction' | |||||
175 | params.d = d; |
|
60 | params.d = d; | |
176 | params.steps = steps; |
|
61 | params.steps = steps; | |
177 | params.nbThreads = threads; |
|
62 | params.nbThreads = threads; | |
178 | params.notificationLevel = notifications; |
|
63 | params.zParams.notificationLevel = notifications; | |
179 | params.dictID = dictID; |
|
64 | params.zParams.dictID = dictID; | |
180 | params.compressionLevel = level; |
|
65 | params.zParams.compressionLevel = level; | |
181 |
|
66 | |||
182 | /* Figure out total size of input samples. */ |
|
67 | /* Figure out total size of input samples. */ | |
183 | samplesLen = PyList_Size(samples); |
|
68 | samplesLen = PyList_Size(samples); | |
@@ -219,12 +104,21 b' ZstdCompressionDict* train_cover_diction' | |||||
219 | } |
|
104 | } | |
220 |
|
105 | |||
221 | Py_BEGIN_ALLOW_THREADS |
|
106 | Py_BEGIN_ALLOW_THREADS | |
222 | if (optimize && PyObject_IsTrue(optimize)) { |
|
107 | /* No parameters uses the default function, which will use default params | |
223 | zresult = COVER_optimizeTrainFromBuffer(dict, capacity, |
|
108 | and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */ | |
|
109 | if (!params.k && !params.d && !params.zParams.compressionLevel | |||
|
110 | && !params.zParams.notificationLevel && !params.zParams.dictID) { | |||
|
111 | zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer, | |||
|
112 | sampleSizes, (unsigned)samplesLen); | |||
|
113 | } | |||
|
114 | /* Use optimize mode if user controlled steps or threads explicitly. */ | |||
|
115 | else if (params.steps || params.nbThreads) { | |||
|
116 | zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity, | |||
224 | sampleBuffer, sampleSizes, (unsigned)samplesLen, ¶ms); |
|
117 | sampleBuffer, sampleSizes, (unsigned)samplesLen, ¶ms); | |
225 | } |
|
118 | } | |
|
119 | /* Non-optimize mode with explicit control. */ | |||
226 | else { |
|
120 | else { | |
227 |
zresult = |
|
121 | zresult = ZDICT_trainFromBuffer_cover(dict, capacity, | |
228 | sampleBuffer, sampleSizes, (unsigned)samplesLen, params); |
|
122 | sampleBuffer, sampleSizes, (unsigned)samplesLen, params); | |
229 | } |
|
123 | } | |
230 | Py_END_ALLOW_THREADS |
|
124 | Py_END_ALLOW_THREADS | |
@@ -243,8 +137,11 b' ZstdCompressionDict* train_cover_diction' | |||||
243 |
|
137 | |||
244 | result->dictData = dict; |
|
138 | result->dictData = dict; | |
245 | result->dictSize = zresult; |
|
139 | result->dictSize = zresult; | |
|
140 | result->dictType = ZSTD_dct_fullDict; | |||
246 | result->d = params.d; |
|
141 | result->d = params.d; | |
247 | result->k = params.k; |
|
142 | result->k = params.k; | |
|
143 | result->cdict = NULL; | |||
|
144 | result->ddict = NULL; | |||
248 |
|
145 | |||
249 | finally: |
|
146 | finally: | |
250 | PyMem_Free(sampleBuffer); |
|
147 | PyMem_Free(sampleBuffer); | |
@@ -253,43 +150,99 b' finally:' | |||||
253 | return result; |
|
150 | return result; | |
254 | } |
|
151 | } | |
255 |
|
152 | |||
|
153 | int ensure_ddict(ZstdCompressionDict* dict) { | |||
|
154 | if (dict->ddict) { | |||
|
155 | return 0; | |||
|
156 | } | |||
|
157 | ||||
|
158 | Py_BEGIN_ALLOW_THREADS | |||
|
159 | dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize, | |||
|
160 | ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem); | |||
|
161 | Py_END_ALLOW_THREADS | |||
|
162 | if (!dict->ddict) { | |||
|
163 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |||
|
164 | return 1; | |||
|
165 | } | |||
|
166 | ||||
|
167 | return 0; | |||
|
168 | } | |||
|
169 | ||||
256 | PyDoc_STRVAR(ZstdCompressionDict__doc__, |
|
170 | PyDoc_STRVAR(ZstdCompressionDict__doc__, | |
257 | "ZstdCompressionDict(data) - Represents a computed compression dictionary\n" |
|
171 | "ZstdCompressionDict(data) - Represents a computed compression dictionary\n" | |
258 | "\n" |
|
172 | "\n" | |
259 | "This type holds the results of a computed Zstandard compression dictionary.\n" |
|
173 | "This type holds the results of a computed Zstandard compression dictionary.\n" | |
260 |
"Instances are obtained by calling ``train_dictionary()`` or by passing |
|
174 | "Instances are obtained by calling ``train_dictionary()`` or by passing\n" | |
261 | "obtained from another source into the constructor.\n" |
|
175 | "bytes obtained from another source into the constructor.\n" | |
262 | ); |
|
176 | ); | |
263 |
|
177 | |||
264 | static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) { |
|
178 | static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) { | |
265 | const char* source; |
|
179 | static char* kwlist[] = { | |
266 | Py_ssize_t sourceSize; |
|
180 | "data", | |
|
181 | "dict_type", | |||
|
182 | NULL | |||
|
183 | }; | |||
|
184 | ||||
|
185 | int result = -1; | |||
|
186 | Py_buffer source; | |||
|
187 | unsigned dictType = ZSTD_dct_auto; | |||
267 |
|
188 | |||
268 | self->dictData = NULL; |
|
189 | self->dictData = NULL; | |
269 | self->dictSize = 0; |
|
190 | self->dictSize = 0; | |
|
191 | self->cdict = NULL; | |||
|
192 | self->ddict = NULL; | |||
270 |
|
193 | |||
271 | #if PY_MAJOR_VERSION >= 3 |
|
194 | #if PY_MAJOR_VERSION >= 3 | |
272 |
if (!PyArg_ParseTuple(args, "y |
|
195 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict", | |
273 | #else |
|
196 | #else | |
274 |
if (!PyArg_ParseTuple(args, "s |
|
197 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict", | |
275 | #endif |
|
198 | #endif | |
276 |
&source, & |
|
199 | kwlist, &source, &dictType)) { | |
277 | return -1; |
|
200 | return -1; | |
278 | } |
|
201 | } | |
279 |
|
202 | |||
280 | self->dictData = PyMem_Malloc(sourceSize); |
|
203 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
204 | PyErr_SetString(PyExc_ValueError, | |||
|
205 | "data buffer should be contiguous and have at most one dimension"); | |||
|
206 | goto finally; | |||
|
207 | } | |||
|
208 | ||||
|
209 | if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent | |||
|
210 | && dictType != ZSTD_dct_fullDict) { | |||
|
211 | PyErr_Format(PyExc_ValueError, | |||
|
212 | "invalid dictionary load mode: %d; must use DICT_TYPE_* constants", | |||
|
213 | dictType); | |||
|
214 | goto finally; | |||
|
215 | } | |||
|
216 | ||||
|
217 | self->dictType = dictType; | |||
|
218 | ||||
|
219 | self->dictData = PyMem_Malloc(source.len); | |||
281 | if (!self->dictData) { |
|
220 | if (!self->dictData) { | |
282 | PyErr_NoMemory(); |
|
221 | PyErr_NoMemory(); | |
283 | return -1; |
|
222 | goto finally; | |
284 | } |
|
223 | } | |
285 |
|
224 | |||
286 |
memcpy(self->dictData, source, source |
|
225 | memcpy(self->dictData, source.buf, source.len); | |
287 |
self->dictSize = source |
|
226 | self->dictSize = source.len; | |
|
227 | ||||
|
228 | result = 0; | |||
288 |
|
229 | |||
289 | return 0; |
|
230 | finally: | |
|
231 | PyBuffer_Release(&source); | |||
|
232 | return result; | |||
|
233 | } | |||
|
234 | ||||
|
235 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { | |||
|
236 | if (self->cdict) { | |||
|
237 | ZSTD_freeCDict(self->cdict); | |||
|
238 | self->cdict = NULL; | |||
290 | } |
|
239 | } | |
291 |
|
240 | |||
292 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { |
|
241 | if (self->ddict) { | |
|
242 | ZSTD_freeDDict(self->ddict); | |||
|
243 | self->ddict = NULL; | |||
|
244 | } | |||
|
245 | ||||
293 | if (self->dictData) { |
|
246 | if (self->dictData) { | |
294 | PyMem_Free(self->dictData); |
|
247 | PyMem_Free(self->dictData); | |
295 | self->dictData = NULL; |
|
248 | self->dictData = NULL; | |
@@ -298,6 +251,74 b' static void ZstdCompressionDict_dealloc(' | |||||
298 | PyObject_Del(self); |
|
251 | PyObject_Del(self); | |
299 | } |
|
252 | } | |
300 |
|
253 | |||
|
254 | PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__, | |||
|
255 | "Precompute a dictionary so it can be used by multiple compressors.\n" | |||
|
256 | ); | |||
|
257 | ||||
|
258 | static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) { | |||
|
259 | static char* kwlist[] = { | |||
|
260 | "level", | |||
|
261 | "compression_params", | |||
|
262 | NULL | |||
|
263 | }; | |||
|
264 | ||||
|
265 | int level = 0; | |||
|
266 | ZstdCompressionParametersObject* compressionParams = NULL; | |||
|
267 | ZSTD_compressionParameters cParams; | |||
|
268 | size_t zresult; | |||
|
269 | ||||
|
270 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist, | |||
|
271 | &level, &ZstdCompressionParametersType, &compressionParams)) { | |||
|
272 | return NULL; | |||
|
273 | } | |||
|
274 | ||||
|
275 | if (level && compressionParams) { | |||
|
276 | PyErr_SetString(PyExc_ValueError, | |||
|
277 | "must only specify one of level or compression_params"); | |||
|
278 | return NULL; | |||
|
279 | } | |||
|
280 | ||||
|
281 | if (!level && !compressionParams) { | |||
|
282 | PyErr_SetString(PyExc_ValueError, | |||
|
283 | "must specify one of level or compression_params"); | |||
|
284 | return NULL; | |||
|
285 | } | |||
|
286 | ||||
|
287 | if (self->cdict) { | |||
|
288 | zresult = ZSTD_freeCDict(self->cdict); | |||
|
289 | self->cdict = NULL; | |||
|
290 | if (ZSTD_isError(zresult)) { | |||
|
291 | PyErr_Format(ZstdError, "unable to free CDict: %s", | |||
|
292 | ZSTD_getErrorName(zresult)); | |||
|
293 | return NULL; | |||
|
294 | } | |||
|
295 | } | |||
|
296 | ||||
|
297 | if (level) { | |||
|
298 | cParams = ZSTD_getCParams(level, 0, self->dictSize); | |||
|
299 | } | |||
|
300 | else { | |||
|
301 | cParams.chainLog = compressionParams->chainLog; | |||
|
302 | cParams.hashLog = compressionParams->hashLog; | |||
|
303 | cParams.searchLength = compressionParams->minMatch; | |||
|
304 | cParams.searchLog = compressionParams->searchLog; | |||
|
305 | cParams.strategy = compressionParams->compressionStrategy; | |||
|
306 | cParams.targetLength = compressionParams->targetLength; | |||
|
307 | cParams.windowLog = compressionParams->windowLog; | |||
|
308 | } | |||
|
309 | ||||
|
310 | assert(!self->cdict); | |||
|
311 | self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize, | |||
|
312 | ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem); | |||
|
313 | ||||
|
314 | if (!self->cdict) { | |||
|
315 | PyErr_SetString(ZstdError, "unable to precompute dictionary"); | |||
|
316 | return NULL; | |||
|
317 | } | |||
|
318 | ||||
|
319 | Py_RETURN_NONE; | |||
|
320 | } | |||
|
321 | ||||
301 | static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) { |
|
322 | static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) { | |
302 | unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); |
|
323 | unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); | |
303 |
|
324 | |||
@@ -313,6 +334,8 b' static PyMethodDef ZstdCompressionDict_m' | |||||
313 | PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") }, |
|
334 | PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") }, | |
314 | { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, |
|
335 | { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, | |
315 | PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") }, |
|
336 | PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") }, | |
|
337 | { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress, | |||
|
338 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ }, | |||
316 | { NULL, NULL } |
|
339 | { NULL, NULL } | |
317 | }; |
|
340 | }; | |
318 |
|
341 |
This diff has been collapsed as it changes many lines, (525 lines changed) Show them Hide them | |||||
@@ -8,204 +8,448 b'' | |||||
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) { |
|
11 | extern PyObject* ZstdError; | |
12 | zparams->windowLog = params->windowLog; |
|
|||
13 | zparams->chainLog = params->chainLog; |
|
|||
14 | zparams->hashLog = params->hashLog; |
|
|||
15 | zparams->searchLog = params->searchLog; |
|
|||
16 | zparams->searchLength = params->searchLength; |
|
|||
17 | zparams->targetLength = params->targetLength; |
|
|||
18 | zparams->strategy = params->strategy; |
|
|||
19 | } |
|
|||
20 |
|
12 | |||
21 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) { |
|
13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) { | |
22 | int compressionLevel; |
|
14 | size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value); | |
23 | unsigned PY_LONG_LONG sourceSize = 0; |
|
15 | if (ZSTD_isError(zresult)) { | |
24 | Py_ssize_t dictSize = 0; |
|
16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", | |
25 | ZSTD_compressionParameters params; |
|
17 | ZSTD_getErrorName(zresult)); | |
26 | CompressionParametersObject* result; |
|
18 | return 1; | |
27 |
|
||||
28 | if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters", |
|
|||
29 | &compressionLevel, &sourceSize, &dictSize)) { |
|
|||
30 | return NULL; |
|
|||
31 | } |
|
19 | } | |
32 |
|
20 | |||
33 | params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize); |
|
21 | return 0; | |
|
22 | } | |||
|
23 | ||||
|
24 | #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1; | |||
34 |
|
25 | |||
35 | result = PyObject_New(CompressionParametersObject, &CompressionParametersType); |
|
26 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) { | |
36 | if (!result) { |
|
27 | TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format); | |
37 | return NULL; |
|
28 | TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel); | |
|
29 | TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog); | |||
|
30 | TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog); | |||
|
31 | TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog); | |||
|
32 | TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog); | |||
|
33 | TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch); | |||
|
34 | TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength); | |||
|
35 | TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy); | |||
|
36 | TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag); | |||
|
37 | TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag); | |||
|
38 | TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag); | |||
|
39 | TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads); | |||
|
40 | TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize); | |||
|
41 | TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog); | |||
|
42 | TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals); | |||
|
43 | TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow); | |||
|
44 | TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching); | |||
|
45 | TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog); | |||
|
46 | TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch); | |||
|
47 | TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog); | |||
|
48 | TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog); | |||
|
49 | ||||
|
50 | return 0; | |||
|
51 | } | |||
|
52 | ||||
|
53 | int reset_params(ZstdCompressionParametersObject* params) { | |||
|
54 | if (params->params) { | |||
|
55 | ZSTD_CCtxParams_reset(params->params); | |||
|
56 | } | |||
|
57 | else { | |||
|
58 | params->params = ZSTD_createCCtxParams(); | |||
|
59 | if (!params->params) { | |||
|
60 | PyErr_NoMemory(); | |||
|
61 | return 1; | |||
|
62 | } | |||
38 | } |
|
63 | } | |
39 |
|
64 | |||
40 | result->windowLog = params.windowLog; |
|
65 | return set_parameters(params->params, params); | |
41 | result->chainLog = params.chainLog; |
|
|||
42 | result->hashLog = params.hashLog; |
|
|||
43 | result->searchLog = params.searchLog; |
|
|||
44 | result->searchLength = params.searchLength; |
|
|||
45 | result->targetLength = params.targetLength; |
|
|||
46 | result->strategy = params.strategy; |
|
|||
47 |
|
||||
48 | return result; |
|
|||
49 | } |
|
66 | } | |
50 |
|
67 | |||
51 | static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) { |
|
68 | static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) { | |
52 | static char* kwlist[] = { |
|
69 | static char* kwlist[] = { | |
|
70 | "format", | |||
|
71 | "compression_level", | |||
53 | "window_log", |
|
72 | "window_log", | |
54 | "chain_log", |
|
|||
55 | "hash_log", |
|
73 | "hash_log", | |
|
74 | "chain_log", | |||
56 | "search_log", |
|
75 | "search_log", | |
57 | "search_length", |
|
76 | "min_match", | |
58 | "target_length", |
|
77 | "target_length", | |
59 | "strategy", |
|
78 | "compression_strategy", | |
|
79 | "write_content_size", | |||
|
80 | "write_checksum", | |||
|
81 | "write_dict_id", | |||
|
82 | "job_size", | |||
|
83 | "overlap_size_log", | |||
|
84 | "force_max_window", | |||
|
85 | "enable_ldm", | |||
|
86 | "ldm_hash_log", | |||
|
87 | "ldm_min_match", | |||
|
88 | "ldm_bucket_size_log", | |||
|
89 | "ldm_hash_every_log", | |||
|
90 | "threads", | |||
|
91 | "compress_literals", | |||
60 | NULL |
|
92 | NULL | |
61 | }; |
|
93 | }; | |
62 |
|
94 | |||
63 | unsigned windowLog; |
|
95 | unsigned format = 0; | |
64 | unsigned chainLog; |
|
96 | int compressionLevel = 0; | |
65 |
unsigned |
|
97 | unsigned windowLog = 0; | |
66 |
unsigned |
|
98 | unsigned hashLog = 0; | |
67 |
unsigned |
|
99 | unsigned chainLog = 0; | |
68 | unsigned targetLength; |
|
100 | unsigned searchLog = 0; | |
69 |
unsigned |
|
101 | unsigned minMatch = 0; | |
70 | ZSTD_compressionParameters params; |
|
102 | unsigned targetLength = 0; | |
71 | size_t zresult; |
|
103 | unsigned compressionStrategy = 0; | |
|
104 | unsigned contentSizeFlag = 1; | |||
|
105 | unsigned checksumFlag = 0; | |||
|
106 | unsigned dictIDFlag = 0; | |||
|
107 | unsigned jobSize = 0; | |||
|
108 | unsigned overlapSizeLog = 0; | |||
|
109 | unsigned forceMaxWindow = 0; | |||
|
110 | unsigned enableLDM = 0; | |||
|
111 | unsigned ldmHashLog = 0; | |||
|
112 | unsigned ldmMinMatch = 0; | |||
|
113 | unsigned ldmBucketSizeLog = 0; | |||
|
114 | unsigned ldmHashEveryLog = 0; | |||
|
115 | int threads = 0; | |||
72 |
|
116 | |||
73 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters", |
|
117 | /* Setting value 0 has the effect of disabling. So we use -1 as a default | |
74 | kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength, |
|
118 | * to detect whether to set. Then we automatically derive the expected value | |
75 | &targetLength, &strategy)) { |
|
119 | * based on the level, just like zstandard does itself. */ | |
76 | return -1; |
|
120 | int compressLiterals = -1; | |
77 | } |
|
|||
78 |
|
121 | |||
79 | if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) { |
|
122 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, | |
80 | PyErr_SetString(PyExc_ValueError, "invalid window log value"); |
|
123 | "|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters", | |
81 | return -1; |
|
124 | kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog, | |
82 | } |
|
125 | &searchLog, &minMatch, &targetLength, &compressionStrategy, | |
83 |
|
126 | &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog, | ||
84 | if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) { |
|
127 | &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog, | |
85 | PyErr_SetString(PyExc_ValueError, "invalid chain log value"); |
|
128 | &ldmHashEveryLog, &threads, &compressLiterals)) { | |
86 | return -1; |
|
|||
87 | } |
|
|||
88 |
|
||||
89 | if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) { |
|
|||
90 | PyErr_SetString(PyExc_ValueError, "invalid hash log value"); |
|
|||
91 | return -1; |
|
129 | return -1; | |
92 | } |
|
130 | } | |
93 |
|
131 | |||
94 | if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) { |
|
132 | if (threads < 0) { | |
95 | PyErr_SetString(PyExc_ValueError, "invalid search log value"); |
|
133 | threads = cpu_count(); | |
96 | return -1; |
|
|||
97 | } |
|
134 | } | |
98 |
|
135 | |||
99 | if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) { |
|
136 | if (compressLiterals < 0) { | |
100 | PyErr_SetString(PyExc_ValueError, "invalid search length value"); |
|
137 | compressLiterals = compressionLevel >= 0; | |
101 | return -1; |
|
|||
102 | } |
|
|||
103 |
|
||||
104 | if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) { |
|
|||
105 | PyErr_SetString(PyExc_ValueError, "invalid target length value"); |
|
|||
106 | return -1; |
|
|||
107 | } |
|
138 | } | |
108 |
|
139 | |||
109 | if (strategy < ZSTD_fast || strategy > ZSTD_btopt) { |
|
140 | self->format = format; | |
110 | PyErr_SetString(PyExc_ValueError, "invalid strategy value"); |
|
141 | self->compressionLevel = compressionLevel; | |
111 | return -1; |
|
|||
112 | } |
|
|||
113 |
|
||||
114 | self->windowLog = windowLog; |
|
142 | self->windowLog = windowLog; | |
|
143 | self->hashLog = hashLog; | |||
115 | self->chainLog = chainLog; |
|
144 | self->chainLog = chainLog; | |
116 | self->hashLog = hashLog; |
|
|||
117 | self->searchLog = searchLog; |
|
145 | self->searchLog = searchLog; | |
118 | self->searchLength = searchLength; |
|
146 | self->minMatch = minMatch; | |
119 | self->targetLength = targetLength; |
|
147 | self->targetLength = targetLength; | |
120 | self->strategy = strategy; |
|
148 | self->compressionStrategy = compressionStrategy; | |
|
149 | self->contentSizeFlag = contentSizeFlag; | |||
|
150 | self->checksumFlag = checksumFlag; | |||
|
151 | self->dictIDFlag = dictIDFlag; | |||
|
152 | self->threads = threads; | |||
|
153 | self->jobSize = jobSize; | |||
|
154 | self->overlapSizeLog = overlapSizeLog; | |||
|
155 | self->compressLiterals = compressLiterals; | |||
|
156 | self->forceMaxWindow = forceMaxWindow; | |||
|
157 | self->enableLongDistanceMatching = enableLDM; | |||
|
158 | self->ldmHashLog = ldmHashLog; | |||
|
159 | self->ldmMinMatch = ldmMinMatch; | |||
|
160 | self->ldmBucketSizeLog = ldmBucketSizeLog; | |||
|
161 | self->ldmHashEveryLog = ldmHashEveryLog; | |||
121 |
|
162 | |||
122 | ztopy_compression_parameters(self, ¶ms); |
|
163 | if (reset_params(self)) { | |
123 | zresult = ZSTD_checkCParams(params); |
|
|||
124 |
|
||||
125 | if (ZSTD_isError(zresult)) { |
|
|||
126 | PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s", |
|
|||
127 | ZSTD_getErrorName(zresult)); |
|
|||
128 | return -1; |
|
164 | return -1; | |
129 | } |
|
165 | } | |
130 |
|
166 | |||
131 | return 0; |
|
167 | return 0; | |
132 | } |
|
168 | } | |
133 |
|
169 | |||
134 |
PyDoc_STRVAR(CompressionParameters_ |
|
170 | PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__, | |
|
171 | "Create a CompressionParameters from a compression level and target sizes\n" | |||
|
172 | ); | |||
|
173 | ||||
|
174 | ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) { | |||
|
175 | int managedKwargs = 0; | |||
|
176 | int level; | |||
|
177 | PyObject* sourceSize = NULL; | |||
|
178 | PyObject* dictSize = NULL; | |||
|
179 | unsigned PY_LONG_LONG iSourceSize = 0; | |||
|
180 | Py_ssize_t iDictSize = 0; | |||
|
181 | PyObject* val; | |||
|
182 | ZSTD_compressionParameters params; | |||
|
183 | ZstdCompressionParametersObject* result = NULL; | |||
|
184 | int res; | |||
|
185 | ||||
|
186 | if (!PyArg_ParseTuple(args, "i:from_level", | |||
|
187 | &level)) { | |||
|
188 | return NULL; | |||
|
189 | } | |||
|
190 | ||||
|
191 | if (!kwargs) { | |||
|
192 | kwargs = PyDict_New(); | |||
|
193 | if (!kwargs) { | |||
|
194 | return NULL; | |||
|
195 | } | |||
|
196 | managedKwargs = 1; | |||
|
197 | } | |||
|
198 | ||||
|
199 | sourceSize = PyDict_GetItemString(kwargs, "source_size"); | |||
|
200 | if (sourceSize) { | |||
|
201 | #if PY_MAJOR_VERSION >= 3 | |||
|
202 | iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); | |||
|
203 | if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { | |||
|
204 | goto cleanup; | |||
|
205 | } | |||
|
206 | #else | |||
|
207 | iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize); | |||
|
208 | #endif | |||
|
209 | ||||
|
210 | PyDict_DelItemString(kwargs, "source_size"); | |||
|
211 | } | |||
|
212 | ||||
|
213 | dictSize = PyDict_GetItemString(kwargs, "dict_size"); | |||
|
214 | if (dictSize) { | |||
|
215 | #if PY_MAJOR_VERSION >= 3 | |||
|
216 | iDictSize = PyLong_AsSsize_t(dictSize); | |||
|
217 | #else | |||
|
218 | iDictSize = PyInt_AsSsize_t(dictSize); | |||
|
219 | #endif | |||
|
220 | if (iDictSize == -1) { | |||
|
221 | goto cleanup; | |||
|
222 | } | |||
|
223 | ||||
|
224 | PyDict_DelItemString(kwargs, "dict_size"); | |||
|
225 | } | |||
|
226 | ||||
|
227 | ||||
|
228 | params = ZSTD_getCParams(level, iSourceSize, iDictSize); | |||
|
229 | ||||
|
230 | /* Values derived from the input level and sizes are passed along to the | |||
|
231 | constructor. But only if a value doesn't already exist. */ | |||
|
232 | val = PyDict_GetItemString(kwargs, "window_log"); | |||
|
233 | if (!val) { | |||
|
234 | val = PyLong_FromUnsignedLong(params.windowLog); | |||
|
235 | if (!val) { | |||
|
236 | goto cleanup; | |||
|
237 | } | |||
|
238 | PyDict_SetItemString(kwargs, "window_log", val); | |||
|
239 | Py_DECREF(val); | |||
|
240 | } | |||
|
241 | ||||
|
242 | val = PyDict_GetItemString(kwargs, "chain_log"); | |||
|
243 | if (!val) { | |||
|
244 | val = PyLong_FromUnsignedLong(params.chainLog); | |||
|
245 | if (!val) { | |||
|
246 | goto cleanup; | |||
|
247 | } | |||
|
248 | PyDict_SetItemString(kwargs, "chain_log", val); | |||
|
249 | Py_DECREF(val); | |||
|
250 | } | |||
|
251 | ||||
|
252 | val = PyDict_GetItemString(kwargs, "hash_log"); | |||
|
253 | if (!val) { | |||
|
254 | val = PyLong_FromUnsignedLong(params.hashLog); | |||
|
255 | if (!val) { | |||
|
256 | goto cleanup; | |||
|
257 | } | |||
|
258 | PyDict_SetItemString(kwargs, "hash_log", val); | |||
|
259 | Py_DECREF(val); | |||
|
260 | } | |||
|
261 | ||||
|
262 | val = PyDict_GetItemString(kwargs, "search_log"); | |||
|
263 | if (!val) { | |||
|
264 | val = PyLong_FromUnsignedLong(params.searchLog); | |||
|
265 | if (!val) { | |||
|
266 | goto cleanup; | |||
|
267 | } | |||
|
268 | PyDict_SetItemString(kwargs, "search_log", val); | |||
|
269 | Py_DECREF(val); | |||
|
270 | } | |||
|
271 | ||||
|
272 | val = PyDict_GetItemString(kwargs, "min_match"); | |||
|
273 | if (!val) { | |||
|
274 | val = PyLong_FromUnsignedLong(params.searchLength); | |||
|
275 | if (!val) { | |||
|
276 | goto cleanup; | |||
|
277 | } | |||
|
278 | PyDict_SetItemString(kwargs, "min_match", val); | |||
|
279 | Py_DECREF(val); | |||
|
280 | } | |||
|
281 | ||||
|
282 | val = PyDict_GetItemString(kwargs, "target_length"); | |||
|
283 | if (!val) { | |||
|
284 | val = PyLong_FromUnsignedLong(params.targetLength); | |||
|
285 | if (!val) { | |||
|
286 | goto cleanup; | |||
|
287 | } | |||
|
288 | PyDict_SetItemString(kwargs, "target_length", val); | |||
|
289 | Py_DECREF(val); | |||
|
290 | } | |||
|
291 | ||||
|
292 | val = PyDict_GetItemString(kwargs, "compression_strategy"); | |||
|
293 | if (!val) { | |||
|
294 | val = PyLong_FromUnsignedLong(params.strategy); | |||
|
295 | if (!val) { | |||
|
296 | goto cleanup; | |||
|
297 | } | |||
|
298 | PyDict_SetItemString(kwargs, "compression_strategy", val); | |||
|
299 | Py_DECREF(val); | |||
|
300 | } | |||
|
301 | ||||
|
302 | val = PyDict_GetItemString(kwargs, "compress_literals"); | |||
|
303 | if (!val) { | |||
|
304 | val = PyLong_FromLong(level >= 0 ? 1 : 0); | |||
|
305 | if (!val) { | |||
|
306 | goto cleanup; | |||
|
307 | } | |||
|
308 | PyDict_SetItemString(kwargs, "compress_literals", val); | |||
|
309 | Py_DECREF(val); | |||
|
310 | } | |||
|
311 | ||||
|
312 | result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType); | |||
|
313 | if (!result) { | |||
|
314 | goto cleanup; | |||
|
315 | } | |||
|
316 | ||||
|
317 | result->params = NULL; | |||
|
318 | ||||
|
319 | val = PyTuple_New(0); | |||
|
320 | if (!val) { | |||
|
321 | Py_CLEAR(result); | |||
|
322 | goto cleanup; | |||
|
323 | } | |||
|
324 | ||||
|
325 | res = ZstdCompressionParameters_init(result, val, kwargs); | |||
|
326 | Py_DECREF(val); | |||
|
327 | ||||
|
328 | if (res) { | |||
|
329 | Py_CLEAR(result); | |||
|
330 | goto cleanup; | |||
|
331 | } | |||
|
332 | ||||
|
333 | cleanup: | |||
|
334 | if (managedKwargs) { | |||
|
335 | Py_DECREF(kwargs); | |||
|
336 | } | |||
|
337 | ||||
|
338 | return result; | |||
|
339 | } | |||
|
340 | ||||
|
341 | PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__, | |||
135 | "Estimate the size in bytes of a compression context for compression parameters\n" |
|
342 | "Estimate the size in bytes of a compression context for compression parameters\n" | |
136 | ); |
|
343 | ); | |
137 |
|
344 | |||
138 | PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) { |
|
345 | PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) { | |
139 | ZSTD_compressionParameters params; |
|
346 | return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); | |
140 |
|
||||
141 | ztopy_compression_parameters(self, ¶ms); |
|
|||
142 |
|
||||
143 | return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params)); |
|
|||
144 | } |
|
347 | } | |
145 |
|
348 | |||
146 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) { |
|
349 | PyDoc_STRVAR(ZstdCompressionParameters__doc__, | |
147 | CompressionParametersObject* params; |
|
350 | "ZstdCompressionParameters: low-level control over zstd compression"); | |
148 | ZSTD_compressionParameters zparams; |
|
|||
149 | PyObject* result; |
|
|||
150 |
|
351 | |||
151 | if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size", |
|
352 | static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) { | |
152 | &CompressionParametersType, ¶ms)) { |
|
353 | if (self->params) { | |
153 | return NULL; |
|
354 | ZSTD_freeCCtxParams(self->params); | |
|
355 | self->params = NULL; | |||
154 | } |
|
356 | } | |
155 |
|
357 | |||
156 | ztopy_compression_parameters(params, &zparams); |
|
|||
157 | result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams)); |
|
|||
158 | return result; |
|
|||
159 | } |
|
|||
160 |
|
||||
161 | PyDoc_STRVAR(CompressionParameters__doc__, |
|
|||
162 | "CompressionParameters: low-level control over zstd compression"); |
|
|||
163 |
|
||||
164 | static void CompressionParameters_dealloc(PyObject* self) { |
|
|||
165 | PyObject_Del(self); |
|
358 | PyObject_Del(self); | |
166 | } |
|
359 | } | |
167 |
|
360 | |||
168 | static PyMethodDef CompressionParameters_methods[] = { |
|
361 | static PyMethodDef ZstdCompressionParameters_methods[] = { | |
|
362 | { | |||
|
363 | "from_level", | |||
|
364 | (PyCFunction)CompressionParameters_from_level, | |||
|
365 | METH_VARARGS | METH_KEYWORDS | METH_STATIC, | |||
|
366 | ZstdCompressionParameters_from_level__doc__ | |||
|
367 | }, | |||
169 | { |
|
368 | { | |
170 | "estimated_compression_context_size", |
|
369 | "estimated_compression_context_size", | |
171 | (PyCFunction)CompressionParameters_estimated_compression_context_size, |
|
370 | (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, | |
172 | METH_NOARGS, |
|
371 | METH_NOARGS, | |
173 | CompressionParameters_estimated_compression_context_size__doc__ |
|
372 | ZstdCompressionParameters_estimated_compression_context_size__doc__ | |
174 | }, |
|
373 | }, | |
175 | { NULL, NULL } |
|
374 | { NULL, NULL } | |
176 | }; |
|
375 | }; | |
177 |
|
376 | |||
178 | static PyMemberDef CompressionParameters_members[] = { |
|
377 | static PyMemberDef ZstdCompressionParameters_members[] = { | |
|
378 | { "format", T_UINT, | |||
|
379 | offsetof(ZstdCompressionParametersObject, format), READONLY, | |||
|
380 | "compression format" }, | |||
|
381 | { "compression_level", T_INT, | |||
|
382 | offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY, | |||
|
383 | "compression level" }, | |||
179 | { "window_log", T_UINT, |
|
384 | { "window_log", T_UINT, | |
180 | offsetof(CompressionParametersObject, windowLog), READONLY, |
|
385 | offsetof(ZstdCompressionParametersObject, windowLog), READONLY, | |
181 | "window log" }, |
|
386 | "window log" }, | |
182 | { "chain_log", T_UINT, |
|
|||
183 | offsetof(CompressionParametersObject, chainLog), READONLY, |
|
|||
184 | "chain log" }, |
|
|||
185 | { "hash_log", T_UINT, |
|
387 | { "hash_log", T_UINT, | |
186 | offsetof(CompressionParametersObject, hashLog), READONLY, |
|
388 | offsetof(ZstdCompressionParametersObject, hashLog), READONLY, | |
187 | "hash log" }, |
|
389 | "hash log" }, | |
|
390 | { "chain_log", T_UINT, | |||
|
391 | offsetof(ZstdCompressionParametersObject, chainLog), READONLY, | |||
|
392 | "chain log" }, | |||
188 | { "search_log", T_UINT, |
|
393 | { "search_log", T_UINT, | |
189 | offsetof(CompressionParametersObject, searchLog), READONLY, |
|
394 | offsetof(ZstdCompressionParametersObject, searchLog), READONLY, | |
190 | "search log" }, |
|
395 | "search log" }, | |
191 |
{ " |
|
396 | { "min_match", T_UINT, | |
192 |
offsetof(CompressionParametersObject, |
|
397 | offsetof(ZstdCompressionParametersObject, minMatch), READONLY, | |
193 | "search length" }, |
|
398 | "search length" }, | |
194 | { "target_length", T_UINT, |
|
399 | { "target_length", T_UINT, | |
195 | offsetof(CompressionParametersObject, targetLength), READONLY, |
|
400 | offsetof(ZstdCompressionParametersObject, targetLength), READONLY, | |
196 | "target length" }, |
|
401 | "target length" }, | |
197 | { "strategy", T_INT, |
|
402 | { "compression_strategy", T_UINT, | |
198 | offsetof(CompressionParametersObject, strategy), READONLY, |
|
403 | offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY, | |
199 | "strategy" }, |
|
404 | "compression strategy" }, | |
|
405 | { "write_content_size", T_UINT, | |||
|
406 | offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY, | |||
|
407 | "whether to write content size in frames" }, | |||
|
408 | { "write_checksum", T_UINT, | |||
|
409 | offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY, | |||
|
410 | "whether to write checksum in frames" }, | |||
|
411 | { "write_dict_id", T_UINT, | |||
|
412 | offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY, | |||
|
413 | "whether to write dictionary ID in frames" }, | |||
|
414 | { "threads", T_UINT, | |||
|
415 | offsetof(ZstdCompressionParametersObject, threads), READONLY, | |||
|
416 | "number of threads to use" }, | |||
|
417 | { "job_size", T_UINT, | |||
|
418 | offsetof(ZstdCompressionParametersObject, jobSize), READONLY, | |||
|
419 | "size of compression job when using multiple threads" }, | |||
|
420 | { "overlap_size_log", T_UINT, | |||
|
421 | offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY, | |||
|
422 | "Size of previous input reloaded at the beginning of each job" }, | |||
|
423 | { "compress_literals", T_UINT, | |||
|
424 | offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY, | |||
|
425 | "whether Huffman compression of literals is in use" }, | |||
|
426 | { "force_max_window", T_UINT, | |||
|
427 | offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY, | |||
|
428 | "force back references to remain smaller than window size" }, | |||
|
429 | { "enable_ldm", T_UINT, | |||
|
430 | offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY, | |||
|
431 | "whether to enable long distance matching" }, | |||
|
432 | { "ldm_hash_log", T_UINT, | |||
|
433 | offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY, | |||
|
434 | "Size of the table for long distance matching, as a power of 2" }, | |||
|
435 | { "ldm_min_match", T_UINT, | |||
|
436 | offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY, | |||
|
437 | "minimum size of searched matches for long distance matcher" }, | |||
|
438 | { "ldm_bucket_size_log", T_UINT, | |||
|
439 | offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY, | |||
|
440 | "log size of each bucket in the LDM hash table for collision resolution" }, | |||
|
441 | { "ldm_hash_every_log", T_UINT, | |||
|
442 | offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY, | |||
|
443 | "frequency of inserting/looking up entries in the LDM hash table" }, | |||
200 | { NULL } |
|
444 | { NULL } | |
201 | }; |
|
445 | }; | |
202 |
|
446 | |||
203 | PyTypeObject CompressionParametersType = { |
|
447 | PyTypeObject ZstdCompressionParametersType = { | |
204 | PyVarObject_HEAD_INIT(NULL, 0) |
|
448 | PyVarObject_HEAD_INIT(NULL, 0) | |
205 | "CompressionParameters", /* tp_name */ |
|
449 | "ZstdCompressionParameters", /* tp_name */ | |
206 | sizeof(CompressionParametersObject), /* tp_basicsize */ |
|
450 | sizeof(ZstdCompressionParametersObject), /* tp_basicsize */ | |
207 | 0, /* tp_itemsize */ |
|
451 | 0, /* tp_itemsize */ | |
208 | (destructor)CompressionParameters_dealloc, /* tp_dealloc */ |
|
452 | (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */ | |
209 | 0, /* tp_print */ |
|
453 | 0, /* tp_print */ | |
210 | 0, /* tp_getattr */ |
|
454 | 0, /* tp_getattr */ | |
211 | 0, /* tp_setattr */ |
|
455 | 0, /* tp_setattr */ | |
@@ -221,33 +465,38 b' PyTypeObject CompressionParametersType =' | |||||
221 | 0, /* tp_setattro */ |
|
465 | 0, /* tp_setattro */ | |
222 | 0, /* tp_as_buffer */ |
|
466 | 0, /* tp_as_buffer */ | |
223 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
467 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
224 | CompressionParameters__doc__, /* tp_doc */ |
|
468 | ZstdCompressionParameters__doc__, /* tp_doc */ | |
225 | 0, /* tp_traverse */ |
|
469 | 0, /* tp_traverse */ | |
226 | 0, /* tp_clear */ |
|
470 | 0, /* tp_clear */ | |
227 | 0, /* tp_richcompare */ |
|
471 | 0, /* tp_richcompare */ | |
228 | 0, /* tp_weaklistoffset */ |
|
472 | 0, /* tp_weaklistoffset */ | |
229 | 0, /* tp_iter */ |
|
473 | 0, /* tp_iter */ | |
230 | 0, /* tp_iternext */ |
|
474 | 0, /* tp_iternext */ | |
231 | CompressionParameters_methods, /* tp_methods */ |
|
475 | ZstdCompressionParameters_methods, /* tp_methods */ | |
232 | CompressionParameters_members, /* tp_members */ |
|
476 | ZstdCompressionParameters_members, /* tp_members */ | |
233 | 0, /* tp_getset */ |
|
477 | 0, /* tp_getset */ | |
234 | 0, /* tp_base */ |
|
478 | 0, /* tp_base */ | |
235 | 0, /* tp_dict */ |
|
479 | 0, /* tp_dict */ | |
236 | 0, /* tp_descr_get */ |
|
480 | 0, /* tp_descr_get */ | |
237 | 0, /* tp_descr_set */ |
|
481 | 0, /* tp_descr_set */ | |
238 | 0, /* tp_dictoffset */ |
|
482 | 0, /* tp_dictoffset */ | |
239 | (initproc)CompressionParameters_init, /* tp_init */ |
|
483 | (initproc)ZstdCompressionParameters_init, /* tp_init */ | |
240 | 0, /* tp_alloc */ |
|
484 | 0, /* tp_alloc */ | |
241 | PyType_GenericNew, /* tp_new */ |
|
485 | PyType_GenericNew, /* tp_new */ | |
242 | }; |
|
486 | }; | |
243 |
|
487 | |||
244 | void compressionparams_module_init(PyObject* mod) { |
|
488 | void compressionparams_module_init(PyObject* mod) { | |
245 | Py_TYPE(&CompressionParametersType) = &PyType_Type; |
|
489 | Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type; | |
246 | if (PyType_Ready(&CompressionParametersType) < 0) { |
|
490 | if (PyType_Ready(&ZstdCompressionParametersType) < 0) { | |
247 | return; |
|
491 | return; | |
248 | } |
|
492 | } | |
249 |
|
493 | |||
250 | Py_INCREF(&CompressionParametersType); |
|
494 | Py_INCREF(&ZstdCompressionParametersType); | |
|
495 | PyModule_AddObject(mod, "ZstdCompressionParameters", | |||
|
496 | (PyObject*)&ZstdCompressionParametersType); | |||
|
497 | ||||
|
498 | /* TODO remove deprecated alias. */ | |||
|
499 | Py_INCREF(&ZstdCompressionParametersType); | |||
251 | PyModule_AddObject(mod, "CompressionParameters", |
|
500 | PyModule_AddObject(mod, "CompressionParameters", | |
252 | (PyObject*)&CompressionParametersType); |
|
501 | (PyObject*)&ZstdCompressionParametersType); | |
253 | } |
|
502 | } |
@@ -22,20 +22,18 b' static void ZstdCompressionWriter_deallo' | |||||
22 | } |
|
22 | } | |
23 |
|
23 | |||
24 | static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) { |
|
24 | static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) { | |
|
25 | size_t zresult; | |||
|
26 | ||||
25 | if (self->entered) { |
|
27 | if (self->entered) { | |
26 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); |
|
28 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); | |
27 | return NULL; |
|
29 | return NULL; | |
28 | } |
|
30 | } | |
29 |
|
31 | |||
30 | if (self->compressor->mtcctx) { |
|
32 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize); | |
31 | if (init_mtcstream(self->compressor, self->sourceSize)) { |
|
33 | if (ZSTD_isError(zresult)) { | |
32 | return NULL; |
|
34 | PyErr_Format(ZstdError, "error setting source size: %s", | |
33 | } |
|
35 | ZSTD_getErrorName(zresult)); | |
34 | } |
|
36 | return NULL; | |
35 | else { |
|
|||
36 | if (0 != init_cstream(self->compressor, self->sourceSize)) { |
|
|||
37 | return NULL; |
|
|||
38 | } |
|
|||
39 | } |
|
37 | } | |
40 |
|
38 | |||
41 | self->entered = 1; |
|
39 | self->entered = 1; | |
@@ -59,8 +57,12 b' static PyObject* ZstdCompressionWriter_e' | |||||
59 |
|
57 | |||
60 | self->entered = 0; |
|
58 | self->entered = 0; | |
61 |
|
59 | |||
62 | if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None |
|
60 | if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) { | |
63 | && exc_value == Py_None && exc_tb == Py_None) { |
|
61 | ZSTD_inBuffer inBuffer; | |
|
62 | ||||
|
63 | inBuffer.src = NULL; | |||
|
64 | inBuffer.size = 0; | |||
|
65 | inBuffer.pos = 0; | |||
64 |
|
66 | |||
65 | output.dst = PyMem_Malloc(self->outSize); |
|
67 | output.dst = PyMem_Malloc(self->outSize); | |
66 | if (!output.dst) { |
|
68 | if (!output.dst) { | |
@@ -70,12 +72,7 b' static PyObject* ZstdCompressionWriter_e' | |||||
70 | output.pos = 0; |
|
72 | output.pos = 0; | |
71 |
|
73 | |||
72 | while (1) { |
|
74 | while (1) { | |
73 | if (self->compressor->mtcctx) { |
|
75 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end); | |
74 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output); |
|
|||
75 | } |
|
|||
76 | else { |
|
|||
77 | zresult = ZSTD_endStream(self->compressor->cstream, &output); |
|
|||
78 | } |
|
|||
79 | if (ZSTD_isError(zresult)) { |
|
76 | if (ZSTD_isError(zresult)) { | |
80 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
77 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
81 | ZSTD_getErrorName(zresult)); |
|
78 | ZSTD_getErrorName(zresult)); | |
@@ -107,18 +104,17 b' static PyObject* ZstdCompressionWriter_e' | |||||
107 | } |
|
104 | } | |
108 |
|
105 | |||
109 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { |
|
106 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { | |
110 | if (!self->compressor->cstream) { |
|
107 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx)); | |
111 | PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; " |
|
|||
112 | "call when a context manager is active"); |
|
|||
113 | return NULL; |
|
|||
114 | } |
|
|||
115 |
|
||||
116 | return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream)); |
|
|||
117 | } |
|
108 | } | |
118 |
|
109 | |||
119 | static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) { |
|
110 | static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) { | |
120 | const char* source; |
|
111 | static char* kwlist[] = { | |
121 | Py_ssize_t sourceSize; |
|
112 | "data", | |
|
113 | NULL | |||
|
114 | }; | |||
|
115 | ||||
|
116 | PyObject* result = NULL; | |||
|
117 | Py_buffer source; | |||
122 | size_t zresult; |
|
118 | size_t zresult; | |
123 | ZSTD_inBuffer input; |
|
119 | ZSTD_inBuffer input; | |
124 | ZSTD_outBuffer output; |
|
120 | ZSTD_outBuffer output; | |
@@ -126,44 +122,46 b' static PyObject* ZstdCompressionWriter_w' | |||||
126 | Py_ssize_t totalWrite = 0; |
|
122 | Py_ssize_t totalWrite = 0; | |
127 |
|
123 | |||
128 | #if PY_MAJOR_VERSION >= 3 |
|
124 | #if PY_MAJOR_VERSION >= 3 | |
129 |
if (!PyArg_ParseTuple(args, "y |
|
125 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", | |
130 | #else |
|
126 | #else | |
131 |
if (!PyArg_ParseTuple(args, "s |
|
127 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write", | |
132 | #endif |
|
128 | #endif | |
|
129 | kwlist, &source)) { | |||
133 | return NULL; |
|
130 | return NULL; | |
134 | } |
|
131 | } | |
135 |
|
132 | |||
136 | if (!self->entered) { |
|
133 | if (!self->entered) { | |
137 | PyErr_SetString(ZstdError, "compress must be called from an active context manager"); |
|
134 | PyErr_SetString(ZstdError, "compress must be called from an active context manager"); | |
138 | return NULL; |
|
135 | goto finally; | |
|
136 | } | |||
|
137 | ||||
|
138 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |||
|
139 | PyErr_SetString(PyExc_ValueError, | |||
|
140 | "data buffer should be contiguous and have at most one dimension"); | |||
|
141 | goto finally; | |||
139 | } |
|
142 | } | |
140 |
|
143 | |||
141 | output.dst = PyMem_Malloc(self->outSize); |
|
144 | output.dst = PyMem_Malloc(self->outSize); | |
142 | if (!output.dst) { |
|
145 | if (!output.dst) { | |
143 |
|
|
146 | PyErr_NoMemory(); | |
|
147 | goto finally; | |||
144 | } |
|
148 | } | |
145 | output.size = self->outSize; |
|
149 | output.size = self->outSize; | |
146 | output.pos = 0; |
|
150 | output.pos = 0; | |
147 |
|
151 | |||
148 | input.src = source; |
|
152 | input.src = source.buf; | |
149 |
input.size = source |
|
153 | input.size = source.len; | |
150 | input.pos = 0; |
|
154 | input.pos = 0; | |
151 |
|
155 | |||
152 |
while ((ssize_t)input.pos < source |
|
156 | while ((ssize_t)input.pos < source.len) { | |
153 | Py_BEGIN_ALLOW_THREADS |
|
157 | Py_BEGIN_ALLOW_THREADS | |
154 | if (self->compressor->mtcctx) { |
|
158 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue); | |
155 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, |
|
|||
156 | &output, &input); |
|
|||
157 | } |
|
|||
158 | else { |
|
|||
159 | zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input); |
|
|||
160 | } |
|
|||
161 | Py_END_ALLOW_THREADS |
|
159 | Py_END_ALLOW_THREADS | |
162 |
|
160 | |||
163 | if (ZSTD_isError(zresult)) { |
|
161 | if (ZSTD_isError(zresult)) { | |
164 | PyMem_Free(output.dst); |
|
162 | PyMem_Free(output.dst); | |
165 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
163 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
166 | return NULL; |
|
164 | goto finally; | |
167 | } |
|
165 | } | |
168 |
|
166 | |||
169 | /* Copy data from output buffer to writer. */ |
|
167 | /* Copy data from output buffer to writer. */ | |
@@ -176,18 +174,24 b' static PyObject* ZstdCompressionWriter_w' | |||||
176 | output.dst, output.pos); |
|
174 | output.dst, output.pos); | |
177 | Py_XDECREF(res); |
|
175 | Py_XDECREF(res); | |
178 | totalWrite += output.pos; |
|
176 | totalWrite += output.pos; | |
|
177 | self->bytesCompressed += output.pos; | |||
179 | } |
|
178 | } | |
180 | output.pos = 0; |
|
179 | output.pos = 0; | |
181 | } |
|
180 | } | |
182 |
|
181 | |||
183 | PyMem_Free(output.dst); |
|
182 | PyMem_Free(output.dst); | |
184 |
|
183 | |||
185 |
re |
|
184 | result = PyLong_FromSsize_t(totalWrite); | |
|
185 | ||||
|
186 | finally: | |||
|
187 | PyBuffer_Release(&source); | |||
|
188 | return result; | |||
186 | } |
|
189 | } | |
187 |
|
190 | |||
188 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { |
|
191 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { | |
189 | size_t zresult; |
|
192 | size_t zresult; | |
190 | ZSTD_outBuffer output; |
|
193 | ZSTD_outBuffer output; | |
|
194 | ZSTD_inBuffer input; | |||
191 | PyObject* res; |
|
195 | PyObject* res; | |
192 | Py_ssize_t totalWrite = 0; |
|
196 | Py_ssize_t totalWrite = 0; | |
193 |
|
197 | |||
@@ -196,6 +200,10 b' static PyObject* ZstdCompressionWriter_f' | |||||
196 | return NULL; |
|
200 | return NULL; | |
197 | } |
|
201 | } | |
198 |
|
202 | |||
|
203 | input.src = NULL; | |||
|
204 | input.size = 0; | |||
|
205 | input.pos = 0; | |||
|
206 | ||||
199 | output.dst = PyMem_Malloc(self->outSize); |
|
207 | output.dst = PyMem_Malloc(self->outSize); | |
200 | if (!output.dst) { |
|
208 | if (!output.dst) { | |
201 | return PyErr_NoMemory(); |
|
209 | return PyErr_NoMemory(); | |
@@ -205,12 +213,7 b' static PyObject* ZstdCompressionWriter_f' | |||||
205 |
|
213 | |||
206 | while (1) { |
|
214 | while (1) { | |
207 | Py_BEGIN_ALLOW_THREADS |
|
215 | Py_BEGIN_ALLOW_THREADS | |
208 | if (self->compressor->mtcctx) { |
|
216 | zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush); | |
209 | zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output); |
|
|||
210 | } |
|
|||
211 | else { |
|
|||
212 | zresult = ZSTD_flushStream(self->compressor->cstream, &output); |
|
|||
213 | } |
|
|||
214 | Py_END_ALLOW_THREADS |
|
217 | Py_END_ALLOW_THREADS | |
215 |
|
218 | |||
216 | if (ZSTD_isError(zresult)) { |
|
219 | if (ZSTD_isError(zresult)) { | |
@@ -233,6 +236,7 b' static PyObject* ZstdCompressionWriter_f' | |||||
233 | output.dst, output.pos); |
|
236 | output.dst, output.pos); | |
234 | Py_XDECREF(res); |
|
237 | Py_XDECREF(res); | |
235 | totalWrite += output.pos; |
|
238 | totalWrite += output.pos; | |
|
239 | self->bytesCompressed += output.pos; | |||
236 | } |
|
240 | } | |
237 | output.pos = 0; |
|
241 | output.pos = 0; | |
238 | } |
|
242 | } | |
@@ -242,6 +246,10 b' static PyObject* ZstdCompressionWriter_f' | |||||
242 | return PyLong_FromSsize_t(totalWrite); |
|
246 | return PyLong_FromSsize_t(totalWrite); | |
243 | } |
|
247 | } | |
244 |
|
248 | |||
|
249 | static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) { | |||
|
250 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |||
|
251 | } | |||
|
252 | ||||
245 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
253 | static PyMethodDef ZstdCompressionWriter_methods[] = { | |
246 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, |
|
254 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, | |
247 | PyDoc_STR("Enter a compression context.") }, |
|
255 | PyDoc_STR("Enter a compression context.") }, | |
@@ -249,10 +257,12 b' static PyMethodDef ZstdCompressionWriter' | |||||
249 | PyDoc_STR("Exit a compression context.") }, |
|
257 | PyDoc_STR("Exit a compression context.") }, | |
250 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, |
|
258 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, | |
251 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, |
|
259 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, | |
252 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS, |
|
260 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS, | |
253 | PyDoc_STR("Compress data") }, |
|
261 | PyDoc_STR("Compress data") }, | |
254 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, |
|
262 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, | |
255 | PyDoc_STR("Flush data and finish a zstd frame") }, |
|
263 | PyDoc_STR("Flush data and finish a zstd frame") }, | |
|
264 | { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS, | |||
|
265 | PyDoc_STR("Returns current number of bytes compressed") }, | |||
256 | { NULL, NULL } |
|
266 | { NULL, NULL } | |
257 | }; |
|
267 | }; | |
258 |
|
268 |
@@ -23,9 +23,13 b' static void ZstdCompressionObj_dealloc(Z' | |||||
23 | PyObject_Del(self); |
|
23 | PyObject_Del(self); | |
24 | } |
|
24 | } | |
25 |
|
25 | |||
26 | static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) { |
|
26 | static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) { | |
27 | const char* source; |
|
27 | static char* kwlist[] = { | |
28 | Py_ssize_t sourceSize; |
|
28 | "data", | |
|
29 | NULL | |||
|
30 | }; | |||
|
31 | ||||
|
32 | Py_buffer source; | |||
29 | ZSTD_inBuffer input; |
|
33 | ZSTD_inBuffer input; | |
30 | size_t zresult; |
|
34 | size_t zresult; | |
31 | PyObject* result = NULL; |
|
35 | PyObject* result = NULL; | |
@@ -37,38 +41,43 b' static PyObject* ZstdCompressionObj_comp' | |||||
37 | } |
|
41 | } | |
38 |
|
42 | |||
39 | #if PY_MAJOR_VERSION >= 3 |
|
43 | #if PY_MAJOR_VERSION >= 3 | |
40 |
if (!PyArg_ParseTuple(args, "y |
|
44 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", | |
41 | #else |
|
45 | #else | |
42 |
if (!PyArg_ParseTuple(args, "s |
|
46 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress", | |
43 | #endif |
|
47 | #endif | |
|
48 | kwlist, &source)) { | |||
44 | return NULL; |
|
49 | return NULL; | |
45 | } |
|
50 | } | |
46 |
|
51 | |||
47 | input.src = source; |
|
52 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
48 | input.size = sourceSize; |
|
53 | PyErr_SetString(PyExc_ValueError, | |
|
54 | "data buffer should be contiguous and have at most one dimension"); | |||
|
55 | goto finally; | |||
|
56 | } | |||
|
57 | ||||
|
58 | input.src = source.buf; | |||
|
59 | input.size = source.len; | |||
49 | input.pos = 0; |
|
60 | input.pos = 0; | |
50 |
|
61 | |||
51 |
while ((ssize_t)input.pos < source |
|
62 | while ((ssize_t)input.pos < source.len) { | |
52 | Py_BEGIN_ALLOW_THREADS |
|
63 | Py_BEGIN_ALLOW_THREADS | |
53 | if (self->compressor->mtcctx) { |
|
64 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
54 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, |
|
65 | &input, ZSTD_e_continue); | |
55 | &self->output, &input); |
|
|||
56 | } |
|
|||
57 | else { |
|
|||
58 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input); |
|
|||
59 | } |
|
|||
60 | Py_END_ALLOW_THREADS |
|
66 | Py_END_ALLOW_THREADS | |
61 |
|
67 | |||
62 | if (ZSTD_isError(zresult)) { |
|
68 | if (ZSTD_isError(zresult)) { | |
63 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
69 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
64 | return NULL; |
|
70 | Py_CLEAR(result); | |
|
71 | goto finally; | |||
65 | } |
|
72 | } | |
66 |
|
73 | |||
67 | if (self->output.pos) { |
|
74 | if (self->output.pos) { | |
68 | if (result) { |
|
75 | if (result) { | |
69 | resultSize = PyBytes_GET_SIZE(result); |
|
76 | resultSize = PyBytes_GET_SIZE(result); | |
70 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { |
|
77 | ||
71 | return NULL; |
|
78 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { | |
|
79 | Py_CLEAR(result); | |||
|
80 | goto finally; | |||
72 | } |
|
81 | } | |
73 |
|
82 | |||
74 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
83 | memcpy(PyBytes_AS_STRING(result) + resultSize, | |
@@ -77,7 +86,7 b' static PyObject* ZstdCompressionObj_comp' | |||||
77 | else { |
|
86 | else { | |
78 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
87 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
79 | if (!result) { |
|
88 | if (!result) { | |
80 | return NULL; |
|
89 | goto finally; | |
81 | } |
|
90 | } | |
82 | } |
|
91 | } | |
83 |
|
92 | |||
@@ -85,21 +94,29 b' static PyObject* ZstdCompressionObj_comp' | |||||
85 | } |
|
94 | } | |
86 | } |
|
95 | } | |
87 |
|
96 | |||
88 | if (result) { |
|
97 | if (NULL == result) { | |
89 | return result; |
|
98 | result = PyBytes_FromString(""); | |
90 | } |
|
99 | } | |
91 | else { |
|
100 | ||
92 | return PyBytes_FromString(""); |
|
101 | finally: | |
93 | } |
|
102 | PyBuffer_Release(&source); | |
|
103 | ||||
|
104 | return result; | |||
94 | } |
|
105 | } | |
95 |
|
106 | |||
96 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) { |
|
107 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) { | |
|
108 | static char* kwlist[] = { | |||
|
109 | "flush_mode", | |||
|
110 | NULL | |||
|
111 | }; | |||
|
112 | ||||
97 | int flushMode = compressorobj_flush_finish; |
|
113 | int flushMode = compressorobj_flush_finish; | |
98 | size_t zresult; |
|
114 | size_t zresult; | |
99 | PyObject* result = NULL; |
|
115 | PyObject* result = NULL; | |
100 | Py_ssize_t resultSize = 0; |
|
116 | Py_ssize_t resultSize = 0; | |
|
117 | ZSTD_inBuffer input; | |||
101 |
|
118 | |||
102 | if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) { |
|
119 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) { | |
103 | return NULL; |
|
120 | return NULL; | |
104 | } |
|
121 | } | |
105 |
|
122 | |||
@@ -115,16 +132,16 b' static PyObject* ZstdCompressionObj_flus' | |||||
115 |
|
132 | |||
116 | assert(self->output.pos == 0); |
|
133 | assert(self->output.pos == 0); | |
117 |
|
134 | |||
|
135 | input.src = NULL; | |||
|
136 | input.size = 0; | |||
|
137 | input.pos = 0; | |||
|
138 | ||||
118 | if (flushMode == compressorobj_flush_block) { |
|
139 | if (flushMode == compressorobj_flush_block) { | |
119 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is |
|
140 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is | |
120 | guaranteed to hold a full block. */ |
|
141 | guaranteed to hold a full block. */ | |
121 | Py_BEGIN_ALLOW_THREADS |
|
142 | Py_BEGIN_ALLOW_THREADS | |
122 | if (self->compressor->mtcctx) { |
|
143 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
123 | zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output); |
|
144 | &input, ZSTD_e_flush); | |
124 | } |
|
|||
125 | else { |
|
|||
126 | zresult = ZSTD_flushStream(self->compressor->cstream, &self->output); |
|
|||
127 | } |
|
|||
128 | Py_END_ALLOW_THREADS |
|
145 | Py_END_ALLOW_THREADS | |
129 |
|
146 | |||
130 | if (ZSTD_isError(zresult)) { |
|
147 | if (ZSTD_isError(zresult)) { | |
@@ -156,12 +173,8 b' static PyObject* ZstdCompressionObj_flus' | |||||
156 | self->finished = 1; |
|
173 | self->finished = 1; | |
157 |
|
174 | |||
158 | while (1) { |
|
175 | while (1) { | |
159 | if (self->compressor->mtcctx) { |
|
176 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
160 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output); |
|
177 | &input, ZSTD_e_end); | |
161 | } |
|
|||
162 | else { |
|
|||
163 | zresult = ZSTD_endStream(self->compressor->cstream, &self->output); |
|
|||
164 | } |
|
|||
165 | if (ZSTD_isError(zresult)) { |
|
178 | if (ZSTD_isError(zresult)) { | |
166 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
179 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
167 | ZSTD_getErrorName(zresult)); |
|
180 | ZSTD_getErrorName(zresult)); | |
@@ -171,7 +184,9 b' static PyObject* ZstdCompressionObj_flus' | |||||
171 | if (self->output.pos) { |
|
184 | if (self->output.pos) { | |
172 | if (result) { |
|
185 | if (result) { | |
173 | resultSize = PyBytes_GET_SIZE(result); |
|
186 | resultSize = PyBytes_GET_SIZE(result); | |
174 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { |
|
187 | ||
|
188 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { | |||
|
189 | Py_XDECREF(result); | |||
175 | return NULL; |
|
190 | return NULL; | |
176 | } |
|
191 | } | |
177 |
|
192 | |||
@@ -202,9 +217,9 b' static PyObject* ZstdCompressionObj_flus' | |||||
202 | } |
|
217 | } | |
203 |
|
218 | |||
204 | static PyMethodDef ZstdCompressionObj_methods[] = { |
|
219 | static PyMethodDef ZstdCompressionObj_methods[] = { | |
205 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS, |
|
220 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS, | |
206 | PyDoc_STR("compress data") }, |
|
221 | PyDoc_STR("compress data") }, | |
207 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS, |
|
222 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS, | |
208 | PyDoc_STR("finish compression operation") }, |
|
223 | PyDoc_STR("finish compression operation") }, | |
209 | { NULL, NULL } |
|
224 | { NULL, NULL } | |
210 | }; |
|
225 | }; |
This diff has been collapsed as it changes many lines, (832 lines changed) Show them Hide them | |||||
@@ -11,118 +11,78 b'' | |||||
11 |
|
11 | |||
12 | extern PyObject* ZstdError; |
|
12 | extern PyObject* ZstdError; | |
13 |
|
13 | |||
14 |
int |
|
14 | int ensure_cctx(ZstdCompressor* compressor) { | |
15 | ZSTD_customMem zmem; |
|
15 | size_t zresult; | |
|
16 | ||||
|
17 | assert(compressor); | |||
|
18 | assert(compressor->cctx); | |||
|
19 | assert(compressor->params); | |||
16 |
|
20 | |||
17 | if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) { |
|
21 | ZSTD_CCtx_reset(compressor->cctx); | |
18 | return 0; |
|
22 | ||
|
23 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params); | |||
|
24 | if (ZSTD_isError(zresult)) { | |||
|
25 | PyErr_Format(ZstdError, "could not set compression parameters: %s", | |||
|
26 | ZSTD_getErrorName(zresult)); | |||
|
27 | return 1; | |||
19 | } |
|
28 | } | |
20 |
|
29 | |||
21 | Py_BEGIN_ALLOW_THREADS |
|
30 | if (compressor->dict) { | |
22 | memset(&zmem, 0, sizeof(zmem)); |
|
31 | if (compressor->dict->cdict) { | |
23 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, |
|
32 | zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict); | |
24 | compressor->dict->dictSize, 1, *zparams, zmem); |
|
33 | } | |
25 | Py_END_ALLOW_THREADS |
|
34 | else { | |
26 |
|
35 | zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx, | ||
27 | if (!compressor->cdict) { |
|
36 | compressor->dict->dictData, compressor->dict->dictSize, | |
28 | PyErr_SetString(ZstdError, "could not create compression dictionary"); |
|
37 | ZSTD_dlm_byRef, compressor->dict->dictType); | |
29 | return 1; |
|
38 | } | |
|
39 | if (ZSTD_isError(zresult)) { | |||
|
40 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", | |||
|
41 | ZSTD_getErrorName(zresult)); | |||
|
42 | return 1; | |||
|
43 | } | |||
30 | } |
|
44 | } | |
31 |
|
45 | |||
32 | return 0; |
|
46 | return 0; | |
33 | } |
|
47 | } | |
34 |
|
48 | |||
35 | /** |
|
49 | static PyObject* frame_progression(ZSTD_CCtx* cctx) { | |
36 | * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized. |
|
50 | PyObject* result = NULL; | |
37 | * |
|
51 | PyObject* value; | |
38 | * Returns 0 on success. Other value on failure. Will set a Python exception |
|
52 | ZSTD_frameProgression progression; | |
39 | * on failure. |
|
|||
40 | */ |
|
|||
41 | int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) { |
|
|||
42 | ZSTD_parameters zparams; |
|
|||
43 | void* dictData = NULL; |
|
|||
44 | size_t dictSize = 0; |
|
|||
45 | size_t zresult; |
|
|||
46 |
|
53 | |||
47 | if (compressor->cstream) { |
|
54 | result = PyTuple_New(3); | |
48 | zresult = ZSTD_resetCStream(compressor->cstream, sourceSize); |
|
55 | if (!result) { | |
49 | if (ZSTD_isError(zresult)) { |
|
56 | return NULL; | |
50 | PyErr_Format(ZstdError, "could not reset CStream: %s", |
|
|||
51 | ZSTD_getErrorName(zresult)); |
|
|||
52 | return -1; |
|
|||
53 | } |
|
|||
54 |
|
||||
55 | return 0; |
|
|||
56 | } |
|
57 | } | |
57 |
|
58 | |||
58 | compressor->cstream = ZSTD_createCStream(); |
|
59 | progression = ZSTD_getFrameProgression(cctx); | |
59 | if (!compressor->cstream) { |
|
|||
60 | PyErr_SetString(ZstdError, "could not create CStream"); |
|
|||
61 | return -1; |
|
|||
62 | } |
|
|||
63 |
|
60 | |||
64 | if (compressor->dict) { |
|
61 | value = PyLong_FromUnsignedLongLong(progression.ingested); | |
65 | dictData = compressor->dict->dictData; |
|
62 | if (!value) { | |
66 | dictSize = compressor->dict->dictSize; |
|
63 | Py_DECREF(result); | |
67 | } |
|
64 | return NULL; | |
68 |
|
||||
69 | memset(&zparams, 0, sizeof(zparams)); |
|
|||
70 | if (compressor->cparams) { |
|
|||
71 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
|||
72 | /* Do NOT call ZSTD_adjustCParams() here because the compression params |
|
|||
73 | come from the user. */ |
|
|||
74 | } |
|
|||
75 | else { |
|
|||
76 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); |
|
|||
77 | } |
|
65 | } | |
78 |
|
66 | |||
79 | zparams.fParams = compressor->fparams; |
|
67 | PyTuple_SET_ITEM(result, 0, value); | |
80 |
|
||||
81 | zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize, |
|
|||
82 | zparams, sourceSize); |
|
|||
83 |
|
68 | |||
84 | if (ZSTD_isError(zresult)) { |
|
69 | value = PyLong_FromUnsignedLongLong(progression.consumed); | |
85 | ZSTD_freeCStream(compressor->cstream); |
|
70 | if (!value) { | |
86 | compressor->cstream = NULL; |
|
71 | Py_DECREF(result); | |
87 | PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); |
|
72 | return NULL; | |
88 | return -1; |
|
|||
89 | } |
|
73 | } | |
90 |
|
74 | |||
91 | return 0;; |
|
75 | PyTuple_SET_ITEM(result, 1, value); | |
92 | } |
|
|||
93 |
|
76 | |||
94 | int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) { |
|
77 | value = PyLong_FromUnsignedLongLong(progression.produced); | |
95 | size_t zresult; |
|
78 | if (!value) { | |
96 | void* dictData = NULL; |
|
79 | Py_DECREF(result); | |
97 | size_t dictSize = 0; |
|
80 | return NULL; | |
98 | ZSTD_parameters zparams; |
|
|||
99 |
|
||||
100 | assert(compressor->mtcctx); |
|
|||
101 |
|
||||
102 | if (compressor->dict) { |
|
|||
103 | dictData = compressor->dict->dictData; |
|
|||
104 | dictSize = compressor->dict->dictSize; |
|
|||
105 | } |
|
81 | } | |
106 |
|
82 | |||
107 | memset(&zparams, 0, sizeof(zparams)); |
|
83 | PyTuple_SET_ITEM(result, 2, value); | |
108 | if (compressor->cparams) { |
|
|||
109 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
|||
110 | } |
|
|||
111 | else { |
|
|||
112 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); |
|
|||
113 | } |
|
|||
114 |
|
||||
115 | zparams.fParams = compressor->fparams; |
|
|||
116 |
|
84 | |||
117 | zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize, |
|
85 | return result; | |
118 | zparams, sourceSize); |
|
|||
119 |
|
||||
120 | if (ZSTD_isError(zresult)) { |
|
|||
121 | PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); |
|
|||
122 | return -1; |
|
|||
123 | } |
|
|||
124 |
|
||||
125 | return 0; |
|
|||
126 | } |
|
86 | } | |
127 |
|
87 | |||
128 | PyDoc_STRVAR(ZstdCompressor__doc__, |
|
88 | PyDoc_STRVAR(ZstdCompressor__doc__, | |
@@ -147,9 +107,9 b' PyDoc_STRVAR(ZstdCompressor__doc__,' | |||||
147 | " If True, a 4 byte content checksum will be written with the compressed\n" |
|
107 | " If True, a 4 byte content checksum will be written with the compressed\n" | |
148 | " data, allowing the decompressor to perform content verification.\n" |
|
108 | " data, allowing the decompressor to perform content verification.\n" | |
149 | "write_content_size\n" |
|
109 | "write_content_size\n" | |
150 |
" If True, the decompressed content size will be included in |
|
110 | " If True (the default), the decompressed content size will be included in\n" | |
151 |
" the compressed data. This data will only be written if the |
|
111 | " the header of the compressed data. This data will only be written if the\n" | |
152 | " knows the size of the input data.\n" |
|
112 | " compressor knows the size of the input data.\n" | |
153 | "write_dict_id\n" |
|
113 | "write_dict_id\n" | |
154 | " Determines whether the dictionary ID will be written into the compressed\n" |
|
114 | " Determines whether the dictionary ID will be written into the compressed\n" | |
155 | " data. Defaults to True. Only adds content to the compressed data if\n" |
|
115 | " data. Defaults to True. Only adds content to the compressed data if\n" | |
@@ -175,7 +135,7 b' static int ZstdCompressor_init(ZstdCompr' | |||||
175 |
|
135 | |||
176 | int level = 3; |
|
136 | int level = 3; | |
177 | ZstdCompressionDict* dict = NULL; |
|
137 | ZstdCompressionDict* dict = NULL; | |
178 | CompressionParametersObject* params = NULL; |
|
138 | ZstdCompressionParametersObject* params = NULL; | |
179 | PyObject* writeChecksum = NULL; |
|
139 | PyObject* writeChecksum = NULL; | |
180 | PyObject* writeContentSize = NULL; |
|
140 | PyObject* writeContentSize = NULL; | |
181 | PyObject* writeDictID = NULL; |
|
141 | PyObject* writeDictID = NULL; | |
@@ -183,16 +143,11 b' static int ZstdCompressor_init(ZstdCompr' | |||||
183 |
|
143 | |||
184 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", |
|
144 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", | |
185 | kwlist, &level, &ZstdCompressionDictType, &dict, |
|
145 | kwlist, &level, &ZstdCompressionDictType, &dict, | |
186 | &CompressionParametersType, ¶ms, |
|
146 | &ZstdCompressionParametersType, ¶ms, | |
187 | &writeChecksum, &writeContentSize, &writeDictID, &threads)) { |
|
147 | &writeChecksum, &writeContentSize, &writeDictID, &threads)) { | |
188 | return -1; |
|
148 | return -1; | |
189 | } |
|
149 | } | |
190 |
|
150 | |||
191 | if (level < 1) { |
|
|||
192 | PyErr_SetString(PyExc_ValueError, "level must be greater than 0"); |
|
|||
193 | return -1; |
|
|||
194 | } |
|
|||
195 |
|
||||
196 | if (level > ZSTD_maxCLevel()) { |
|
151 | if (level > ZSTD_maxCLevel()) { | |
197 | PyErr_Format(PyExc_ValueError, "level must be less than %d", |
|
152 | PyErr_Format(PyExc_ValueError, "level must be less than %d", | |
198 | ZSTD_maxCLevel() + 1); |
|
153 | ZSTD_maxCLevel() + 1); | |
@@ -203,79 +158,135 b' static int ZstdCompressor_init(ZstdCompr' | |||||
203 | threads = cpu_count(); |
|
158 | threads = cpu_count(); | |
204 | } |
|
159 | } | |
205 |
|
160 | |||
206 | self->threads = threads; |
|
|||
207 |
|
||||
208 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the |
|
161 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the | |
209 | overhead of each compression operation. */ |
|
162 | overhead of each compression operation. */ | |
210 | if (threads) { |
|
163 | self->cctx = ZSTD_createCCtx(); | |
211 | self->mtcctx = ZSTDMT_createCCtx(threads); |
|
164 | if (!self->cctx) { | |
212 | if (!self->mtcctx) { |
|
165 | PyErr_NoMemory(); | |
213 | PyErr_NoMemory(); |
|
166 | return -1; | |
|
167 | } | |||
|
168 | ||||
|
169 | /* TODO stuff the original parameters away somewhere so we can reset later. This | |||
|
170 | will allow us to do things like automatically adjust cparams based on input | |||
|
171 | size (assuming zstd isn't doing that internally). */ | |||
|
172 | ||||
|
173 | self->params = ZSTD_createCCtxParams(); | |||
|
174 | if (!self->params) { | |||
|
175 | PyErr_NoMemory(); | |||
|
176 | return -1; | |||
|
177 | } | |||
|
178 | ||||
|
179 | if (params && writeChecksum) { | |||
|
180 | PyErr_SetString(PyExc_ValueError, | |||
|
181 | "cannot define compression_params and write_checksum"); | |||
|
182 | return -1; | |||
|
183 | } | |||
|
184 | ||||
|
185 | if (params && writeContentSize) { | |||
|
186 | PyErr_SetString(PyExc_ValueError, | |||
|
187 | "cannot define compression_params and write_content_size"); | |||
|
188 | return -1; | |||
|
189 | } | |||
|
190 | ||||
|
191 | if (params && writeDictID) { | |||
|
192 | PyErr_SetString(PyExc_ValueError, | |||
|
193 | "cannot define compression_params and write_dict_id"); | |||
|
194 | return -1; | |||
|
195 | } | |||
|
196 | ||||
|
197 | if (params && threads) { | |||
|
198 | PyErr_SetString(PyExc_ValueError, | |||
|
199 | "cannot define compression_params and threads"); | |||
|
200 | return -1; | |||
|
201 | } | |||
|
202 | ||||
|
203 | if (params) { | |||
|
204 | if (set_parameters(self->params, params)) { | |||
214 | return -1; |
|
205 | return -1; | |
215 | } |
|
206 | } | |
216 | } |
|
207 | } | |
217 | else { |
|
208 | else { | |
218 | self->cctx = ZSTD_createCCtx(); |
|
209 | if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) { | |
219 | if (!self->cctx) { |
|
210 | return -1; | |
220 | PyErr_NoMemory(); |
|
211 | } | |
|
212 | ||||
|
213 | if (set_parameter(self->params, ZSTD_p_contentSizeFlag, | |||
|
214 | writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) { | |||
|
215 | return -1; | |||
|
216 | } | |||
|
217 | ||||
|
218 | if (set_parameter(self->params, ZSTD_p_checksumFlag, | |||
|
219 | writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) { | |||
221 | return -1; |
|
220 | return -1; | |
222 | } |
|
221 | } | |
223 | } |
|
222 | ||
|
223 | if (set_parameter(self->params, ZSTD_p_dictIDFlag, | |||
|
224 | writeDictID ? PyObject_IsTrue(writeDictID) : 1)) { | |||
|
225 | return -1; | |||
|
226 | } | |||
224 |
|
227 | |||
225 | self->compressionLevel = level; |
|
228 | if (threads) { | |
|
229 | if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) { | |||
|
230 | return -1; | |||
|
231 | } | |||
|
232 | } | |||
|
233 | } | |||
226 |
|
234 | |||
227 | if (dict) { |
|
235 | if (dict) { | |
228 | self->dict = dict; |
|
236 | self->dict = dict; | |
229 | Py_INCREF(dict); |
|
237 | Py_INCREF(dict); | |
230 | } |
|
238 | } | |
231 |
|
239 | |||
232 | if (params) { |
|
240 | if (ensure_cctx(self)) { | |
233 | self->cparams = params; |
|
241 | return -1; | |
234 | Py_INCREF(params); |
|
|||
235 | } |
|
|||
236 |
|
||||
237 | memset(&self->fparams, 0, sizeof(self->fparams)); |
|
|||
238 |
|
||||
239 | if (writeChecksum && PyObject_IsTrue(writeChecksum)) { |
|
|||
240 | self->fparams.checksumFlag = 1; |
|
|||
241 | } |
|
|||
242 | if (writeContentSize && PyObject_IsTrue(writeContentSize)) { |
|
|||
243 | self->fparams.contentSizeFlag = 1; |
|
|||
244 | } |
|
|||
245 | if (writeDictID && PyObject_Not(writeDictID)) { |
|
|||
246 | self->fparams.noDictIDFlag = 1; |
|
|||
247 | } |
|
242 | } | |
248 |
|
243 | |||
249 | return 0; |
|
244 | return 0; | |
250 | } |
|
245 | } | |
251 |
|
246 | |||
252 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { |
|
247 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { | |
253 | if (self->cstream) { |
|
|||
254 | ZSTD_freeCStream(self->cstream); |
|
|||
255 | self->cstream = NULL; |
|
|||
256 | } |
|
|||
257 |
|
||||
258 | Py_XDECREF(self->cparams); |
|
|||
259 | Py_XDECREF(self->dict); |
|
|||
260 |
|
||||
261 | if (self->cdict) { |
|
|||
262 | ZSTD_freeCDict(self->cdict); |
|
|||
263 | self->cdict = NULL; |
|
|||
264 | } |
|
|||
265 |
|
||||
266 | if (self->cctx) { |
|
248 | if (self->cctx) { | |
267 | ZSTD_freeCCtx(self->cctx); |
|
249 | ZSTD_freeCCtx(self->cctx); | |
268 | self->cctx = NULL; |
|
250 | self->cctx = NULL; | |
269 | } |
|
251 | } | |
270 |
|
252 | |||
271 |
if (self-> |
|
253 | if (self->params) { | |
272 |
ZSTD |
|
254 | ZSTD_freeCCtxParams(self->params); | |
273 |
self-> |
|
255 | self->params = NULL; | |
274 | } |
|
256 | } | |
275 |
|
257 | |||
|
258 | Py_XDECREF(self->dict); | |||
276 | PyObject_Del(self); |
|
259 | PyObject_Del(self); | |
277 | } |
|
260 | } | |
278 |
|
261 | |||
|
262 | PyDoc_STRVAR(ZstdCompressor_memory_size__doc__, | |||
|
263 | "memory_size()\n" | |||
|
264 | "\n" | |||
|
265 | "Obtain the memory usage of this compressor, in bytes.\n" | |||
|
266 | ); | |||
|
267 | ||||
|
268 | static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) { | |||
|
269 | if (self->cctx) { | |||
|
270 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx)); | |||
|
271 | } | |||
|
272 | else { | |||
|
273 | PyErr_SetString(ZstdError, "no compressor context found; this should never happen"); | |||
|
274 | return NULL; | |||
|
275 | } | |||
|
276 | } | |||
|
277 | ||||
|
278 | PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__, | |||
|
279 | "frame_progression()\n" | |||
|
280 | "\n" | |||
|
281 | "Return information on how much work the compressor has done.\n" | |||
|
282 | "\n" | |||
|
283 | "Returns a 3-tuple of (ingested, consumed, produced).\n" | |||
|
284 | ); | |||
|
285 | ||||
|
286 | static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) { | |||
|
287 | return frame_progression(self->cctx); | |||
|
288 | } | |||
|
289 | ||||
279 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, |
|
290 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, | |
280 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" |
|
291 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" | |
281 | "compress data between streams\n" |
|
292 | "compress data between streams\n" | |
@@ -304,7 +315,7 b' static PyObject* ZstdCompressor_copy_str' | |||||
304 |
|
315 | |||
305 | PyObject* source; |
|
316 | PyObject* source; | |
306 | PyObject* dest; |
|
317 | PyObject* dest; | |
307 | Py_ssize_t sourceSize = 0; |
|
318 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
308 | size_t inSize = ZSTD_CStreamInSize(); |
|
319 | size_t inSize = ZSTD_CStreamInSize(); | |
309 | size_t outSize = ZSTD_CStreamOutSize(); |
|
320 | size_t outSize = ZSTD_CStreamOutSize(); | |
310 | ZSTD_inBuffer input; |
|
321 | ZSTD_inBuffer input; | |
@@ -313,14 +324,14 b' static PyObject* ZstdCompressor_copy_str' | |||||
313 | Py_ssize_t totalWrite = 0; |
|
324 | Py_ssize_t totalWrite = 0; | |
314 | char* readBuffer; |
|
325 | char* readBuffer; | |
315 | Py_ssize_t readSize; |
|
326 | Py_ssize_t readSize; | |
316 | PyObject* readResult; |
|
327 | PyObject* readResult = NULL; | |
317 | PyObject* res = NULL; |
|
328 | PyObject* res = NULL; | |
318 | size_t zresult; |
|
329 | size_t zresult; | |
319 | PyObject* writeResult; |
|
330 | PyObject* writeResult; | |
320 | PyObject* totalReadPy; |
|
331 | PyObject* totalReadPy; | |
321 | PyObject* totalWritePy; |
|
332 | PyObject* totalWritePy; | |
322 |
|
333 | |||
323 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO| |
|
334 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist, | |
324 | &source, &dest, &sourceSize, &inSize, &outSize)) { |
|
335 | &source, &dest, &sourceSize, &inSize, &outSize)) { | |
325 | return NULL; |
|
336 | return NULL; | |
326 | } |
|
337 | } | |
@@ -335,22 +346,18 b' static PyObject* ZstdCompressor_copy_str' | |||||
335 | return NULL; |
|
346 | return NULL; | |
336 | } |
|
347 | } | |
337 |
|
348 | |||
338 | /* Prevent free on uninitialized memory in finally. */ |
|
349 | if (ensure_cctx(self)) { | |
339 | output.dst = NULL; |
|
350 | return NULL; | |
340 |
|
||||
341 | if (self->mtcctx) { |
|
|||
342 | if (init_mtcstream(self, sourceSize)) { |
|
|||
343 | res = NULL; |
|
|||
344 | goto finally; |
|
|||
345 | } |
|
|||
346 | } |
|
|||
347 | else { |
|
|||
348 | if (0 != init_cstream(self, sourceSize)) { |
|
|||
349 | res = NULL; |
|
|||
350 | goto finally; |
|
|||
351 | } |
|
|||
352 | } |
|
351 | } | |
353 |
|
352 | |||
|
353 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |||
|
354 | if (ZSTD_isError(zresult)) { | |||
|
355 | PyErr_Format(ZstdError, "error setting source size: %s", | |||
|
356 | ZSTD_getErrorName(zresult)); | |||
|
357 | return NULL; | |||
|
358 | } | |||
|
359 | ||||
|
360 | /* Prevent free on uninitialized memory in finally. */ | |||
354 | output.dst = PyMem_Malloc(outSize); |
|
361 | output.dst = PyMem_Malloc(outSize); | |
355 | if (!output.dst) { |
|
362 | if (!output.dst) { | |
356 | PyErr_NoMemory(); |
|
363 | PyErr_NoMemory(); | |
@@ -360,6 +367,10 b' static PyObject* ZstdCompressor_copy_str' | |||||
360 | output.size = outSize; |
|
367 | output.size = outSize; | |
361 | output.pos = 0; |
|
368 | output.pos = 0; | |
362 |
|
369 | |||
|
370 | input.src = NULL; | |||
|
371 | input.size = 0; | |||
|
372 | input.pos = 0; | |||
|
373 | ||||
363 | while (1) { |
|
374 | while (1) { | |
364 | /* Try to read from source stream. */ |
|
375 | /* Try to read from source stream. */ | |
365 | readResult = PyObject_CallMethod(source, "read", "n", inSize); |
|
376 | readResult = PyObject_CallMethod(source, "read", "n", inSize); | |
@@ -384,12 +395,7 b' static PyObject* ZstdCompressor_copy_str' | |||||
384 |
|
395 | |||
385 | while (input.pos < input.size) { |
|
396 | while (input.pos < input.size) { | |
386 | Py_BEGIN_ALLOW_THREADS |
|
397 | Py_BEGIN_ALLOW_THREADS | |
387 | if (self->mtcctx) { |
|
398 | zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue); | |
388 | zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input); |
|
|||
389 | } |
|
|||
390 | else { |
|
|||
391 | zresult = ZSTD_compressStream(self->cstream, &output, &input); |
|
|||
392 | } |
|
|||
393 | Py_END_ALLOW_THREADS |
|
399 | Py_END_ALLOW_THREADS | |
394 |
|
400 | |||
395 | if (ZSTD_isError(zresult)) { |
|
401 | if (ZSTD_isError(zresult)) { | |
@@ -410,16 +416,18 b' static PyObject* ZstdCompressor_copy_str' | |||||
410 | output.pos = 0; |
|
416 | output.pos = 0; | |
411 | } |
|
417 | } | |
412 | } |
|
418 | } | |
|
419 | ||||
|
420 | Py_CLEAR(readResult); | |||
413 | } |
|
421 | } | |
414 |
|
422 | |||
415 | /* We've finished reading. Now flush the compressor stream. */ |
|
423 | /* We've finished reading. Now flush the compressor stream. */ | |
|
424 | assert(input.pos == input.size); | |||
|
425 | ||||
416 | while (1) { |
|
426 | while (1) { | |
417 | if (self->mtcctx) { |
|
427 | Py_BEGIN_ALLOW_THREADS | |
418 |
|
|
428 | zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end); | |
419 | } |
|
429 | Py_END_ALLOW_THREADS | |
420 | else { |
|
430 | ||
421 | zresult = ZSTD_endStream(self->cstream, &output); |
|
|||
422 | } |
|
|||
423 | if (ZSTD_isError(zresult)) { |
|
431 | if (ZSTD_isError(zresult)) { | |
424 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
432 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
425 | ZSTD_getErrorName(zresult)); |
|
433 | ZSTD_getErrorName(zresult)); | |
@@ -455,11 +463,81 b' finally:' | |||||
455 | PyMem_Free(output.dst); |
|
463 | PyMem_Free(output.dst); | |
456 | } |
|
464 | } | |
457 |
|
465 | |||
|
466 | Py_XDECREF(readResult); | |||
|
467 | ||||
458 | return res; |
|
468 | return res; | |
459 | } |
|
469 | } | |
460 |
|
470 | |||
|
471 | PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__, | |||
|
472 | "stream_reader(source, [size=0])\n" | |||
|
473 | "\n" | |||
|
474 | "Obtain an object that behaves like an I/O stream.\n" | |||
|
475 | "\n" | |||
|
476 | "The source object can be any object with a ``read(size)`` method\n" | |||
|
477 | "or an object that conforms to the buffer protocol.\n" | |||
|
478 | ); | |||
|
479 | ||||
|
480 | static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |||
|
481 | static char* kwlist[] = { | |||
|
482 | "source", | |||
|
483 | "size", | |||
|
484 | "read_size", | |||
|
485 | NULL | |||
|
486 | }; | |||
|
487 | ||||
|
488 | PyObject* source; | |||
|
489 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |||
|
490 | size_t readSize = ZSTD_CStreamInSize(); | |||
|
491 | ZstdCompressionReader* result = NULL; | |||
|
492 | ||||
|
493 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist, | |||
|
494 | &source, &sourceSize, &readSize)) { | |||
|
495 | return NULL; | |||
|
496 | } | |||
|
497 | ||||
|
498 | result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL); | |||
|
499 | if (!result) { | |||
|
500 | return NULL; | |||
|
501 | } | |||
|
502 | ||||
|
503 | if (PyObject_HasAttrString(source, "read")) { | |||
|
504 | result->reader = source; | |||
|
505 | Py_INCREF(source); | |||
|
506 | result->readSize = readSize; | |||
|
507 | } | |||
|
508 | else if (1 == PyObject_CheckBuffer(source)) { | |||
|
509 | if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { | |||
|
510 | goto except; | |||
|
511 | } | |||
|
512 | ||||
|
513 | assert(result->buffer.len >= 0); | |||
|
514 | ||||
|
515 | sourceSize = result->buffer.len; | |||
|
516 | } | |||
|
517 | else { | |||
|
518 | PyErr_SetString(PyExc_TypeError, | |||
|
519 | "must pass an object with a read() method or that conforms to the buffer protocol"); | |||
|
520 | goto except; | |||
|
521 | } | |||
|
522 | ||||
|
523 | if (ensure_cctx(self)) { | |||
|
524 | goto except; | |||
|
525 | } | |||
|
526 | ||||
|
527 | result->compressor = self; | |||
|
528 | Py_INCREF(self); | |||
|
529 | result->sourceSize = sourceSize; | |||
|
530 | ||||
|
531 | return result; | |||
|
532 | ||||
|
533 | except: | |||
|
534 | Py_CLEAR(result); | |||
|
535 | ||||
|
536 | return NULL; | |||
|
537 | } | |||
|
538 | ||||
461 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
|
539 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, | |
462 |
"compress(data |
|
540 | "compress(data)\n" | |
463 | "\n" |
|
541 | "\n" | |
464 | "Compress data in a single operation.\n" |
|
542 | "Compress data in a single operation.\n" | |
465 | "\n" |
|
543 | "\n" | |
@@ -473,122 +551,79 b' PyDoc_STRVAR(ZstdCompressor_compress__do' | |||||
473 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
551 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
474 | static char* kwlist[] = { |
|
552 | static char* kwlist[] = { | |
475 | "data", |
|
553 | "data", | |
476 | "allow_empty", |
|
|||
477 | NULL |
|
554 | NULL | |
478 | }; |
|
555 | }; | |
479 |
|
556 | |||
480 | const char* source; |
|
557 | Py_buffer source; | |
481 | Py_ssize_t sourceSize; |
|
|||
482 | PyObject* allowEmpty = NULL; |
|
|||
483 | size_t destSize; |
|
558 | size_t destSize; | |
484 | PyObject* output; |
|
559 | PyObject* output = NULL; | |
485 | char* dest; |
|
|||
486 | void* dictData = NULL; |
|
|||
487 | size_t dictSize = 0; |
|
|||
488 | size_t zresult; |
|
560 | size_t zresult; | |
489 | ZSTD_parameters zparams; |
|
561 | ZSTD_outBuffer outBuffer; | |
|
562 | ZSTD_inBuffer inBuffer; | |||
490 |
|
563 | |||
491 | #if PY_MAJOR_VERSION >= 3 |
|
564 | #if PY_MAJOR_VERSION >= 3 | |
492 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y |
|
565 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", | |
493 | #else |
|
566 | #else | |
494 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s |
|
567 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress", | |
495 | #endif |
|
568 | #endif | |
496 |
kwlist, &source |
|
569 | kwlist, &source)) { | |
497 | return NULL; |
|
|||
498 | } |
|
|||
499 |
|
||||
500 | if (self->threads && self->dict) { |
|
|||
501 | PyErr_SetString(ZstdError, |
|
|||
502 | "compress() cannot be used with both dictionaries and multi-threaded compression"); |
|
|||
503 | return NULL; |
|
|||
504 | } |
|
|||
505 |
|
||||
506 | if (self->threads && self->cparams) { |
|
|||
507 | PyErr_SetString(ZstdError, |
|
|||
508 | "compress() cannot be used with both compression parameters and multi-threaded compression"); |
|
|||
509 | return NULL; |
|
|||
510 | } |
|
|||
511 |
|
||||
512 | /* Limitation in zstd C API doesn't let decompression side distinguish |
|
|||
513 | between content size of 0 and unknown content size. This can make round |
|
|||
514 | tripping via Python difficult. Until this is fixed, require a flag |
|
|||
515 | to fire the footgun. |
|
|||
516 | https://github.com/indygreg/python-zstandard/issues/11 */ |
|
|||
517 | if (0 == sourceSize && self->fparams.contentSizeFlag |
|
|||
518 | && (!allowEmpty || PyObject_Not(allowEmpty))) { |
|
|||
519 | PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); |
|
|||
520 | return NULL; |
|
|||
521 | } |
|
|||
522 |
|
||||
523 | destSize = ZSTD_compressBound(sourceSize); |
|
|||
524 | output = PyBytes_FromStringAndSize(NULL, destSize); |
|
|||
525 | if (!output) { |
|
|||
526 | return NULL; |
|
570 | return NULL; | |
527 | } |
|
571 | } | |
528 |
|
572 | |||
529 | dest = PyBytes_AsString(output); |
|
573 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
530 |
|
574 | PyErr_SetString(PyExc_ValueError, | ||
531 | if (self->dict) { |
|
575 | "data buffer should be contiguous and have at most one dimension"); | |
532 | dictData = self->dict->dictData; |
|
576 | goto finally; | |
533 | dictSize = self->dict->dictSize; |
|
|||
534 | } |
|
577 | } | |
535 |
|
578 | |||
536 | memset(&zparams, 0, sizeof(zparams)); |
|
579 | if (ensure_cctx(self)) { | |
537 | if (!self->cparams) { |
|
580 | goto finally; | |
538 | zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize); |
|
|||
539 | } |
|
581 | } | |
540 | else { |
|
582 | ||
541 | ztopy_compression_parameters(self->cparams, &zparams.cParams); |
|
583 | destSize = ZSTD_compressBound(source.len); | |
542 | /* Do NOT call ZSTD_adjustCParams() here because the compression params |
|
584 | output = PyBytes_FromStringAndSize(NULL, destSize); | |
543 | come from the user. */ |
|
585 | if (!output) { | |
|
586 | goto finally; | |||
544 | } |
|
587 | } | |
545 |
|
588 | |||
546 | zparams.fParams = self->fparams; |
|
589 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len); | |
547 |
|
590 | if (ZSTD_isError(zresult)) { | ||
548 | /* The raw dict data has to be processed before it can be used. Since this |
|
591 | PyErr_Format(ZstdError, "error setting source size: %s", | |
549 | adds overhead - especially if multiple dictionary compression operations |
|
592 | ZSTD_getErrorName(zresult)); | |
550 | are performed on the same ZstdCompressor instance - we create a |
|
593 | Py_CLEAR(output); | |
551 | ZSTD_CDict once and reuse it for all operations. |
|
594 | goto finally; | |
|
595 | } | |||
552 |
|
596 | |||
553 | Note: the compression parameters used for the first invocation (possibly |
|
597 | inBuffer.src = source.buf; | |
554 | derived from the source size) will be reused on all subsequent invocations. |
|
598 | inBuffer.size = source.len; | |
555 | https://github.com/facebook/zstd/issues/358 contains more info. We could |
|
599 | inBuffer.pos = 0; | |
556 | potentially add an argument somewhere to control this behavior. |
|
600 | ||
557 | */ |
|
601 | outBuffer.dst = PyBytes_AsString(output); | |
558 | if (0 != populate_cdict(self, &zparams)) { |
|
602 | outBuffer.size = destSize; | |
559 | Py_DECREF(output); |
|
603 | outBuffer.pos = 0; | |
560 | return NULL; |
|
|||
561 | } |
|
|||
562 |
|
604 | |||
563 | Py_BEGIN_ALLOW_THREADS |
|
605 | Py_BEGIN_ALLOW_THREADS | |
564 | if (self->mtcctx) { |
|
606 | /* By avoiding ZSTD_compress(), we don't necessarily write out content | |
565 | zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize, |
|
607 | size. This means the argument to ZstdCompressor to control frame | |
566 | source, sourceSize, self->compressionLevel); |
|
608 | parameters is honored. */ | |
567 | } |
|
609 | zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end); | |
568 | else { |
|
|||
569 | /* By avoiding ZSTD_compress(), we don't necessarily write out content |
|
|||
570 | size. This means the argument to ZstdCompressor to control frame |
|
|||
571 | parameters is honored. */ |
|
|||
572 | if (self->cdict) { |
|
|||
573 | zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, |
|
|||
574 | source, sourceSize, self->cdict); |
|
|||
575 | } |
|
|||
576 | else { |
|
|||
577 | zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, |
|
|||
578 | source, sourceSize, dictData, dictSize, zparams); |
|
|||
579 | } |
|
|||
580 | } |
|
|||
581 | Py_END_ALLOW_THREADS |
|
610 | Py_END_ALLOW_THREADS | |
582 |
|
611 | |||
583 | if (ZSTD_isError(zresult)) { |
|
612 | if (ZSTD_isError(zresult)) { | |
584 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
|
613 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); | |
585 | Py_CLEAR(output); |
|
614 | Py_CLEAR(output); | |
586 | return NULL; |
|
615 | goto finally; | |
587 | } |
|
616 | } | |
588 | else { |
|
617 | else if (zresult) { | |
589 | Py_SIZE(output) = zresult; |
|
618 | PyErr_SetString(ZstdError, "unexpected partial frame flush"); | |
|
619 | Py_CLEAR(output); | |||
|
620 | goto finally; | |||
590 | } |
|
621 | } | |
591 |
|
622 | |||
|
623 | Py_SIZE(output) = outBuffer.pos; | |||
|
624 | ||||
|
625 | finally: | |||
|
626 | PyBuffer_Release(&source); | |||
592 | return output; |
|
627 | return output; | |
593 | } |
|
628 | } | |
594 |
|
629 | |||
@@ -608,11 +643,23 b' static ZstdCompressionObj* ZstdCompresso' | |||||
608 | NULL |
|
643 | NULL | |
609 | }; |
|
644 | }; | |
610 |
|
645 | |||
611 | Py_ssize_t inSize = 0; |
|
646 | unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
612 | size_t outSize = ZSTD_CStreamOutSize(); |
|
647 | size_t outSize = ZSTD_CStreamOutSize(); | |
613 | ZstdCompressionObj* result = NULL; |
|
648 | ZstdCompressionObj* result = NULL; | |
|
649 | size_t zresult; | |||
614 |
|
650 | |||
615 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "| |
|
651 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) { | |
|
652 | return NULL; | |||
|
653 | } | |||
|
654 | ||||
|
655 | if (ensure_cctx(self)) { | |||
|
656 | return NULL; | |||
|
657 | } | |||
|
658 | ||||
|
659 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); | |||
|
660 | if (ZSTD_isError(zresult)) { | |||
|
661 | PyErr_Format(ZstdError, "error setting source size: %s", | |||
|
662 | ZSTD_getErrorName(zresult)); | |||
616 | return NULL; |
|
663 | return NULL; | |
617 | } |
|
664 | } | |
618 |
|
665 | |||
@@ -621,19 +668,6 b' static ZstdCompressionObj* ZstdCompresso' | |||||
621 | return NULL; |
|
668 | return NULL; | |
622 | } |
|
669 | } | |
623 |
|
670 | |||
624 | if (self->mtcctx) { |
|
|||
625 | if (init_mtcstream(self, inSize)) { |
|
|||
626 | Py_DECREF(result); |
|
|||
627 | return NULL; |
|
|||
628 | } |
|
|||
629 | } |
|
|||
630 | else { |
|
|||
631 | if (0 != init_cstream(self, inSize)) { |
|
|||
632 | Py_DECREF(result); |
|
|||
633 | return NULL; |
|
|||
634 | } |
|
|||
635 | } |
|
|||
636 |
|
||||
637 | result->output.dst = PyMem_Malloc(outSize); |
|
671 | result->output.dst = PyMem_Malloc(outSize); | |
638 | if (!result->output.dst) { |
|
672 | if (!result->output.dst) { | |
639 | PyErr_NoMemory(); |
|
673 | PyErr_NoMemory(); | |
@@ -647,9 +681,9 b' static ZstdCompressionObj* ZstdCompresso' | |||||
647 | return result; |
|
681 | return result; | |
648 | } |
|
682 | } | |
649 |
|
683 | |||
650 |
PyDoc_STRVAR(ZstdCompressor_read_ |
|
684 | PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__, | |
651 |
"read_ |
|
685 | "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n" | |
652 | "Read uncompress data from a reader and return an iterator\n" |
|
686 | "Read uncompressed data from a reader and return an iterator\n" | |
653 | "\n" |
|
687 | "\n" | |
654 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" |
|
688 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" | |
655 | "\n" |
|
689 | "\n" | |
@@ -667,7 +701,7 b' PyDoc_STRVAR(ZstdCompressor_read_from__d' | |||||
667 | "not consume from the reader unless the caller consumes from the iterator.\n" |
|
701 | "not consume from the reader unless the caller consumes from the iterator.\n" | |
668 | ); |
|
702 | ); | |
669 |
|
703 | |||
670 |
static ZstdCompressorIterator* ZstdCompressor_read_ |
|
704 | static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
671 | static char* kwlist[] = { |
|
705 | static char* kwlist[] = { | |
672 | "reader", |
|
706 | "reader", | |
673 | "size", |
|
707 | "size", | |
@@ -677,12 +711,13 b' static ZstdCompressorIterator* ZstdCompr' | |||||
677 | }; |
|
711 | }; | |
678 |
|
712 | |||
679 | PyObject* reader; |
|
713 | PyObject* reader; | |
680 | Py_ssize_t sourceSize = 0; |
|
714 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
681 | size_t inSize = ZSTD_CStreamInSize(); |
|
715 | size_t inSize = ZSTD_CStreamInSize(); | |
682 | size_t outSize = ZSTD_CStreamOutSize(); |
|
716 | size_t outSize = ZSTD_CStreamOutSize(); | |
683 | ZstdCompressorIterator* result; |
|
717 | ZstdCompressorIterator* result; | |
|
718 | size_t zresult; | |||
684 |
|
719 | |||
685 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O| |
|
720 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist, | |
686 | &reader, &sourceSize, &inSize, &outSize)) { |
|
721 | &reader, &sourceSize, &inSize, &outSize)) { | |
687 | return NULL; |
|
722 | return NULL; | |
688 | } |
|
723 | } | |
@@ -696,18 +731,11 b' static ZstdCompressorIterator* ZstdCompr' | |||||
696 | Py_INCREF(result->reader); |
|
731 | Py_INCREF(result->reader); | |
697 | } |
|
732 | } | |
698 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
733 | else if (1 == PyObject_CheckBuffer(reader)) { | |
699 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); |
|
734 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { | |
700 | if (!result->buffer) { |
|
|||
701 | goto except; |
|
735 | goto except; | |
702 | } |
|
736 | } | |
703 |
|
737 | |||
704 | memset(result->buffer, 0, sizeof(Py_buffer)); |
|
738 | sourceSize = result->buffer.len; | |
705 |
|
||||
706 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { |
|
|||
707 | goto except; |
|
|||
708 | } |
|
|||
709 |
|
||||
710 | sourceSize = result->buffer->len; |
|
|||
711 | } |
|
739 | } | |
712 | else { |
|
740 | else { | |
713 | PyErr_SetString(PyExc_ValueError, |
|
741 | PyErr_SetString(PyExc_ValueError, | |
@@ -715,22 +743,20 b' static ZstdCompressorIterator* ZstdCompr' | |||||
715 | goto except; |
|
743 | goto except; | |
716 | } |
|
744 | } | |
717 |
|
745 | |||
|
746 | if (ensure_cctx(self)) { | |||
|
747 | return NULL; | |||
|
748 | } | |||
|
749 | ||||
|
750 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |||
|
751 | if (ZSTD_isError(zresult)) { | |||
|
752 | PyErr_Format(ZstdError, "error setting source size: %s", | |||
|
753 | ZSTD_getErrorName(zresult)); | |||
|
754 | return NULL; | |||
|
755 | } | |||
|
756 | ||||
718 | result->compressor = self; |
|
757 | result->compressor = self; | |
719 | Py_INCREF(result->compressor); |
|
758 | Py_INCREF(result->compressor); | |
720 |
|
759 | |||
721 | result->sourceSize = sourceSize; |
|
|||
722 |
|
||||
723 | if (self->mtcctx) { |
|
|||
724 | if (init_mtcstream(self, sourceSize)) { |
|
|||
725 | goto except; |
|
|||
726 | } |
|
|||
727 | } |
|
|||
728 | else { |
|
|||
729 | if (0 != init_cstream(self, sourceSize)) { |
|
|||
730 | goto except; |
|
|||
731 | } |
|
|||
732 | } |
|
|||
733 |
|
||||
734 | result->inSize = inSize; |
|
760 | result->inSize = inSize; | |
735 | result->outSize = outSize; |
|
761 | result->outSize = outSize; | |
736 |
|
762 | |||
@@ -744,16 +770,13 b' static ZstdCompressorIterator* ZstdCompr' | |||||
744 | goto finally; |
|
770 | goto finally; | |
745 |
|
771 | |||
746 | except: |
|
772 | except: | |
747 |
Py_ |
|
773 | Py_CLEAR(result); | |
748 | Py_XDECREF(result->reader); |
|
|||
749 | Py_DECREF(result); |
|
|||
750 | result = NULL; |
|
|||
751 |
|
774 | |||
752 | finally: |
|
775 | finally: | |
753 | return result; |
|
776 | return result; | |
754 | } |
|
777 | } | |
755 |
|
778 | |||
756 |
PyDoc_STRVAR(ZstdCompressor_ |
|
779 | PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__, | |
757 | "Create a context manager to write compressed data to an object.\n" |
|
780 | "Create a context manager to write compressed data to an object.\n" | |
758 | "\n" |
|
781 | "\n" | |
759 | "The passed object must have a ``write()`` method.\n" |
|
782 | "The passed object must have a ``write()`` method.\n" | |
@@ -771,7 +794,7 b' PyDoc_STRVAR(ZstdCompressor_write_to___d' | |||||
771 | "for a compressor output stream.\n" |
|
794 | "for a compressor output stream.\n" | |
772 | ); |
|
795 | ); | |
773 |
|
796 | |||
774 |
static ZstdCompressionWriter* ZstdCompressor_ |
|
797 | static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
775 | static char* kwlist[] = { |
|
798 | static char* kwlist[] = { | |
776 | "writer", |
|
799 | "writer", | |
777 | "size", |
|
800 | "size", | |
@@ -781,10 +804,10 b' static ZstdCompressionWriter* ZstdCompre' | |||||
781 |
|
804 | |||
782 | PyObject* writer; |
|
805 | PyObject* writer; | |
783 | ZstdCompressionWriter* result; |
|
806 | ZstdCompressionWriter* result; | |
784 | Py_ssize_t sourceSize = 0; |
|
807 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
785 | size_t outSize = ZSTD_CStreamOutSize(); |
|
808 | size_t outSize = ZSTD_CStreamOutSize(); | |
786 |
|
809 | |||
787 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O| |
|
810 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist, | |
788 | &writer, &sourceSize, &outSize)) { |
|
811 | &writer, &sourceSize, &outSize)) { | |
789 | return NULL; |
|
812 | return NULL; | |
790 | } |
|
813 | } | |
@@ -794,6 +817,10 b' static ZstdCompressionWriter* ZstdCompre' | |||||
794 | return NULL; |
|
817 | return NULL; | |
795 | } |
|
818 | } | |
796 |
|
819 | |||
|
820 | if (ensure_cctx(self)) { | |||
|
821 | return NULL; | |||
|
822 | } | |||
|
823 | ||||
797 | result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); |
|
824 | result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); | |
798 | if (!result) { |
|
825 | if (!result) { | |
799 | return NULL; |
|
826 | return NULL; | |
@@ -807,6 +834,7 b' static ZstdCompressionWriter* ZstdCompre' | |||||
807 |
|
834 | |||
808 | result->sourceSize = sourceSize; |
|
835 | result->sourceSize = sourceSize; | |
809 | result->outSize = outSize; |
|
836 | result->outSize = outSize; | |
|
837 | result->bytesCompressed = 0; | |||
810 |
|
838 | |||
811 | return result; |
|
839 | return result; | |
812 | } |
|
840 | } | |
@@ -833,6 +861,7 b' typedef enum {' | |||||
833 | WorkerError_none = 0, |
|
861 | WorkerError_none = 0, | |
834 | WorkerError_zstd = 1, |
|
862 | WorkerError_zstd = 1, | |
835 | WorkerError_no_memory = 2, |
|
863 | WorkerError_no_memory = 2, | |
|
864 | WorkerError_nospace = 3, | |||
836 | } WorkerError; |
|
865 | } WorkerError; | |
837 |
|
866 | |||
838 | /** |
|
867 | /** | |
@@ -841,10 +870,6 b' typedef enum {' | |||||
841 | typedef struct { |
|
870 | typedef struct { | |
842 | /* Used for compression. */ |
|
871 | /* Used for compression. */ | |
843 | ZSTD_CCtx* cctx; |
|
872 | ZSTD_CCtx* cctx; | |
844 | ZSTD_CDict* cdict; |
|
|||
845 | int cLevel; |
|
|||
846 | CompressionParametersObject* cParams; |
|
|||
847 | ZSTD_frameParameters fParams; |
|
|||
848 |
|
873 | |||
849 | /* What to compress. */ |
|
874 | /* What to compress. */ | |
850 | DataSource* sources; |
|
875 | DataSource* sources; | |
@@ -868,7 +893,6 b' static void compress_worker(WorkerState*' | |||||
868 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; |
|
893 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; | |
869 | Py_ssize_t currentBufferStartOffset = state->startOffset; |
|
894 | Py_ssize_t currentBufferStartOffset = state->startOffset; | |
870 | size_t zresult; |
|
895 | size_t zresult; | |
871 | ZSTD_parameters zparams; |
|
|||
872 | void* newDest; |
|
896 | void* newDest; | |
873 | size_t allocationSize; |
|
897 | size_t allocationSize; | |
874 | size_t boundSize; |
|
898 | size_t boundSize; | |
@@ -879,16 +903,10 b' static void compress_worker(WorkerState*' | |||||
879 | assert(!state->destBuffers); |
|
903 | assert(!state->destBuffers); | |
880 | assert(0 == state->destCount); |
|
904 | assert(0 == state->destCount); | |
881 |
|
905 | |||
882 | if (state->cParams) { |
|
|||
883 | ztopy_compression_parameters(state->cParams, &zparams.cParams); |
|
|||
884 | } |
|
|||
885 |
|
||||
886 | zparams.fParams = state->fParams; |
|
|||
887 |
|
||||
888 | /* |
|
906 | /* | |
889 | * The total size of the compressed data is unknown until we actually |
|
907 | * The total size of the compressed data is unknown until we actually | |
890 | * compress data. That means we can't pre-allocate the exact size we need. |
|
908 | * compress data. That means we can't pre-allocate the exact size we need. | |
891 |
* |
|
909 | * | |
892 | * There is a cost to every allocation and reallocation. So, it is in our |
|
910 | * There is a cost to every allocation and reallocation. So, it is in our | |
893 | * interest to minimize the number of allocations. |
|
911 | * interest to minimize the number of allocations. | |
894 | * |
|
912 | * | |
@@ -927,7 +945,8 b' static void compress_worker(WorkerState*' | |||||
927 |
|
945 | |||
928 | destBuffer->segmentsSize = remainingItems; |
|
946 | destBuffer->segmentsSize = remainingItems; | |
929 |
|
947 | |||
930 | allocationSize = roundpow2(state->totalSourceSize >> 4); |
|
948 | assert(state->totalSourceSize <= SIZE_MAX); | |
|
949 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); | |||
931 |
|
950 | |||
932 | /* If the maximum size of the output is larger than that, round up. */ |
|
951 | /* If the maximum size of the output is larger than that, round up. */ | |
933 | boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize); |
|
952 | boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize); | |
@@ -949,6 +968,8 b' static void compress_worker(WorkerState*' | |||||
949 | size_t sourceSize = sources[inputOffset].sourceSize; |
|
968 | size_t sourceSize = sources[inputOffset].sourceSize; | |
950 | size_t destAvailable; |
|
969 | size_t destAvailable; | |
951 | void* dest; |
|
970 | void* dest; | |
|
971 | ZSTD_outBuffer opOutBuffer; | |||
|
972 | ZSTD_inBuffer opInBuffer; | |||
952 |
|
973 | |||
953 | destAvailable = destBuffer->destSize - destOffset; |
|
974 | destAvailable = destBuffer->destSize - destOffset; | |
954 | boundSize = ZSTD_compressBound(sourceSize); |
|
975 | boundSize = ZSTD_compressBound(sourceSize); | |
@@ -1004,7 +1025,8 b' static void compress_worker(WorkerState*' | |||||
1004 | * We could dynamically update allocation size based on work done so far. |
|
1025 | * We could dynamically update allocation size based on work done so far. | |
1005 | * For now, keep is simple. |
|
1026 | * For now, keep is simple. | |
1006 | */ |
|
1027 | */ | |
1007 |
a |
|
1028 | assert(state->totalSourceSize <= SIZE_MAX); | |
|
1029 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); | |||
1008 |
|
1030 | |||
1009 | if (boundSize > allocationSize) { |
|
1031 | if (boundSize > allocationSize) { | |
1010 | allocationSize = roundpow2(boundSize); |
|
1032 | allocationSize = roundpow2(boundSize); | |
@@ -1032,19 +1054,15 b' static void compress_worker(WorkerState*' | |||||
1032 |
|
1054 | |||
1033 | dest = (char*)destBuffer->dest + destOffset; |
|
1055 | dest = (char*)destBuffer->dest + destOffset; | |
1034 |
|
1056 | |||
1035 | if (state->cdict) { |
|
1057 | opInBuffer.src = source; | |
1036 | zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable, |
|
1058 | opInBuffer.size = sourceSize; | |
1037 | source, sourceSize, state->cdict); |
|
1059 | opInBuffer.pos = 0; | |
1038 | } |
|
|||
1039 | else { |
|
|||
1040 | if (!state->cParams) { |
|
|||
1041 | zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0); |
|
|||
1042 | } |
|
|||
1043 |
|
1060 | |||
1044 | zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable, |
|
1061 | opOutBuffer.dst = dest; | |
1045 | source, sourceSize, NULL, 0, zparams); |
|
1062 | opOutBuffer.size = destAvailable; | |
1046 | } |
|
1063 | opOutBuffer.pos = 0; | |
1047 |
|
1064 | |||
|
1065 | zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize); | |||
1048 | if (ZSTD_isError(zresult)) { |
|
1066 | if (ZSTD_isError(zresult)) { | |
1049 | state->error = WorkerError_zstd; |
|
1067 | state->error = WorkerError_zstd; | |
1050 | state->zresult = zresult; |
|
1068 | state->zresult = zresult; | |
@@ -1052,10 +1070,23 b' static void compress_worker(WorkerState*' | |||||
1052 | break; |
|
1070 | break; | |
1053 | } |
|
1071 | } | |
1054 |
|
1072 | |||
|
1073 | zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end); | |||
|
1074 | if (ZSTD_isError(zresult)) { | |||
|
1075 | state->error = WorkerError_zstd; | |||
|
1076 | state->zresult = zresult; | |||
|
1077 | state->errorOffset = inputOffset; | |||
|
1078 | break; | |||
|
1079 | } | |||
|
1080 | else if (zresult) { | |||
|
1081 | state->error = WorkerError_nospace; | |||
|
1082 | state->errorOffset = inputOffset; | |||
|
1083 | break; | |||
|
1084 | } | |||
|
1085 | ||||
1055 | destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; |
|
1086 | destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; | |
1056 |
destBuffer->segments[inputOffset - currentBufferStartOffset].length = |
|
1087 | destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos; | |
1057 |
|
1088 | |||
1058 |
destOffset += |
|
1089 | destOffset += opOutBuffer.pos; | |
1059 | remainingItems--; |
|
1090 | remainingItems--; | |
1060 | } |
|
1091 | } | |
1061 |
|
1092 | |||
@@ -1072,15 +1103,14 b' static void compress_worker(WorkerState*' | |||||
1072 | } |
|
1103 | } | |
1073 |
|
1104 | |||
1074 | ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, |
|
1105 | ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, | |
1075 |
DataSources* sources, |
|
1106 | DataSources* sources, Py_ssize_t threadCount) { | |
1076 | ZSTD_parameters zparams; |
|
|||
1077 | unsigned long long bytesPerWorker; |
|
1107 | unsigned long long bytesPerWorker; | |
1078 | POOL_ctx* pool = NULL; |
|
1108 | POOL_ctx* pool = NULL; | |
1079 | WorkerState* workerStates = NULL; |
|
1109 | WorkerState* workerStates = NULL; | |
1080 | Py_ssize_t i; |
|
1110 | Py_ssize_t i; | |
1081 | unsigned long long workerBytes = 0; |
|
1111 | unsigned long long workerBytes = 0; | |
1082 | Py_ssize_t workerStartOffset = 0; |
|
1112 | Py_ssize_t workerStartOffset = 0; | |
1083 | size_t currentThread = 0; |
|
1113 | Py_ssize_t currentThread = 0; | |
1084 | int errored = 0; |
|
1114 | int errored = 0; | |
1085 | Py_ssize_t segmentsCount = 0; |
|
1115 | Py_ssize_t segmentsCount = 0; | |
1086 | Py_ssize_t segmentIndex; |
|
1116 | Py_ssize_t segmentIndex; | |
@@ -1093,34 +1123,12 b' ZstdBufferWithSegmentsCollection* compre' | |||||
1093 | assert(threadCount >= 1); |
|
1123 | assert(threadCount >= 1); | |
1094 |
|
1124 | |||
1095 | /* More threads than inputs makes no sense. */ |
|
1125 | /* More threads than inputs makes no sense. */ | |
1096 |
threadCount = sources->sourcesSize < threadCount ? |
|
1126 | threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize | |
1097 | : threadCount; |
|
1127 | : threadCount; | |
1098 |
|
1128 | |||
1099 | /* TODO lower thread count when input size is too small and threads would add |
|
1129 | /* TODO lower thread count when input size is too small and threads would add | |
1100 | overhead. */ |
|
1130 | overhead. */ | |
1101 |
|
1131 | |||
1102 | /* |
|
|||
1103 | * When dictionaries are used, parameters are derived from the size of the |
|
|||
1104 | * first element. |
|
|||
1105 | * |
|
|||
1106 | * TODO come up with a better mechanism. |
|
|||
1107 | */ |
|
|||
1108 | memset(&zparams, 0, sizeof(zparams)); |
|
|||
1109 | if (compressor->cparams) { |
|
|||
1110 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
|||
1111 | } |
|
|||
1112 | else { |
|
|||
1113 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, |
|
|||
1114 | sources->sources[0].sourceSize, |
|
|||
1115 | compressor->dict ? compressor->dict->dictSize : 0); |
|
|||
1116 | } |
|
|||
1117 |
|
||||
1118 | zparams.fParams = compressor->fparams; |
|
|||
1119 |
|
||||
1120 | if (0 != populate_cdict(compressor, &zparams)) { |
|
|||
1121 | return NULL; |
|
|||
1122 | } |
|
|||
1123 |
|
||||
1124 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); |
|
1132 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); | |
1125 | if (NULL == workerStates) { |
|
1133 | if (NULL == workerStates) { | |
1126 | PyErr_NoMemory(); |
|
1134 | PyErr_NoMemory(); | |
@@ -1140,16 +1148,42 b' ZstdBufferWithSegmentsCollection* compre' | |||||
1140 | bytesPerWorker = sources->totalSourceSize / threadCount; |
|
1148 | bytesPerWorker = sources->totalSourceSize / threadCount; | |
1141 |
|
1149 | |||
1142 | for (i = 0; i < threadCount; i++) { |
|
1150 | for (i = 0; i < threadCount; i++) { | |
|
1151 | size_t zresult; | |||
|
1152 | ||||
1143 | workerStates[i].cctx = ZSTD_createCCtx(); |
|
1153 | workerStates[i].cctx = ZSTD_createCCtx(); | |
1144 | if (!workerStates[i].cctx) { |
|
1154 | if (!workerStates[i].cctx) { | |
1145 | PyErr_NoMemory(); |
|
1155 | PyErr_NoMemory(); | |
1146 | goto finally; |
|
1156 | goto finally; | |
1147 | } |
|
1157 | } | |
1148 |
|
1158 | |||
1149 | workerStates[i].cdict = compressor->cdict; |
|
1159 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx, | |
1150 | workerStates[i].cLevel = compressor->compressionLevel; |
|
1160 | compressor->params); | |
1151 | workerStates[i].cParams = compressor->cparams; |
|
1161 | if (ZSTD_isError(zresult)) { | |
1152 | workerStates[i].fParams = compressor->fparams; |
|
1162 | PyErr_Format(ZstdError, "could not set compression parameters: %s", | |
|
1163 | ZSTD_getErrorName(zresult)); | |||
|
1164 | goto finally; | |||
|
1165 | } | |||
|
1166 | ||||
|
1167 | if (compressor->dict) { | |||
|
1168 | if (compressor->dict->cdict) { | |||
|
1169 | zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict); | |||
|
1170 | } | |||
|
1171 | else { | |||
|
1172 | zresult = ZSTD_CCtx_loadDictionary_advanced( | |||
|
1173 | workerStates[i].cctx, | |||
|
1174 | compressor->dict->dictData, | |||
|
1175 | compressor->dict->dictSize, | |||
|
1176 | ZSTD_dlm_byRef, | |||
|
1177 | compressor->dict->dictType); | |||
|
1178 | } | |||
|
1179 | ||||
|
1180 | if (ZSTD_isError(zresult)) { | |||
|
1181 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", | |||
|
1182 | ZSTD_getErrorName(zresult)); | |||
|
1183 | goto finally; | |||
|
1184 | } | |||
|
1185 | ||||
|
1186 | } | |||
1153 |
|
1187 | |||
1154 | workerStates[i].sources = sources->sources; |
|
1188 | workerStates[i].sources = sources->sources; | |
1155 | workerStates[i].sourcesSize = sources->sourcesSize; |
|
1189 | workerStates[i].sourcesSize = sources->sourcesSize; | |
@@ -1221,6 +1255,13 b' ZstdBufferWithSegmentsCollection* compre' | |||||
1221 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); |
|
1255 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); | |
1222 | errored = 1; |
|
1256 | errored = 1; | |
1223 | break; |
|
1257 | break; | |
|
1258 | ||||
|
1259 | case WorkerError_nospace: | |||
|
1260 | PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output", | |||
|
1261 | workerStates[i].errorOffset); | |||
|
1262 | errored = 1; | |||
|
1263 | break; | |||
|
1264 | ||||
1224 | default: |
|
1265 | default: | |
1225 | ; |
|
1266 | ; | |
1226 | } |
|
1267 | } | |
@@ -1341,12 +1382,6 b' static ZstdBufferWithSegmentsCollection*' | |||||
1341 | Py_ssize_t sourceCount = 0; |
|
1382 | Py_ssize_t sourceCount = 0; | |
1342 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1383 | ZstdBufferWithSegmentsCollection* result = NULL; | |
1343 |
|
1384 | |||
1344 | if (self->mtcctx) { |
|
|||
1345 | PyErr_SetString(ZstdError, |
|
|||
1346 | "function cannot be called on ZstdCompressor configured for multi-threaded compression"); |
|
|||
1347 | return NULL; |
|
|||
1348 | } |
|
|||
1349 |
|
||||
1350 | memset(&sources, 0, sizeof(sources)); |
|
1385 | memset(&sources, 0, sizeof(sources)); | |
1351 |
|
1386 | |||
1352 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist, |
|
1387 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist, | |
@@ -1372,8 +1407,14 b' static ZstdBufferWithSegmentsCollection*' | |||||
1372 | } |
|
1407 | } | |
1373 |
|
1408 | |||
1374 | for (i = 0; i < buffer->segmentCount; i++) { |
|
1409 | for (i = 0; i < buffer->segmentCount; i++) { | |
|
1410 | if (buffer->segments[i].length > SIZE_MAX) { | |||
|
1411 | PyErr_Format(PyExc_ValueError, | |||
|
1412 | "buffer segment %zd is too large for this platform", i); | |||
|
1413 | goto finally; | |||
|
1414 | } | |||
|
1415 | ||||
1375 | sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset; |
|
1416 | sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset; | |
1376 | sources.sources[i].sourceSize = buffer->segments[i].length; |
|
1417 | sources.sources[i].sourceSize = (size_t)buffer->segments[i].length; | |
1377 | sources.totalSourceSize += buffer->segments[i].length; |
|
1418 | sources.totalSourceSize += buffer->segments[i].length; | |
1378 | } |
|
1419 | } | |
1379 |
|
1420 | |||
@@ -1397,8 +1438,15 b' static ZstdBufferWithSegmentsCollection*' | |||||
1397 | buffer = collection->buffers[i]; |
|
1438 | buffer = collection->buffers[i]; | |
1398 |
|
1439 | |||
1399 | for (j = 0; j < buffer->segmentCount; j++) { |
|
1440 | for (j = 0; j < buffer->segmentCount; j++) { | |
|
1441 | if (buffer->segments[j].length > SIZE_MAX) { | |||
|
1442 | PyErr_Format(PyExc_ValueError, | |||
|
1443 | "buffer segment %zd in buffer %zd is too large for this platform", | |||
|
1444 | j, i); | |||
|
1445 | goto finally; | |||
|
1446 | } | |||
|
1447 | ||||
1400 | sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset; |
|
1448 | sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset; | |
1401 | sources.sources[offset].sourceSize = buffer->segments[j].length; |
|
1449 | sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length; | |
1402 | sources.totalSourceSize += buffer->segments[j].length; |
|
1450 | sources.totalSourceSize += buffer->segments[j].length; | |
1403 |
|
1451 | |||
1404 | offset++; |
|
1452 | offset++; | |
@@ -1416,11 +1464,6 b' static ZstdBufferWithSegmentsCollection*' | |||||
1416 | goto finally; |
|
1464 | goto finally; | |
1417 | } |
|
1465 | } | |
1418 |
|
1466 | |||
1419 | /* |
|
|||
1420 | * It isn't clear whether the address referred to by Py_buffer.buf |
|
|||
1421 | * is still valid after PyBuffer_Release. We we hold a reference to all |
|
|||
1422 | * Py_buffer instances for the duration of the operation. |
|
|||
1423 | */ |
|
|||
1424 | dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer)); |
|
1467 | dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer)); | |
1425 | if (NULL == dataBuffers) { |
|
1468 | if (NULL == dataBuffers) { | |
1426 | PyErr_NoMemory(); |
|
1469 | PyErr_NoMemory(); | |
@@ -1459,6 +1502,11 b' static ZstdBufferWithSegmentsCollection*' | |||||
1459 | goto finally; |
|
1502 | goto finally; | |
1460 | } |
|
1503 | } | |
1461 |
|
1504 | |||
|
1505 | if (sources.totalSourceSize > SIZE_MAX) { | |||
|
1506 | PyErr_SetString(PyExc_ValueError, "sources are too large for this platform"); | |||
|
1507 | goto finally; | |||
|
1508 | } | |||
|
1509 | ||||
1462 | result = compress_from_datasources(self, &sources, threads); |
|
1510 | result = compress_from_datasources(self, &sources, threads); | |
1463 |
|
1511 | |||
1464 | finally: |
|
1512 | finally: | |
@@ -1482,12 +1530,24 b' static PyMethodDef ZstdCompressor_method' | |||||
1482 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
|
1530 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, | |
1483 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
|
1531 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, | |
1484 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
|
1532 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, | |
1485 |
{ " |
|
1533 | { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader, | |
1486 |
METH_VARARGS | METH_KEYWORDS, ZstdCompressor_ |
|
1534 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ }, | |
1487 |
{ " |
|
1535 | { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer, | |
1488 |
METH_VARARGS | METH_KEYWORDS, ZstdCompressor_ |
|
1536 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, | |
|
1537 | { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter, | |||
|
1538 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, | |||
|
1539 | /* TODO Remove deprecated API */ | |||
|
1540 | { "read_from", (PyCFunction)ZstdCompressor_read_to_iter, | |||
|
1541 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, | |||
|
1542 | /* TODO remove deprecated API */ | |||
|
1543 | { "write_to", (PyCFunction)ZstdCompressor_stream_writer, | |||
|
1544 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, | |||
1489 | { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, |
|
1545 | { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, | |
1490 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, |
|
1546 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, | |
|
1547 | { "memory_size", (PyCFunction)ZstdCompressor_memory_size, | |||
|
1548 | METH_NOARGS, ZstdCompressor_memory_size__doc__ }, | |||
|
1549 | { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression, | |||
|
1550 | METH_NOARGS, ZstdCompressor_frame_progression__doc__ }, | |||
1491 | { NULL, NULL } |
|
1551 | { NULL, NULL } | |
1492 | }; |
|
1552 | }; | |
1493 |
|
1553 |
@@ -21,10 +21,9 b' static void ZstdCompressorIterator_deall' | |||||
21 | Py_XDECREF(self->compressor); |
|
21 | Py_XDECREF(self->compressor); | |
22 | Py_XDECREF(self->reader); |
|
22 | Py_XDECREF(self->reader); | |
23 |
|
23 | |||
24 | if (self->buffer) { |
|
24 | if (self->buffer.buf) { | |
25 | PyBuffer_Release(self->buffer); |
|
25 | PyBuffer_Release(&self->buffer); | |
26 | PyMem_FREE(self->buffer); |
|
26 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
27 | self->buffer = NULL; |
|
|||
28 | } |
|
27 | } | |
29 |
|
28 | |||
30 | if (self->output.dst) { |
|
29 | if (self->output.dst) { | |
@@ -58,14 +57,8 b' feedcompressor:' | |||||
58 | /* If we have data left in the input, consume it. */ |
|
57 | /* If we have data left in the input, consume it. */ | |
59 | if (self->input.pos < self->input.size) { |
|
58 | if (self->input.pos < self->input.size) { | |
60 | Py_BEGIN_ALLOW_THREADS |
|
59 | Py_BEGIN_ALLOW_THREADS | |
61 | if (self->compressor->mtcctx) { |
|
60 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
62 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, |
|
61 | &self->input, ZSTD_e_continue); | |
63 | &self->output, &self->input); |
|
|||
64 | } |
|
|||
65 | else { |
|
|||
66 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, |
|
|||
67 | &self->input); |
|
|||
68 | } |
|
|||
69 | Py_END_ALLOW_THREADS |
|
62 | Py_END_ALLOW_THREADS | |
70 |
|
63 | |||
71 | /* Release the Python object holding the input buffer. */ |
|
64 | /* Release the Python object holding the input buffer. */ | |
@@ -107,14 +100,14 b' feedcompressor:' | |||||
107 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
100 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
108 | } |
|
101 | } | |
109 | else { |
|
102 | else { | |
110 |
assert(self->buffer |
|
103 | assert(self->buffer.buf); | |
111 |
|
104 | |||
112 | /* Only support contiguous C arrays. */ |
|
105 | /* Only support contiguous C arrays. */ | |
113 |
assert(self->buffer |
|
106 | assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL); | |
114 |
assert(self->buffer |
|
107 | assert(self->buffer.itemsize == 1); | |
115 |
|
108 | |||
116 |
readBuffer = (char*)self->buffer |
|
109 | readBuffer = (char*)self->buffer.buf + self->bufferOffset; | |
117 |
bufferRemaining = self->buffer |
|
110 | bufferRemaining = self->buffer.len - self->bufferOffset; | |
118 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); |
|
111 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
119 | self->bufferOffset += readSize; |
|
112 | self->bufferOffset += readSize; | |
120 | } |
|
113 | } | |
@@ -130,12 +123,12 b' feedcompressor:' | |||||
130 |
|
123 | |||
131 | /* EOF */ |
|
124 | /* EOF */ | |
132 | if (0 == readSize) { |
|
125 | if (0 == readSize) { | |
133 | if (self->compressor->mtcctx) { |
|
126 | self->input.src = NULL; | |
134 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output); |
|
127 | self->input.size = 0; | |
135 | } |
|
128 | self->input.pos = 0; | |
136 | else { |
|
129 | ||
137 |
|
|
130 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
138 | } |
|
131 | &self->input, ZSTD_e_end); | |
139 | if (ZSTD_isError(zresult)) { |
|
132 | if (ZSTD_isError(zresult)) { | |
140 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
133 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
141 | ZSTD_getErrorName(zresult)); |
|
134 | ZSTD_getErrorName(zresult)); | |
@@ -159,13 +152,8 b' feedcompressor:' | |||||
159 | self->input.pos = 0; |
|
152 | self->input.pos = 0; | |
160 |
|
153 | |||
161 | Py_BEGIN_ALLOW_THREADS |
|
154 | Py_BEGIN_ALLOW_THREADS | |
162 | if (self->compressor->mtcctx) { |
|
155 | zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output, | |
163 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output, |
|
156 | &self->input, ZSTD_e_continue); | |
164 | &self->input); |
|
|||
165 | } |
|
|||
166 | else { |
|
|||
167 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input); |
|
|||
168 | } |
|
|||
169 | Py_END_ALLOW_THREADS |
|
157 | Py_END_ALLOW_THREADS | |
170 |
|
158 | |||
171 | /* The input buffer currently points to memory managed by Python |
|
159 | /* The input buffer currently points to memory managed by Python |
@@ -52,6 +52,11 b' void constants_module_init(PyObject* mod' | |||||
52 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); |
|
52 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
|
55 | PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN", | |||
|
56 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN)); | |||
|
57 | PyModule_AddObject(mod, "CONTENTSIZE_ERROR", | |||
|
58 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR)); | |||
|
59 | ||||
55 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); |
|
60 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); | |
56 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", |
|
61 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
57 | (long)ZSTD_CStreamInSize()); |
|
62 | (long)ZSTD_CStreamInSize()); | |
@@ -75,7 +80,9 b' void constants_module_init(PyObject* mod' | |||||
75 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN); |
|
80 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN); | |
76 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX); |
|
81 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX); | |
77 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); |
|
82 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); | |
78 |
PyModule_AddIntConstant(mod, " |
|
83 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN); | |
|
84 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX); | |||
|
85 | PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX); | |||
79 |
|
86 | |||
80 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); |
|
87 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); | |
81 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); |
|
88 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); | |
@@ -84,4 +91,12 b' void constants_module_init(PyObject* mod' | |||||
84 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); |
|
91 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); | |
85 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); |
|
92 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); | |
86 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); |
|
93 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); | |
|
94 | PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra); | |||
|
95 | ||||
|
96 | PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto); | |||
|
97 | PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent); | |||
|
98 | PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict); | |||
|
99 | ||||
|
100 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1); | |||
|
101 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless); | |||
87 | } |
|
102 | } |
@@ -27,7 +27,7 b' static PyObject* ZstdDecompressionWriter' | |||||
27 | return NULL; |
|
27 | return NULL; | |
28 | } |
|
28 | } | |
29 |
|
29 | |||
30 |
if ( |
|
30 | if (ensure_dctx(self->decompressor, 1)) { | |
31 | return NULL; |
|
31 | return NULL; | |
32 | } |
|
32 | } | |
33 |
|
33 | |||
@@ -44,18 +44,17 b' static PyObject* ZstdDecompressionWriter' | |||||
44 | } |
|
44 | } | |
45 |
|
45 | |||
46 | static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) { |
|
46 | static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) { | |
47 | if (!self->decompressor->dstream) { |
|
47 | return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx)); | |
48 | PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; " |
|
|||
49 | "call when context manager is active"); |
|
|||
50 | return NULL; |
|
|||
51 | } |
|
|||
52 |
|
||||
53 | return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream)); |
|
|||
54 | } |
|
48 | } | |
55 |
|
49 | |||
56 | static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) { |
|
50 | static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) { | |
57 | const char* source; |
|
51 | static char* kwlist[] = { | |
58 | Py_ssize_t sourceSize; |
|
52 | "data", | |
|
53 | NULL | |||
|
54 | }; | |||
|
55 | ||||
|
56 | PyObject* result = NULL; | |||
|
57 | Py_buffer source; | |||
59 | size_t zresult = 0; |
|
58 | size_t zresult = 0; | |
60 | ZSTD_inBuffer input; |
|
59 | ZSTD_inBuffer input; | |
61 | ZSTD_outBuffer output; |
|
60 | ZSTD_outBuffer output; | |
@@ -63,41 +62,47 b' static PyObject* ZstdDecompressionWriter' | |||||
63 | Py_ssize_t totalWrite = 0; |
|
62 | Py_ssize_t totalWrite = 0; | |
64 |
|
63 | |||
65 | #if PY_MAJOR_VERSION >= 3 |
|
64 | #if PY_MAJOR_VERSION >= 3 | |
66 |
if (!PyArg_ParseTuple(args, "y |
|
65 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", | |
67 | #else |
|
66 | #else | |
68 |
if (!PyArg_ParseTuple(args, "s |
|
67 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write", | |
69 | #endif |
|
68 | #endif | |
|
69 | kwlist, &source)) { | |||
70 | return NULL; |
|
70 | return NULL; | |
71 | } |
|
71 | } | |
72 |
|
72 | |||
|
73 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |||
|
74 | PyErr_SetString(PyExc_ValueError, | |||
|
75 | "data buffer should be contiguous and have at most one dimension"); | |||
|
76 | goto finally; | |||
|
77 | } | |||
|
78 | ||||
73 | if (!self->entered) { |
|
79 | if (!self->entered) { | |
74 | PyErr_SetString(ZstdError, "write must be called from an active context manager"); |
|
80 | PyErr_SetString(ZstdError, "write must be called from an active context manager"); | |
75 | return NULL; |
|
81 | goto finally; | |
76 | } |
|
82 | } | |
77 |
|
83 | |||
78 | assert(self->decompressor->dstream); |
|
|||
79 |
|
||||
80 | output.dst = PyMem_Malloc(self->outSize); |
|
84 | output.dst = PyMem_Malloc(self->outSize); | |
81 | if (!output.dst) { |
|
85 | if (!output.dst) { | |
82 |
|
|
86 | PyErr_NoMemory(); | |
|
87 | goto finally; | |||
83 | } |
|
88 | } | |
84 | output.size = self->outSize; |
|
89 | output.size = self->outSize; | |
85 | output.pos = 0; |
|
90 | output.pos = 0; | |
86 |
|
91 | |||
87 | input.src = source; |
|
92 | input.src = source.buf; | |
88 |
input.size = source |
|
93 | input.size = source.len; | |
89 | input.pos = 0; |
|
94 | input.pos = 0; | |
90 |
|
95 | |||
91 |
while ((ssize_t)input.pos < source |
|
96 | while ((ssize_t)input.pos < source.len) { | |
92 | Py_BEGIN_ALLOW_THREADS |
|
97 | Py_BEGIN_ALLOW_THREADS | |
93 |
zresult = ZSTD_decompress |
|
98 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input); | |
94 | Py_END_ALLOW_THREADS |
|
99 | Py_END_ALLOW_THREADS | |
95 |
|
100 | |||
96 | if (ZSTD_isError(zresult)) { |
|
101 | if (ZSTD_isError(zresult)) { | |
97 | PyMem_Free(output.dst); |
|
102 | PyMem_Free(output.dst); | |
98 | PyErr_Format(ZstdError, "zstd decompress error: %s", |
|
103 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
99 | ZSTD_getErrorName(zresult)); |
|
104 | ZSTD_getErrorName(zresult)); | |
100 | return NULL; |
|
105 | goto finally; | |
101 | } |
|
106 | } | |
102 |
|
107 | |||
103 | if (output.pos) { |
|
108 | if (output.pos) { | |
@@ -115,7 +120,11 b' static PyObject* ZstdDecompressionWriter' | |||||
115 |
|
120 | |||
116 | PyMem_Free(output.dst); |
|
121 | PyMem_Free(output.dst); | |
117 |
|
122 | |||
118 |
re |
|
123 | result = PyLong_FromSsize_t(totalWrite); | |
|
124 | ||||
|
125 | finally: | |||
|
126 | PyBuffer_Release(&source); | |||
|
127 | return result; | |||
119 | } |
|
128 | } | |
120 |
|
129 | |||
121 | static PyMethodDef ZstdDecompressionWriter_methods[] = { |
|
130 | static PyMethodDef ZstdDecompressionWriter_methods[] = { | |
@@ -125,7 +134,7 b' static PyMethodDef ZstdDecompressionWrit' | |||||
125 | PyDoc_STR("Exit a decompression context.") }, |
|
134 | PyDoc_STR("Exit a decompression context.") }, | |
126 | { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS, |
|
135 | { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS, | |
127 | PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") }, |
|
136 | PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") }, | |
128 | { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS, |
|
137 | { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS, | |
129 | PyDoc_STR("Compress data") }, |
|
138 | PyDoc_STR("Compress data") }, | |
130 | { NULL, NULL } |
|
139 | { NULL, NULL } | |
131 | }; |
|
140 | }; |
@@ -20,56 +20,61 b' static void DecompressionObj_dealloc(Zst' | |||||
20 | PyObject_Del(self); |
|
20 | PyObject_Del(self); | |
21 | } |
|
21 | } | |
22 |
|
22 | |||
23 | static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) { |
|
23 | static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) { | |
24 | const char* source; |
|
24 | static char* kwlist[] = { | |
25 | Py_ssize_t sourceSize; |
|
25 | "data", | |
|
26 | NULL | |||
|
27 | }; | |||
|
28 | ||||
|
29 | Py_buffer source; | |||
26 | size_t zresult; |
|
30 | size_t zresult; | |
27 | ZSTD_inBuffer input; |
|
31 | ZSTD_inBuffer input; | |
28 | ZSTD_outBuffer output; |
|
32 | ZSTD_outBuffer output; | |
29 | size_t outSize = ZSTD_DStreamOutSize(); |
|
|||
30 | PyObject* result = NULL; |
|
33 | PyObject* result = NULL; | |
31 | Py_ssize_t resultSize = 0; |
|
34 | Py_ssize_t resultSize = 0; | |
32 |
|
35 | |||
33 | /* Constructor should ensure stream is populated. */ |
|
|||
34 | assert(self->decompressor->dstream); |
|
|||
35 |
|
||||
36 | if (self->finished) { |
|
36 | if (self->finished) { | |
37 | PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times"); |
|
37 | PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times"); | |
38 | return NULL; |
|
38 | return NULL; | |
39 | } |
|
39 | } | |
40 |
|
40 | |||
41 | #if PY_MAJOR_VERSION >= 3 |
|
41 | #if PY_MAJOR_VERSION >= 3 | |
42 |
if (!PyArg_ParseTuple(args, "y |
|
42 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress", | |
43 | #else |
|
43 | #else | |
44 |
if (!PyArg_ParseTuple(args, "s |
|
44 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress", | |
45 | #endif |
|
45 | #endif | |
46 |
|
|
46 | kwlist, &source)) { | |
47 | return NULL; |
|
47 | return NULL; | |
48 | } |
|
48 | } | |
49 |
|
49 | |||
50 | input.src = source; |
|
50 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
51 | input.size = sourceSize; |
|
51 | PyErr_SetString(PyExc_ValueError, | |
|
52 | "data buffer should be contiguous and have at most one dimension"); | |||
|
53 | goto finally; | |||
|
54 | } | |||
|
55 | ||||
|
56 | input.src = source.buf; | |||
|
57 | input.size = source.len; | |||
52 | input.pos = 0; |
|
58 | input.pos = 0; | |
53 |
|
59 | |||
54 | output.dst = PyMem_Malloc(outSize); |
|
60 | output.dst = PyMem_Malloc(self->outSize); | |
55 | if (!output.dst) { |
|
61 | if (!output.dst) { | |
56 | PyErr_NoMemory(); |
|
62 | PyErr_NoMemory(); | |
57 | return NULL; |
|
63 | goto except; | |
58 | } |
|
64 | } | |
59 | output.size = outSize; |
|
65 | output.size = self->outSize; | |
60 | output.pos = 0; |
|
66 | output.pos = 0; | |
61 |
|
67 | |||
62 | /* Read input until exhausted. */ |
|
68 | /* Read input until exhausted. */ | |
63 | while (input.pos < input.size) { |
|
69 | while (input.pos < input.size) { | |
64 | Py_BEGIN_ALLOW_THREADS |
|
70 | Py_BEGIN_ALLOW_THREADS | |
65 |
zresult = ZSTD_decompress |
|
71 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input); | |
66 | Py_END_ALLOW_THREADS |
|
72 | Py_END_ALLOW_THREADS | |
67 |
|
73 | |||
68 | if (ZSTD_isError(zresult)) { |
|
74 | if (ZSTD_isError(zresult)) { | |
69 | PyErr_Format(ZstdError, "zstd decompressor error: %s", |
|
75 | PyErr_Format(ZstdError, "zstd decompressor error: %s", | |
70 | ZSTD_getErrorName(zresult)); |
|
76 | ZSTD_getErrorName(zresult)); | |
71 | result = NULL; |
|
77 | goto except; | |
72 | goto finally; |
|
|||
73 | } |
|
78 | } | |
74 |
|
79 | |||
75 | if (0 == zresult) { |
|
80 | if (0 == zresult) { | |
@@ -79,7 +84,8 b' static PyObject* DecompressionObj_decomp' | |||||
79 | if (output.pos) { |
|
84 | if (output.pos) { | |
80 | if (result) { |
|
85 | if (result) { | |
81 | resultSize = PyBytes_GET_SIZE(result); |
|
86 | resultSize = PyBytes_GET_SIZE(result); | |
82 |
if (-1 == |
|
87 | if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) { | |
|
88 | Py_XDECREF(result); | |||
83 | goto except; |
|
89 | goto except; | |
84 | } |
|
90 | } | |
85 |
|
91 | |||
@@ -108,13 +114,14 b' except:' | |||||
108 |
|
114 | |||
109 | finally: |
|
115 | finally: | |
110 | PyMem_Free(output.dst); |
|
116 | PyMem_Free(output.dst); | |
|
117 | PyBuffer_Release(&source); | |||
111 |
|
118 | |||
112 | return result; |
|
119 | return result; | |
113 | } |
|
120 | } | |
114 |
|
121 | |||
115 | static PyMethodDef DecompressionObj_methods[] = { |
|
122 | static PyMethodDef DecompressionObj_methods[] = { | |
116 | { "decompress", (PyCFunction)DecompressionObj_decompress, |
|
123 | { "decompress", (PyCFunction)DecompressionObj_decompress, | |
117 | METH_VARARGS, PyDoc_STR("decompress data") }, |
|
124 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") }, | |
118 | { NULL, NULL } |
|
125 | { NULL, NULL } | |
119 | }; |
|
126 | }; | |
120 |
|
127 |
This diff has been collapsed as it changes many lines, (649 lines changed) Show them Hide them | |||||
@@ -12,54 +12,40 b'' | |||||
12 | extern PyObject* ZstdError; |
|
12 | extern PyObject* ZstdError; | |
13 |
|
13 | |||
14 | /** |
|
14 | /** | |
15 |
|
|
15 | * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation. | |
16 |
|
|
16 | */ | |
17 | * This should be called before starting a decompression operation with a |
|
17 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) { | |
18 | * ZSTD_DStream on a ZstdDecompressor. |
|
|||
19 | */ |
|
|||
20 | int init_dstream(ZstdDecompressor* decompressor) { |
|
|||
21 | void* dictData = NULL; |
|
|||
22 | size_t dictSize = 0; |
|
|||
23 | size_t zresult; |
|
18 | size_t zresult; | |
24 |
|
19 | |||
25 | /* Simple case of dstream already exists. Just reset it. */ |
|
20 | ZSTD_DCtx_reset(decompressor->dctx); | |
26 | if (decompressor->dstream) { |
|
21 | ||
27 | zresult = ZSTD_resetDStream(decompressor->dstream); |
|
22 | if (decompressor->maxWindowSize) { | |
|
23 | zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize); | |||
28 | if (ZSTD_isError(zresult)) { |
|
24 | if (ZSTD_isError(zresult)) { | |
29 |
PyErr_Format(ZstdError, " |
|
25 | PyErr_Format(ZstdError, "unable to set max window size: %s", | |
30 | ZSTD_getErrorName(zresult)); |
|
26 | ZSTD_getErrorName(zresult)); | |
31 |
return |
|
27 | return 1; | |
32 | } |
|
28 | } | |
33 |
|
||||
34 | return 0; |
|
|||
35 | } |
|
29 | } | |
36 |
|
30 | |||
37 | decompressor->dstream = ZSTD_createDStream(); |
|
31 | zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format); | |
38 | if (!decompressor->dstream) { |
|
32 | if (ZSTD_isError(zresult)) { | |
39 | PyErr_SetString(ZstdError, "could not create DStream"); |
|
33 | PyErr_Format(ZstdError, "unable to set decoding format: %s", | |
40 | return -1; |
|
34 | ZSTD_getErrorName(zresult)); | |
41 | } |
|
35 | return 1; | |
42 |
|
||||
43 | if (decompressor->dict) { |
|
|||
44 | dictData = decompressor->dict->dictData; |
|
|||
45 | dictSize = decompressor->dict->dictSize; |
|
|||
46 | } |
|
36 | } | |
47 |
|
37 | |||
48 | if (dictData) { |
|
38 | if (loadDict && decompressor->dict) { | |
49 | zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize); |
|
39 | if (ensure_ddict(decompressor->dict)) { | |
50 | } |
|
40 | return 1; | |
51 | else { |
|
41 | } | |
52 | zresult = ZSTD_initDStream(decompressor->dstream); |
|
|||
53 | } |
|
|||
54 |
|
42 | |||
55 | if (ZSTD_isError(zresult)) { |
|
43 | zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict); | |
56 | /* Don't leave a reference to an invalid object. */ |
|
44 | if (ZSTD_isError(zresult)) { | |
57 | ZSTD_freeDStream(decompressor->dstream); |
|
45 | PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s", | |
58 | decompressor->dstream = NULL; |
|
46 | ZSTD_getErrorName(zresult)); | |
59 |
|
47 | return 1; | ||
60 | PyErr_Format(ZstdError, "could not initialize DStream: %s", |
|
48 | } | |
61 | ZSTD_getErrorName(zresult)); |
|
|||
62 | return -1; |
|
|||
63 | } |
|
49 | } | |
64 |
|
50 | |||
65 | return 0; |
|
51 | return 0; | |
@@ -76,36 +62,46 b' PyDoc_STRVAR(Decompressor__doc__,' | |||||
76 | static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
62 | static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
77 | static char* kwlist[] = { |
|
63 | static char* kwlist[] = { | |
78 | "dict_data", |
|
64 | "dict_data", | |
|
65 | "max_window_size", | |||
|
66 | "format", | |||
79 | NULL |
|
67 | NULL | |
80 | }; |
|
68 | }; | |
81 |
|
69 | |||
82 | ZstdCompressionDict* dict = NULL; |
|
70 | ZstdCompressionDict* dict = NULL; | |
|
71 | size_t maxWindowSize = 0; | |||
|
72 | ZSTD_format_e format = ZSTD_f_zstd1; | |||
83 |
|
73 | |||
84 | self->dctx = NULL; |
|
74 | self->dctx = NULL; | |
85 | self->dict = NULL; |
|
75 | self->dict = NULL; | |
86 | self->ddict = NULL; |
|
|||
87 |
|
76 | |||
88 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist, |
|
77 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist, | |
89 | &ZstdCompressionDictType, &dict)) { |
|
78 | &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) { | |
90 | return -1; |
|
79 | return -1; | |
91 | } |
|
80 | } | |
92 |
|
81 | |||
93 | /* TODO lazily initialize the reference ZSTD_DCtx on first use since |
|
|||
94 | not instances of ZstdDecompressor will use a ZSTD_DCtx. */ |
|
|||
95 | self->dctx = ZSTD_createDCtx(); |
|
82 | self->dctx = ZSTD_createDCtx(); | |
96 | if (!self->dctx) { |
|
83 | if (!self->dctx) { | |
97 | PyErr_NoMemory(); |
|
84 | PyErr_NoMemory(); | |
98 | goto except; |
|
85 | goto except; | |
99 | } |
|
86 | } | |
100 |
|
87 | |||
|
88 | self->maxWindowSize = maxWindowSize; | |||
|
89 | self->format = format; | |||
|
90 | ||||
101 | if (dict) { |
|
91 | if (dict) { | |
102 | self->dict = dict; |
|
92 | self->dict = dict; | |
103 | Py_INCREF(dict); |
|
93 | Py_INCREF(dict); | |
104 | } |
|
94 | } | |
105 |
|
95 | |||
|
96 | if (ensure_dctx(self, 1)) { | |||
|
97 | goto except; | |||
|
98 | } | |||
|
99 | ||||
106 | return 0; |
|
100 | return 0; | |
107 |
|
101 | |||
108 | except: |
|
102 | except: | |
|
103 | Py_CLEAR(self->dict); | |||
|
104 | ||||
109 | if (self->dctx) { |
|
105 | if (self->dctx) { | |
110 | ZSTD_freeDCtx(self->dctx); |
|
106 | ZSTD_freeDCtx(self->dctx); | |
111 | self->dctx = NULL; |
|
107 | self->dctx = NULL; | |
@@ -117,16 +113,6 b' except:' | |||||
117 | static void Decompressor_dealloc(ZstdDecompressor* self) { |
|
113 | static void Decompressor_dealloc(ZstdDecompressor* self) { | |
118 | Py_CLEAR(self->dict); |
|
114 | Py_CLEAR(self->dict); | |
119 |
|
115 | |||
120 | if (self->ddict) { |
|
|||
121 | ZSTD_freeDDict(self->ddict); |
|
|||
122 | self->ddict = NULL; |
|
|||
123 | } |
|
|||
124 |
|
||||
125 | if (self->dstream) { |
|
|||
126 | ZSTD_freeDStream(self->dstream); |
|
|||
127 | self->dstream = NULL; |
|
|||
128 | } |
|
|||
129 |
|
||||
130 | if (self->dctx) { |
|
116 | if (self->dctx) { | |
131 | ZSTD_freeDCtx(self->dctx); |
|
117 | ZSTD_freeDCtx(self->dctx); | |
132 | self->dctx = NULL; |
|
118 | self->dctx = NULL; | |
@@ -135,6 +121,20 b' static void Decompressor_dealloc(ZstdDec' | |||||
135 | PyObject_Del(self); |
|
121 | PyObject_Del(self); | |
136 | } |
|
122 | } | |
137 |
|
123 | |||
|
124 | PyDoc_STRVAR(Decompressor_memory_size__doc__, | |||
|
125 | "memory_size() -- Size of decompression context, in bytes\n" | |||
|
126 | ); | |||
|
127 | ||||
|
128 | static PyObject* Decompressor_memory_size(ZstdDecompressor* self) { | |||
|
129 | if (self->dctx) { | |||
|
130 | return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx)); | |||
|
131 | } | |||
|
132 | else { | |||
|
133 | PyErr_SetString(ZstdError, "no decompressor context found; this should never happen"); | |||
|
134 | return NULL; | |||
|
135 | } | |||
|
136 | } | |||
|
137 | ||||
138 | PyDoc_STRVAR(Decompressor_copy_stream__doc__, |
|
138 | PyDoc_STRVAR(Decompressor_copy_stream__doc__, | |
139 | "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" |
|
139 | "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" | |
140 | "\n" |
|
140 | "\n" | |
@@ -166,7 +166,7 b' static PyObject* Decompressor_copy_strea' | |||||
166 | Py_ssize_t totalWrite = 0; |
|
166 | Py_ssize_t totalWrite = 0; | |
167 | char* readBuffer; |
|
167 | char* readBuffer; | |
168 | Py_ssize_t readSize; |
|
168 | Py_ssize_t readSize; | |
169 | PyObject* readResult; |
|
169 | PyObject* readResult = NULL; | |
170 | PyObject* res = NULL; |
|
170 | PyObject* res = NULL; | |
171 | size_t zresult = 0; |
|
171 | size_t zresult = 0; | |
172 | PyObject* writeResult; |
|
172 | PyObject* writeResult; | |
@@ -191,7 +191,7 b' static PyObject* Decompressor_copy_strea' | |||||
191 | /* Prevent free on uninitialized memory in finally. */ |
|
191 | /* Prevent free on uninitialized memory in finally. */ | |
192 | output.dst = NULL; |
|
192 | output.dst = NULL; | |
193 |
|
193 | |||
194 |
if ( |
|
194 | if (ensure_dctx(self, 1)) { | |
195 | res = NULL; |
|
195 | res = NULL; | |
196 | goto finally; |
|
196 | goto finally; | |
197 | } |
|
197 | } | |
@@ -229,7 +229,7 b' static PyObject* Decompressor_copy_strea' | |||||
229 |
|
229 | |||
230 | while (input.pos < input.size) { |
|
230 | while (input.pos < input.size) { | |
231 | Py_BEGIN_ALLOW_THREADS |
|
231 | Py_BEGIN_ALLOW_THREADS | |
232 |
zresult = ZSTD_decompress |
|
232 | zresult = ZSTD_decompress_generic(self->dctx, &output, &input); | |
233 | Py_END_ALLOW_THREADS |
|
233 | Py_END_ALLOW_THREADS | |
234 |
|
234 | |||
235 | if (ZSTD_isError(zresult)) { |
|
235 | if (ZSTD_isError(zresult)) { | |
@@ -252,6 +252,8 b' static PyObject* Decompressor_copy_strea' | |||||
252 | output.pos = 0; |
|
252 | output.pos = 0; | |
253 | } |
|
253 | } | |
254 | } |
|
254 | } | |
|
255 | ||||
|
256 | Py_CLEAR(readResult); | |||
255 | } |
|
257 | } | |
256 |
|
258 | |||
257 | /* Source stream is exhausted. Finish up. */ |
|
259 | /* Source stream is exhausted. Finish up. */ | |
@@ -267,6 +269,8 b' finally:' | |||||
267 | PyMem_Free(output.dst); |
|
269 | PyMem_Free(output.dst); | |
268 | } |
|
270 | } | |
269 |
|
271 | |||
|
272 | Py_XDECREF(readResult); | |||
|
273 | ||||
270 | return res; |
|
274 | return res; | |
271 | } |
|
275 | } | |
272 |
|
276 | |||
@@ -300,98 +304,114 b' PyObject* Decompressor_decompress(ZstdDe' | |||||
300 | NULL |
|
304 | NULL | |
301 | }; |
|
305 | }; | |
302 |
|
306 | |||
303 | const char* source; |
|
307 | Py_buffer source; | |
304 | Py_ssize_t sourceSize; |
|
|||
305 | Py_ssize_t maxOutputSize = 0; |
|
308 | Py_ssize_t maxOutputSize = 0; | |
306 | unsigned long long decompressedSize; |
|
309 | unsigned long long decompressedSize; | |
307 | size_t destCapacity; |
|
310 | size_t destCapacity; | |
308 | PyObject* result = NULL; |
|
311 | PyObject* result = NULL; | |
309 | void* dictData = NULL; |
|
|||
310 | size_t dictSize = 0; |
|
|||
311 | size_t zresult; |
|
312 | size_t zresult; | |
|
313 | ZSTD_outBuffer outBuffer; | |||
|
314 | ZSTD_inBuffer inBuffer; | |||
312 |
|
315 | |||
313 | #if PY_MAJOR_VERSION >= 3 |
|
316 | #if PY_MAJOR_VERSION >= 3 | |
314 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y |
|
317 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress", | |
315 | #else |
|
318 | #else | |
316 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s |
|
319 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress", | |
317 | #endif |
|
320 | #endif | |
318 |
kwlist, &source, & |
|
321 | kwlist, &source, &maxOutputSize)) { | |
319 | return NULL; |
|
322 | return NULL; | |
320 | } |
|
323 | } | |
321 |
|
324 | |||
322 | if (self->dict) { |
|
325 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
323 | dictData = self->dict->dictData; |
|
326 | PyErr_SetString(PyExc_ValueError, | |
324 | dictSize = self->dict->dictSize; |
|
327 | "data buffer should be contiguous and have at most one dimension"); | |
|
328 | goto finally; | |||
325 | } |
|
329 | } | |
326 |
|
330 | |||
327 | if (dictData && !self->ddict) { |
|
331 | if (ensure_dctx(self, 1)) { | |
328 | Py_BEGIN_ALLOW_THREADS |
|
332 | goto finally; | |
329 | self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); |
|
|||
330 | Py_END_ALLOW_THREADS |
|
|||
331 |
|
||||
332 | if (!self->ddict) { |
|
|||
333 | PyErr_SetString(ZstdError, "could not create decompression dict"); |
|
|||
334 | return NULL; |
|
|||
335 | } |
|
|||
336 | } |
|
333 | } | |
337 |
|
334 | |||
338 |
decompressedSize = ZSTD_get |
|
335 | decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len); | |
339 | /* 0 returned if content size not in the zstd frame header */ |
|
336 | ||
340 |
if ( |
|
337 | if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) { | |
|
338 | PyErr_SetString(ZstdError, "error determining content size from frame header"); | |||
|
339 | goto finally; | |||
|
340 | } | |||
|
341 | /* Special case of empty frame. */ | |||
|
342 | else if (0 == decompressedSize) { | |||
|
343 | result = PyBytes_FromStringAndSize("", 0); | |||
|
344 | goto finally; | |||
|
345 | } | |||
|
346 | /* Missing content size in frame header. */ | |||
|
347 | if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) { | |||
341 | if (0 == maxOutputSize) { |
|
348 | if (0 == maxOutputSize) { | |
342 |
PyErr_SetString(ZstdError, " |
|
349 | PyErr_SetString(ZstdError, "could not determine content size in frame header"); | |
343 | "in frame header"); |
|
350 | goto finally; | |
344 | return NULL; |
|
|||
345 | } |
|
351 | } | |
346 | else { |
|
352 | ||
347 |
|
|
353 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); | |
348 |
|
|
354 | destCapacity = maxOutputSize; | |
|
355 | decompressedSize = 0; | |||
|
356 | } | |||
|
357 | /* Size is recorded in frame header. */ | |||
|
358 | else { | |||
|
359 | assert(SIZE_MAX >= PY_SSIZE_T_MAX); | |||
|
360 | if (decompressedSize > PY_SSIZE_T_MAX) { | |||
|
361 | PyErr_SetString(ZstdError, "frame is too large to decompress on this platform"); | |||
|
362 | goto finally; | |||
349 | } |
|
363 | } | |
350 | } |
|
364 | ||
351 | else { |
|
365 | result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize); | |
352 | result = PyBytes_FromStringAndSize(NULL, decompressedSize); |
|
366 | destCapacity = (size_t)decompressedSize; | |
353 | destCapacity = decompressedSize; |
|
|||
354 | } |
|
367 | } | |
355 |
|
368 | |||
356 | if (!result) { |
|
369 | if (!result) { | |
357 | return NULL; |
|
370 | goto finally; | |
358 | } |
|
371 | } | |
359 |
|
372 | |||
|
373 | outBuffer.dst = PyBytes_AsString(result); | |||
|
374 | outBuffer.size = destCapacity; | |||
|
375 | outBuffer.pos = 0; | |||
|
376 | ||||
|
377 | inBuffer.src = source.buf; | |||
|
378 | inBuffer.size = source.len; | |||
|
379 | inBuffer.pos = 0; | |||
|
380 | ||||
360 | Py_BEGIN_ALLOW_THREADS |
|
381 | Py_BEGIN_ALLOW_THREADS | |
361 | if (self->ddict) { |
|
382 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); | |
362 | zresult = ZSTD_decompress_usingDDict(self->dctx, |
|
|||
363 | PyBytes_AsString(result), destCapacity, |
|
|||
364 | source, sourceSize, self->ddict); |
|
|||
365 | } |
|
|||
366 | else { |
|
|||
367 | zresult = ZSTD_decompressDCtx(self->dctx, |
|
|||
368 | PyBytes_AsString(result), destCapacity, source, sourceSize); |
|
|||
369 | } |
|
|||
370 | Py_END_ALLOW_THREADS |
|
383 | Py_END_ALLOW_THREADS | |
371 |
|
384 | |||
372 | if (ZSTD_isError(zresult)) { |
|
385 | if (ZSTD_isError(zresult)) { | |
373 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
|
386 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); | |
374 |
Py_ |
|
387 | Py_CLEAR(result); | |
375 | return NULL; |
|
388 | goto finally; | |
376 | } |
|
389 | } | |
377 | else if (decompressedSize && zresult != decompressedSize) { |
|
390 | else if (zresult) { | |
|
391 | PyErr_Format(ZstdError, "decompression error: did not decompress full frame"); | |||
|
392 | Py_CLEAR(result); | |||
|
393 | goto finally; | |||
|
394 | } | |||
|
395 | else if (decompressedSize && outBuffer.pos != decompressedSize) { | |||
378 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
|
396 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", | |
379 | zresult, decompressedSize); |
|
397 | zresult, decompressedSize); | |
380 |
Py_ |
|
398 | Py_CLEAR(result); | |
381 | return NULL; |
|
399 | goto finally; | |
382 | } |
|
400 | } | |
383 |
else if ( |
|
401 | else if (outBuffer.pos < destCapacity) { | |
384 |
if ( |
|
402 | if (safe_pybytes_resize(&result, outBuffer.pos)) { | |
385 |
Py_ |
|
403 | Py_CLEAR(result); | |
386 | return NULL; |
|
404 | goto finally; | |
387 | } |
|
405 | } | |
388 | } |
|
406 | } | |
389 |
|
407 | |||
|
408 | finally: | |||
|
409 | PyBuffer_Release(&source); | |||
390 | return result; |
|
410 | return result; | |
391 | } |
|
411 | } | |
392 |
|
412 | |||
393 | PyDoc_STRVAR(Decompressor_decompressobj__doc__, |
|
413 | PyDoc_STRVAR(Decompressor_decompressobj__doc__, | |
394 | "decompressobj()\n" |
|
414 | "decompressobj([write_size=default])\n" | |
395 | "\n" |
|
415 | "\n" | |
396 | "Incrementally feed data into a decompressor.\n" |
|
416 | "Incrementally feed data into a decompressor.\n" | |
397 | "\n" |
|
417 | "\n" | |
@@ -400,25 +420,43 b' PyDoc_STRVAR(Decompressor_decompressobj_' | |||||
400 | "callers can swap in the zstd decompressor while using the same API.\n" |
|
420 | "callers can swap in the zstd decompressor while using the same API.\n" | |
401 | ); |
|
421 | ); | |
402 |
|
422 | |||
403 | static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) { |
|
423 | static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
404 | ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL); |
|
424 | static char* kwlist[] = { | |
|
425 | "write_size", | |||
|
426 | NULL | |||
|
427 | }; | |||
|
428 | ||||
|
429 | ZstdDecompressionObj* result = NULL; | |||
|
430 | size_t outSize = ZSTD_DStreamOutSize(); | |||
|
431 | ||||
|
432 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) { | |||
|
433 | return NULL; | |||
|
434 | } | |||
|
435 | ||||
|
436 | if (!outSize) { | |||
|
437 | PyErr_SetString(PyExc_ValueError, "write_size must be positive"); | |||
|
438 | return NULL; | |||
|
439 | } | |||
|
440 | ||||
|
441 | result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL); | |||
405 | if (!result) { |
|
442 | if (!result) { | |
406 | return NULL; |
|
443 | return NULL; | |
407 | } |
|
444 | } | |
408 |
|
445 | |||
409 |
if ( |
|
446 | if (ensure_dctx(self, 1)) { | |
410 | Py_DECREF(result); |
|
447 | Py_DECREF(result); | |
411 | return NULL; |
|
448 | return NULL; | |
412 | } |
|
449 | } | |
413 |
|
450 | |||
414 | result->decompressor = self; |
|
451 | result->decompressor = self; | |
415 | Py_INCREF(result->decompressor); |
|
452 | Py_INCREF(result->decompressor); | |
|
453 | result->outSize = outSize; | |||
416 |
|
454 | |||
417 | return result; |
|
455 | return result; | |
418 | } |
|
456 | } | |
419 |
|
457 | |||
420 |
PyDoc_STRVAR(Decompressor_read_ |
|
458 | PyDoc_STRVAR(Decompressor_read_to_iter__doc__, | |
421 |
"read_ |
|
459 | "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n" | |
422 | "Read compressed data and return an iterator\n" |
|
460 | "Read compressed data and return an iterator\n" | |
423 | "\n" |
|
461 | "\n" | |
424 | "Returns an iterator of decompressed data chunks produced from reading from\n" |
|
462 | "Returns an iterator of decompressed data chunks produced from reading from\n" | |
@@ -437,7 +475,7 b' PyDoc_STRVAR(Decompressor_read_from__doc' | |||||
437 | "the source.\n" |
|
475 | "the source.\n" | |
438 | ); |
|
476 | ); | |
439 |
|
477 | |||
440 |
static ZstdDecompressorIterator* Decompressor_read_ |
|
478 | static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
441 | static char* kwlist[] = { |
|
479 | static char* kwlist[] = { | |
442 | "reader", |
|
480 | "reader", | |
443 | "read_size", |
|
481 | "read_size", | |
@@ -452,7 +490,7 b' static ZstdDecompressorIterator* Decompr' | |||||
452 | ZstdDecompressorIterator* result; |
|
490 | ZstdDecompressorIterator* result; | |
453 | size_t skipBytes = 0; |
|
491 | size_t skipBytes = 0; | |
454 |
|
492 | |||
455 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_ |
|
493 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist, | |
456 | &reader, &inSize, &outSize, &skipBytes)) { |
|
494 | &reader, &inSize, &outSize, &skipBytes)) { | |
457 | return NULL; |
|
495 | return NULL; | |
458 | } |
|
496 | } | |
@@ -474,14 +512,7 b' static ZstdDecompressorIterator* Decompr' | |||||
474 | } |
|
512 | } | |
475 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
513 | else if (1 == PyObject_CheckBuffer(reader)) { | |
476 | /* Object claims it is a buffer. Try to get a handle to it. */ |
|
514 | /* Object claims it is a buffer. Try to get a handle to it. */ | |
477 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); |
|
515 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { | |
478 | if (!result->buffer) { |
|
|||
479 | goto except; |
|
|||
480 | } |
|
|||
481 |
|
||||
482 | memset(result->buffer, 0, sizeof(Py_buffer)); |
|
|||
483 |
|
||||
484 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { |
|
|||
485 | goto except; |
|
516 | goto except; | |
486 | } |
|
517 | } | |
487 | } |
|
518 | } | |
@@ -498,7 +529,7 b' static ZstdDecompressorIterator* Decompr' | |||||
498 | result->outSize = outSize; |
|
529 | result->outSize = outSize; | |
499 | result->skipBytes = skipBytes; |
|
530 | result->skipBytes = skipBytes; | |
500 |
|
531 | |||
501 |
if ( |
|
532 | if (ensure_dctx(self, 1)) { | |
502 | goto except; |
|
533 | goto except; | |
503 | } |
|
534 | } | |
504 |
|
535 | |||
@@ -511,13 +542,6 b' static ZstdDecompressorIterator* Decompr' | |||||
511 | goto finally; |
|
542 | goto finally; | |
512 |
|
543 | |||
513 | except: |
|
544 | except: | |
514 | Py_CLEAR(result->reader); |
|
|||
515 |
|
||||
516 | if (result->buffer) { |
|
|||
517 | PyBuffer_Release(result->buffer); |
|
|||
518 | Py_CLEAR(result->buffer); |
|
|||
519 | } |
|
|||
520 |
|
||||
521 | Py_CLEAR(result); |
|
545 | Py_CLEAR(result); | |
522 |
|
546 | |||
523 | finally: |
|
547 | finally: | |
@@ -525,7 +549,62 b' finally:' | |||||
525 | return result; |
|
549 | return result; | |
526 | } |
|
550 | } | |
527 |
|
551 | |||
528 |
PyDoc_STRVAR(Decompressor_ |
|
552 | PyDoc_STRVAR(Decompressor_stream_reader__doc__, | |
|
553 | "stream_reader(source, [read_size=default])\n" | |||
|
554 | "\n" | |||
|
555 | "Obtain an object that behaves like an I/O stream that can be used for\n" | |||
|
556 | "reading decompressed output from an object.\n" | |||
|
557 | "\n" | |||
|
558 | "The source object can be any object with a ``read(size)`` method or that\n" | |||
|
559 | "conforms to the buffer protocol.\n" | |||
|
560 | ); | |||
|
561 | ||||
|
562 | static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |||
|
563 | static char* kwlist[] = { | |||
|
564 | "source", | |||
|
565 | "read_size", | |||
|
566 | NULL | |||
|
567 | }; | |||
|
568 | ||||
|
569 | PyObject* source; | |||
|
570 | size_t readSize = ZSTD_DStreamInSize(); | |||
|
571 | ZstdDecompressionReader* result; | |||
|
572 | ||||
|
573 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist, | |||
|
574 | &source, &readSize)) { | |||
|
575 | return NULL; | |||
|
576 | } | |||
|
577 | ||||
|
578 | result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL); | |||
|
579 | if (NULL == result) { | |||
|
580 | return NULL; | |||
|
581 | } | |||
|
582 | ||||
|
583 | if (PyObject_HasAttrString(source, "read")) { | |||
|
584 | result->reader = source; | |||
|
585 | Py_INCREF(source); | |||
|
586 | result->readSize = readSize; | |||
|
587 | } | |||
|
588 | else if (1 == PyObject_CheckBuffer(source)) { | |||
|
589 | if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { | |||
|
590 | Py_CLEAR(result); | |||
|
591 | return NULL; | |||
|
592 | } | |||
|
593 | } | |||
|
594 | else { | |||
|
595 | PyErr_SetString(PyExc_TypeError, | |||
|
596 | "must pass an object with a read() method or that conforms to the buffer protocol"); | |||
|
597 | Py_CLEAR(result); | |||
|
598 | return NULL; | |||
|
599 | } | |||
|
600 | ||||
|
601 | result->decompressor = self; | |||
|
602 | Py_INCREF(self); | |||
|
603 | ||||
|
604 | return result; | |||
|
605 | } | |||
|
606 | ||||
|
607 | PyDoc_STRVAR(Decompressor_stream_writer__doc__, | |||
529 | "Create a context manager to write decompressed data to an object.\n" |
|
608 | "Create a context manager to write decompressed data to an object.\n" | |
530 | "\n" |
|
609 | "\n" | |
531 | "The passed object must have a ``write()`` method.\n" |
|
610 | "The passed object must have a ``write()`` method.\n" | |
@@ -538,7 +617,7 b' PyDoc_STRVAR(Decompressor_write_to__doc_' | |||||
538 | "streaming decompressor.\n" |
|
617 | "streaming decompressor.\n" | |
539 | ); |
|
618 | ); | |
540 |
|
619 | |||
541 |
static ZstdDecompressionWriter* Decompressor_ |
|
620 | static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
542 | static char* kwlist[] = { |
|
621 | static char* kwlist[] = { | |
543 | "writer", |
|
622 | "writer", | |
544 | "write_size", |
|
623 | "write_size", | |
@@ -549,7 +628,7 b' static ZstdDecompressionWriter* Decompre' | |||||
549 | size_t outSize = ZSTD_DStreamOutSize(); |
|
628 | size_t outSize = ZSTD_DStreamOutSize(); | |
550 | ZstdDecompressionWriter* result; |
|
629 | ZstdDecompressionWriter* result; | |
551 |
|
630 | |||
552 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k: |
|
631 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist, | |
553 | &writer, &outSize)) { |
|
632 | &writer, &outSize)) { | |
554 | return NULL; |
|
633 | return NULL; | |
555 | } |
|
634 | } | |
@@ -579,7 +658,7 b' PyDoc_STRVAR(Decompressor_decompress_con' | |||||
579 | "Decompress a series of chunks using the content dictionary chaining technique\n" |
|
658 | "Decompress a series of chunks using the content dictionary chaining technique\n" | |
580 | ); |
|
659 | ); | |
581 |
|
660 | |||
582 |
static PyObject* Decompressor_decompress_content_dict_chain( |
|
661 | static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
583 | static char* kwlist[] = { |
|
662 | static char* kwlist[] = { | |
584 | "frames", |
|
663 | "frames", | |
585 | NULL |
|
664 | NULL | |
@@ -592,9 +671,8 b' static PyObject* Decompressor_decompress' | |||||
592 | PyObject* chunk; |
|
671 | PyObject* chunk; | |
593 | char* chunkData; |
|
672 | char* chunkData; | |
594 | Py_ssize_t chunkSize; |
|
673 | Py_ssize_t chunkSize; | |
595 | ZSTD_DCtx* dctx = NULL; |
|
|||
596 | size_t zresult; |
|
674 | size_t zresult; | |
597 |
ZSTD_frame |
|
675 | ZSTD_frameHeader frameHeader; | |
598 | void* buffer1 = NULL; |
|
676 | void* buffer1 = NULL; | |
599 | size_t buffer1Size = 0; |
|
677 | size_t buffer1Size = 0; | |
600 | size_t buffer1ContentSize = 0; |
|
678 | size_t buffer1ContentSize = 0; | |
@@ -603,6 +681,8 b' static PyObject* Decompressor_decompress' | |||||
603 | size_t buffer2ContentSize = 0; |
|
681 | size_t buffer2ContentSize = 0; | |
604 | void* destBuffer = NULL; |
|
682 | void* destBuffer = NULL; | |
605 | PyObject* result = NULL; |
|
683 | PyObject* result = NULL; | |
|
684 | ZSTD_outBuffer outBuffer; | |||
|
685 | ZSTD_inBuffer inBuffer; | |||
606 |
|
686 | |||
607 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", |
|
687 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", | |
608 | kwlist, &PyList_Type, &chunks)) { |
|
688 | kwlist, &PyList_Type, &chunks)) { | |
@@ -624,7 +704,7 b' static PyObject* Decompressor_decompress' | |||||
624 |
|
704 | |||
625 | /* We require that all chunks be zstd frames and that they have content size set. */ |
|
705 | /* We require that all chunks be zstd frames and that they have content size set. */ | |
626 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
706 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |
627 |
zresult = ZSTD_getFrame |
|
707 | zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize); | |
628 | if (ZSTD_isError(zresult)) { |
|
708 | if (ZSTD_isError(zresult)) { | |
629 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); |
|
709 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); | |
630 | return NULL; |
|
710 | return NULL; | |
@@ -634,32 +714,56 b' static PyObject* Decompressor_decompress' | |||||
634 | return NULL; |
|
714 | return NULL; | |
635 | } |
|
715 | } | |
636 |
|
716 | |||
637 |
if ( |
|
717 | if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) { | |
638 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); |
|
718 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); | |
639 | return NULL; |
|
719 | return NULL; | |
640 | } |
|
720 | } | |
641 |
|
721 | |||
642 | dctx = ZSTD_createDCtx(); |
|
722 | assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize); | |
643 | if (!dctx) { |
|
723 | ||
644 | PyErr_NoMemory(); |
|
724 | /* We check against PY_SSIZE_T_MAX here because we ultimately cast the | |
|
725 | * result to a Python object and it's length can be no greater than | |||
|
726 | * Py_ssize_t. In theory, we could have an intermediate frame that is | |||
|
727 | * larger. But a) why would this API be used for frames that large b) | |||
|
728 | * it isn't worth the complexity to support. */ | |||
|
729 | assert(SIZE_MAX >= PY_SSIZE_T_MAX); | |||
|
730 | if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) { | |||
|
731 | PyErr_SetString(PyExc_ValueError, | |||
|
732 | "chunk 0 is too large to decompress on this platform"); | |||
|
733 | return NULL; | |||
|
734 | } | |||
|
735 | ||||
|
736 | if (ensure_dctx(self, 0)) { | |||
645 | goto finally; |
|
737 | goto finally; | |
646 | } |
|
738 | } | |
647 |
|
739 | |||
648 |
buffer1Size = |
|
740 | buffer1Size = (size_t)frameHeader.frameContentSize; | |
649 | buffer1 = PyMem_Malloc(buffer1Size); |
|
741 | buffer1 = PyMem_Malloc(buffer1Size); | |
650 | if (!buffer1) { |
|
742 | if (!buffer1) { | |
651 | goto finally; |
|
743 | goto finally; | |
652 | } |
|
744 | } | |
653 |
|
745 | |||
|
746 | outBuffer.dst = buffer1; | |||
|
747 | outBuffer.size = buffer1Size; | |||
|
748 | outBuffer.pos = 0; | |||
|
749 | ||||
|
750 | inBuffer.src = chunkData; | |||
|
751 | inBuffer.size = chunkSize; | |||
|
752 | inBuffer.pos = 0; | |||
|
753 | ||||
654 | Py_BEGIN_ALLOW_THREADS |
|
754 | Py_BEGIN_ALLOW_THREADS | |
655 |
zresult = ZSTD_decompress |
|
755 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); | |
656 | Py_END_ALLOW_THREADS |
|
756 | Py_END_ALLOW_THREADS | |
657 | if (ZSTD_isError(zresult)) { |
|
757 | if (ZSTD_isError(zresult)) { | |
658 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); |
|
758 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); | |
659 | goto finally; |
|
759 | goto finally; | |
660 | } |
|
760 | } | |
|
761 | else if (zresult) { | |||
|
762 | PyErr_Format(ZstdError, "chunk 0 did not decompress full frame"); | |||
|
763 | goto finally; | |||
|
764 | } | |||
661 |
|
765 | |||
662 |
buffer1ContentSize = |
|
766 | buffer1ContentSize = outBuffer.pos; | |
663 |
|
767 | |||
664 | /* Special case of a simple chain. */ |
|
768 | /* Special case of a simple chain. */ | |
665 | if (1 == chunksLen) { |
|
769 | if (1 == chunksLen) { | |
@@ -668,7 +772,7 b' static PyObject* Decompressor_decompress' | |||||
668 | } |
|
772 | } | |
669 |
|
773 | |||
670 | /* This should ideally look at next chunk. But this is slightly simpler. */ |
|
774 | /* This should ideally look at next chunk. But this is slightly simpler. */ | |
671 |
buffer2Size = |
|
775 | buffer2Size = (size_t)frameHeader.frameContentSize; | |
672 | buffer2 = PyMem_Malloc(buffer2Size); |
|
776 | buffer2 = PyMem_Malloc(buffer2Size); | |
673 | if (!buffer2) { |
|
777 | if (!buffer2) { | |
674 | goto finally; |
|
778 | goto finally; | |
@@ -688,7 +792,7 b' static PyObject* Decompressor_decompress' | |||||
688 | } |
|
792 | } | |
689 |
|
793 | |||
690 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
794 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |
691 |
zresult = ZSTD_getFrame |
|
795 | zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize); | |
692 | if (ZSTD_isError(zresult)) { |
|
796 | if (ZSTD_isError(zresult)) { | |
693 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); |
|
797 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); | |
694 | goto finally; |
|
798 | goto finally; | |
@@ -698,18 +802,30 b' static PyObject* Decompressor_decompress' | |||||
698 | goto finally; |
|
802 | goto finally; | |
699 | } |
|
803 | } | |
700 |
|
804 | |||
701 |
if ( |
|
805 | if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) { | |
702 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); |
|
806 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); | |
703 | goto finally; |
|
807 | goto finally; | |
704 | } |
|
808 | } | |
705 |
|
809 | |||
|
810 | assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize); | |||
|
811 | ||||
|
812 | if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) { | |||
|
813 | PyErr_Format(PyExc_ValueError, | |||
|
814 | "chunk %zd is too large to decompress on this platform", chunkIndex); | |||
|
815 | goto finally; | |||
|
816 | } | |||
|
817 | ||||
|
818 | inBuffer.src = chunkData; | |||
|
819 | inBuffer.size = chunkSize; | |||
|
820 | inBuffer.pos = 0; | |||
|
821 | ||||
706 | parity = chunkIndex % 2; |
|
822 | parity = chunkIndex % 2; | |
707 |
|
823 | |||
708 | /* This could definitely be abstracted to reduce code duplication. */ |
|
824 | /* This could definitely be abstracted to reduce code duplication. */ | |
709 | if (parity) { |
|
825 | if (parity) { | |
710 | /* Resize destination buffer to hold larger content. */ |
|
826 | /* Resize destination buffer to hold larger content. */ | |
711 |
if (buffer2Size < frame |
|
827 | if (buffer2Size < frameHeader.frameContentSize) { | |
712 |
buffer2Size = |
|
828 | buffer2Size = (size_t)frameHeader.frameContentSize; | |
713 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); |
|
829 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); | |
714 | if (!destBuffer) { |
|
830 | if (!destBuffer) { | |
715 | goto finally; |
|
831 | goto finally; | |
@@ -718,19 +834,38 b' static PyObject* Decompressor_decompress' | |||||
718 | } |
|
834 | } | |
719 |
|
835 | |||
720 | Py_BEGIN_ALLOW_THREADS |
|
836 | Py_BEGIN_ALLOW_THREADS | |
721 | zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, |
|
837 | zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx, | |
722 |
|
|
838 | buffer1, buffer1ContentSize, ZSTD_dct_rawContent); | |
|
839 | Py_END_ALLOW_THREADS | |||
|
840 | if (ZSTD_isError(zresult)) { | |||
|
841 | PyErr_Format(ZstdError, | |||
|
842 | "failed to load prefix dictionary at chunk %zd", chunkIndex); | |||
|
843 | goto finally; | |||
|
844 | } | |||
|
845 | ||||
|
846 | outBuffer.dst = buffer2; | |||
|
847 | outBuffer.size = buffer2Size; | |||
|
848 | outBuffer.pos = 0; | |||
|
849 | ||||
|
850 | Py_BEGIN_ALLOW_THREADS | |||
|
851 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); | |||
723 | Py_END_ALLOW_THREADS |
|
852 | Py_END_ALLOW_THREADS | |
724 | if (ZSTD_isError(zresult)) { |
|
853 | if (ZSTD_isError(zresult)) { | |
725 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
854 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |
726 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
855 | chunkIndex, ZSTD_getErrorName(zresult)); | |
727 | goto finally; |
|
856 | goto finally; | |
728 | } |
|
857 | } | |
729 | buffer2ContentSize = zresult; |
|
858 | else if (zresult) { | |
|
859 | PyErr_Format(ZstdError, "chunk %zd did not decompress full frame", | |||
|
860 | chunkIndex); | |||
|
861 | goto finally; | |||
|
862 | } | |||
|
863 | ||||
|
864 | buffer2ContentSize = outBuffer.pos; | |||
730 | } |
|
865 | } | |
731 | else { |
|
866 | else { | |
732 |
if (buffer1Size < frame |
|
867 | if (buffer1Size < frameHeader.frameContentSize) { | |
733 |
buffer1Size = |
|
868 | buffer1Size = (size_t)frameHeader.frameContentSize; | |
734 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); |
|
869 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); | |
735 | if (!destBuffer) { |
|
870 | if (!destBuffer) { | |
736 | goto finally; |
|
871 | goto finally; | |
@@ -739,15 +874,34 b' static PyObject* Decompressor_decompress' | |||||
739 | } |
|
874 | } | |
740 |
|
875 | |||
741 | Py_BEGIN_ALLOW_THREADS |
|
876 | Py_BEGIN_ALLOW_THREADS | |
742 | zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, |
|
877 | zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx, | |
743 |
|
|
878 | buffer2, buffer2ContentSize, ZSTD_dct_rawContent); | |
|
879 | Py_END_ALLOW_THREADS | |||
|
880 | if (ZSTD_isError(zresult)) { | |||
|
881 | PyErr_Format(ZstdError, | |||
|
882 | "failed to load prefix dictionary at chunk %zd", chunkIndex); | |||
|
883 | goto finally; | |||
|
884 | } | |||
|
885 | ||||
|
886 | outBuffer.dst = buffer1; | |||
|
887 | outBuffer.size = buffer1Size; | |||
|
888 | outBuffer.pos = 0; | |||
|
889 | ||||
|
890 | Py_BEGIN_ALLOW_THREADS | |||
|
891 | zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer); | |||
744 | Py_END_ALLOW_THREADS |
|
892 | Py_END_ALLOW_THREADS | |
745 | if (ZSTD_isError(zresult)) { |
|
893 | if (ZSTD_isError(zresult)) { | |
746 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
894 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |
747 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
895 | chunkIndex, ZSTD_getErrorName(zresult)); | |
748 | goto finally; |
|
896 | goto finally; | |
749 | } |
|
897 | } | |
750 | buffer1ContentSize = zresult; |
|
898 | else if (zresult) { | |
|
899 | PyErr_Format(ZstdError, "chunk %zd did not decompress full frame", | |||
|
900 | chunkIndex); | |||
|
901 | goto finally; | |||
|
902 | } | |||
|
903 | ||||
|
904 | buffer1ContentSize = outBuffer.pos; | |||
751 | } |
|
905 | } | |
752 | } |
|
906 | } | |
753 |
|
907 | |||
@@ -762,17 +916,13 b' finally:' | |||||
762 | PyMem_Free(buffer1); |
|
916 | PyMem_Free(buffer1); | |
763 | } |
|
917 | } | |
764 |
|
918 | |||
765 | if (dctx) { |
|
|||
766 | ZSTD_freeDCtx(dctx); |
|
|||
767 | } |
|
|||
768 |
|
||||
769 | return result; |
|
919 | return result; | |
770 | } |
|
920 | } | |
771 |
|
921 | |||
772 | typedef struct { |
|
922 | typedef struct { | |
773 | void* sourceData; |
|
923 | void* sourceData; | |
774 | size_t sourceSize; |
|
924 | size_t sourceSize; | |
775 |
|
|
925 | size_t destSize; | |
776 | } FramePointer; |
|
926 | } FramePointer; | |
777 |
|
927 | |||
778 | typedef struct { |
|
928 | typedef struct { | |
@@ -806,7 +956,6 b' typedef struct {' | |||||
806 |
|
956 | |||
807 | /* Compression state and settings. */ |
|
957 | /* Compression state and settings. */ | |
808 | ZSTD_DCtx* dctx; |
|
958 | ZSTD_DCtx* dctx; | |
809 | ZSTD_DDict* ddict; |
|
|||
810 | int requireOutputSizes; |
|
959 | int requireOutputSizes; | |
811 |
|
960 | |||
812 | /* Output storage. */ |
|
961 | /* Output storage. */ | |
@@ -838,6 +987,14 b' static void decompress_worker(WorkerStat' | |||||
838 | assert(0 == state->destCount); |
|
987 | assert(0 == state->destCount); | |
839 | assert(state->endOffset - state->startOffset >= 0); |
|
988 | assert(state->endOffset - state->startOffset >= 0); | |
840 |
|
989 | |||
|
990 | /* We could get here due to the way work is allocated. Ideally we wouldn't | |||
|
991 | get here. But that would require a bit of a refactor in the caller. */ | |||
|
992 | if (state->totalSourceSize > SIZE_MAX) { | |||
|
993 | state->error = WorkerError_memory; | |||
|
994 | state->errorOffset = 0; | |||
|
995 | return; | |||
|
996 | } | |||
|
997 | ||||
841 | /* |
|
998 | /* | |
842 | * We need to allocate a buffer to hold decompressed data. How we do this |
|
999 | * We need to allocate a buffer to hold decompressed data. How we do this | |
843 | * depends on what we know about the output. The following scenarios are |
|
1000 | * depends on what we know about the output. The following scenarios are | |
@@ -853,14 +1010,34 b' static void decompress_worker(WorkerStat' | |||||
853 | /* Resolve ouput segments. */ |
|
1010 | /* Resolve ouput segments. */ | |
854 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { |
|
1011 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { | |
855 | FramePointer* fp = &framePointers[frameIndex]; |
|
1012 | FramePointer* fp = &framePointers[frameIndex]; | |
|
1013 | unsigned long long decompressedSize; | |||
856 |
|
1014 | |||
857 | if (0 == fp->destSize) { |
|
1015 | if (0 == fp->destSize) { | |
858 |
|
|
1016 | decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize); | |
859 | if (0 == fp->destSize && state->requireOutputSizes) { |
|
1017 | ||
|
1018 | if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) { | |||
860 | state->error = WorkerError_unknownSize; |
|
1019 | state->error = WorkerError_unknownSize; | |
861 | state->errorOffset = frameIndex; |
|
1020 | state->errorOffset = frameIndex; | |
862 | return; |
|
1021 | return; | |
863 | } |
|
1022 | } | |
|
1023 | else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) { | |||
|
1024 | if (state->requireOutputSizes) { | |||
|
1025 | state->error = WorkerError_unknownSize; | |||
|
1026 | state->errorOffset = frameIndex; | |||
|
1027 | return; | |||
|
1028 | } | |||
|
1029 | ||||
|
1030 | /* This will fail the assert for .destSize > 0 below. */ | |||
|
1031 | decompressedSize = 0; | |||
|
1032 | } | |||
|
1033 | ||||
|
1034 | if (decompressedSize > SIZE_MAX) { | |||
|
1035 | state->error = WorkerError_memory; | |||
|
1036 | state->errorOffset = frameIndex; | |||
|
1037 | return; | |||
|
1038 | } | |||
|
1039 | ||||
|
1040 | fp->destSize = (size_t)decompressedSize; | |||
864 | } |
|
1041 | } | |
865 |
|
1042 | |||
866 | totalOutputSize += fp->destSize; |
|
1043 | totalOutputSize += fp->destSize; | |
@@ -878,7 +1055,7 b' static void decompress_worker(WorkerStat' | |||||
878 |
|
1055 | |||
879 | assert(framePointers[state->startOffset].destSize > 0); /* For now. */ |
|
1056 | assert(framePointers[state->startOffset].destSize > 0); /* For now. */ | |
880 |
|
1057 | |||
881 | allocationSize = roundpow2(state->totalSourceSize); |
|
1058 | allocationSize = roundpow2((size_t)state->totalSourceSize); | |
882 |
|
1059 | |||
883 | if (framePointers[state->startOffset].destSize > allocationSize) { |
|
1060 | if (framePointers[state->startOffset].destSize > allocationSize) { | |
884 | allocationSize = roundpow2(framePointers[state->startOffset].destSize); |
|
1061 | allocationSize = roundpow2(framePointers[state->startOffset].destSize); | |
@@ -902,6 +1079,8 b' static void decompress_worker(WorkerStat' | |||||
902 | destBuffer->segmentsSize = remainingItems; |
|
1079 | destBuffer->segmentsSize = remainingItems; | |
903 |
|
1080 | |||
904 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { |
|
1081 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { | |
|
1082 | ZSTD_outBuffer outBuffer; | |||
|
1083 | ZSTD_inBuffer inBuffer; | |||
905 | const void* source = framePointers[frameIndex].sourceData; |
|
1084 | const void* source = framePointers[frameIndex].sourceData; | |
906 | const size_t sourceSize = framePointers[frameIndex].sourceSize; |
|
1085 | const size_t sourceSize = framePointers[frameIndex].sourceSize; | |
907 | void* dest; |
|
1086 | void* dest; | |
@@ -956,7 +1135,7 b' static void decompress_worker(WorkerStat' | |||||
956 | /* Don't take any chances will non-NULL pointers. */ |
|
1135 | /* Don't take any chances will non-NULL pointers. */ | |
957 | memset(destBuffer, 0, sizeof(DestBuffer)); |
|
1136 | memset(destBuffer, 0, sizeof(DestBuffer)); | |
958 |
|
1137 | |||
959 | allocationSize = roundpow2(state->totalSourceSize); |
|
1138 | allocationSize = roundpow2((size_t)state->totalSourceSize); | |
960 |
|
1139 | |||
961 | if (decompressedSize > allocationSize) { |
|
1140 | if (decompressedSize > allocationSize) { | |
962 | allocationSize = roundpow2(decompressedSize); |
|
1141 | allocationSize = roundpow2(decompressedSize); | |
@@ -985,31 +1164,31 b' static void decompress_worker(WorkerStat' | |||||
985 |
|
1164 | |||
986 | dest = (char*)destBuffer->dest + destOffset; |
|
1165 | dest = (char*)destBuffer->dest + destOffset; | |
987 |
|
1166 | |||
988 | if (state->ddict) { |
|
1167 | outBuffer.dst = dest; | |
989 | zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize, |
|
1168 | outBuffer.size = decompressedSize; | |
990 | source, sourceSize, state->ddict); |
|
1169 | outBuffer.pos = 0; | |
991 | } |
|
|||
992 | else { |
|
|||
993 | zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize, |
|
|||
994 | source, sourceSize); |
|
|||
995 | } |
|
|||
996 |
|
1170 | |||
|
1171 | inBuffer.src = source; | |||
|
1172 | inBuffer.size = sourceSize; | |||
|
1173 | inBuffer.pos = 0; | |||
|
1174 | ||||
|
1175 | zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer); | |||
997 | if (ZSTD_isError(zresult)) { |
|
1176 | if (ZSTD_isError(zresult)) { | |
998 | state->error = WorkerError_zstd; |
|
1177 | state->error = WorkerError_zstd; | |
999 | state->zresult = zresult; |
|
1178 | state->zresult = zresult; | |
1000 | state->errorOffset = frameIndex; |
|
1179 | state->errorOffset = frameIndex; | |
1001 | return; |
|
1180 | return; | |
1002 | } |
|
1181 | } | |
1003 | else if (zresult != decompressedSize) { |
|
1182 | else if (zresult || outBuffer.pos != decompressedSize) { | |
1004 | state->error = WorkerError_sizeMismatch; |
|
1183 | state->error = WorkerError_sizeMismatch; | |
1005 |
state->zresult = |
|
1184 | state->zresult = outBuffer.pos; | |
1006 | state->errorOffset = frameIndex; |
|
1185 | state->errorOffset = frameIndex; | |
1007 | return; |
|
1186 | return; | |
1008 | } |
|
1187 | } | |
1009 |
|
1188 | |||
1010 | destBuffer->segments[localOffset].offset = destOffset; |
|
1189 | destBuffer->segments[localOffset].offset = destOffset; | |
1011 |
destBuffer->segments[localOffset].length = |
|
1190 | destBuffer->segments[localOffset].length = outBuffer.pos; | |
1012 |
destOffset += |
|
1191 | destOffset += outBuffer.pos; | |
1013 | localOffset++; |
|
1192 | localOffset++; | |
1014 | remainingItems--; |
|
1193 | remainingItems--; | |
1015 | } |
|
1194 | } | |
@@ -1027,9 +1206,7 b' static void decompress_worker(WorkerStat' | |||||
1027 | } |
|
1206 | } | |
1028 |
|
1207 | |||
1029 | ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames, |
|
1208 | ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames, | |
1030 |
|
|
1209 | Py_ssize_t threadCount) { | |
1031 | void* dictData = NULL; |
|
|||
1032 | size_t dictSize = 0; |
|
|||
1033 | Py_ssize_t i = 0; |
|
1210 | Py_ssize_t i = 0; | |
1034 | int errored = 0; |
|
1211 | int errored = 0; | |
1035 | Py_ssize_t segmentsCount; |
|
1212 | Py_ssize_t segmentsCount; | |
@@ -1039,7 +1216,7 b' ZstdBufferWithSegmentsCollection* decomp' | |||||
1039 | ZstdBufferWithSegmentsCollection* result = NULL; |
|
1216 | ZstdBufferWithSegmentsCollection* result = NULL; | |
1040 | FramePointer* framePointers = frames->frames; |
|
1217 | FramePointer* framePointers = frames->frames; | |
1041 | unsigned long long workerBytes = 0; |
|
1218 | unsigned long long workerBytes = 0; | |
1042 |
|
|
1219 | Py_ssize_t currentThread = 0; | |
1043 | Py_ssize_t workerStartOffset = 0; |
|
1220 | Py_ssize_t workerStartOffset = 0; | |
1044 | POOL_ctx* pool = NULL; |
|
1221 | POOL_ctx* pool = NULL; | |
1045 | WorkerState* workerStates = NULL; |
|
1222 | WorkerState* workerStates = NULL; | |
@@ -1049,24 +1226,14 b' ZstdBufferWithSegmentsCollection* decomp' | |||||
1049 | assert(threadCount >= 1); |
|
1226 | assert(threadCount >= 1); | |
1050 |
|
1227 | |||
1051 | /* More threads than inputs makes no sense under any conditions. */ |
|
1228 | /* More threads than inputs makes no sense under any conditions. */ | |
1052 |
threadCount = frames->framesSize < threadCount ? |
|
1229 | threadCount = frames->framesSize < threadCount ? frames->framesSize | |
1053 | : threadCount; |
|
1230 | : threadCount; | |
1054 |
|
1231 | |||
1055 | /* TODO lower thread count if input size is too small and threads would just |
|
1232 | /* TODO lower thread count if input size is too small and threads would just | |
1056 | add overhead. */ |
|
1233 | add overhead. */ | |
1057 |
|
1234 | |||
1058 | if (decompressor->dict) { |
|
1235 | if (decompressor->dict) { | |
1059 | dictData = decompressor->dict->dictData; |
|
1236 | if (ensure_ddict(decompressor->dict)) { | |
1060 | dictSize = decompressor->dict->dictSize; |
|
|||
1061 | } |
|
|||
1062 |
|
||||
1063 | if (dictData && !decompressor->ddict) { |
|
|||
1064 | Py_BEGIN_ALLOW_THREADS |
|
|||
1065 | decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize); |
|
|||
1066 | Py_END_ALLOW_THREADS |
|
|||
1067 |
|
||||
1068 | if (!decompressor->ddict) { |
|
|||
1069 | PyErr_SetString(ZstdError, "could not create decompression dict"); |
|
|||
1070 | return NULL; |
|
1237 | return NULL; | |
1071 | } |
|
1238 | } | |
1072 | } |
|
1239 | } | |
@@ -1091,7 +1258,14 b' ZstdBufferWithSegmentsCollection* decomp' | |||||
1091 |
|
1258 | |||
1092 | bytesPerWorker = frames->compressedSize / threadCount; |
|
1259 | bytesPerWorker = frames->compressedSize / threadCount; | |
1093 |
|
1260 | |||
|
1261 | if (bytesPerWorker > SIZE_MAX) { | |||
|
1262 | PyErr_SetString(ZstdError, "too much data per worker for this platform"); | |||
|
1263 | goto finally; | |||
|
1264 | } | |||
|
1265 | ||||
1094 | for (i = 0; i < threadCount; i++) { |
|
1266 | for (i = 0; i < threadCount; i++) { | |
|
1267 | size_t zresult; | |||
|
1268 | ||||
1095 | workerStates[i].dctx = ZSTD_createDCtx(); |
|
1269 | workerStates[i].dctx = ZSTD_createDCtx(); | |
1096 | if (NULL == workerStates[i].dctx) { |
|
1270 | if (NULL == workerStates[i].dctx) { | |
1097 | PyErr_NoMemory(); |
|
1271 | PyErr_NoMemory(); | |
@@ -1100,7 +1274,15 b' ZstdBufferWithSegmentsCollection* decomp' | |||||
1100 |
|
1274 | |||
1101 | ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx); |
|
1275 | ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx); | |
1102 |
|
1276 | |||
1103 |
|
|
1277 | if (decompressor->dict) { | |
|
1278 | zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict); | |||
|
1279 | if (zresult) { | |||
|
1280 | PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s", | |||
|
1281 | ZSTD_getErrorName(zresult)); | |||
|
1282 | goto finally; | |||
|
1283 | } | |||
|
1284 | } | |||
|
1285 | ||||
1104 | workerStates[i].framePointers = framePointers; |
|
1286 | workerStates[i].framePointers = framePointers; | |
1105 | workerStates[i].requireOutputSizes = 1; |
|
1287 | workerStates[i].requireOutputSizes = 1; | |
1106 | } |
|
1288 | } | |
@@ -1178,7 +1360,7 b' ZstdBufferWithSegmentsCollection* decomp' | |||||
1178 | break; |
|
1360 | break; | |
1179 |
|
1361 | |||
1180 | case WorkerError_sizeMismatch: |
|
1362 | case WorkerError_sizeMismatch: | |
1181 |
PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected % |
|
1363 | PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu", | |
1182 | workerStates[i].errorOffset, workerStates[i].zresult, |
|
1364 | workerStates[i].errorOffset, workerStates[i].zresult, | |
1183 | framePointers[workerStates[i].errorOffset].destSize); |
|
1365 | framePointers[workerStates[i].errorOffset].destSize); | |
1184 | errored = 1; |
|
1366 | errored = 1; | |
@@ -1388,9 +1570,21 b' static ZstdBufferWithSegmentsCollection*' | |||||
1388 | decompressedSize = frameSizesP[i]; |
|
1570 | decompressedSize = frameSizesP[i]; | |
1389 | } |
|
1571 | } | |
1390 |
|
1572 | |||
|
1573 | if (sourceSize > SIZE_MAX) { | |||
|
1574 | PyErr_Format(PyExc_ValueError, | |||
|
1575 | "item %zd is too large for this platform", i); | |||
|
1576 | goto finally; | |||
|
1577 | } | |||
|
1578 | ||||
|
1579 | if (decompressedSize > SIZE_MAX) { | |||
|
1580 | PyErr_Format(PyExc_ValueError, | |||
|
1581 | "decompressed size of item %zd is too large for this platform", i); | |||
|
1582 | goto finally; | |||
|
1583 | } | |||
|
1584 | ||||
1391 | framePointers[i].sourceData = sourceData; |
|
1585 | framePointers[i].sourceData = sourceData; | |
1392 | framePointers[i].sourceSize = sourceSize; |
|
1586 | framePointers[i].sourceSize = (size_t)sourceSize; | |
1393 | framePointers[i].destSize = decompressedSize; |
|
1587 | framePointers[i].destSize = (size_t)decompressedSize; | |
1394 | } |
|
1588 | } | |
1395 | } |
|
1589 | } | |
1396 | else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) { |
|
1590 | else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) { | |
@@ -1419,17 +1613,33 b' static ZstdBufferWithSegmentsCollection*' | |||||
1419 | buffer = collection->buffers[i]; |
|
1613 | buffer = collection->buffers[i]; | |
1420 |
|
1614 | |||
1421 | for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) { |
|
1615 | for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) { | |
|
1616 | unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0; | |||
|
1617 | ||||
1422 | if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) { |
|
1618 | if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) { | |
1423 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", |
|
1619 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", | |
1424 | offset); |
|
1620 | offset); | |
1425 | goto finally; |
|
1621 | goto finally; | |
1426 | } |
|
1622 | } | |
1427 |
|
1623 | |||
|
1624 | if (buffer->segments[segmentIndex].length > SIZE_MAX) { | |||
|
1625 | PyErr_Format(PyExc_ValueError, | |||
|
1626 | "item %zd in buffer %zd is too large for this platform", | |||
|
1627 | segmentIndex, i); | |||
|
1628 | goto finally; | |||
|
1629 | } | |||
|
1630 | ||||
|
1631 | if (decompressedSize > SIZE_MAX) { | |||
|
1632 | PyErr_Format(PyExc_ValueError, | |||
|
1633 | "decompressed size of item %zd in buffer %zd is too large for this platform", | |||
|
1634 | segmentIndex, i); | |||
|
1635 | goto finally; | |||
|
1636 | } | |||
|
1637 | ||||
1428 | totalInputSize += buffer->segments[segmentIndex].length; |
|
1638 | totalInputSize += buffer->segments[segmentIndex].length; | |
1429 |
|
1639 | |||
1430 | framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset; |
|
1640 | framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset; | |
1431 | framePointers[offset].sourceSize = buffer->segments[segmentIndex].length; |
|
1641 | framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length; | |
1432 |
framePointers[offset].destSize = |
|
1642 | framePointers[offset].destSize = (size_t)decompressedSize; | |
1433 |
|
1643 | |||
1434 | offset++; |
|
1644 | offset++; | |
1435 | } |
|
1645 | } | |
@@ -1450,11 +1660,6 b' static ZstdBufferWithSegmentsCollection*' | |||||
1450 | goto finally; |
|
1660 | goto finally; | |
1451 | } |
|
1661 | } | |
1452 |
|
1662 | |||
1453 | /* |
|
|||
1454 | * It is not clear whether Py_buffer.buf is still valid after |
|
|||
1455 | * PyBuffer_Release. So, we hold a reference to all Py_buffer instances |
|
|||
1456 | * for the duration of the operation. |
|
|||
1457 | */ |
|
|||
1458 | frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer)); |
|
1663 | frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer)); | |
1459 | if (NULL == frameBuffers) { |
|
1664 | if (NULL == frameBuffers) { | |
1460 | PyErr_NoMemory(); |
|
1665 | PyErr_NoMemory(); | |
@@ -1465,6 +1670,8 b' static ZstdBufferWithSegmentsCollection*' | |||||
1465 |
|
1670 | |||
1466 | /* Do a pass to assemble info about our input buffers and output sizes. */ |
|
1671 | /* Do a pass to assemble info about our input buffers and output sizes. */ | |
1467 | for (i = 0; i < frameCount; i++) { |
|
1672 | for (i = 0; i < frameCount; i++) { | |
|
1673 | unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0; | |||
|
1674 | ||||
1468 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i), |
|
1675 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i), | |
1469 | &frameBuffers[i], PyBUF_CONTIG_RO)) { |
|
1676 | &frameBuffers[i], PyBUF_CONTIG_RO)) { | |
1470 | PyErr_Clear(); |
|
1677 | PyErr_Clear(); | |
@@ -1472,11 +1679,17 b' static ZstdBufferWithSegmentsCollection*' | |||||
1472 | goto finally; |
|
1679 | goto finally; | |
1473 | } |
|
1680 | } | |
1474 |
|
1681 | |||
|
1682 | if (decompressedSize > SIZE_MAX) { | |||
|
1683 | PyErr_Format(PyExc_ValueError, | |||
|
1684 | "decompressed size of item %zd is too large for this platform", i); | |||
|
1685 | goto finally; | |||
|
1686 | } | |||
|
1687 | ||||
1475 | totalInputSize += frameBuffers[i].len; |
|
1688 | totalInputSize += frameBuffers[i].len; | |
1476 |
|
1689 | |||
1477 | framePointers[i].sourceData = frameBuffers[i].buf; |
|
1690 | framePointers[i].sourceData = frameBuffers[i].buf; | |
1478 | framePointers[i].sourceSize = frameBuffers[i].len; |
|
1691 | framePointers[i].sourceSize = frameBuffers[i].len; | |
1479 |
framePointers[i].destSize = |
|
1692 | framePointers[i].destSize = (size_t)decompressedSize; | |
1480 | } |
|
1693 | } | |
1481 | } |
|
1694 | } | |
1482 | else { |
|
1695 | else { | |
@@ -1514,16 +1727,26 b' static PyMethodDef Decompressor_methods[' | |||||
1514 | Decompressor_copy_stream__doc__ }, |
|
1727 | Decompressor_copy_stream__doc__ }, | |
1515 | { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, |
|
1728 | { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, | |
1516 | Decompressor_decompress__doc__ }, |
|
1729 | Decompressor_decompress__doc__ }, | |
1517 |
{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_ |
|
1730 | { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS, | |
1518 | Decompressor_decompressobj__doc__ }, |
|
1731 | Decompressor_decompressobj__doc__ }, | |
1519 |
{ "read_ |
|
1732 | { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS, | |
1520 |
Decompressor_read_ |
|
1733 | Decompressor_read_to_iter__doc__ }, | |
1521 | { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, |
|
1734 | /* TODO Remove deprecated API */ | |
1522 | Decompressor_write_to__doc__ }, |
|
1735 | { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS, | |
|
1736 | Decompressor_read_to_iter__doc__ }, | |||
|
1737 | { "stream_reader", (PyCFunction)Decompressor_stream_reader, | |||
|
1738 | METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ }, | |||
|
1739 | { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS, | |||
|
1740 | Decompressor_stream_writer__doc__ }, | |||
|
1741 | /* TODO remove deprecated API */ | |||
|
1742 | { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS, | |||
|
1743 | Decompressor_stream_writer__doc__ }, | |||
1523 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, |
|
1744 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, | |
1524 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, |
|
1745 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, | |
1525 | { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer, |
|
1746 | { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer, | |
1526 | METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ }, |
|
1747 | METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ }, | |
|
1748 | { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS, | |||
|
1749 | Decompressor_memory_size__doc__ }, | |||
1527 | { NULL, NULL } |
|
1750 | { NULL, NULL } | |
1528 | }; |
|
1751 | }; | |
1529 |
|
1752 |
@@ -20,10 +20,9 b' static void ZstdDecompressorIterator_dea' | |||||
20 | Py_XDECREF(self->decompressor); |
|
20 | Py_XDECREF(self->decompressor); | |
21 | Py_XDECREF(self->reader); |
|
21 | Py_XDECREF(self->reader); | |
22 |
|
22 | |||
23 | if (self->buffer) { |
|
23 | if (self->buffer.buf) { | |
24 | PyBuffer_Release(self->buffer); |
|
24 | PyBuffer_Release(&self->buffer); | |
25 | PyMem_FREE(self->buffer); |
|
25 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
26 | self->buffer = NULL; |
|
|||
27 | } |
|
26 | } | |
28 |
|
27 | |||
29 | if (self->input.src) { |
|
28 | if (self->input.src) { | |
@@ -45,8 +44,6 b' static DecompressorIteratorResult read_d' | |||||
45 | DecompressorIteratorResult result; |
|
44 | DecompressorIteratorResult result; | |
46 | size_t oldInputPos = self->input.pos; |
|
45 | size_t oldInputPos = self->input.pos; | |
47 |
|
46 | |||
48 | assert(self->decompressor->dstream); |
|
|||
49 |
|
||||
50 | result.chunk = NULL; |
|
47 | result.chunk = NULL; | |
51 |
|
48 | |||
52 | chunk = PyBytes_FromStringAndSize(NULL, self->outSize); |
|
49 | chunk = PyBytes_FromStringAndSize(NULL, self->outSize); | |
@@ -60,7 +57,7 b' static DecompressorIteratorResult read_d' | |||||
60 | self->output.pos = 0; |
|
57 | self->output.pos = 0; | |
61 |
|
58 | |||
62 | Py_BEGIN_ALLOW_THREADS |
|
59 | Py_BEGIN_ALLOW_THREADS | |
63 |
zresult = ZSTD_decompress |
|
60 | zresult = ZSTD_decompress_generic(self->decompressor->dctx, &self->output, &self->input); | |
64 | Py_END_ALLOW_THREADS |
|
61 | Py_END_ALLOW_THREADS | |
65 |
|
62 | |||
66 | /* We're done with the pointer. Nullify to prevent anyone from getting a |
|
63 | /* We're done with the pointer. Nullify to prevent anyone from getting a | |
@@ -86,7 +83,8 b' static DecompressorIteratorResult read_d' | |||||
86 | /* If it produced output data, return it. */ |
|
83 | /* If it produced output data, return it. */ | |
87 | if (self->output.pos) { |
|
84 | if (self->output.pos) { | |
88 | if (self->output.pos < self->outSize) { |
|
85 | if (self->output.pos < self->outSize) { | |
89 |
if ( |
|
86 | if (safe_pybytes_resize(&chunk, self->output.pos)) { | |
|
87 | Py_XDECREF(chunk); | |||
90 | result.errored = 1; |
|
88 | result.errored = 1; | |
91 | return result; |
|
89 | return result; | |
92 | } |
|
90 | } | |
@@ -137,15 +135,15 b' read_from_source:' | |||||
137 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
135 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
138 | } |
|
136 | } | |
139 | else { |
|
137 | else { | |
140 |
assert(self->buffer |
|
138 | assert(self->buffer.buf); | |
141 |
|
139 | |||
142 | /* Only support contiguous C arrays for now */ |
|
140 | /* Only support contiguous C arrays for now */ | |
143 |
assert(self->buffer |
|
141 | assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL); | |
144 |
assert(self->buffer |
|
142 | assert(self->buffer.itemsize == 1); | |
145 |
|
143 | |||
146 | /* TODO avoid memcpy() below */ |
|
144 | /* TODO avoid memcpy() below */ | |
147 |
readBuffer = (char *)self->buffer |
|
145 | readBuffer = (char *)self->buffer.buf + self->bufferOffset; | |
148 |
bufferRemaining = self->buffer |
|
146 | bufferRemaining = self->buffer.len - self->bufferOffset; | |
149 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); |
|
147 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
150 | self->bufferOffset += readSize; |
|
148 | self->bufferOffset += readSize; | |
151 | } |
|
149 | } |
@@ -13,50 +13,56 b' extern PyObject* ZstdError;' | |||||
13 | PyDoc_STRVAR(FrameParameters__doc__, |
|
13 | PyDoc_STRVAR(FrameParameters__doc__, | |
14 | "FrameParameters: information about a zstd frame"); |
|
14 | "FrameParameters: information about a zstd frame"); | |
15 |
|
15 | |||
16 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) { |
|
16 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) { | |
17 | const char* source; |
|
17 | static char* kwlist[] = { | |
18 | Py_ssize_t sourceSize; |
|
18 | "data", | |
19 | ZSTD_frameParams params; |
|
19 | NULL | |
|
20 | }; | |||
|
21 | ||||
|
22 | Py_buffer source; | |||
|
23 | ZSTD_frameHeader header; | |||
20 | FrameParametersObject* result = NULL; |
|
24 | FrameParametersObject* result = NULL; | |
21 | size_t zresult; |
|
25 | size_t zresult; | |
22 |
|
26 | |||
23 | #if PY_MAJOR_VERSION >= 3 |
|
27 | #if PY_MAJOR_VERSION >= 3 | |
24 |
if (!PyArg_ParseTuple(args, "y |
|
28 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters", | |
25 | #else |
|
29 | #else | |
26 |
if (!PyArg_ParseTuple(args, "s |
|
30 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters", | |
27 | #endif |
|
31 | #endif | |
28 |
|
|
32 | kwlist, &source)) { | |
29 | return NULL; |
|
33 | return NULL; | |
30 | } |
|
34 | } | |
31 |
|
35 | |||
32 | /* Needed for Python 2 to reject unicode */ |
|
36 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
33 | if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) { |
|
37 | PyErr_SetString(PyExc_ValueError, | |
34 | PyErr_SetString(PyExc_TypeError, "argument must be bytes"); |
|
38 | "data buffer should be contiguous and have at most one dimension"); | |
35 | return NULL; |
|
39 | goto finally; | |
36 | } |
|
40 | } | |
37 |
|
41 | |||
38 |
zresult = ZSTD_getFrame |
|
42 | zresult = ZSTD_getFrameHeader(&header, source.buf, source.len); | |
39 |
|
43 | |||
40 | if (ZSTD_isError(zresult)) { |
|
44 | if (ZSTD_isError(zresult)) { | |
41 | PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult)); |
|
45 | PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult)); | |
42 | return NULL; |
|
46 | goto finally; | |
43 | } |
|
47 | } | |
44 |
|
48 | |||
45 | if (zresult) { |
|
49 | if (zresult) { | |
46 | PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult); |
|
50 | PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult); | |
47 | return NULL; |
|
51 | goto finally; | |
48 | } |
|
52 | } | |
49 |
|
53 | |||
50 | result = PyObject_New(FrameParametersObject, &FrameParametersType); |
|
54 | result = PyObject_New(FrameParametersObject, &FrameParametersType); | |
51 | if (!result) { |
|
55 | if (!result) { | |
52 | return NULL; |
|
56 | goto finally; | |
53 | } |
|
57 | } | |
54 |
|
58 | |||
55 |
result->frameContentSize = |
|
59 | result->frameContentSize = header.frameContentSize; | |
56 |
result->windowSize = |
|
60 | result->windowSize = header.windowSize; | |
57 |
result->dictID = |
|
61 | result->dictID = header.dictID; | |
58 |
result->checksumFlag = |
|
62 | result->checksumFlag = header.checksumFlag ? 1 : 0; | |
59 |
|
63 | |||
|
64 | finally: | |||
|
65 | PyBuffer_Release(&source); | |||
60 | return result; |
|
66 | return result; | |
61 | } |
|
67 | } | |
62 |
|
68 | |||
@@ -68,7 +74,7 b' static PyMemberDef FrameParameters_membe' | |||||
68 | { "content_size", T_ULONGLONG, |
|
74 | { "content_size", T_ULONGLONG, | |
69 | offsetof(FrameParametersObject, frameContentSize), READONLY, |
|
75 | offsetof(FrameParametersObject, frameContentSize), READONLY, | |
70 | "frame content size" }, |
|
76 | "frame content size" }, | |
71 |
{ "window_size", T_U |
|
77 | { "window_size", T_ULONGLONG, | |
72 | offsetof(FrameParametersObject, windowSize), READONLY, |
|
78 | offsetof(FrameParametersObject, windowSize), READONLY, | |
73 | "window size" }, |
|
79 | "window size" }, | |
74 | { "dict_id", T_UINT, |
|
80 | { "dict_id", T_UINT, |
@@ -12,12 +12,10 b'' | |||||
12 |
|
12 | |||
13 | #define ZSTD_STATIC_LINKING_ONLY |
|
13 | #define ZSTD_STATIC_LINKING_ONLY | |
14 | #define ZDICT_STATIC_LINKING_ONLY |
|
14 | #define ZDICT_STATIC_LINKING_ONLY | |
15 |
#include |
|
15 | #include <zstd.h> | |
16 |
#include |
|
16 | #include <zdict.h> | |
17 | #include "zdict.h" |
|
|||
18 | #include "zstdmt_compress.h" |
|
|||
19 |
|
17 | |||
20 |
#define PYTHON_ZSTANDARD_VERSION "0. |
|
18 | #define PYTHON_ZSTANDARD_VERSION "0.9.0" | |
21 |
|
19 | |||
22 | typedef enum { |
|
20 | typedef enum { | |
23 | compressorobj_flush_finish, |
|
21 | compressorobj_flush_finish, | |
@@ -25,22 +23,38 b' typedef enum {' | |||||
25 | } CompressorObj_Flush; |
|
23 | } CompressorObj_Flush; | |
26 |
|
24 | |||
27 | /* |
|
25 | /* | |
28 | Represents a CompressionParameters type. |
|
26 | Represents a ZstdCompressionParameters type. | |
29 |
|
27 | |||
30 |
This type |
|
28 | This type holds all the low-level compression parameters that can be set. | |
31 | */ |
|
29 | */ | |
32 | typedef struct { |
|
30 | typedef struct { | |
33 | PyObject_HEAD |
|
31 | PyObject_HEAD | |
|
32 | ZSTD_CCtx_params* params; | |||
|
33 | unsigned format; | |||
|
34 | int compressionLevel; | |||
34 | unsigned windowLog; |
|
35 | unsigned windowLog; | |
|
36 | unsigned hashLog; | |||
35 | unsigned chainLog; |
|
37 | unsigned chainLog; | |
36 | unsigned hashLog; |
|
|||
37 | unsigned searchLog; |
|
38 | unsigned searchLog; | |
38 |
unsigned |
|
39 | unsigned minMatch; | |
39 | unsigned targetLength; |
|
40 | unsigned targetLength; | |
40 | ZSTD_strategy strategy; |
|
41 | unsigned compressionStrategy; | |
41 | } CompressionParametersObject; |
|
42 | unsigned contentSizeFlag; | |
|
43 | unsigned checksumFlag; | |||
|
44 | unsigned dictIDFlag; | |||
|
45 | unsigned threads; | |||
|
46 | unsigned jobSize; | |||
|
47 | unsigned overlapSizeLog; | |||
|
48 | unsigned compressLiterals; | |||
|
49 | unsigned forceMaxWindow; | |||
|
50 | unsigned enableLongDistanceMatching; | |||
|
51 | unsigned ldmHashLog; | |||
|
52 | unsigned ldmMinMatch; | |||
|
53 | unsigned ldmBucketSizeLog; | |||
|
54 | unsigned ldmHashEveryLog; | |||
|
55 | } ZstdCompressionParametersObject; | |||
42 |
|
56 | |||
43 | extern PyTypeObject CompressionParametersType; |
|
57 | extern PyTypeObject ZstdCompressionParametersType; | |
44 |
|
58 | |||
45 | /* |
|
59 | /* | |
46 | Represents a FrameParameters type. |
|
60 | Represents a FrameParameters type. | |
@@ -50,7 +64,7 b' extern PyTypeObject CompressionParameter' | |||||
50 | typedef struct { |
|
64 | typedef struct { | |
51 | PyObject_HEAD |
|
65 | PyObject_HEAD | |
52 | unsigned long long frameContentSize; |
|
66 | unsigned long long frameContentSize; | |
53 | unsigned windowSize; |
|
67 | unsigned long long windowSize; | |
54 | unsigned dictID; |
|
68 | unsigned dictID; | |
55 | char checksumFlag; |
|
69 | char checksumFlag; | |
56 | } FrameParametersObject; |
|
70 | } FrameParametersObject; | |
@@ -69,10 +83,14 b' typedef struct {' | |||||
69 | void* dictData; |
|
83 | void* dictData; | |
70 | /* Size of dictionary data. */ |
|
84 | /* Size of dictionary data. */ | |
71 | size_t dictSize; |
|
85 | size_t dictSize; | |
|
86 | ZSTD_dictContentType_e dictType; | |||
72 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
87 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |
73 | unsigned k; |
|
88 | unsigned k; | |
74 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
89 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |
75 | unsigned d; |
|
90 | unsigned d; | |
|
91 | /* Digested dictionary, suitable for reuse. */ | |||
|
92 | ZSTD_CDict* cdict; | |||
|
93 | ZSTD_DDict* ddict; | |||
76 | } ZstdCompressionDict; |
|
94 | } ZstdCompressionDict; | |
77 |
|
95 | |||
78 | extern PyTypeObject ZstdCompressionDictType; |
|
96 | extern PyTypeObject ZstdCompressionDictType; | |
@@ -83,29 +101,15 b' extern PyTypeObject ZstdCompressionDictT' | |||||
83 | typedef struct { |
|
101 | typedef struct { | |
84 | PyObject_HEAD |
|
102 | PyObject_HEAD | |
85 |
|
103 | |||
86 | /* Configured compression level. Should be always set. */ |
|
|||
87 | int compressionLevel; |
|
|||
88 | /* Number of threads to use for operations. */ |
|
104 | /* Number of threads to use for operations. */ | |
89 | unsigned int threads; |
|
105 | unsigned int threads; | |
90 | /* Pointer to compression dictionary to use. NULL if not using dictionary |
|
106 | /* Pointer to compression dictionary to use. NULL if not using dictionary | |
91 | compression. */ |
|
107 | compression. */ | |
92 | ZstdCompressionDict* dict; |
|
108 | ZstdCompressionDict* dict; | |
93 |
/* Compression context to use. Populated during object construction. |
|
109 | /* Compression context to use. Populated during object construction. */ | |
94 | if using multi-threaded compression. */ |
|
|||
95 | ZSTD_CCtx* cctx; |
|
110 | ZSTD_CCtx* cctx; | |
96 | /* Multi-threaded compression context to use. Populated during object |
|
111 | /* Compression parameters in use. */ | |
97 | construction. NULL if not using multi-threaded compression. */ |
|
112 | ZSTD_CCtx_params* params; | |
98 | ZSTDMT_CCtx* mtcctx; |
|
|||
99 | /* Digest compression dictionary. NULL initially. Populated on first use. */ |
|
|||
100 | ZSTD_CDict* cdict; |
|
|||
101 | /* Low-level compression parameter control. NULL unless passed to |
|
|||
102 | constructor. Takes precedence over `compressionLevel` if defined. */ |
|
|||
103 | CompressionParametersObject* cparams; |
|
|||
104 | /* Controls zstd frame options. */ |
|
|||
105 | ZSTD_frameParameters fparams; |
|
|||
106 | /* Holds state for streaming compression. Shared across all invocation. |
|
|||
107 | Populated on first use. */ |
|
|||
108 | ZSTD_CStream* cstream; |
|
|||
109 | } ZstdCompressor; |
|
113 | } ZstdCompressor; | |
110 |
|
114 | |||
111 | extern PyTypeObject ZstdCompressorType; |
|
115 | extern PyTypeObject ZstdCompressorType; | |
@@ -125,9 +129,10 b' typedef struct {' | |||||
125 |
|
129 | |||
126 | ZstdCompressor* compressor; |
|
130 | ZstdCompressor* compressor; | |
127 | PyObject* writer; |
|
131 | PyObject* writer; | |
128 | Py_ssize_t sourceSize; |
|
132 | unsigned long long sourceSize; | |
129 | size_t outSize; |
|
133 | size_t outSize; | |
130 | int entered; |
|
134 | int entered; | |
|
135 | unsigned long long bytesCompressed; | |||
131 | } ZstdCompressionWriter; |
|
136 | } ZstdCompressionWriter; | |
132 |
|
137 | |||
133 | extern PyTypeObject ZstdCompressionWriterType; |
|
138 | extern PyTypeObject ZstdCompressionWriterType; | |
@@ -137,9 +142,8 b' typedef struct {' | |||||
137 |
|
142 | |||
138 | ZstdCompressor* compressor; |
|
143 | ZstdCompressor* compressor; | |
139 | PyObject* reader; |
|
144 | PyObject* reader; | |
140 |
Py_buffer |
|
145 | Py_buffer buffer; | |
141 | Py_ssize_t bufferOffset; |
|
146 | Py_ssize_t bufferOffset; | |
142 | Py_ssize_t sourceSize; |
|
|||
143 | size_t inSize; |
|
147 | size_t inSize; | |
144 | size_t outSize; |
|
148 | size_t outSize; | |
145 |
|
149 | |||
@@ -155,11 +159,32 b' extern PyTypeObject ZstdCompressorIterat' | |||||
155 | typedef struct { |
|
159 | typedef struct { | |
156 | PyObject_HEAD |
|
160 | PyObject_HEAD | |
157 |
|
161 | |||
|
162 | ZstdCompressor* compressor; | |||
|
163 | PyObject* reader; | |||
|
164 | Py_buffer buffer; | |||
|
165 | unsigned long long sourceSize; | |||
|
166 | size_t readSize; | |||
|
167 | ||||
|
168 | int entered; | |||
|
169 | int closed; | |||
|
170 | unsigned long long bytesCompressed; | |||
|
171 | ||||
|
172 | ZSTD_inBuffer input; | |||
|
173 | ZSTD_outBuffer output; | |||
|
174 | int finishedInput; | |||
|
175 | int finishedOutput; | |||
|
176 | PyObject* readResult; | |||
|
177 | } ZstdCompressionReader; | |||
|
178 | ||||
|
179 | extern PyTypeObject ZstdCompressionReaderType; | |||
|
180 | ||||
|
181 | typedef struct { | |||
|
182 | PyObject_HEAD | |||
|
183 | ||||
158 | ZSTD_DCtx* dctx; |
|
184 | ZSTD_DCtx* dctx; | |
159 |
|
||||
160 | ZstdCompressionDict* dict; |
|
185 | ZstdCompressionDict* dict; | |
161 | ZSTD_DDict* ddict; |
|
186 | size_t maxWindowSize; | |
162 | ZSTD_DStream* dstream; |
|
187 | ZSTD_format_e format; | |
163 | } ZstdDecompressor; |
|
188 | } ZstdDecompressor; | |
164 |
|
189 | |||
165 | extern PyTypeObject ZstdDecompressorType; |
|
190 | extern PyTypeObject ZstdDecompressorType; | |
@@ -168,6 +193,7 b' typedef struct {' | |||||
168 | PyObject_HEAD |
|
193 | PyObject_HEAD | |
169 |
|
194 | |||
170 | ZstdDecompressor* decompressor; |
|
195 | ZstdDecompressor* decompressor; | |
|
196 | size_t outSize; | |||
171 | int finished; |
|
197 | int finished; | |
172 | } ZstdDecompressionObj; |
|
198 | } ZstdDecompressionObj; | |
173 |
|
199 | |||
@@ -176,6 +202,40 b' extern PyTypeObject ZstdDecompressionObj' | |||||
176 | typedef struct { |
|
202 | typedef struct { | |
177 | PyObject_HEAD |
|
203 | PyObject_HEAD | |
178 |
|
204 | |||
|
205 | /* Parent decompressor to which this object is associated. */ | |||
|
206 | ZstdDecompressor* decompressor; | |||
|
207 | /* Object to read() from (if reading from a stream). */ | |||
|
208 | PyObject* reader; | |||
|
209 | /* Size for read() operations on reader. */ | |||
|
210 | size_t readSize; | |||
|
211 | /* Buffer to read from (if reading from a buffer). */ | |||
|
212 | Py_buffer buffer; | |||
|
213 | ||||
|
214 | /* Whether the context manager is active. */ | |||
|
215 | int entered; | |||
|
216 | /* Whether we've closed the stream. */ | |||
|
217 | int closed; | |||
|
218 | ||||
|
219 | /* Number of bytes decompressed and returned to user. */ | |||
|
220 | unsigned long long bytesDecompressed; | |||
|
221 | ||||
|
222 | /* Tracks data going into decompressor. */ | |||
|
223 | ZSTD_inBuffer input; | |||
|
224 | ||||
|
225 | /* Holds output from read() operation on reader. */ | |||
|
226 | PyObject* readResult; | |||
|
227 | ||||
|
228 | /* Whether all input has been sent to the decompressor. */ | |||
|
229 | int finishedInput; | |||
|
230 | /* Whether all output has been flushed from the decompressor. */ | |||
|
231 | int finishedOutput; | |||
|
232 | } ZstdDecompressionReader; | |||
|
233 | ||||
|
234 | extern PyTypeObject ZstdDecompressionReaderType; | |||
|
235 | ||||
|
236 | typedef struct { | |||
|
237 | PyObject_HEAD | |||
|
238 | ||||
179 | ZstdDecompressor* decompressor; |
|
239 | ZstdDecompressor* decompressor; | |
180 | PyObject* writer; |
|
240 | PyObject* writer; | |
181 | size_t outSize; |
|
241 | size_t outSize; | |
@@ -189,7 +249,7 b' typedef struct {' | |||||
189 |
|
249 | |||
190 | ZstdDecompressor* decompressor; |
|
250 | ZstdDecompressor* decompressor; | |
191 | PyObject* reader; |
|
251 | PyObject* reader; | |
192 |
Py_buffer |
|
252 | Py_buffer buffer; | |
193 | Py_ssize_t bufferOffset; |
|
253 | Py_ssize_t bufferOffset; | |
194 | size_t inSize; |
|
254 | size_t inSize; | |
195 | size_t outSize; |
|
255 | size_t outSize; | |
@@ -209,6 +269,9 b' typedef struct {' | |||||
209 | } DecompressorIteratorResult; |
|
269 | } DecompressorIteratorResult; | |
210 |
|
270 | |||
211 | typedef struct { |
|
271 | typedef struct { | |
|
272 | /* The public API is that these are 64-bit unsigned integers. So these can't | |||
|
273 | * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may | |||
|
274 | * be nonsensical for this platform. */ | |||
212 | unsigned long long offset; |
|
275 | unsigned long long offset; | |
213 | unsigned long long length; |
|
276 | unsigned long long length; | |
214 | } BufferSegment; |
|
277 | } BufferSegment; | |
@@ -270,16 +333,14 b' typedef struct {' | |||||
270 |
|
333 | |||
271 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; |
|
334 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; | |
272 |
|
335 | |||
273 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams); |
|
336 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value); | |
274 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args); |
|
337 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj); | |
275 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args); |
|
338 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs); | |
276 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args); |
|
339 | int ensure_ddict(ZstdCompressionDict* dict); | |
277 |
int |
|
340 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict); | |
278 | int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize); |
|
|||
279 | int init_dstream(ZstdDecompressor* decompressor); |
|
|||
280 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
341 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); | |
281 | ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
|||
282 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); |
|
342 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); | |
283 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); |
|
343 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); | |
284 | int cpu_count(void); |
|
344 | int cpu_count(void); | |
285 | size_t roundpow2(size_t); |
|
345 | size_t roundpow2(size_t); | |
|
346 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size); |
@@ -27,6 +27,11 b" SOURCES = ['zstd/%s' % p for p in (" | |||||
27 | 'compress/fse_compress.c', |
|
27 | 'compress/fse_compress.c', | |
28 | 'compress/huf_compress.c', |
|
28 | 'compress/huf_compress.c', | |
29 | 'compress/zstd_compress.c', |
|
29 | 'compress/zstd_compress.c', | |
|
30 | 'compress/zstd_double_fast.c', | |||
|
31 | 'compress/zstd_fast.c', | |||
|
32 | 'compress/zstd_lazy.c', | |||
|
33 | 'compress/zstd_ldm.c', | |||
|
34 | 'compress/zstd_opt.c', | |||
30 | 'compress/zstdmt_compress.c', |
|
35 | 'compress/zstdmt_compress.c', | |
31 | 'decompress/huf_decompress.c', |
|
36 | 'decompress/huf_decompress.c', | |
32 | 'decompress/zstd_decompress.c', |
|
37 | 'decompress/zstd_decompress.c', | |
@@ -38,7 +43,6 b" SOURCES = ['zstd/%s' % p for p in (" | |||||
38 | # Headers whose preprocessed output will be fed into cdef(). |
|
43 | # Headers whose preprocessed output will be fed into cdef(). | |
39 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( |
|
44 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( | |
40 | ('zstd.h',), |
|
45 | ('zstd.h',), | |
41 | ('compress', 'zstdmt_compress.h'), |
|
|||
42 | ('dictBuilder', 'zdict.h'), |
|
46 | ('dictBuilder', 'zdict.h'), | |
43 | )] |
|
47 | )] | |
44 |
|
48 | |||
@@ -80,7 +84,9 b' else:' | |||||
80 | def preprocess(path): |
|
84 | def preprocess(path): | |
81 | with open(path, 'rb') as fh: |
|
85 | with open(path, 'rb') as fh: | |
82 | lines = [] |
|
86 | lines = [] | |
83 |
|
|
87 | it = iter(fh) | |
|
88 | ||||
|
89 | for l in it: | |||
84 | # zstd.h includes <stddef.h>, which is also included by cffi's |
|
90 | # zstd.h includes <stddef.h>, which is also included by cffi's | |
85 | # boilerplate. This can lead to duplicate declarations. So we strip |
|
91 | # boilerplate. This can lead to duplicate declarations. So we strip | |
86 | # this include from the preprocessor invocation. |
|
92 | # this include from the preprocessor invocation. | |
@@ -137,18 +143,21 b' def normalize_output(output):' | |||||
137 |
|
143 | |||
138 |
|
144 | |||
139 | ffi = cffi.FFI() |
|
145 | ffi = cffi.FFI() | |
|
146 | # zstd.h uses a possible undefined MIN(). Define it until | |||
|
147 | # https://github.com/facebook/zstd/issues/976 is fixed. | |||
140 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning |
|
148 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning | |
141 | # when cffi uses the function. Since we statically link against zstd, even |
|
149 | # when cffi uses the function. Since we statically link against zstd, even | |
142 | # if we use the deprecated functions it shouldn't be a huge problem. |
|
150 | # if we use the deprecated functions it shouldn't be a huge problem. | |
143 | ffi.set_source('_zstd_cffi', ''' |
|
151 | ffi.set_source('_zstd_cffi', ''' | |
144 | #include "mem.h" |
|
152 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) | |
145 | #define ZSTD_STATIC_LINKING_ONLY |
|
153 | #define ZSTD_STATIC_LINKING_ONLY | |
146 |
#include |
|
154 | #include <zstd.h> | |
147 | #define ZDICT_STATIC_LINKING_ONLY |
|
155 | #define ZDICT_STATIC_LINKING_ONLY | |
148 | #define ZDICT_DISABLE_DEPRECATE_WARNINGS |
|
156 | #define ZDICT_DISABLE_DEPRECATE_WARNINGS | |
149 |
#include |
|
157 | #include <zdict.h> | |
150 | #include "zstdmt_compress.h" |
|
158 | ''', sources=SOURCES, | |
151 |
|
|
159 | include_dirs=INCLUDE_DIRS, | |
|
160 | extra_compile_args=['-DZSTD_MULTITHREAD']) | |||
152 |
|
161 | |||
153 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') |
|
162 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') | |
154 |
|
163 |
@@ -5,6 +5,7 b'' | |||||
5 | # This software may be modified and distributed under the terms |
|
5 | # This software may be modified and distributed under the terms | |
6 | # of the BSD license. See the LICENSE file for details. |
|
6 | # of the BSD license. See the LICENSE file for details. | |
7 |
|
7 | |||
|
8 | import os | |||
8 | import sys |
|
9 | import sys | |
9 | from setuptools import setup |
|
10 | from setuptools import setup | |
10 |
|
11 | |||
@@ -16,14 +17,32 b' except ImportError:' | |||||
16 | import setup_zstd |
|
17 | import setup_zstd | |
17 |
|
18 | |||
18 | SUPPORT_LEGACY = False |
|
19 | SUPPORT_LEGACY = False | |
|
20 | SYSTEM_ZSTD = False | |||
|
21 | WARNINGS_AS_ERRORS = False | |||
19 |
|
22 | |||
20 | if "--legacy" in sys.argv: |
|
23 | if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''): | |
|
24 | WARNINGS_AS_ERRORS = True | |||
|
25 | ||||
|
26 | if '--legacy' in sys.argv: | |||
21 | SUPPORT_LEGACY = True |
|
27 | SUPPORT_LEGACY = True | |
22 |
sys.argv.remove( |
|
28 | sys.argv.remove('--legacy') | |
|
29 | ||||
|
30 | if '--system-zstd' in sys.argv: | |||
|
31 | SYSTEM_ZSTD = True | |||
|
32 | sys.argv.remove('--system-zstd') | |||
|
33 | ||||
|
34 | if '--warnings-as-errors' in sys.argv: | |||
|
35 | WARNINGS_AS_ERRORS = True | |||
|
36 | sys.argv.remote('--warning-as-errors') | |||
23 |
|
37 | |||
24 | # Code for obtaining the Extension instance is in its own module to |
|
38 | # Code for obtaining the Extension instance is in its own module to | |
25 | # facilitate reuse in other projects. |
|
39 | # facilitate reuse in other projects. | |
26 | extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')] |
|
40 | extensions = [ | |
|
41 | setup_zstd.get_c_extension(name='zstd', | |||
|
42 | support_legacy=SUPPORT_LEGACY, | |||
|
43 | system_zstd=SYSTEM_ZSTD, | |||
|
44 | warnings_as_errors=WARNINGS_AS_ERRORS), | |||
|
45 | ] | |||
27 |
|
46 | |||
28 | install_requires = [] |
|
47 | install_requires = [] | |
29 |
|
48 | |||
@@ -31,8 +50,11 b' if cffi:' | |||||
31 | import make_cffi |
|
50 | import make_cffi | |
32 | extensions.append(make_cffi.ffi.distutils_extension()) |
|
51 | extensions.append(make_cffi.ffi.distutils_extension()) | |
33 |
|
52 | |||
34 |
# Need change in 1. |
|
53 | # Need change in 1.10 for ffi.from_buffer() to handle all buffer types | |
35 | install_requires.append('cffi>=1.8') |
|
54 | # (like memoryview). | |
|
55 | # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid | |||
|
56 | # garbage collection pitfalls. | |||
|
57 | install_requires.append('cffi>=1.11') | |||
36 |
|
58 | |||
37 | version = None |
|
59 | version = None | |
38 |
|
60 | |||
@@ -62,14 +84,13 b' setup(' | |||||
62 | 'Intended Audience :: Developers', |
|
84 | 'Intended Audience :: Developers', | |
63 | 'License :: OSI Approved :: BSD License', |
|
85 | 'License :: OSI Approved :: BSD License', | |
64 | 'Programming Language :: C', |
|
86 | 'Programming Language :: C', | |
65 | 'Programming Language :: Python :: 2.6', |
|
|||
66 | 'Programming Language :: Python :: 2.7', |
|
87 | 'Programming Language :: Python :: 2.7', | |
67 | 'Programming Language :: Python :: 3.3', |
|
|||
68 | 'Programming Language :: Python :: 3.4', |
|
88 | 'Programming Language :: Python :: 3.4', | |
69 | 'Programming Language :: Python :: 3.5', |
|
89 | 'Programming Language :: Python :: 3.5', | |
70 | 'Programming Language :: Python :: 3.6', |
|
90 | 'Programming Language :: Python :: 3.6', | |
71 | ], |
|
91 | ], | |
72 | keywords='zstandard zstd compression', |
|
92 | keywords='zstandard zstd compression', | |
|
93 | packages=['zstandard'], | |||
73 | ext_modules=extensions, |
|
94 | ext_modules=extensions, | |
74 | test_suite='tests', |
|
95 | test_suite='tests', | |
75 | install_requires=install_requires, |
|
96 | install_requires=install_requires, |
@@ -4,7 +4,10 b'' | |||||
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
|
7 | import distutils.ccompiler | |||
7 | import os |
|
8 | import os | |
|
9 | import sys | |||
|
10 | ||||
8 | from distutils.extension import Extension |
|
11 | from distutils.extension import Extension | |
9 |
|
12 | |||
10 |
|
13 | |||
@@ -19,6 +22,11 b" zstd_sources = ['zstd/%s' % p for p in (" | |||||
19 | 'compress/fse_compress.c', |
|
22 | 'compress/fse_compress.c', | |
20 | 'compress/huf_compress.c', |
|
23 | 'compress/huf_compress.c', | |
21 | 'compress/zstd_compress.c', |
|
24 | 'compress/zstd_compress.c', | |
|
25 | 'compress/zstd_double_fast.c', | |||
|
26 | 'compress/zstd_fast.c', | |||
|
27 | 'compress/zstd_lazy.c', | |||
|
28 | 'compress/zstd_ldm.c', | |||
|
29 | 'compress/zstd_opt.c', | |||
22 | 'compress/zstdmt_compress.c', |
|
30 | 'compress/zstdmt_compress.c', | |
23 | 'decompress/huf_decompress.c', |
|
31 | 'decompress/huf_decompress.c', | |
24 | 'decompress/zstd_decompress.c', |
|
32 | 'decompress/zstd_decompress.c', | |
@@ -41,7 +49,6 b" zstd_sources_legacy = ['zstd/%s' % p for" | |||||
41 | )] |
|
49 | )] | |
42 |
|
50 | |||
43 | zstd_includes = [ |
|
51 | zstd_includes = [ | |
44 | 'c-ext', |
|
|||
45 | 'zstd', |
|
52 | 'zstd', | |
46 | 'zstd/common', |
|
53 | 'zstd/common', | |
47 | 'zstd/compress', |
|
54 | 'zstd/compress', | |
@@ -54,7 +61,14 b' zstd_includes_legacy = [' | |||||
54 | 'zstd/legacy', |
|
61 | 'zstd/legacy', | |
55 | ] |
|
62 | ] | |
56 |
|
63 | |||
|
64 | ext_includes = [ | |||
|
65 | 'c-ext', | |||
|
66 | 'zstd/common', | |||
|
67 | ] | |||
|
68 | ||||
57 | ext_sources = [ |
|
69 | ext_sources = [ | |
|
70 | 'zstd/common/pool.c', | |||
|
71 | 'zstd/common/threading.c', | |||
58 | 'zstd.c', |
|
72 | 'zstd.c', | |
59 | 'c-ext/bufferutil.c', |
|
73 | 'c-ext/bufferutil.c', | |
60 | 'c-ext/compressiondict.c', |
|
74 | 'c-ext/compressiondict.c', | |
@@ -62,11 +76,13 b' ext_sources = [' | |||||
62 | 'c-ext/compressor.c', |
|
76 | 'c-ext/compressor.c', | |
63 | 'c-ext/compressoriterator.c', |
|
77 | 'c-ext/compressoriterator.c', | |
64 | 'c-ext/compressionparams.c', |
|
78 | 'c-ext/compressionparams.c', | |
|
79 | 'c-ext/compressionreader.c', | |||
65 | 'c-ext/compressionwriter.c', |
|
80 | 'c-ext/compressionwriter.c', | |
66 | 'c-ext/constants.c', |
|
81 | 'c-ext/constants.c', | |
67 | 'c-ext/decompressobj.c', |
|
82 | 'c-ext/decompressobj.c', | |
68 | 'c-ext/decompressor.c', |
|
83 | 'c-ext/decompressor.c', | |
69 | 'c-ext/decompressoriterator.c', |
|
84 | 'c-ext/decompressoriterator.c', | |
|
85 | 'c-ext/decompressionreader.c', | |||
70 | 'c-ext/decompressionwriter.c', |
|
86 | 'c-ext/decompressionwriter.c', | |
71 | 'c-ext/frameparams.c', |
|
87 | 'c-ext/frameparams.c', | |
72 | ] |
|
88 | ] | |
@@ -76,27 +92,67 b' zstd_depends = [' | |||||
76 | ] |
|
92 | ] | |
77 |
|
93 | |||
78 |
|
94 | |||
79 |
def get_c_extension(support_legacy=False, name='zstd' |
|
95 | def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', | |
|
96 | warnings_as_errors=False): | |||
80 | """Obtain a distutils.extension.Extension for the C extension.""" |
|
97 | """Obtain a distutils.extension.Extension for the C extension.""" | |
81 | root = os.path.abspath(os.path.dirname(__file__)) |
|
98 | root = os.path.abspath(os.path.dirname(__file__)) | |
82 |
|
99 | |||
83 |
sources = [os.path.join(root, p) for p in |
|
100 | sources = set([os.path.join(root, p) for p in ext_sources]) | |
84 | if support_legacy: |
|
101 | if not system_zstd: | |
85 |
sources. |
|
102 | sources.update([os.path.join(root, p) for p in zstd_sources]) | |
|
103 | if support_legacy: | |||
|
104 | sources.update([os.path.join(root, p) for p in zstd_sources_legacy]) | |||
|
105 | sources = list(sources) | |||
86 |
|
106 | |||
87 |
include_dirs = [os.path.join(root, d) for d in |
|
107 | include_dirs = set([os.path.join(root, d) for d in ext_includes]) | |
88 | if support_legacy: |
|
108 | if not system_zstd: | |
89 |
include_dirs. |
|
109 | include_dirs.update([os.path.join(root, d) for d in zstd_includes]) | |
|
110 | if support_legacy: | |||
|
111 | include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy]) | |||
|
112 | include_dirs = list(include_dirs) | |||
90 |
|
113 | |||
91 | depends = [os.path.join(root, p) for p in zstd_depends] |
|
114 | depends = [os.path.join(root, p) for p in zstd_depends] | |
92 |
|
115 | |||
|
116 | compiler = distutils.ccompiler.new_compiler() | |||
|
117 | ||||
|
118 | # Needed for MSVC. | |||
|
119 | if hasattr(compiler, 'initialize'): | |||
|
120 | compiler.initialize() | |||
|
121 | ||||
|
122 | if compiler.compiler_type == 'unix': | |||
|
123 | compiler_type = 'unix' | |||
|
124 | elif compiler.compiler_type == 'msvc': | |||
|
125 | compiler_type = 'msvc' | |||
|
126 | else: | |||
|
127 | raise Exception('unhandled compiler type: %s' % | |||
|
128 | compiler.compiler_type) | |||
|
129 | ||||
93 | extra_args = ['-DZSTD_MULTITHREAD'] |
|
130 | extra_args = ['-DZSTD_MULTITHREAD'] | |
94 |
|
131 | |||
95 | if support_legacy: |
|
132 | if not system_zstd: | |
|
133 | extra_args.append('-DZSTDLIB_VISIBILITY=') | |||
|
134 | extra_args.append('-DZDICTLIB_VISIBILITY=') | |||
|
135 | extra_args.append('-DZSTDERRORLIB_VISIBILITY=') | |||
|
136 | ||||
|
137 | if compiler_type == 'unix': | |||
|
138 | extra_args.append('-fvisibility=hidden') | |||
|
139 | ||||
|
140 | if not system_zstd and support_legacy: | |||
96 | extra_args.append('-DZSTD_LEGACY_SUPPORT=1') |
|
141 | extra_args.append('-DZSTD_LEGACY_SUPPORT=1') | |
97 |
|
142 | |||
|
143 | if warnings_as_errors: | |||
|
144 | if compiler_type == 'unix': | |||
|
145 | extra_args.append('-Werror') | |||
|
146 | elif compiler_type == 'msvc': | |||
|
147 | extra_args.append('/WX') | |||
|
148 | else: | |||
|
149 | assert False | |||
|
150 | ||||
|
151 | libraries = ['zstd'] if system_zstd else [] | |||
|
152 | ||||
98 | # TODO compile with optimizations. |
|
153 | # TODO compile with optimizations. | |
99 | return Extension(name, sources, |
|
154 | return Extension(name, sources, | |
100 | include_dirs=include_dirs, |
|
155 | include_dirs=include_dirs, | |
101 | depends=depends, |
|
156 | depends=depends, | |
102 |
extra_compile_args=extra_args |
|
157 | extra_compile_args=extra_args, | |
|
158 | libraries=libraries) |
@@ -1,16 +1,48 b'' | |||||
|
1 | import imp | |||
1 | import inspect |
|
2 | import inspect | |
2 | import io |
|
3 | import io | |
3 | import os |
|
4 | import os | |
4 | import types |
|
5 | import types | |
5 |
|
6 | |||
|
7 | try: | |||
|
8 | import hypothesis | |||
|
9 | except ImportError: | |||
|
10 | hypothesis = None | |||
|
11 | ||||
6 |
|
12 | |||
7 | def make_cffi(cls): |
|
13 | def make_cffi(cls): | |
8 | """Decorator to add CFFI versions of each test method.""" |
|
14 | """Decorator to add CFFI versions of each test method.""" | |
9 |
|
15 | |||
|
16 | # The module containing this class definition should | |||
|
17 | # `import zstandard as zstd`. Otherwise things may blow up. | |||
|
18 | mod = inspect.getmodule(cls) | |||
|
19 | if not hasattr(mod, 'zstd'): | |||
|
20 | raise Exception('test module does not contain "zstd" symbol') | |||
|
21 | ||||
|
22 | if not hasattr(mod.zstd, 'backend'): | |||
|
23 | raise Exception('zstd symbol does not have "backend" attribute; did ' | |||
|
24 | 'you `import zstandard as zstd`?') | |||
|
25 | ||||
|
26 | # If `import zstandard` already chose the cffi backend, there is nothing | |||
|
27 | # for us to do: we only add the cffi variation if the default backend | |||
|
28 | # is the C extension. | |||
|
29 | if mod.zstd.backend == 'cffi': | |||
|
30 | return cls | |||
|
31 | ||||
|
32 | old_env = dict(os.environ) | |||
|
33 | os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi' | |||
10 | try: |
|
34 | try: | |
11 | import zstd_cffi |
|
35 | try: | |
12 | except ImportError: |
|
36 | mod_info = imp.find_module('zstandard') | |
13 | return cls |
|
37 | mod = imp.load_module('zstandard_cffi', *mod_info) | |
|
38 | except ImportError: | |||
|
39 | return cls | |||
|
40 | finally: | |||
|
41 | os.environ.clear() | |||
|
42 | os.environ.update(old_env) | |||
|
43 | ||||
|
44 | if mod.backend != 'cffi': | |||
|
45 | raise Exception('got the zstandard %s backend instead of cffi' % mod.backend) | |||
14 |
|
46 | |||
15 | # If CFFI version is available, dynamically construct test methods |
|
47 | # If CFFI version is available, dynamically construct test methods | |
16 | # that use it. |
|
48 | # that use it. | |
@@ -29,13 +61,13 b' def make_cffi(cls):' | |||||
29 | # the function object and install it in a new attribute. |
|
61 | # the function object and install it in a new attribute. | |
30 | if isinstance(fn, types.FunctionType): |
|
62 | if isinstance(fn, types.FunctionType): | |
31 | globs = dict(fn.__globals__) |
|
63 | globs = dict(fn.__globals__) | |
32 |
globs['zstd'] = |
|
64 | globs['zstd'] = mod | |
33 | new_fn = types.FunctionType(fn.__code__, globs, name, |
|
65 | new_fn = types.FunctionType(fn.__code__, globs, name, | |
34 | fn.__defaults__, fn.__closure__) |
|
66 | fn.__defaults__, fn.__closure__) | |
35 | new_method = new_fn |
|
67 | new_method = new_fn | |
36 | else: |
|
68 | else: | |
37 | globs = dict(fn.__func__.func_globals) |
|
69 | globs = dict(fn.__func__.func_globals) | |
38 |
globs['zstd'] = |
|
70 | globs['zstd'] = mod | |
39 | new_fn = types.FunctionType(fn.__func__.func_code, globs, name, |
|
71 | new_fn = types.FunctionType(fn.__func__.func_code, globs, name, | |
40 | fn.__func__.func_defaults, |
|
72 | fn.__func__.func_defaults, | |
41 | fn.__func__.func_closure) |
|
73 | fn.__func__.func_closure) | |
@@ -86,3 +118,34 b' def random_input_data():' | |||||
86 | pass |
|
118 | pass | |
87 |
|
119 | |||
88 | return _source_files |
|
120 | return _source_files | |
|
121 | ||||
|
122 | ||||
|
123 | def generate_samples(): | |||
|
124 | inputs = [ | |||
|
125 | b'foo', | |||
|
126 | b'bar', | |||
|
127 | b'abcdef', | |||
|
128 | b'sometext', | |||
|
129 | b'baz', | |||
|
130 | ] | |||
|
131 | ||||
|
132 | samples = [] | |||
|
133 | ||||
|
134 | for i in range(128): | |||
|
135 | samples.append(inputs[i % 5]) | |||
|
136 | samples.append(inputs[i % 5] * (i + 3)) | |||
|
137 | samples.append(inputs[-(i % 5)] * (i + 2)) | |||
|
138 | ||||
|
139 | return samples | |||
|
140 | ||||
|
141 | ||||
|
142 | if hypothesis: | |||
|
143 | default_settings = hypothesis.settings() | |||
|
144 | hypothesis.settings.register_profile('default', default_settings) | |||
|
145 | ||||
|
146 | ci_settings = hypothesis.settings(max_examples=2500, | |||
|
147 | max_iterations=2500) | |||
|
148 | hypothesis.settings.register_profile('ci', ci_settings) | |||
|
149 | ||||
|
150 | hypothesis.settings.load_profile( | |||
|
151 | os.environ.get('HYPOTHESIS_PROFILE', 'default')) |
@@ -1,11 +1,7 b'' | |||||
1 | import struct |
|
1 | import struct | |
|
2 | import unittest | |||
2 |
|
3 | |||
3 | try: |
|
4 | import zstandard as zstd | |
4 | import unittest2 as unittest |
|
|||
5 | except ImportError: |
|
|||
6 | import unittest |
|
|||
7 |
|
||||
8 | import zstd |
|
|||
9 |
|
5 | |||
10 | ss = struct.Struct('=QQ') |
|
6 | ss = struct.Struct('=QQ') | |
11 |
|
7 |
This diff has been collapsed as it changes many lines, (623 lines changed) Show them Hide them | |||||
@@ -2,13 +2,10 b' import hashlib' | |||||
2 | import io |
|
2 | import io | |
3 | import struct |
|
3 | import struct | |
4 | import sys |
|
4 | import sys | |
|
5 | import tarfile | |||
|
6 | import unittest | |||
5 |
|
7 | |||
6 | try: |
|
8 | import zstandard as zstd | |
7 | import unittest2 as unittest |
|
|||
8 | except ImportError: |
|
|||
9 | import unittest |
|
|||
10 |
|
||||
11 | import zstd |
|
|||
12 |
|
9 | |||
13 | from .common import ( |
|
10 | from .common import ( | |
14 | make_cffi, |
|
11 | make_cffi, | |
@@ -23,7 +20,8 b' else:' | |||||
23 |
|
20 | |||
24 |
|
21 | |||
25 | def multithreaded_chunk_size(level, source_size=0): |
|
22 | def multithreaded_chunk_size(level, source_size=0): | |
26 |
params = zstd. |
|
23 | params = zstd.ZstdCompressionParameters.from_level(level, | |
|
24 | source_size=source_size) | |||
27 |
|
25 | |||
28 | return 1 << (params.window_log + 2) |
|
26 | return 1 << (params.window_log + 2) | |
29 |
|
27 | |||
@@ -32,67 +30,82 b' def multithreaded_chunk_size(level, sour' | |||||
32 | class TestCompressor(unittest.TestCase): |
|
30 | class TestCompressor(unittest.TestCase): | |
33 | def test_level_bounds(self): |
|
31 | def test_level_bounds(self): | |
34 | with self.assertRaises(ValueError): |
|
32 | with self.assertRaises(ValueError): | |
35 |
zstd.ZstdCompressor(level= |
|
33 | zstd.ZstdCompressor(level=23) | |
36 |
|
34 | |||
37 | with self.assertRaises(ValueError): |
|
35 | def test_memory_size(self): | |
38 |
|
|
36 | cctx = zstd.ZstdCompressor(level=1) | |
|
37 | self.assertGreater(cctx.memory_size(), 100) | |||
39 |
|
38 | |||
40 |
|
39 | |||
41 | @make_cffi |
|
40 | @make_cffi | |
42 | class TestCompressor_compress(unittest.TestCase): |
|
41 | class TestCompressor_compress(unittest.TestCase): | |
43 | def test_multithreaded_unsupported(self): |
|
|||
44 | samples = [] |
|
|||
45 | for i in range(128): |
|
|||
46 | samples.append(b'foo' * 64) |
|
|||
47 | samples.append(b'bar' * 64) |
|
|||
48 |
|
||||
49 | d = zstd.train_dictionary(8192, samples) |
|
|||
50 |
|
||||
51 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) |
|
|||
52 |
|
||||
53 | with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'): |
|
|||
54 | cctx.compress(b'foo') |
|
|||
55 |
|
||||
56 | params = zstd.get_compression_parameters(3) |
|
|||
57 | cctx = zstd.ZstdCompressor(compression_params=params, threads=2) |
|
|||
58 | with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'): |
|
|||
59 | cctx.compress(b'foo') |
|
|||
60 |
|
||||
61 | def test_compress_empty(self): |
|
42 | def test_compress_empty(self): | |
62 | cctx = zstd.ZstdCompressor(level=1) |
|
43 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
63 | result = cctx.compress(b'') |
|
44 | result = cctx.compress(b'') | |
64 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
45 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
65 | params = zstd.get_frame_parameters(result) |
|
46 | params = zstd.get_frame_parameters(result) | |
66 |
self.assertEqual(params.content_size, |
|
47 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
67 | self.assertEqual(params.window_size, 524288) |
|
48 | self.assertEqual(params.window_size, 524288) | |
68 | self.assertEqual(params.dict_id, 0) |
|
49 | self.assertEqual(params.dict_id, 0) | |
69 | self.assertFalse(params.has_checksum, 0) |
|
50 | self.assertFalse(params.has_checksum, 0) | |
70 |
|
51 | |||
71 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 |
|
52 | cctx = zstd.ZstdCompressor() | |
72 | # is fixed. |
|
53 | result = cctx.compress(b'') | |
73 | cctx = zstd.ZstdCompressor(write_content_size=True) |
|
54 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00') | |
74 | with self.assertRaises(ValueError): |
|
55 | params = zstd.get_frame_parameters(result) | |
75 | cctx.compress(b'') |
|
56 | self.assertEqual(params.content_size, 0) | |
|
57 | ||||
|
58 | def test_input_types(self): | |||
|
59 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |||
|
60 | expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f' | |||
76 |
|
61 | |||
77 | cctx.compress(b'', allow_empty=True) |
|
62 | mutable_array = bytearray(3) | |
|
63 | mutable_array[:] = b'foo' | |||
|
64 | ||||
|
65 | sources = [ | |||
|
66 | memoryview(b'foo'), | |||
|
67 | bytearray(b'foo'), | |||
|
68 | mutable_array, | |||
|
69 | ] | |||
|
70 | ||||
|
71 | for source in sources: | |||
|
72 | self.assertEqual(cctx.compress(source), expected) | |||
78 |
|
73 | |||
79 | def test_compress_large(self): |
|
74 | def test_compress_large(self): | |
80 | chunks = [] |
|
75 | chunks = [] | |
81 | for i in range(255): |
|
76 | for i in range(255): | |
82 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
77 | chunks.append(struct.Struct('>B').pack(i) * 16384) | |
83 |
|
78 | |||
84 | cctx = zstd.ZstdCompressor(level=3) |
|
79 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) | |
85 | result = cctx.compress(b''.join(chunks)) |
|
80 | result = cctx.compress(b''.join(chunks)) | |
86 | self.assertEqual(len(result), 999) |
|
81 | self.assertEqual(len(result), 999) | |
87 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
82 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
88 |
|
83 | |||
89 |
# This matches the test for read_ |
|
84 | # This matches the test for read_to_iter() below. | |
90 | cctx = zstd.ZstdCompressor(level=1) |
|
85 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
91 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') |
|
86 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') | |
92 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' |
|
87 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' | |
93 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' |
|
88 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' | |
94 | b'\x02\x09\x00\x00\x6f') |
|
89 | b'\x02\x09\x00\x00\x6f') | |
95 |
|
90 | |||
|
91 | def test_negative_level(self): | |||
|
92 | cctx = zstd.ZstdCompressor(level=-4) | |||
|
93 | result = cctx.compress(b'foo' * 256) | |||
|
94 | ||||
|
95 | def test_no_magic(self): | |||
|
96 | params = zstd.ZstdCompressionParameters.from_level( | |||
|
97 | 1, format=zstd.FORMAT_ZSTD1) | |||
|
98 | cctx = zstd.ZstdCompressor(compression_params=params) | |||
|
99 | magic = cctx.compress(b'foobar') | |||
|
100 | ||||
|
101 | params = zstd.ZstdCompressionParameters.from_level( | |||
|
102 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) | |||
|
103 | cctx = zstd.ZstdCompressor(compression_params=params) | |||
|
104 | no_magic = cctx.compress(b'foobar') | |||
|
105 | ||||
|
106 | self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd') | |||
|
107 | self.assertEqual(magic[4:], no_magic) | |||
|
108 | ||||
96 | def test_write_checksum(self): |
|
109 | def test_write_checksum(self): | |
97 | cctx = zstd.ZstdCompressor(level=1) |
|
110 | cctx = zstd.ZstdCompressor(level=1) | |
98 | no_checksum = cctx.compress(b'foobar') |
|
111 | no_checksum = cctx.compress(b'foobar') | |
@@ -109,15 +122,15 b' class TestCompressor_compress(unittest.T' | |||||
109 |
|
122 | |||
110 | def test_write_content_size(self): |
|
123 | def test_write_content_size(self): | |
111 | cctx = zstd.ZstdCompressor(level=1) |
|
124 | cctx = zstd.ZstdCompressor(level=1) | |
|
125 | with_size = cctx.compress(b'foobar' * 256) | |||
|
126 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |||
112 | no_size = cctx.compress(b'foobar' * 256) |
|
127 | no_size = cctx.compress(b'foobar' * 256) | |
113 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
|||
114 | with_size = cctx.compress(b'foobar' * 256) |
|
|||
115 |
|
128 | |||
116 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
129 | self.assertEqual(len(with_size), len(no_size) + 1) | |
117 |
|
130 | |||
118 | no_params = zstd.get_frame_parameters(no_size) |
|
131 | no_params = zstd.get_frame_parameters(no_size) | |
119 | with_params = zstd.get_frame_parameters(with_size) |
|
132 | with_params = zstd.get_frame_parameters(with_size) | |
120 |
self.assertEqual(no_params.content_size, |
|
133 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
121 | self.assertEqual(with_params.content_size, 1536) |
|
134 | self.assertEqual(with_params.content_size, 1536) | |
122 |
|
135 | |||
123 | def test_no_dict_id(self): |
|
136 | def test_no_dict_id(self): | |
@@ -140,7 +153,7 b' class TestCompressor_compress(unittest.T' | |||||
140 | no_params = zstd.get_frame_parameters(no_dict_id) |
|
153 | no_params = zstd.get_frame_parameters(no_dict_id) | |
141 | with_params = zstd.get_frame_parameters(with_dict_id) |
|
154 | with_params = zstd.get_frame_parameters(with_dict_id) | |
142 | self.assertEqual(no_params.dict_id, 0) |
|
155 | self.assertEqual(no_params.dict_id, 0) | |
143 |
self.assertEqual(with_params.dict_id, 1 |
|
156 | self.assertEqual(with_params.dict_id, 1387616518) | |
144 |
|
157 | |||
145 | def test_compress_dict_multiple(self): |
|
158 | def test_compress_dict_multiple(self): | |
146 | samples = [] |
|
159 | samples = [] | |
@@ -156,6 +169,21 b' class TestCompressor_compress(unittest.T' | |||||
156 | for i in range(32): |
|
169 | for i in range(32): | |
157 | cctx.compress(b'foo bar foobar foo bar foobar') |
|
170 | cctx.compress(b'foo bar foobar foo bar foobar') | |
158 |
|
171 | |||
|
172 | def test_dict_precompute(self): | |||
|
173 | samples = [] | |||
|
174 | for i in range(128): | |||
|
175 | samples.append(b'foo' * 64) | |||
|
176 | samples.append(b'bar' * 64) | |||
|
177 | samples.append(b'foobar' * 64) | |||
|
178 | ||||
|
179 | d = zstd.train_dictionary(8192, samples) | |||
|
180 | d.precompute_compress(level=1) | |||
|
181 | ||||
|
182 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |||
|
183 | ||||
|
184 | for i in range(32): | |||
|
185 | cctx.compress(b'foo bar foobar foo bar foobar') | |||
|
186 | ||||
159 | def test_multithreaded(self): |
|
187 | def test_multithreaded(self): | |
160 | chunk_size = multithreaded_chunk_size(1) |
|
188 | chunk_size = multithreaded_chunk_size(1) | |
161 | source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) |
|
189 | source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) | |
@@ -171,16 +199,65 b' class TestCompressor_compress(unittest.T' | |||||
171 | dctx = zstd.ZstdDecompressor() |
|
199 | dctx = zstd.ZstdDecompressor() | |
172 | self.assertEqual(dctx.decompress(compressed), source) |
|
200 | self.assertEqual(dctx.decompress(compressed), source) | |
173 |
|
201 | |||
|
202 | def test_multithreaded_dict(self): | |||
|
203 | samples = [] | |||
|
204 | for i in range(128): | |||
|
205 | samples.append(b'foo' * 64) | |||
|
206 | samples.append(b'bar' * 64) | |||
|
207 | samples.append(b'foobar' * 64) | |||
|
208 | ||||
|
209 | d = zstd.train_dictionary(1024, samples) | |||
|
210 | ||||
|
211 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) | |||
|
212 | ||||
|
213 | result = cctx.compress(b'foo') | |||
|
214 | params = zstd.get_frame_parameters(result); | |||
|
215 | self.assertEqual(params.content_size, 3); | |||
|
216 | self.assertEqual(params.dict_id, d.dict_id()) | |||
|
217 | ||||
|
218 | self.assertEqual(result, | |||
|
219 | b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00' | |||
|
220 | b'\x66\x6f\x6f') | |||
|
221 | ||||
|
222 | def test_multithreaded_compression_params(self): | |||
|
223 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) | |||
|
224 | cctx = zstd.ZstdCompressor(compression_params=params) | |||
|
225 | ||||
|
226 | result = cctx.compress(b'foo') | |||
|
227 | params = zstd.get_frame_parameters(result); | |||
|
228 | self.assertEqual(params.content_size, 3); | |||
|
229 | ||||
|
230 | self.assertEqual(result, | |||
|
231 | b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') | |||
|
232 | ||||
174 |
|
233 | |||
175 | @make_cffi |
|
234 | @make_cffi | |
176 | class TestCompressor_compressobj(unittest.TestCase): |
|
235 | class TestCompressor_compressobj(unittest.TestCase): | |
177 | def test_compressobj_empty(self): |
|
236 | def test_compressobj_empty(self): | |
178 | cctx = zstd.ZstdCompressor(level=1) |
|
237 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
179 | cobj = cctx.compressobj() |
|
238 | cobj = cctx.compressobj() | |
180 | self.assertEqual(cobj.compress(b''), b'') |
|
239 | self.assertEqual(cobj.compress(b''), b'') | |
181 | self.assertEqual(cobj.flush(), |
|
240 | self.assertEqual(cobj.flush(), | |
182 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
241 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
183 |
|
242 | |||
|
243 | def test_input_types(self): | |||
|
244 | expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' | |||
|
245 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |||
|
246 | ||||
|
247 | mutable_array = bytearray(3) | |||
|
248 | mutable_array[:] = b'foo' | |||
|
249 | ||||
|
250 | sources = [ | |||
|
251 | memoryview(b'foo'), | |||
|
252 | bytearray(b'foo'), | |||
|
253 | mutable_array, | |||
|
254 | ] | |||
|
255 | ||||
|
256 | for source in sources: | |||
|
257 | cobj = cctx.compressobj() | |||
|
258 | self.assertEqual(cobj.compress(source), b'') | |||
|
259 | self.assertEqual(cobj.flush(), expected) | |||
|
260 | ||||
184 | def test_compressobj_large(self): |
|
261 | def test_compressobj_large(self): | |
185 | chunks = [] |
|
262 | chunks = [] | |
186 | for i in range(255): |
|
263 | for i in range(255): | |
@@ -194,7 +271,7 b' class TestCompressor_compressobj(unittes' | |||||
194 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
271 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
195 |
|
272 | |||
196 | params = zstd.get_frame_parameters(result) |
|
273 | params = zstd.get_frame_parameters(result) | |
197 |
self.assertEqual(params.content_size, |
|
274 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
198 | self.assertEqual(params.window_size, 1048576) |
|
275 | self.assertEqual(params.window_size, 1048576) | |
199 | self.assertEqual(params.dict_id, 0) |
|
276 | self.assertEqual(params.dict_id, 0) | |
200 | self.assertFalse(params.has_checksum) |
|
277 | self.assertFalse(params.has_checksum) | |
@@ -209,8 +286,8 b' class TestCompressor_compressobj(unittes' | |||||
209 |
|
286 | |||
210 | no_params = zstd.get_frame_parameters(no_checksum) |
|
287 | no_params = zstd.get_frame_parameters(no_checksum) | |
211 | with_params = zstd.get_frame_parameters(with_checksum) |
|
288 | with_params = zstd.get_frame_parameters(with_checksum) | |
212 |
self.assertEqual(no_params.content_size, |
|
289 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
213 |
self.assertEqual(with_params.content_size, |
|
290 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
214 | self.assertEqual(no_params.dict_id, 0) |
|
291 | self.assertEqual(no_params.dict_id, 0) | |
215 | self.assertEqual(with_params.dict_id, 0) |
|
292 | self.assertEqual(with_params.dict_id, 0) | |
216 | self.assertFalse(no_params.has_checksum) |
|
293 | self.assertFalse(no_params.has_checksum) | |
@@ -221,14 +298,14 b' class TestCompressor_compressobj(unittes' | |||||
221 | def test_write_content_size(self): |
|
298 | def test_write_content_size(self): | |
222 | cctx = zstd.ZstdCompressor(level=1) |
|
299 | cctx = zstd.ZstdCompressor(level=1) | |
223 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
300 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) | |
|
301 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() | |||
|
302 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |||
|
303 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) | |||
224 | no_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
304 | no_size = cobj.compress(b'foobar' * 256) + cobj.flush() | |
225 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
|||
226 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
|||
227 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
|||
228 |
|
305 | |||
229 | no_params = zstd.get_frame_parameters(no_size) |
|
306 | no_params = zstd.get_frame_parameters(no_size) | |
230 | with_params = zstd.get_frame_parameters(with_size) |
|
307 | with_params = zstd.get_frame_parameters(with_size) | |
231 |
self.assertEqual(no_params.content_size, |
|
308 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
232 | self.assertEqual(with_params.content_size, 1536) |
|
309 | self.assertEqual(with_params.content_size, 1536) | |
233 | self.assertEqual(no_params.dict_id, 0) |
|
310 | self.assertEqual(no_params.dict_id, 0) | |
234 | self.assertEqual(with_params.dict_id, 0) |
|
311 | self.assertEqual(with_params.dict_id, 0) | |
@@ -300,6 +377,34 b' class TestCompressor_compressobj(unittes' | |||||
300 |
|
377 | |||
301 | self.assertEqual(len(compressed), 295) |
|
378 | self.assertEqual(len(compressed), 295) | |
302 |
|
379 | |||
|
380 | def test_frame_progression(self): | |||
|
381 | cctx = zstd.ZstdCompressor() | |||
|
382 | ||||
|
383 | self.assertEqual(cctx.frame_progression(), (0, 0, 0)) | |||
|
384 | ||||
|
385 | cobj = cctx.compressobj() | |||
|
386 | ||||
|
387 | cobj.compress(b'foobar') | |||
|
388 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) | |||
|
389 | ||||
|
390 | cobj.flush() | |||
|
391 | self.assertEqual(cctx.frame_progression(), (6, 6, 15)) | |||
|
392 | ||||
|
393 | def test_bad_size(self): | |||
|
394 | cctx = zstd.ZstdCompressor() | |||
|
395 | ||||
|
396 | cobj = cctx.compressobj(size=2) | |||
|
397 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |||
|
398 | cobj.compress(b'foo') | |||
|
399 | ||||
|
400 | # Try another operation on this instance. | |||
|
401 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |||
|
402 | cobj.compress(b'aa') | |||
|
403 | ||||
|
404 | # Try another operation on the compressor. | |||
|
405 | cctx.compressobj(size=4) | |||
|
406 | cctx.compress(b'foobar') | |||
|
407 | ||||
303 |
|
408 | |||
304 | @make_cffi |
|
409 | @make_cffi | |
305 | class TestCompressor_copy_stream(unittest.TestCase): |
|
410 | class TestCompressor_copy_stream(unittest.TestCase): | |
@@ -323,7 +428,7 b' class TestCompressor_copy_stream(unittes' | |||||
323 | source = io.BytesIO() |
|
428 | source = io.BytesIO() | |
324 | dest = io.BytesIO() |
|
429 | dest = io.BytesIO() | |
325 |
|
430 | |||
326 | cctx = zstd.ZstdCompressor(level=1) |
|
431 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
327 | r, w = cctx.copy_stream(source, dest) |
|
432 | r, w = cctx.copy_stream(source, dest) | |
328 | self.assertEqual(int(r), 0) |
|
433 | self.assertEqual(int(r), 0) | |
329 | self.assertEqual(w, 9) |
|
434 | self.assertEqual(w, 9) | |
@@ -345,7 +450,7 b' class TestCompressor_copy_stream(unittes' | |||||
345 | self.assertEqual(w, 999) |
|
450 | self.assertEqual(w, 999) | |
346 |
|
451 | |||
347 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
452 | params = zstd.get_frame_parameters(dest.getvalue()) | |
348 |
self.assertEqual(params.content_size, |
|
453 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
349 | self.assertEqual(params.window_size, 1048576) |
|
454 | self.assertEqual(params.window_size, 1048576) | |
350 | self.assertEqual(params.dict_id, 0) |
|
455 | self.assertEqual(params.dict_id, 0) | |
351 | self.assertFalse(params.has_checksum) |
|
456 | self.assertFalse(params.has_checksum) | |
@@ -367,8 +472,8 b' class TestCompressor_copy_stream(unittes' | |||||
367 |
|
472 | |||
368 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
473 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
369 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
474 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
370 |
self.assertEqual(no_params.content_size, |
|
475 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
371 |
self.assertEqual(with_params.content_size, |
|
476 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
372 | self.assertEqual(no_params.dict_id, 0) |
|
477 | self.assertEqual(no_params.dict_id, 0) | |
373 | self.assertEqual(with_params.dict_id, 0) |
|
478 | self.assertEqual(with_params.dict_id, 0) | |
374 | self.assertFalse(no_params.has_checksum) |
|
479 | self.assertFalse(no_params.has_checksum) | |
@@ -378,12 +483,12 b' class TestCompressor_copy_stream(unittes' | |||||
378 | source = io.BytesIO(b'foobar' * 256) |
|
483 | source = io.BytesIO(b'foobar' * 256) | |
379 | no_size = io.BytesIO() |
|
484 | no_size = io.BytesIO() | |
380 |
|
485 | |||
381 | cctx = zstd.ZstdCompressor(level=1) |
|
486 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
382 | cctx.copy_stream(source, no_size) |
|
487 | cctx.copy_stream(source, no_size) | |
383 |
|
488 | |||
384 | source.seek(0) |
|
489 | source.seek(0) | |
385 | with_size = io.BytesIO() |
|
490 | with_size = io.BytesIO() | |
386 |
cctx = zstd.ZstdCompressor(level=1 |
|
491 | cctx = zstd.ZstdCompressor(level=1) | |
387 | cctx.copy_stream(source, with_size) |
|
492 | cctx.copy_stream(source, with_size) | |
388 |
|
493 | |||
389 | # Source content size is unknown, so no content size written. |
|
494 | # Source content size is unknown, so no content size written. | |
@@ -400,7 +505,7 b' class TestCompressor_copy_stream(unittes' | |||||
400 |
|
505 | |||
401 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
506 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
402 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
507 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
403 |
self.assertEqual(no_params.content_size, |
|
508 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
404 | self.assertEqual(with_params.content_size, 1536) |
|
509 | self.assertEqual(with_params.content_size, 1536) | |
405 | self.assertEqual(no_params.dict_id, 0) |
|
510 | self.assertEqual(no_params.dict_id, 0) | |
406 | self.assertEqual(with_params.dict_id, 0) |
|
511 | self.assertEqual(with_params.dict_id, 0) | |
@@ -426,19 +531,18 b' class TestCompressor_copy_stream(unittes' | |||||
426 | source.seek(0) |
|
531 | source.seek(0) | |
427 |
|
532 | |||
428 | dest = io.BytesIO() |
|
533 | dest = io.BytesIO() | |
429 | cctx = zstd.ZstdCompressor(threads=2) |
|
534 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) | |
430 | r, w = cctx.copy_stream(source, dest) |
|
535 | r, w = cctx.copy_stream(source, dest) | |
431 | self.assertEqual(r, 3145728) |
|
536 | self.assertEqual(r, 3145728) | |
432 | self.assertEqual(w, 295) |
|
537 | self.assertEqual(w, 295) | |
433 |
|
538 | |||
434 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
539 | params = zstd.get_frame_parameters(dest.getvalue()) | |
435 |
self.assertEqual(params.content_size, |
|
540 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
436 | self.assertEqual(params.dict_id, 0) |
|
541 | self.assertEqual(params.dict_id, 0) | |
437 | self.assertFalse(params.has_checksum) |
|
542 | self.assertFalse(params.has_checksum) | |
438 |
|
543 | |||
439 | # Writing content size and checksum works. |
|
544 | # Writing content size and checksum works. | |
440 |
cctx = zstd.ZstdCompressor(threads=2, write_c |
|
545 | cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) | |
441 | write_checksum=True) |
|
|||
442 | dest = io.BytesIO() |
|
546 | dest = io.BytesIO() | |
443 | source.seek(0) |
|
547 | source.seek(0) | |
444 | cctx.copy_stream(source, dest, size=len(source.getvalue())) |
|
548 | cctx.copy_stream(source, dest, size=len(source.getvalue())) | |
@@ -448,31 +552,227 b' class TestCompressor_copy_stream(unittes' | |||||
448 | self.assertEqual(params.dict_id, 0) |
|
552 | self.assertEqual(params.dict_id, 0) | |
449 | self.assertTrue(params.has_checksum) |
|
553 | self.assertTrue(params.has_checksum) | |
450 |
|
554 | |||
|
555 | def test_bad_size(self): | |||
|
556 | source = io.BytesIO() | |||
|
557 | source.write(b'a' * 32768) | |||
|
558 | source.write(b'b' * 32768) | |||
|
559 | source.seek(0) | |||
451 |
|
560 | |||
452 | def compress(data, level): |
|
561 | dest = io.BytesIO() | |
453 | buffer = io.BytesIO() |
|
562 | ||
454 |
cctx = zstd.ZstdCompressor( |
|
563 | cctx = zstd.ZstdCompressor() | |
455 | with cctx.write_to(buffer) as compressor: |
|
564 | ||
456 | compressor.write(data) |
|
565 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |
457 | return buffer.getvalue() |
|
566 | cctx.copy_stream(source, dest, size=42) | |
|
567 | ||||
|
568 | # Try another operation on this compressor. | |||
|
569 | source.seek(0) | |||
|
570 | dest = io.BytesIO() | |||
|
571 | cctx.copy_stream(source, dest) | |||
458 |
|
572 | |||
459 |
|
573 | |||
460 | @make_cffi |
|
574 | @make_cffi | |
461 |
class TestCompressor_ |
|
575 | class TestCompressor_stream_reader(unittest.TestCase): | |
|
576 | def test_context_manager(self): | |||
|
577 | cctx = zstd.ZstdCompressor() | |||
|
578 | ||||
|
579 | reader = cctx.stream_reader(b'foo' * 60) | |||
|
580 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |||
|
581 | reader.read(10) | |||
|
582 | ||||
|
583 | with cctx.stream_reader(b'foo') as reader: | |||
|
584 | with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): | |||
|
585 | with reader as reader2: | |||
|
586 | pass | |||
|
587 | ||||
|
588 | def test_not_implemented(self): | |||
|
589 | cctx = zstd.ZstdCompressor() | |||
|
590 | ||||
|
591 | with cctx.stream_reader(b'foo' * 60) as reader: | |||
|
592 | with self.assertRaises(io.UnsupportedOperation): | |||
|
593 | reader.readline() | |||
|
594 | ||||
|
595 | with self.assertRaises(io.UnsupportedOperation): | |||
|
596 | reader.readlines() | |||
|
597 | ||||
|
598 | # This could probably be implemented someday. | |||
|
599 | with self.assertRaises(NotImplementedError): | |||
|
600 | reader.readall() | |||
|
601 | ||||
|
602 | with self.assertRaises(io.UnsupportedOperation): | |||
|
603 | iter(reader) | |||
|
604 | ||||
|
605 | with self.assertRaises(io.UnsupportedOperation): | |||
|
606 | next(reader) | |||
|
607 | ||||
|
608 | with self.assertRaises(OSError): | |||
|
609 | reader.writelines([]) | |||
|
610 | ||||
|
611 | with self.assertRaises(OSError): | |||
|
612 | reader.write(b'foo') | |||
|
613 | ||||
|
614 | def test_constant_methods(self): | |||
|
615 | cctx = zstd.ZstdCompressor() | |||
|
616 | ||||
|
617 | with cctx.stream_reader(b'boo') as reader: | |||
|
618 | self.assertTrue(reader.readable()) | |||
|
619 | self.assertFalse(reader.writable()) | |||
|
620 | self.assertFalse(reader.seekable()) | |||
|
621 | self.assertFalse(reader.isatty()) | |||
|
622 | self.assertIsNone(reader.flush()) | |||
|
623 | ||||
|
624 | def test_read_closed(self): | |||
|
625 | cctx = zstd.ZstdCompressor() | |||
|
626 | ||||
|
627 | with cctx.stream_reader(b'foo' * 60) as reader: | |||
|
628 | reader.close() | |||
|
629 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |||
|
630 | reader.read(10) | |||
|
631 | ||||
|
632 | def test_read_bad_size(self): | |||
|
633 | cctx = zstd.ZstdCompressor() | |||
|
634 | ||||
|
635 | with cctx.stream_reader(b'foo') as reader: | |||
|
636 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): | |||
|
637 | reader.read(-1) | |||
|
638 | ||||
|
639 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): | |||
|
640 | reader.read(0) | |||
|
641 | ||||
|
642 | def test_read_buffer(self): | |||
|
643 | cctx = zstd.ZstdCompressor() | |||
|
644 | ||||
|
645 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
646 | frame = cctx.compress(source) | |||
|
647 | ||||
|
648 | with cctx.stream_reader(source) as reader: | |||
|
649 | self.assertEqual(reader.tell(), 0) | |||
|
650 | ||||
|
651 | # We should get entire frame in one read. | |||
|
652 | result = reader.read(8192) | |||
|
653 | self.assertEqual(result, frame) | |||
|
654 | self.assertEqual(reader.tell(), len(result)) | |||
|
655 | self.assertEqual(reader.read(), b'') | |||
|
656 | self.assertEqual(reader.tell(), len(result)) | |||
|
657 | ||||
|
658 | def test_read_buffer_small_chunks(self): | |||
|
659 | cctx = zstd.ZstdCompressor() | |||
|
660 | ||||
|
661 | source = b'foo' * 60 | |||
|
662 | chunks = [] | |||
|
663 | ||||
|
664 | with cctx.stream_reader(source) as reader: | |||
|
665 | self.assertEqual(reader.tell(), 0) | |||
|
666 | ||||
|
667 | while True: | |||
|
668 | chunk = reader.read(1) | |||
|
669 | if not chunk: | |||
|
670 | break | |||
|
671 | ||||
|
672 | chunks.append(chunk) | |||
|
673 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |||
|
674 | ||||
|
675 | self.assertEqual(b''.join(chunks), cctx.compress(source)) | |||
|
676 | ||||
|
677 | def test_read_stream(self): | |||
|
678 | cctx = zstd.ZstdCompressor() | |||
|
679 | ||||
|
680 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
681 | frame = cctx.compress(source) | |||
|
682 | ||||
|
683 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: | |||
|
684 | self.assertEqual(reader.tell(), 0) | |||
|
685 | ||||
|
686 | chunk = reader.read(8192) | |||
|
687 | self.assertEqual(chunk, frame) | |||
|
688 | self.assertEqual(reader.tell(), len(chunk)) | |||
|
689 | self.assertEqual(reader.read(), b'') | |||
|
690 | self.assertEqual(reader.tell(), len(chunk)) | |||
|
691 | ||||
|
692 | def test_read_stream_small_chunks(self): | |||
|
693 | cctx = zstd.ZstdCompressor() | |||
|
694 | ||||
|
695 | source = b'foo' * 60 | |||
|
696 | chunks = [] | |||
|
697 | ||||
|
698 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: | |||
|
699 | self.assertEqual(reader.tell(), 0) | |||
|
700 | ||||
|
701 | while True: | |||
|
702 | chunk = reader.read(1) | |||
|
703 | if not chunk: | |||
|
704 | break | |||
|
705 | ||||
|
706 | chunks.append(chunk) | |||
|
707 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |||
|
708 | ||||
|
709 | self.assertEqual(b''.join(chunks), cctx.compress(source)) | |||
|
710 | ||||
|
711 | def test_read_after_exit(self): | |||
|
712 | cctx = zstd.ZstdCompressor() | |||
|
713 | ||||
|
714 | with cctx.stream_reader(b'foo' * 60) as reader: | |||
|
715 | while reader.read(8192): | |||
|
716 | pass | |||
|
717 | ||||
|
718 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |||
|
719 | reader.read(10) | |||
|
720 | ||||
|
721 | def test_bad_size(self): | |||
|
722 | cctx = zstd.ZstdCompressor() | |||
|
723 | ||||
|
724 | source = io.BytesIO(b'foobar') | |||
|
725 | ||||
|
726 | with cctx.stream_reader(source, size=2) as reader: | |||
|
727 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |||
|
728 | reader.read(10) | |||
|
729 | ||||
|
730 | # Try another compression operation. | |||
|
731 | with cctx.stream_reader(source, size=42): | |||
|
732 | pass | |||
|
733 | ||||
|
734 | ||||
|
735 | @make_cffi | |||
|
736 | class TestCompressor_stream_writer(unittest.TestCase): | |||
462 | def test_empty(self): |
|
737 | def test_empty(self): | |
463 | result = compress(b'', 1) |
|
738 | buffer = io.BytesIO() | |
|
739 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |||
|
740 | with cctx.stream_writer(buffer) as compressor: | |||
|
741 | compressor.write(b'') | |||
|
742 | ||||
|
743 | result = buffer.getvalue() | |||
464 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
744 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
465 |
|
745 | |||
466 | params = zstd.get_frame_parameters(result) |
|
746 | params = zstd.get_frame_parameters(result) | |
467 |
self.assertEqual(params.content_size, |
|
747 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
468 | self.assertEqual(params.window_size, 524288) |
|
748 | self.assertEqual(params.window_size, 524288) | |
469 | self.assertEqual(params.dict_id, 0) |
|
749 | self.assertEqual(params.dict_id, 0) | |
470 | self.assertFalse(params.has_checksum) |
|
750 | self.assertFalse(params.has_checksum) | |
471 |
|
751 | |||
|
752 | def test_input_types(self): | |||
|
753 | expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' | |||
|
754 | cctx = zstd.ZstdCompressor(level=1) | |||
|
755 | ||||
|
756 | mutable_array = bytearray(3) | |||
|
757 | mutable_array[:] = b'foo' | |||
|
758 | ||||
|
759 | sources = [ | |||
|
760 | memoryview(b'foo'), | |||
|
761 | bytearray(b'foo'), | |||
|
762 | mutable_array, | |||
|
763 | ] | |||
|
764 | ||||
|
765 | for source in sources: | |||
|
766 | buffer = io.BytesIO() | |||
|
767 | with cctx.stream_writer(buffer) as compressor: | |||
|
768 | compressor.write(source) | |||
|
769 | ||||
|
770 | self.assertEqual(buffer.getvalue(), expected) | |||
|
771 | ||||
472 | def test_multiple_compress(self): |
|
772 | def test_multiple_compress(self): | |
473 | buffer = io.BytesIO() |
|
773 | buffer = io.BytesIO() | |
474 | cctx = zstd.ZstdCompressor(level=5) |
|
774 | cctx = zstd.ZstdCompressor(level=5) | |
475 |
with cctx. |
|
775 | with cctx.stream_writer(buffer) as compressor: | |
476 | self.assertEqual(compressor.write(b'foo'), 0) |
|
776 | self.assertEqual(compressor.write(b'foo'), 0) | |
477 | self.assertEqual(compressor.write(b'bar'), 0) |
|
777 | self.assertEqual(compressor.write(b'bar'), 0) | |
478 | self.assertEqual(compressor.write(b'x' * 8192), 0) |
|
778 | self.assertEqual(compressor.write(b'x' * 8192), 0) | |
@@ -491,35 +791,40 b' class TestCompressor_write_to(unittest.T' | |||||
491 |
|
791 | |||
492 | d = zstd.train_dictionary(8192, samples) |
|
792 | d = zstd.train_dictionary(8192, samples) | |
493 |
|
793 | |||
|
794 | h = hashlib.sha1(d.as_bytes()).hexdigest() | |||
|
795 | self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027') | |||
|
796 | ||||
494 | buffer = io.BytesIO() |
|
797 | buffer = io.BytesIO() | |
495 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
798 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) | |
496 |
with cctx. |
|
799 | with cctx.stream_writer(buffer) as compressor: | |
497 | self.assertEqual(compressor.write(b'foo'), 0) |
|
800 | self.assertEqual(compressor.write(b'foo'), 0) | |
498 | self.assertEqual(compressor.write(b'bar'), 0) |
|
801 | self.assertEqual(compressor.write(b'bar'), 0) | |
499 |
self.assertEqual(compressor.write(b'foo' * 16384), |
|
802 | self.assertEqual(compressor.write(b'foo' * 16384), 0) | |
500 |
|
803 | |||
501 | compressed = buffer.getvalue() |
|
804 | compressed = buffer.getvalue() | |
502 |
|
805 | |||
503 | params = zstd.get_frame_parameters(compressed) |
|
806 | params = zstd.get_frame_parameters(compressed) | |
504 |
self.assertEqual(params.content_size, |
|
807 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
505 |
self.assertEqual(params.window_size, |
|
808 | self.assertEqual(params.window_size, 2097152) | |
506 | self.assertEqual(params.dict_id, d.dict_id()) |
|
809 | self.assertEqual(params.dict_id, d.dict_id()) | |
507 | self.assertFalse(params.has_checksum) |
|
810 | self.assertFalse(params.has_checksum) | |
508 |
|
811 | self.assertEqual(compressed, | ||
509 | self.assertEqual(compressed[0:32], |
|
812 | b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00' | |
510 | b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' |
|
813 | b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89') | |
511 | b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' |
|
|||
512 | b'\x00\x00\x18\x6f\x6f\x66\x01\x00') |
|
|||
513 |
|
||||
514 | h = hashlib.sha1(compressed).hexdigest() |
|
|||
515 | self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') |
|
|||
516 |
|
814 | |||
517 | def test_compression_params(self): |
|
815 | def test_compression_params(self): | |
518 |
params = zstd.CompressionParameters( |
|
816 | params = zstd.ZstdCompressionParameters( | |
|
817 | window_log=20, | |||
|
818 | chain_log=6, | |||
|
819 | hash_log=12, | |||
|
820 | min_match=5, | |||
|
821 | search_log=4, | |||
|
822 | target_length=10, | |||
|
823 | compression_strategy=zstd.STRATEGY_FAST) | |||
519 |
|
824 | |||
520 | buffer = io.BytesIO() |
|
825 | buffer = io.BytesIO() | |
521 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
826 | cctx = zstd.ZstdCompressor(compression_params=params) | |
522 |
with cctx. |
|
827 | with cctx.stream_writer(buffer) as compressor: | |
523 | self.assertEqual(compressor.write(b'foo'), 0) |
|
828 | self.assertEqual(compressor.write(b'foo'), 0) | |
524 | self.assertEqual(compressor.write(b'bar'), 0) |
|
829 | self.assertEqual(compressor.write(b'bar'), 0) | |
525 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) |
|
830 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) | |
@@ -527,29 +832,29 b' class TestCompressor_write_to(unittest.T' | |||||
527 | compressed = buffer.getvalue() |
|
832 | compressed = buffer.getvalue() | |
528 |
|
833 | |||
529 | params = zstd.get_frame_parameters(compressed) |
|
834 | params = zstd.get_frame_parameters(compressed) | |
530 |
self.assertEqual(params.content_size, |
|
835 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
531 | self.assertEqual(params.window_size, 1048576) |
|
836 | self.assertEqual(params.window_size, 1048576) | |
532 | self.assertEqual(params.dict_id, 0) |
|
837 | self.assertEqual(params.dict_id, 0) | |
533 | self.assertFalse(params.has_checksum) |
|
838 | self.assertFalse(params.has_checksum) | |
534 |
|
839 | |||
535 | h = hashlib.sha1(compressed).hexdigest() |
|
840 | h = hashlib.sha1(compressed).hexdigest() | |
536 | self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') |
|
841 | self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef') | |
537 |
|
842 | |||
538 | def test_write_checksum(self): |
|
843 | def test_write_checksum(self): | |
539 | no_checksum = io.BytesIO() |
|
844 | no_checksum = io.BytesIO() | |
540 | cctx = zstd.ZstdCompressor(level=1) |
|
845 | cctx = zstd.ZstdCompressor(level=1) | |
541 |
with cctx. |
|
846 | with cctx.stream_writer(no_checksum) as compressor: | |
542 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
847 | self.assertEqual(compressor.write(b'foobar'), 0) | |
543 |
|
848 | |||
544 | with_checksum = io.BytesIO() |
|
849 | with_checksum = io.BytesIO() | |
545 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
850 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
546 |
with cctx. |
|
851 | with cctx.stream_writer(with_checksum) as compressor: | |
547 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
852 | self.assertEqual(compressor.write(b'foobar'), 0) | |
548 |
|
853 | |||
549 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
854 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
550 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
855 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
551 |
self.assertEqual(no_params.content_size, |
|
856 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
552 |
self.assertEqual(with_params.content_size, |
|
857 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
553 | self.assertEqual(no_params.dict_id, 0) |
|
858 | self.assertEqual(no_params.dict_id, 0) | |
554 | self.assertEqual(with_params.dict_id, 0) |
|
859 | self.assertEqual(with_params.dict_id, 0) | |
555 | self.assertFalse(no_params.has_checksum) |
|
860 | self.assertFalse(no_params.has_checksum) | |
@@ -560,13 +865,13 b' class TestCompressor_write_to(unittest.T' | |||||
560 |
|
865 | |||
561 | def test_write_content_size(self): |
|
866 | def test_write_content_size(self): | |
562 | no_size = io.BytesIO() |
|
867 | no_size = io.BytesIO() | |
563 | cctx = zstd.ZstdCompressor(level=1) |
|
868 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
564 |
with cctx. |
|
869 | with cctx.stream_writer(no_size) as compressor: | |
565 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
870 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
566 |
|
871 | |||
567 | with_size = io.BytesIO() |
|
872 | with_size = io.BytesIO() | |
568 |
cctx = zstd.ZstdCompressor(level=1 |
|
873 | cctx = zstd.ZstdCompressor(level=1) | |
569 |
with cctx. |
|
874 | with cctx.stream_writer(with_size) as compressor: | |
570 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
875 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
571 |
|
876 | |||
572 | # Source size is not known in streaming mode, so header not |
|
877 | # Source size is not known in streaming mode, so header not | |
@@ -576,12 +881,12 b' class TestCompressor_write_to(unittest.T' | |||||
576 |
|
881 | |||
577 | # Declaring size will write the header. |
|
882 | # Declaring size will write the header. | |
578 | with_size = io.BytesIO() |
|
883 | with_size = io.BytesIO() | |
579 |
with cctx. |
|
884 | with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor: | |
580 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
885 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
581 |
|
886 | |||
582 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
887 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
583 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
888 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
584 |
self.assertEqual(no_params.content_size, |
|
889 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
585 | self.assertEqual(with_params.content_size, 1536) |
|
890 | self.assertEqual(with_params.content_size, 1536) | |
586 | self.assertEqual(no_params.dict_id, 0) |
|
891 | self.assertEqual(no_params.dict_id, 0) | |
587 | self.assertEqual(with_params.dict_id, 0) |
|
892 | self.assertEqual(with_params.dict_id, 0) | |
@@ -602,18 +907,22 b' class TestCompressor_write_to(unittest.T' | |||||
602 |
|
907 | |||
603 | with_dict_id = io.BytesIO() |
|
908 | with_dict_id = io.BytesIO() | |
604 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
909 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
605 |
with cctx. |
|
910 | with cctx.stream_writer(with_dict_id) as compressor: | |
606 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
911 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
607 |
|
912 | |||
|
913 | self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03') | |||
|
914 | ||||
608 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
915 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
609 | no_dict_id = io.BytesIO() |
|
916 | no_dict_id = io.BytesIO() | |
610 |
with cctx. |
|
917 | with cctx.stream_writer(no_dict_id) as compressor: | |
611 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
918 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
612 |
|
919 | |||
|
920 | self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00') | |||
|
921 | ||||
613 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
922 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) | |
614 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
|
923 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) | |
615 |
self.assertEqual(no_params.content_size, |
|
924 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
616 |
self.assertEqual(with_params.content_size, |
|
925 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
617 | self.assertEqual(no_params.dict_id, 0) |
|
926 | self.assertEqual(no_params.dict_id, 0) | |
618 | self.assertEqual(with_params.dict_id, d.dict_id()) |
|
927 | self.assertEqual(with_params.dict_id, d.dict_id()) | |
619 | self.assertFalse(no_params.has_checksum) |
|
928 | self.assertFalse(no_params.has_checksum) | |
@@ -625,7 +934,8 b' class TestCompressor_write_to(unittest.T' | |||||
625 | def test_memory_size(self): |
|
934 | def test_memory_size(self): | |
626 | cctx = zstd.ZstdCompressor(level=3) |
|
935 | cctx = zstd.ZstdCompressor(level=3) | |
627 | buffer = io.BytesIO() |
|
936 | buffer = io.BytesIO() | |
628 |
with cctx. |
|
937 | with cctx.stream_writer(buffer) as compressor: | |
|
938 | compressor.write(b'foo') | |||
629 | size = compressor.memory_size() |
|
939 | size = compressor.memory_size() | |
630 |
|
940 | |||
631 | self.assertGreater(size, 100000) |
|
941 | self.assertGreater(size, 100000) | |
@@ -633,7 +943,7 b' class TestCompressor_write_to(unittest.T' | |||||
633 | def test_write_size(self): |
|
943 | def test_write_size(self): | |
634 | cctx = zstd.ZstdCompressor(level=3) |
|
944 | cctx = zstd.ZstdCompressor(level=3) | |
635 | dest = OpCountingBytesIO() |
|
945 | dest = OpCountingBytesIO() | |
636 |
with cctx. |
|
946 | with cctx.stream_writer(dest, write_size=1) as compressor: | |
637 | self.assertEqual(compressor.write(b'foo'), 0) |
|
947 | self.assertEqual(compressor.write(b'foo'), 0) | |
638 | self.assertEqual(compressor.write(b'bar'), 0) |
|
948 | self.assertEqual(compressor.write(b'bar'), 0) | |
639 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
949 | self.assertEqual(compressor.write(b'foobar'), 0) | |
@@ -643,7 +953,7 b' class TestCompressor_write_to(unittest.T' | |||||
643 | def test_flush_repeated(self): |
|
953 | def test_flush_repeated(self): | |
644 | cctx = zstd.ZstdCompressor(level=3) |
|
954 | cctx = zstd.ZstdCompressor(level=3) | |
645 | dest = OpCountingBytesIO() |
|
955 | dest = OpCountingBytesIO() | |
646 |
with cctx. |
|
956 | with cctx.stream_writer(dest) as compressor: | |
647 | self.assertEqual(compressor.write(b'foo'), 0) |
|
957 | self.assertEqual(compressor.write(b'foo'), 0) | |
648 | self.assertEqual(dest._write_count, 0) |
|
958 | self.assertEqual(dest._write_count, 0) | |
649 | self.assertEqual(compressor.flush(), 12) |
|
959 | self.assertEqual(compressor.flush(), 12) | |
@@ -659,7 +969,7 b' class TestCompressor_write_to(unittest.T' | |||||
659 | def test_flush_empty_block(self): |
|
969 | def test_flush_empty_block(self): | |
660 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
970 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) | |
661 | dest = OpCountingBytesIO() |
|
971 | dest = OpCountingBytesIO() | |
662 |
with cctx. |
|
972 | with cctx.stream_writer(dest) as compressor: | |
663 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) |
|
973 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) | |
664 | count = dest._write_count |
|
974 | count = dest._write_count | |
665 | offset = dest.tell() |
|
975 | offset = dest.tell() | |
@@ -680,50 +990,89 b' class TestCompressor_write_to(unittest.T' | |||||
680 | def test_multithreaded(self): |
|
990 | def test_multithreaded(self): | |
681 | dest = io.BytesIO() |
|
991 | dest = io.BytesIO() | |
682 | cctx = zstd.ZstdCompressor(threads=2) |
|
992 | cctx = zstd.ZstdCompressor(threads=2) | |
683 |
with cctx. |
|
993 | with cctx.stream_writer(dest) as compressor: | |
684 | compressor.write(b'a' * 1048576) |
|
994 | compressor.write(b'a' * 1048576) | |
685 | compressor.write(b'b' * 1048576) |
|
995 | compressor.write(b'b' * 1048576) | |
686 | compressor.write(b'c' * 1048576) |
|
996 | compressor.write(b'c' * 1048576) | |
687 |
|
997 | |||
688 | self.assertEqual(len(dest.getvalue()), 295) |
|
998 | self.assertEqual(len(dest.getvalue()), 295) | |
689 |
|
999 | |||
|
1000 | def test_tell(self): | |||
|
1001 | dest = io.BytesIO() | |||
|
1002 | cctx = zstd.ZstdCompressor() | |||
|
1003 | with cctx.stream_writer(dest) as compressor: | |||
|
1004 | self.assertEqual(compressor.tell(), 0) | |||
|
1005 | ||||
|
1006 | for i in range(256): | |||
|
1007 | compressor.write(b'foo' * (i + 1)) | |||
|
1008 | self.assertEqual(compressor.tell(), dest.tell()) | |||
|
1009 | ||||
|
1010 | def test_bad_size(self): | |||
|
1011 | cctx = zstd.ZstdCompressor() | |||
|
1012 | ||||
|
1013 | dest = io.BytesIO() | |||
|
1014 | ||||
|
1015 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |||
|
1016 | with cctx.stream_writer(dest, size=2) as compressor: | |||
|
1017 | compressor.write(b'foo') | |||
|
1018 | ||||
|
1019 | # Test another operation. | |||
|
1020 | with cctx.stream_writer(dest, size=42): | |||
|
1021 | pass | |||
|
1022 | ||||
|
1023 | def test_tarfile_compat(self): | |||
|
1024 | raise unittest.SkipTest('not yet fully working') | |||
|
1025 | ||||
|
1026 | dest = io.BytesIO() | |||
|
1027 | cctx = zstd.ZstdCompressor() | |||
|
1028 | with cctx.stream_writer(dest) as compressor: | |||
|
1029 | with tarfile.open('tf', mode='w', fileobj=compressor) as tf: | |||
|
1030 | tf.add(__file__, 'test_compressor.py') | |||
|
1031 | ||||
|
1032 | dest.seek(0) | |||
|
1033 | ||||
|
1034 | dctx = zstd.ZstdDecompressor() | |||
|
1035 | with dctx.stream_reader(dest) as reader: | |||
|
1036 | with tarfile.open(mode='r:', fileobj=reader) as tf: | |||
|
1037 | for member in tf: | |||
|
1038 | self.assertEqual(member.name, 'test_compressor.py') | |||
690 |
|
1039 | |||
691 | @make_cffi |
|
1040 | @make_cffi | |
692 |
class TestCompressor_read_ |
|
1041 | class TestCompressor_read_to_iter(unittest.TestCase): | |
693 | def test_type_validation(self): |
|
1042 | def test_type_validation(self): | |
694 | cctx = zstd.ZstdCompressor() |
|
1043 | cctx = zstd.ZstdCompressor() | |
695 |
|
1044 | |||
696 | # Object with read() works. |
|
1045 | # Object with read() works. | |
697 |
for chunk in cctx.read_ |
|
1046 | for chunk in cctx.read_to_iter(io.BytesIO()): | |
698 | pass |
|
1047 | pass | |
699 |
|
1048 | |||
700 | # Buffer protocol works. |
|
1049 | # Buffer protocol works. | |
701 |
for chunk in cctx.read_ |
|
1050 | for chunk in cctx.read_to_iter(b'foobar'): | |
702 | pass |
|
1051 | pass | |
703 |
|
1052 | |||
704 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
1053 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
705 |
for chunk in cctx.read_ |
|
1054 | for chunk in cctx.read_to_iter(True): | |
706 | pass |
|
1055 | pass | |
707 |
|
1056 | |||
708 | def test_read_empty(self): |
|
1057 | def test_read_empty(self): | |
709 | cctx = zstd.ZstdCompressor(level=1) |
|
1058 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
710 |
|
1059 | |||
711 | source = io.BytesIO() |
|
1060 | source = io.BytesIO() | |
712 |
it = cctx.read_ |
|
1061 | it = cctx.read_to_iter(source) | |
713 | chunks = list(it) |
|
1062 | chunks = list(it) | |
714 | self.assertEqual(len(chunks), 1) |
|
1063 | self.assertEqual(len(chunks), 1) | |
715 | compressed = b''.join(chunks) |
|
1064 | compressed = b''.join(chunks) | |
716 | self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
1065 | self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
717 |
|
1066 | |||
718 | # And again with the buffer protocol. |
|
1067 | # And again with the buffer protocol. | |
719 |
it = cctx.read_ |
|
1068 | it = cctx.read_to_iter(b'') | |
720 | chunks = list(it) |
|
1069 | chunks = list(it) | |
721 | self.assertEqual(len(chunks), 1) |
|
1070 | self.assertEqual(len(chunks), 1) | |
722 | compressed2 = b''.join(chunks) |
|
1071 | compressed2 = b''.join(chunks) | |
723 | self.assertEqual(compressed2, compressed) |
|
1072 | self.assertEqual(compressed2, compressed) | |
724 |
|
1073 | |||
725 | def test_read_large(self): |
|
1074 | def test_read_large(self): | |
726 | cctx = zstd.ZstdCompressor(level=1) |
|
1075 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
727 |
|
1076 | |||
728 | source = io.BytesIO() |
|
1077 | source = io.BytesIO() | |
729 | source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
1078 | source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
@@ -732,7 +1081,7 b' class TestCompressor_read_from(unittest.' | |||||
732 |
|
1081 | |||
733 | # Creating an iterator should not perform any compression until |
|
1082 | # Creating an iterator should not perform any compression until | |
734 | # first read. |
|
1083 | # first read. | |
735 |
it = cctx.read_ |
|
1084 | it = cctx.read_to_iter(source, size=len(source.getvalue())) | |
736 | self.assertEqual(source.tell(), 0) |
|
1085 | self.assertEqual(source.tell(), 0) | |
737 |
|
1086 | |||
738 | # We should have exactly 2 output chunks. |
|
1087 | # We should have exactly 2 output chunks. | |
@@ -758,21 +1107,28 b' class TestCompressor_read_from(unittest.' | |||||
758 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
1107 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) | |
759 |
|
1108 | |||
760 | params = zstd.get_frame_parameters(b''.join(chunks)) |
|
1109 | params = zstd.get_frame_parameters(b''.join(chunks)) | |
761 |
self.assertEqual(params.content_size, |
|
1110 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
762 | self.assertEqual(params.window_size, 262144) |
|
1111 | self.assertEqual(params.window_size, 262144) | |
763 | self.assertEqual(params.dict_id, 0) |
|
1112 | self.assertEqual(params.dict_id, 0) | |
764 | self.assertFalse(params.has_checksum) |
|
1113 | self.assertFalse(params.has_checksum) | |
765 |
|
1114 | |||
766 | # Now check the buffer protocol. |
|
1115 | # Now check the buffer protocol. | |
767 |
it = cctx.read_ |
|
1116 | it = cctx.read_to_iter(source.getvalue()) | |
768 | chunks = list(it) |
|
1117 | chunks = list(it) | |
769 | self.assertEqual(len(chunks), 2) |
|
1118 | self.assertEqual(len(chunks), 2) | |
|
1119 | ||||
|
1120 | params = zstd.get_frame_parameters(b''.join(chunks)) | |||
|
1121 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |||
|
1122 | #self.assertEqual(params.window_size, 262144) | |||
|
1123 | self.assertEqual(params.dict_id, 0) | |||
|
1124 | self.assertFalse(params.has_checksum) | |||
|
1125 | ||||
770 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
1126 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) | |
771 |
|
1127 | |||
772 | def test_read_write_size(self): |
|
1128 | def test_read_write_size(self): | |
773 | source = OpCountingBytesIO(b'foobarfoobar') |
|
1129 | source = OpCountingBytesIO(b'foobarfoobar') | |
774 | cctx = zstd.ZstdCompressor(level=3) |
|
1130 | cctx = zstd.ZstdCompressor(level=3) | |
775 |
for chunk in cctx.read_ |
|
1131 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): | |
776 | self.assertEqual(len(chunk), 1) |
|
1132 | self.assertEqual(len(chunk), 1) | |
777 |
|
1133 | |||
778 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
1134 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
@@ -786,17 +1142,22 b' class TestCompressor_read_from(unittest.' | |||||
786 |
|
1142 | |||
787 | cctx = zstd.ZstdCompressor(threads=2) |
|
1143 | cctx = zstd.ZstdCompressor(threads=2) | |
788 |
|
1144 | |||
789 |
compressed = b''.join(cctx.read_ |
|
1145 | compressed = b''.join(cctx.read_to_iter(source)) | |
790 | self.assertEqual(len(compressed), 295) |
|
1146 | self.assertEqual(len(compressed), 295) | |
791 |
|
1147 | |||
|
1148 | def test_bad_size(self): | |||
|
1149 | cctx = zstd.ZstdCompressor() | |||
|
1150 | ||||
|
1151 | source = io.BytesIO(b'a' * 42) | |||
|
1152 | ||||
|
1153 | with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): | |||
|
1154 | b''.join(cctx.read_to_iter(source, size=2)) | |||
|
1155 | ||||
|
1156 | # Test another operation on errored compressor. | |||
|
1157 | b''.join(cctx.read_to_iter(source)) | |||
|
1158 | ||||
792 |
|
1159 | |||
793 | class TestCompressor_multi_compress_to_buffer(unittest.TestCase): |
|
1160 | class TestCompressor_multi_compress_to_buffer(unittest.TestCase): | |
794 | def test_multithreaded_unsupported(self): |
|
|||
795 | cctx = zstd.ZstdCompressor(threads=2) |
|
|||
796 |
|
||||
797 | with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'): |
|
|||
798 | cctx.multi_compress_to_buffer([b'foo']) |
|
|||
799 |
|
||||
800 | def test_invalid_inputs(self): |
|
1161 | def test_invalid_inputs(self): | |
801 | cctx = zstd.ZstdCompressor() |
|
1162 | cctx = zstd.ZstdCompressor() | |
802 |
|
1163 | |||
@@ -819,7 +1180,7 b' class TestCompressor_multi_compress_to_b' | |||||
819 | cctx.multi_compress_to_buffer([b'', b'', b'']) |
|
1180 | cctx.multi_compress_to_buffer([b'', b'', b'']) | |
820 |
|
1181 | |||
821 | def test_list_input(self): |
|
1182 | def test_list_input(self): | |
822 |
cctx = zstd.ZstdCompressor( |
|
1183 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
823 |
|
1184 | |||
824 | original = [b'foo' * 12, b'bar' * 6] |
|
1185 | original = [b'foo' * 12, b'bar' * 6] | |
825 | frames = [cctx.compress(c) for c in original] |
|
1186 | frames = [cctx.compress(c) for c in original] | |
@@ -834,7 +1195,7 b' class TestCompressor_multi_compress_to_b' | |||||
834 | self.assertEqual(b[1].tobytes(), frames[1]) |
|
1195 | self.assertEqual(b[1].tobytes(), frames[1]) | |
835 |
|
1196 | |||
836 | def test_buffer_with_segments_input(self): |
|
1197 | def test_buffer_with_segments_input(self): | |
837 |
cctx = zstd.ZstdCompressor( |
|
1198 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
838 |
|
1199 | |||
839 | original = [b'foo' * 4, b'bar' * 6] |
|
1200 | original = [b'foo' * 4, b'bar' * 6] | |
840 | frames = [cctx.compress(c) for c in original] |
|
1201 | frames = [cctx.compress(c) for c in original] | |
@@ -852,7 +1213,7 b' class TestCompressor_multi_compress_to_b' | |||||
852 | self.assertEqual(result[1].tobytes(), frames[1]) |
|
1213 | self.assertEqual(result[1].tobytes(), frames[1]) | |
853 |
|
1214 | |||
854 | def test_buffer_with_segments_collection_input(self): |
|
1215 | def test_buffer_with_segments_collection_input(self): | |
855 |
cctx = zstd.ZstdCompressor( |
|
1216 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
856 |
|
1217 | |||
857 | original = [ |
|
1218 | original = [ | |
858 | b'foo1', |
|
1219 | b'foo1', | |
@@ -886,10 +1247,10 b' class TestCompressor_multi_compress_to_b' | |||||
886 | def test_multiple_threads(self): |
|
1247 | def test_multiple_threads(self): | |
887 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will |
|
1248 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will | |
888 | # make output different. |
|
1249 | # make output different. | |
889 |
refcctx = zstd.ZstdCompressor( |
|
1250 | refcctx = zstd.ZstdCompressor(write_checksum=True) | |
890 | reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] |
|
1251 | reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] | |
891 |
|
1252 | |||
892 |
cctx = zstd.ZstdCompressor( |
|
1253 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
893 |
|
1254 | |||
894 | frames = [] |
|
1255 | frames = [] | |
895 | frames.extend(b'x' * 64 for i in range(256)) |
|
1256 | frames.extend(b'x' * 64 for i in range(256)) |
@@ -1,10 +1,6 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 |
|
3 | import unittest | ||
4 | try: |
|
|||
5 | import unittest2 as unittest |
|
|||
6 | except ImportError: |
|
|||
7 | import unittest |
|
|||
8 |
|
4 | |||
9 | try: |
|
5 | try: | |
10 | import hypothesis |
|
6 | import hypothesis | |
@@ -12,7 +8,7 b' try:' | |||||
12 | except ImportError: |
|
8 | except ImportError: | |
13 | raise unittest.SkipTest('hypothesis not available') |
|
9 | raise unittest.SkipTest('hypothesis not available') | |
14 |
|
10 | |||
15 | import zstd |
|
11 | import zstandard as zstd | |
16 |
|
12 | |||
17 | from . common import ( |
|
13 | from . common import ( | |
18 | make_cffi, |
|
14 | make_cffi, | |
@@ -22,7 +18,57 b' from . common import (' | |||||
22 |
|
18 | |||
23 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
19 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
24 | @make_cffi |
|
20 | @make_cffi | |
25 |
class TestCompressor_ |
|
21 | class TestCompressor_stream_reader_fuzzing(unittest.TestCase): | |
|
22 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
23 | level=strategies.integers(min_value=1, max_value=5), | |||
|
24 | source_read_size=strategies.integers(1, 16384), | |||
|
25 | read_sizes=strategies.data()) | |||
|
26 | def test_stream_source_read_variance(self, original, level, source_read_size, | |||
|
27 | read_sizes): | |||
|
28 | refctx = zstd.ZstdCompressor(level=level) | |||
|
29 | ref_frame = refctx.compress(original) | |||
|
30 | ||||
|
31 | cctx = zstd.ZstdCompressor(level=level) | |||
|
32 | with cctx.stream_reader(io.BytesIO(original), size=len(original), | |||
|
33 | read_size=source_read_size) as reader: | |||
|
34 | chunks = [] | |||
|
35 | while True: | |||
|
36 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |||
|
37 | chunk = reader.read(read_size) | |||
|
38 | ||||
|
39 | if not chunk: | |||
|
40 | break | |||
|
41 | chunks.append(chunk) | |||
|
42 | ||||
|
43 | self.assertEqual(b''.join(chunks), ref_frame) | |||
|
44 | ||||
|
45 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
46 | level=strategies.integers(min_value=1, max_value=5), | |||
|
47 | source_read_size=strategies.integers(1, 16384), | |||
|
48 | read_sizes=strategies.data()) | |||
|
49 | def test_buffer_source_read_variance(self, original, level, source_read_size, | |||
|
50 | read_sizes): | |||
|
51 | ||||
|
52 | refctx = zstd.ZstdCompressor(level=level) | |||
|
53 | ref_frame = refctx.compress(original) | |||
|
54 | ||||
|
55 | cctx = zstd.ZstdCompressor(level=level) | |||
|
56 | with cctx.stream_reader(original, size=len(original), | |||
|
57 | read_size=source_read_size) as reader: | |||
|
58 | chunks = [] | |||
|
59 | while True: | |||
|
60 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |||
|
61 | chunk = reader.read(read_size) | |||
|
62 | if not chunk: | |||
|
63 | break | |||
|
64 | chunks.append(chunk) | |||
|
65 | ||||
|
66 | self.assertEqual(b''.join(chunks), ref_frame) | |||
|
67 | ||||
|
68 | ||||
|
69 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
70 | @make_cffi | |||
|
71 | class TestCompressor_stream_writer_fuzzing(unittest.TestCase): | |||
26 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
72 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
27 | level=strategies.integers(min_value=1, max_value=5), |
|
73 | level=strategies.integers(min_value=1, max_value=5), | |
28 | write_size=strategies.integers(min_value=1, max_value=1048576)) |
|
74 | write_size=strategies.integers(min_value=1, max_value=1048576)) | |
@@ -32,7 +78,7 b' class TestCompressor_write_to_fuzzing(un' | |||||
32 |
|
78 | |||
33 | cctx = zstd.ZstdCompressor(level=level) |
|
79 | cctx = zstd.ZstdCompressor(level=level) | |
34 | b = io.BytesIO() |
|
80 | b = io.BytesIO() | |
35 |
with cctx. |
|
81 | with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: | |
36 | compressor.write(original) |
|
82 | compressor.write(original) | |
37 |
|
83 | |||
38 | self.assertEqual(b.getvalue(), ref_frame) |
|
84 | self.assertEqual(b.getvalue(), ref_frame) | |
@@ -62,13 +108,12 b' class TestCompressor_copy_stream_fuzzing' | |||||
62 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
108 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
63 | @make_cffi |
|
109 | @make_cffi | |
64 | class TestCompressor_compressobj_fuzzing(unittest.TestCase): |
|
110 | class TestCompressor_compressobj_fuzzing(unittest.TestCase): | |
|
111 | @hypothesis.settings( | |||
|
112 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |||
65 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
113 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
66 | level=strategies.integers(min_value=1, max_value=5), |
|
114 | level=strategies.integers(min_value=1, max_value=5), | |
67 |
chunk_sizes=strategies. |
|
115 | chunk_sizes=strategies.data()) | |
68 | strategies.integers(min_value=1, max_value=4096))) |
|
|||
69 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
116 | def test_random_input_sizes(self, original, level, chunk_sizes): | |
70 | chunk_sizes = iter(chunk_sizes) |
|
|||
71 |
|
||||
72 | refctx = zstd.ZstdCompressor(level=level) |
|
117 | refctx = zstd.ZstdCompressor(level=level) | |
73 | ref_frame = refctx.compress(original) |
|
118 | ref_frame = refctx.compress(original) | |
74 |
|
119 | |||
@@ -78,7 +123,7 b' class TestCompressor_compressobj_fuzzing' | |||||
78 | chunks = [] |
|
123 | chunks = [] | |
79 | i = 0 |
|
124 | i = 0 | |
80 | while True: |
|
125 | while True: | |
81 |
chunk_size = |
|
126 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
82 | source = original[i:i + chunk_size] |
|
127 | source = original[i:i + chunk_size] | |
83 | if not source: |
|
128 | if not source: | |
84 | break |
|
129 | break | |
@@ -93,7 +138,7 b' class TestCompressor_compressobj_fuzzing' | |||||
93 |
|
138 | |||
94 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
139 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
95 | @make_cffi |
|
140 | @make_cffi | |
96 |
class TestCompressor_read_ |
|
141 | class TestCompressor_read_to_iter_fuzzing(unittest.TestCase): | |
97 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
142 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
98 | level=strategies.integers(min_value=1, max_value=5), |
|
143 | level=strategies.integers(min_value=1, max_value=5), | |
99 | read_size=strategies.integers(min_value=1, max_value=4096), |
|
144 | read_size=strategies.integers(min_value=1, max_value=4096), | |
@@ -105,8 +150,9 b' class TestCompressor_read_from_fuzzing(u' | |||||
105 | source = io.BytesIO(original) |
|
150 | source = io.BytesIO(original) | |
106 |
|
151 | |||
107 | cctx = zstd.ZstdCompressor(level=level) |
|
152 | cctx = zstd.ZstdCompressor(level=level) | |
108 |
chunks = list(cctx.read_ |
|
153 | chunks = list(cctx.read_to_iter(source, size=len(original), | |
109 |
|
|
154 | read_size=read_size, | |
|
155 | write_size=write_size)) | |||
110 |
|
156 | |||
111 | self.assertEqual(b''.join(chunks), ref_frame) |
|
157 | self.assertEqual(b''.join(chunks), ref_frame) | |
112 |
|
158 | |||
@@ -125,7 +171,6 b' class TestCompressor_multi_compress_to_b' | |||||
125 | kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) |
|
171 | kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) | |
126 |
|
172 | |||
127 | cctx = zstd.ZstdCompressor(level=1, |
|
173 | cctx = zstd.ZstdCompressor(level=1, | |
128 | write_content_size=True, |
|
|||
129 | write_checksum=True, |
|
174 | write_checksum=True, | |
130 | **kwargs) |
|
175 | **kwargs) | |
131 |
|
176 |
@@ -1,9 +1,7 b'' | |||||
1 | try: |
|
1 | import sys | |
2 |
|
|
2 | import unittest | |
3 | except ImportError: |
|
|||
4 | import unittest |
|
|||
5 |
|
3 | |||
6 | import zstd |
|
4 | import zstandard as zstd | |
7 |
|
5 | |||
8 | from . common import ( |
|
6 | from . common import ( | |
9 | make_cffi, |
|
7 | make_cffi, | |
@@ -12,52 +10,104 b' from . common import (' | |||||
12 |
|
10 | |||
13 | @make_cffi |
|
11 | @make_cffi | |
14 | class TestCompressionParameters(unittest.TestCase): |
|
12 | class TestCompressionParameters(unittest.TestCase): | |
15 |
def test_ |
|
13 | def test_bounds(self): | |
16 | with self.assertRaises(TypeError): |
|
14 | zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN, | |
17 | zstd.CompressionParameters() |
|
15 | chain_log=zstd.CHAINLOG_MIN, | |
18 |
|
16 | hash_log=zstd.HASHLOG_MIN, | ||
19 | with self.assertRaises(TypeError): |
|
17 | search_log=zstd.SEARCHLOG_MIN, | |
20 | zstd.CompressionParameters(0, 1) |
|
18 | min_match=zstd.SEARCHLENGTH_MIN + 1, | |
|
19 | target_length=zstd.TARGETLENGTH_MIN, | |||
|
20 | compression_strategy=zstd.STRATEGY_FAST) | |||
21 |
|
21 | |||
22 | def test_bounds(self): |
|
22 | zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX, | |
23 | zstd.CompressionParameters(zstd.WINDOWLOG_MIN, |
|
23 | chain_log=zstd.CHAINLOG_MAX, | |
24 |
zstd. |
|
24 | hash_log=zstd.HASHLOG_MAX, | |
25 |
zstd. |
|
25 | search_log=zstd.SEARCHLOG_MAX, | |
26 |
zstd.SEARCHL |
|
26 | min_match=zstd.SEARCHLENGTH_MAX - 1, | |
27 |
zstd.S |
|
27 | compression_strategy=zstd.STRATEGY_BTULTRA) | |
28 | zstd.TARGETLENGTH_MIN, |
|
|||
29 | zstd.STRATEGY_FAST) |
|
|||
30 |
|
28 | |||
31 | zstd.CompressionParameters(zstd.WINDOWLOG_MAX, |
|
29 | def test_from_level(self): | |
32 | zstd.CHAINLOG_MAX, |
|
30 | p = zstd.ZstdCompressionParameters.from_level(1) | |
33 | zstd.HASHLOG_MAX, |
|
|||
34 | zstd.SEARCHLOG_MAX, |
|
|||
35 | zstd.SEARCHLENGTH_MAX - 1, |
|
|||
36 | zstd.TARGETLENGTH_MAX, |
|
|||
37 | zstd.STRATEGY_BTOPT) |
|
|||
38 |
|
||||
39 | def test_get_compression_parameters(self): |
|
|||
40 | p = zstd.get_compression_parameters(1) |
|
|||
41 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
31 | self.assertIsInstance(p, zstd.CompressionParameters) | |
42 |
|
32 | |||
43 | self.assertEqual(p.window_log, 19) |
|
33 | self.assertEqual(p.window_log, 19) | |
44 |
|
34 | |||
|
35 | p = zstd.ZstdCompressionParameters.from_level(-4) | |||
|
36 | self.assertEqual(p.window_log, 19) | |||
|
37 | self.assertEqual(p.compress_literals, 0) | |||
|
38 | ||||
45 | def test_members(self): |
|
39 | def test_members(self): | |
46 |
p = zstd.CompressionParameters(10, |
|
40 | p = zstd.ZstdCompressionParameters(window_log=10, | |
|
41 | chain_log=6, | |||
|
42 | hash_log=7, | |||
|
43 | search_log=4, | |||
|
44 | min_match=5, | |||
|
45 | target_length=8, | |||
|
46 | compression_strategy=1) | |||
47 | self.assertEqual(p.window_log, 10) |
|
47 | self.assertEqual(p.window_log, 10) | |
48 | self.assertEqual(p.chain_log, 6) |
|
48 | self.assertEqual(p.chain_log, 6) | |
49 | self.assertEqual(p.hash_log, 7) |
|
49 | self.assertEqual(p.hash_log, 7) | |
50 | self.assertEqual(p.search_log, 4) |
|
50 | self.assertEqual(p.search_log, 4) | |
51 |
self.assertEqual(p. |
|
51 | self.assertEqual(p.min_match, 5) | |
52 | self.assertEqual(p.target_length, 8) |
|
52 | self.assertEqual(p.target_length, 8) | |
53 | self.assertEqual(p.strategy, 1) |
|
53 | self.assertEqual(p.compression_strategy, 1) | |
|
54 | ||||
|
55 | p = zstd.ZstdCompressionParameters(compression_level=2) | |||
|
56 | self.assertEqual(p.compression_level, 2) | |||
|
57 | ||||
|
58 | p = zstd.ZstdCompressionParameters(threads=4) | |||
|
59 | self.assertEqual(p.threads, 4) | |||
|
60 | ||||
|
61 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, | |||
|
62 | overlap_size_log=6) | |||
|
63 | self.assertEqual(p.threads, 2) | |||
|
64 | self.assertEqual(p.job_size, 1048576) | |||
|
65 | self.assertEqual(p.overlap_size_log, 6) | |||
|
66 | ||||
|
67 | p = zstd.ZstdCompressionParameters(compression_level=2) | |||
|
68 | self.assertEqual(p.compress_literals, 1) | |||
|
69 | ||||
|
70 | p = zstd.ZstdCompressionParameters(compress_literals=False) | |||
|
71 | self.assertEqual(p.compress_literals, 0) | |||
|
72 | ||||
|
73 | p = zstd.ZstdCompressionParameters(compression_level=-1) | |||
|
74 | self.assertEqual(p.compression_level, -1) | |||
|
75 | self.assertEqual(p.compress_literals, 0) | |||
|
76 | ||||
|
77 | p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True) | |||
|
78 | self.assertEqual(p.compression_level, -2) | |||
|
79 | self.assertEqual(p.compress_literals, 1) | |||
|
80 | ||||
|
81 | p = zstd.ZstdCompressionParameters(force_max_window=True) | |||
|
82 | self.assertEqual(p.force_max_window, 1) | |||
|
83 | ||||
|
84 | p = zstd.ZstdCompressionParameters(enable_ldm=True) | |||
|
85 | self.assertEqual(p.enable_ldm, 1) | |||
|
86 | ||||
|
87 | p = zstd.ZstdCompressionParameters(ldm_hash_log=7) | |||
|
88 | self.assertEqual(p.ldm_hash_log, 7) | |||
|
89 | ||||
|
90 | p = zstd.ZstdCompressionParameters(ldm_min_match=6) | |||
|
91 | self.assertEqual(p.ldm_min_match, 6) | |||
|
92 | ||||
|
93 | p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) | |||
|
94 | self.assertEqual(p.ldm_bucket_size_log, 7) | |||
|
95 | ||||
|
96 | p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8) | |||
|
97 | self.assertEqual(p.ldm_hash_every_log, 8) | |||
54 |
|
98 | |||
55 | def test_estimated_compression_context_size(self): |
|
99 | def test_estimated_compression_context_size(self): | |
56 |
p = zstd.CompressionParameters(20, |
|
100 | p = zstd.ZstdCompressionParameters(window_log=20, | |
|
101 | chain_log=16, | |||
|
102 | hash_log=17, | |||
|
103 | search_log=1, | |||
|
104 | min_match=5, | |||
|
105 | target_length=16, | |||
|
106 | compression_strategy=zstd.STRATEGY_DFAST) | |||
57 |
|
107 | |||
58 | # 32-bit has slightly different values from 64-bit. |
|
108 | # 32-bit has slightly different values from 64-bit. | |
59 |
self.assertAlmostEqual(p.estimated_compression_context_size(), 12 |
|
109 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072, | |
60 |
delta= |
|
110 | delta=250) | |
61 |
|
111 | |||
62 |
|
112 | |||
63 | @make_cffi |
|
113 | @make_cffi | |
@@ -66,8 +116,18 b' class TestFrameParameters(unittest.TestC' | |||||
66 | with self.assertRaises(TypeError): |
|
116 | with self.assertRaises(TypeError): | |
67 | zstd.get_frame_parameters(None) |
|
117 | zstd.get_frame_parameters(None) | |
68 |
|
118 | |||
69 | with self.assertRaises(TypeError): |
|
119 | # Python 3 doesn't appear to convert unicode to Py_buffer. | |
70 | zstd.get_frame_parameters(u'foobarbaz') |
|
120 | if sys.version_info[0] >= 3: | |
|
121 | with self.assertRaises(TypeError): | |||
|
122 | zstd.get_frame_parameters(u'foobarbaz') | |||
|
123 | else: | |||
|
124 | # CPython will convert unicode to Py_buffer. But CFFI won't. | |||
|
125 | if zstd.backend == 'cffi': | |||
|
126 | with self.assertRaises(TypeError): | |||
|
127 | zstd.get_frame_parameters(u'foobarbaz') | |||
|
128 | else: | |||
|
129 | with self.assertRaises(zstd.ZstdError): | |||
|
130 | zstd.get_frame_parameters(u'foobarbaz') | |||
71 |
|
131 | |||
72 | def test_invalid_input_sizes(self): |
|
132 | def test_invalid_input_sizes(self): | |
73 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): |
|
133 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): | |
@@ -82,21 +142,21 b' class TestFrameParameters(unittest.TestC' | |||||
82 |
|
142 | |||
83 | def test_attributes(self): |
|
143 | def test_attributes(self): | |
84 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') |
|
144 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') | |
85 |
self.assertEqual(params.content_size, |
|
145 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
86 | self.assertEqual(params.window_size, 1024) |
|
146 | self.assertEqual(params.window_size, 1024) | |
87 | self.assertEqual(params.dict_id, 0) |
|
147 | self.assertEqual(params.dict_id, 0) | |
88 | self.assertFalse(params.has_checksum) |
|
148 | self.assertFalse(params.has_checksum) | |
89 |
|
149 | |||
90 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
150 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. | |
91 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') |
|
151 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') | |
92 |
self.assertEqual(params.content_size, |
|
152 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
93 | self.assertEqual(params.window_size, 1024) |
|
153 | self.assertEqual(params.window_size, 1024) | |
94 | self.assertEqual(params.dict_id, 255) |
|
154 | self.assertEqual(params.dict_id, 255) | |
95 | self.assertFalse(params.has_checksum) |
|
155 | self.assertFalse(params.has_checksum) | |
96 |
|
156 | |||
97 | # Lowest 3rd bit indicates if checksum is present. |
|
157 | # Lowest 3rd bit indicates if checksum is present. | |
98 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') |
|
158 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') | |
99 |
self.assertEqual(params.content_size, |
|
159 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
100 | self.assertEqual(params.window_size, 1024) |
|
160 | self.assertEqual(params.window_size, 1024) | |
101 | self.assertEqual(params.dict_id, 0) |
|
161 | self.assertEqual(params.dict_id, 0) | |
102 | self.assertTrue(params.has_checksum) |
|
162 | self.assertTrue(params.has_checksum) | |
@@ -110,7 +170,7 b' class TestFrameParameters(unittest.TestC' | |||||
110 |
|
170 | |||
111 | # Window descriptor is 2nd byte after frame header. |
|
171 | # Window descriptor is 2nd byte after frame header. | |
112 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') |
|
172 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') | |
113 |
self.assertEqual(params.content_size, |
|
173 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
114 | self.assertEqual(params.window_size, 262144) |
|
174 | self.assertEqual(params.window_size, 262144) | |
115 | self.assertEqual(params.dict_id, 0) |
|
175 | self.assertEqual(params.dict_id, 0) | |
116 | self.assertFalse(params.has_checksum) |
|
176 | self.assertFalse(params.has_checksum) | |
@@ -121,3 +181,22 b' class TestFrameParameters(unittest.TestC' | |||||
121 | self.assertEqual(params.window_size, 262144) |
|
181 | self.assertEqual(params.window_size, 262144) | |
122 | self.assertEqual(params.dict_id, 15) |
|
182 | self.assertEqual(params.dict_id, 15) | |
123 | self.assertTrue(params.has_checksum) |
|
183 | self.assertTrue(params.has_checksum) | |
|
184 | ||||
|
185 | def test_input_types(self): | |||
|
186 | v = zstd.FRAME_HEADER + b'\x00\x00' | |||
|
187 | ||||
|
188 | mutable_array = bytearray(len(v)) | |||
|
189 | mutable_array[:] = v | |||
|
190 | ||||
|
191 | sources = [ | |||
|
192 | memoryview(v), | |||
|
193 | bytearray(v), | |||
|
194 | mutable_array, | |||
|
195 | ] | |||
|
196 | ||||
|
197 | for source in sources: | |||
|
198 | params = zstd.get_frame_parameters(source) | |||
|
199 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |||
|
200 | self.assertEqual(params.window_size, 1024) | |||
|
201 | self.assertEqual(params.dict_id, 0) | |||
|
202 | self.assertFalse(params.has_checksum) |
@@ -1,10 +1,7 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 |
|
3 | import sys | ||
4 | try: |
|
4 | import unittest | |
5 | import unittest2 as unittest |
|
|||
6 | except ImportError: |
|
|||
7 | import unittest |
|
|||
8 |
|
5 | |||
9 | try: |
|
6 | try: | |
10 | import hypothesis |
|
7 | import hypothesis | |
@@ -12,7 +9,7 b' try:' | |||||
12 | except ImportError: |
|
9 | except ImportError: | |
13 | raise unittest.SkipTest('hypothesis not available') |
|
10 | raise unittest.SkipTest('hypothesis not available') | |
14 |
|
11 | |||
15 | import zstd |
|
12 | import zstandard as zstd | |
16 |
|
13 | |||
17 | from .common import ( |
|
14 | from .common import ( | |
18 | make_cffi, |
|
15 | make_cffi, | |
@@ -28,16 +25,17 b' s_hashlog = strategies.integers(min_valu' | |||||
28 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, |
|
25 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, | |
29 | max_value=zstd.SEARCHLOG_MAX) |
|
26 | max_value=zstd.SEARCHLOG_MAX) | |
30 | s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN, |
|
27 | s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN, | |
31 |
|
|
28 | max_value=zstd.SEARCHLENGTH_MAX) | |
32 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, |
|
29 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, | |
33 |
|
|
30 | max_value=2**32) | |
34 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, |
|
31 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, | |
35 | zstd.STRATEGY_DFAST, |
|
32 | zstd.STRATEGY_DFAST, | |
36 | zstd.STRATEGY_GREEDY, |
|
33 | zstd.STRATEGY_GREEDY, | |
37 | zstd.STRATEGY_LAZY, |
|
34 | zstd.STRATEGY_LAZY, | |
38 | zstd.STRATEGY_LAZY2, |
|
35 | zstd.STRATEGY_LAZY2, | |
39 | zstd.STRATEGY_BTLAZY2, |
|
36 | zstd.STRATEGY_BTLAZY2, | |
40 |
zstd.STRATEGY_BTOPT |
|
37 | zstd.STRATEGY_BTOPT, | |
|
38 | zstd.STRATEGY_BTULTRA)) | |||
41 |
|
39 | |||
42 |
|
40 | |||
43 | @make_cffi |
|
41 | @make_cffi | |
@@ -47,24 +45,17 b' class TestCompressionParametersHypothesi' | |||||
47 | s_searchlength, s_targetlength, s_strategy) |
|
45 | s_searchlength, s_targetlength, s_strategy) | |
48 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, |
|
46 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, | |
49 | searchlength, targetlength, strategy): |
|
47 | searchlength, targetlength, strategy): | |
50 | # ZSTD_checkCParams moves the goal posts on us from what's advertised |
|
48 | zstd.ZstdCompressionParameters(window_log=windowlog, | |
51 | # in the constants. So move along with them. |
|
49 | chain_log=chainlog, | |
52 | if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): |
|
50 | hash_log=hashlog, | |
53 | searchlength += 1 |
|
51 | search_log=searchlog, | |
54 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: |
|
52 | min_match=searchlength, | |
55 | searchlength -= 1 |
|
53 | target_length=targetlength, | |
56 |
|
54 | compression_strategy=strategy) | ||
57 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
|||
58 | searchlog, searchlength, |
|
|||
59 | targetlength, strategy) |
|
|||
60 |
|
||||
61 | cctx = zstd.ZstdCompressor(compression_params=p) |
|
|||
62 | with cctx.write_to(io.BytesIO()): |
|
|||
63 | pass |
|
|||
64 |
|
55 | |||
65 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
56 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
66 | s_searchlength, s_targetlength, s_strategy) |
|
57 | s_searchlength, s_targetlength, s_strategy) | |
67 | def test_estimate_compression_context_size(self, windowlog, chainlog, |
|
58 | def test_estimated_compression_context_size(self, windowlog, chainlog, | |
68 | hashlog, searchlog, |
|
59 | hashlog, searchlog, | |
69 | searchlength, targetlength, |
|
60 | searchlength, targetlength, | |
70 | strategy): |
|
61 | strategy): | |
@@ -73,7 +64,12 b' class TestCompressionParametersHypothesi' | |||||
73 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: |
|
64 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: | |
74 | searchlength -= 1 |
|
65 | searchlength -= 1 | |
75 |
|
66 | |||
76 |
p = zstd.CompressionParameters(windowlog, |
|
67 | p = zstd.ZstdCompressionParameters(window_log=windowlog, | |
77 | searchlog, searchlength, |
|
68 | chain_log=chainlog, | |
78 | targetlength, strategy) |
|
69 | hash_log=hashlog, | |
79 | size = zstd.estimate_compression_context_size(p) |
|
70 | search_log=searchlog, | |
|
71 | min_match=searchlength, | |||
|
72 | target_length=targetlength, | |||
|
73 | compression_strategy=strategy) | |||
|
74 | size = p.estimated_compression_context_size() | |||
|
75 |
This diff has been collapsed as it changes many lines, (516 lines changed) Show them Hide them | |||||
@@ -1,16 +1,14 b'' | |||||
1 | import io |
|
1 | import io | |
|
2 | import os | |||
2 | import random |
|
3 | import random | |
3 | import struct |
|
4 | import struct | |
4 | import sys |
|
5 | import sys | |
|
6 | import unittest | |||
5 |
|
7 | |||
6 | try: |
|
8 | import zstandard as zstd | |
7 | import unittest2 as unittest |
|
|||
8 | except ImportError: |
|
|||
9 | import unittest |
|
|||
10 |
|
||||
11 | import zstd |
|
|||
12 |
|
9 | |||
13 | from .common import ( |
|
10 | from .common import ( | |
|
11 | generate_samples, | |||
14 | make_cffi, |
|
12 | make_cffi, | |
15 | OpCountingBytesIO, |
|
13 | OpCountingBytesIO, | |
16 | ) |
|
14 | ) | |
@@ -23,35 +21,124 b' else:' | |||||
23 |
|
21 | |||
24 |
|
22 | |||
25 | @make_cffi |
|
23 | @make_cffi | |
|
24 | class TestFrameHeaderSize(unittest.TestCase): | |||
|
25 | def test_empty(self): | |||
|
26 | with self.assertRaisesRegexp( | |||
|
27 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |||
|
28 | 'is incorrect'): | |||
|
29 | zstd.frame_header_size(b'') | |||
|
30 | ||||
|
31 | def test_too_small(self): | |||
|
32 | with self.assertRaisesRegexp( | |||
|
33 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |||
|
34 | 'is incorrect'): | |||
|
35 | zstd.frame_header_size(b'foob') | |||
|
36 | ||||
|
37 | def test_basic(self): | |||
|
38 | # It doesn't matter that it isn't a valid frame. | |||
|
39 | self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6) | |||
|
40 | ||||
|
41 | ||||
|
42 | @make_cffi | |||
|
43 | class TestFrameContentSize(unittest.TestCase): | |||
|
44 | def test_empty(self): | |||
|
45 | with self.assertRaisesRegexp(zstd.ZstdError, | |||
|
46 | 'error when determining content size'): | |||
|
47 | zstd.frame_content_size(b'') | |||
|
48 | ||||
|
49 | def test_too_small(self): | |||
|
50 | with self.assertRaisesRegexp(zstd.ZstdError, | |||
|
51 | 'error when determining content size'): | |||
|
52 | zstd.frame_content_size(b'foob') | |||
|
53 | ||||
|
54 | def test_bad_frame(self): | |||
|
55 | with self.assertRaisesRegexp(zstd.ZstdError, | |||
|
56 | 'error when determining content size'): | |||
|
57 | zstd.frame_content_size(b'invalid frame header') | |||
|
58 | ||||
|
59 | def test_unknown(self): | |||
|
60 | cctx = zstd.ZstdCompressor(write_content_size=False) | |||
|
61 | frame = cctx.compress(b'foobar') | |||
|
62 | ||||
|
63 | self.assertEqual(zstd.frame_content_size(frame), -1) | |||
|
64 | ||||
|
65 | def test_empty(self): | |||
|
66 | cctx = zstd.ZstdCompressor() | |||
|
67 | frame = cctx.compress(b'') | |||
|
68 | ||||
|
69 | self.assertEqual(zstd.frame_content_size(frame), 0) | |||
|
70 | ||||
|
71 | def test_basic(self): | |||
|
72 | cctx = zstd.ZstdCompressor() | |||
|
73 | frame = cctx.compress(b'foobar') | |||
|
74 | ||||
|
75 | self.assertEqual(zstd.frame_content_size(frame), 6) | |||
|
76 | ||||
|
77 | ||||
|
78 | @make_cffi | |||
|
79 | class TestDecompressor(unittest.TestCase): | |||
|
80 | def test_memory_size(self): | |||
|
81 | dctx = zstd.ZstdDecompressor() | |||
|
82 | ||||
|
83 | self.assertGreater(dctx.memory_size(), 100) | |||
|
84 | ||||
|
85 | ||||
|
86 | @make_cffi | |||
26 | class TestDecompressor_decompress(unittest.TestCase): |
|
87 | class TestDecompressor_decompress(unittest.TestCase): | |
27 | def test_empty_input(self): |
|
88 | def test_empty_input(self): | |
28 | dctx = zstd.ZstdDecompressor() |
|
89 | dctx = zstd.ZstdDecompressor() | |
29 |
|
90 | |||
30 |
with self.assertRaisesRegexp(zstd.ZstdError, ' |
|
91 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
31 | dctx.decompress(b'') |
|
92 | dctx.decompress(b'') | |
32 |
|
93 | |||
33 | def test_invalid_input(self): |
|
94 | def test_invalid_input(self): | |
34 | dctx = zstd.ZstdDecompressor() |
|
95 | dctx = zstd.ZstdDecompressor() | |
35 |
|
96 | |||
36 |
with self.assertRaisesRegexp(zstd.ZstdError, ' |
|
97 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
37 | dctx.decompress(b'foobar') |
|
98 | dctx.decompress(b'foobar') | |
38 |
|
99 | |||
|
100 | def test_input_types(self): | |||
|
101 | cctx = zstd.ZstdCompressor(level=1) | |||
|
102 | compressed = cctx.compress(b'foo') | |||
|
103 | ||||
|
104 | mutable_array = bytearray(len(compressed)) | |||
|
105 | mutable_array[:] = compressed | |||
|
106 | ||||
|
107 | sources = [ | |||
|
108 | memoryview(compressed), | |||
|
109 | bytearray(compressed), | |||
|
110 | mutable_array, | |||
|
111 | ] | |||
|
112 | ||||
|
113 | dctx = zstd.ZstdDecompressor() | |||
|
114 | for source in sources: | |||
|
115 | self.assertEqual(dctx.decompress(source), b'foo') | |||
|
116 | ||||
39 | def test_no_content_size_in_frame(self): |
|
117 | def test_no_content_size_in_frame(self): | |
40 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
118 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
41 | compressed = cctx.compress(b'foobar') |
|
119 | compressed = cctx.compress(b'foobar') | |
42 |
|
120 | |||
43 | dctx = zstd.ZstdDecompressor() |
|
121 | dctx = zstd.ZstdDecompressor() | |
44 |
with self.assertRaisesRegexp(zstd.ZstdError, ' |
|
122 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): | |
45 | dctx.decompress(compressed) |
|
123 | dctx.decompress(compressed) | |
46 |
|
124 | |||
47 | def test_content_size_present(self): |
|
125 | def test_content_size_present(self): | |
48 |
cctx = zstd.ZstdCompressor( |
|
126 | cctx = zstd.ZstdCompressor() | |
49 | compressed = cctx.compress(b'foobar') |
|
127 | compressed = cctx.compress(b'foobar') | |
50 |
|
128 | |||
51 | dctx = zstd.ZstdDecompressor() |
|
129 | dctx = zstd.ZstdDecompressor() | |
52 | decompressed = dctx.decompress(compressed) |
|
130 | decompressed = dctx.decompress(compressed) | |
53 | self.assertEqual(decompressed, b'foobar') |
|
131 | self.assertEqual(decompressed, b'foobar') | |
54 |
|
132 | |||
|
133 | def test_empty_roundtrip(self): | |||
|
134 | cctx = zstd.ZstdCompressor() | |||
|
135 | compressed = cctx.compress(b'') | |||
|
136 | ||||
|
137 | dctx = zstd.ZstdDecompressor() | |||
|
138 | decompressed = dctx.decompress(compressed) | |||
|
139 | ||||
|
140 | self.assertEqual(decompressed, b'') | |||
|
141 | ||||
55 | def test_max_output_size(self): |
|
142 | def test_max_output_size(self): | |
56 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
143 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
57 | source = b'foobar' * 256 |
|
144 | source = b'foobar' * 256 | |
@@ -63,7 +150,8 b' class TestDecompressor_decompress(unitte' | |||||
63 | self.assertEqual(decompressed, source) |
|
150 | self.assertEqual(decompressed, source) | |
64 |
|
151 | |||
65 | # Input size - 1 fails |
|
152 | # Input size - 1 fails | |
66 |
with self.assertRaisesRegexp(zstd.ZstdError, |
|
153 | with self.assertRaisesRegexp(zstd.ZstdError, | |
|
154 | 'decompression error: did not decompress full frame'): | |||
67 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
155 | dctx.decompress(compressed, max_output_size=len(source) - 1) | |
68 |
|
156 | |||
69 | # Input size + 1 works |
|
157 | # Input size + 1 works | |
@@ -94,7 +182,7 b' class TestDecompressor_decompress(unitte' | |||||
94 | d = zstd.train_dictionary(8192, samples) |
|
182 | d = zstd.train_dictionary(8192, samples) | |
95 |
|
183 | |||
96 | orig = b'foobar' * 16384 |
|
184 | orig = b'foobar' * 16384 | |
97 |
cctx = zstd.ZstdCompressor(level=1, dict_data=d |
|
185 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
98 | compressed = cctx.compress(orig) |
|
186 | compressed = cctx.compress(orig) | |
99 |
|
187 | |||
100 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
188 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
@@ -113,7 +201,7 b' class TestDecompressor_decompress(unitte' | |||||
113 |
|
201 | |||
114 | sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) |
|
202 | sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) | |
115 | compressed = [] |
|
203 | compressed = [] | |
116 |
cctx = zstd.ZstdCompressor(level=1, dict_data=d |
|
204 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
117 | for source in sources: |
|
205 | for source in sources: | |
118 | compressed.append(cctx.compress(source)) |
|
206 | compressed.append(cctx.compress(source)) | |
119 |
|
207 | |||
@@ -122,6 +210,21 b' class TestDecompressor_decompress(unitte' | |||||
122 | decompressed = dctx.decompress(compressed[i]) |
|
210 | decompressed = dctx.decompress(compressed[i]) | |
123 | self.assertEqual(decompressed, sources[i]) |
|
211 | self.assertEqual(decompressed, sources[i]) | |
124 |
|
212 | |||
|
213 | def test_max_window_size(self): | |||
|
214 | with open(__file__, 'rb') as fh: | |||
|
215 | source = fh.read() | |||
|
216 | ||||
|
217 | # If we write a content size, the decompressor engages single pass | |||
|
218 | # mode and the window size doesn't come into play. | |||
|
219 | cctx = zstd.ZstdCompressor(write_content_size=False) | |||
|
220 | frame = cctx.compress(source) | |||
|
221 | ||||
|
222 | dctx = zstd.ZstdDecompressor(max_window_size=1) | |||
|
223 | ||||
|
224 | with self.assertRaisesRegexp( | |||
|
225 | zstd.ZstdError, 'decompression error: Frame requires too much memory'): | |||
|
226 | dctx.decompress(frame, max_output_size=len(source)) | |||
|
227 | ||||
125 |
|
228 | |||
126 | @make_cffi |
|
229 | @make_cffi | |
127 | class TestDecompressor_copy_stream(unittest.TestCase): |
|
230 | class TestDecompressor_copy_stream(unittest.TestCase): | |
@@ -186,6 +289,211 b' class TestDecompressor_copy_stream(unitt' | |||||
186 |
|
289 | |||
187 |
|
290 | |||
188 | @make_cffi |
|
291 | @make_cffi | |
|
292 | class TestDecompressor_stream_reader(unittest.TestCase): | |||
|
293 | def test_context_manager(self): | |||
|
294 | dctx = zstd.ZstdDecompressor() | |||
|
295 | ||||
|
296 | reader = dctx.stream_reader(b'foo') | |||
|
297 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |||
|
298 | reader.read(1) | |||
|
299 | ||||
|
300 | with dctx.stream_reader(b'foo') as reader: | |||
|
301 | with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): | |||
|
302 | with reader as reader2: | |||
|
303 | pass | |||
|
304 | ||||
|
305 | def test_not_implemented(self): | |||
|
306 | dctx = zstd.ZstdDecompressor() | |||
|
307 | ||||
|
308 | with dctx.stream_reader(b'foo') as reader: | |||
|
309 | with self.assertRaises(NotImplementedError): | |||
|
310 | reader.readline() | |||
|
311 | ||||
|
312 | with self.assertRaises(NotImplementedError): | |||
|
313 | reader.readlines() | |||
|
314 | ||||
|
315 | with self.assertRaises(NotImplementedError): | |||
|
316 | reader.readall() | |||
|
317 | ||||
|
318 | with self.assertRaises(NotImplementedError): | |||
|
319 | iter(reader) | |||
|
320 | ||||
|
321 | with self.assertRaises(NotImplementedError): | |||
|
322 | next(reader) | |||
|
323 | ||||
|
324 | with self.assertRaises(io.UnsupportedOperation): | |||
|
325 | reader.write(b'foo') | |||
|
326 | ||||
|
327 | with self.assertRaises(io.UnsupportedOperation): | |||
|
328 | reader.writelines([]) | |||
|
329 | ||||
|
330 | def test_constant_methods(self): | |||
|
331 | dctx = zstd.ZstdDecompressor() | |||
|
332 | ||||
|
333 | with dctx.stream_reader(b'foo') as reader: | |||
|
334 | self.assertTrue(reader.readable()) | |||
|
335 | self.assertFalse(reader.writable()) | |||
|
336 | self.assertTrue(reader.seekable()) | |||
|
337 | self.assertFalse(reader.isatty()) | |||
|
338 | self.assertIsNone(reader.flush()) | |||
|
339 | ||||
|
340 | def test_read_closed(self): | |||
|
341 | dctx = zstd.ZstdDecompressor() | |||
|
342 | ||||
|
343 | with dctx.stream_reader(b'foo') as reader: | |||
|
344 | reader.close() | |||
|
345 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |||
|
346 | reader.read(1) | |||
|
347 | ||||
|
348 | def test_bad_read_size(self): | |||
|
349 | dctx = zstd.ZstdDecompressor() | |||
|
350 | ||||
|
351 | with dctx.stream_reader(b'foo') as reader: | |||
|
352 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): | |||
|
353 | reader.read(-1) | |||
|
354 | ||||
|
355 | with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): | |||
|
356 | reader.read(0) | |||
|
357 | ||||
|
358 | def test_read_buffer(self): | |||
|
359 | cctx = zstd.ZstdCompressor() | |||
|
360 | ||||
|
361 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
362 | frame = cctx.compress(source) | |||
|
363 | ||||
|
364 | dctx = zstd.ZstdDecompressor() | |||
|
365 | ||||
|
366 | with dctx.stream_reader(frame) as reader: | |||
|
367 | self.assertEqual(reader.tell(), 0) | |||
|
368 | ||||
|
369 | # We should get entire frame in one read. | |||
|
370 | result = reader.read(8192) | |||
|
371 | self.assertEqual(result, source) | |||
|
372 | self.assertEqual(reader.tell(), len(source)) | |||
|
373 | ||||
|
374 | # Read after EOF should return empty bytes. | |||
|
375 | self.assertEqual(reader.read(), b'') | |||
|
376 | self.assertEqual(reader.tell(), len(result)) | |||
|
377 | ||||
|
378 | self.assertTrue(reader.closed()) | |||
|
379 | ||||
|
380 | def test_read_buffer_small_chunks(self): | |||
|
381 | cctx = zstd.ZstdCompressor() | |||
|
382 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
383 | frame = cctx.compress(source) | |||
|
384 | ||||
|
385 | dctx = zstd.ZstdDecompressor() | |||
|
386 | chunks = [] | |||
|
387 | ||||
|
388 | with dctx.stream_reader(frame, read_size=1) as reader: | |||
|
389 | while True: | |||
|
390 | chunk = reader.read(1) | |||
|
391 | if not chunk: | |||
|
392 | break | |||
|
393 | ||||
|
394 | chunks.append(chunk) | |||
|
395 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |||
|
396 | ||||
|
397 | self.assertEqual(b''.join(chunks), source) | |||
|
398 | ||||
|
399 | def test_read_stream(self): | |||
|
400 | cctx = zstd.ZstdCompressor() | |||
|
401 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
402 | frame = cctx.compress(source) | |||
|
403 | ||||
|
404 | dctx = zstd.ZstdDecompressor() | |||
|
405 | with dctx.stream_reader(io.BytesIO(frame)) as reader: | |||
|
406 | self.assertEqual(reader.tell(), 0) | |||
|
407 | ||||
|
408 | chunk = reader.read(8192) | |||
|
409 | self.assertEqual(chunk, source) | |||
|
410 | self.assertEqual(reader.tell(), len(source)) | |||
|
411 | self.assertEqual(reader.read(), b'') | |||
|
412 | self.assertEqual(reader.tell(), len(source)) | |||
|
413 | ||||
|
414 | def test_read_stream_small_chunks(self): | |||
|
415 | cctx = zstd.ZstdCompressor() | |||
|
416 | source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) | |||
|
417 | frame = cctx.compress(source) | |||
|
418 | ||||
|
419 | dctx = zstd.ZstdDecompressor() | |||
|
420 | chunks = [] | |||
|
421 | ||||
|
422 | with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: | |||
|
423 | while True: | |||
|
424 | chunk = reader.read(1) | |||
|
425 | if not chunk: | |||
|
426 | break | |||
|
427 | ||||
|
428 | chunks.append(chunk) | |||
|
429 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |||
|
430 | ||||
|
431 | self.assertEqual(b''.join(chunks), source) | |||
|
432 | ||||
|
433 | def test_read_after_exit(self): | |||
|
434 | cctx = zstd.ZstdCompressor() | |||
|
435 | frame = cctx.compress(b'foo' * 60) | |||
|
436 | ||||
|
437 | dctx = zstd.ZstdDecompressor() | |||
|
438 | ||||
|
439 | with dctx.stream_reader(frame) as reader: | |||
|
440 | while reader.read(16): | |||
|
441 | pass | |||
|
442 | ||||
|
443 | with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |||
|
444 | reader.read(10) | |||
|
445 | ||||
|
446 | def test_illegal_seeks(self): | |||
|
447 | cctx = zstd.ZstdCompressor() | |||
|
448 | frame = cctx.compress(b'foo' * 60) | |||
|
449 | ||||
|
450 | dctx = zstd.ZstdDecompressor() | |||
|
451 | ||||
|
452 | with dctx.stream_reader(frame) as reader: | |||
|
453 | with self.assertRaisesRegexp(ValueError, | |||
|
454 | 'cannot seek to negative position'): | |||
|
455 | reader.seek(-1, os.SEEK_SET) | |||
|
456 | ||||
|
457 | reader.read(1) | |||
|
458 | ||||
|
459 | with self.assertRaisesRegexp( | |||
|
460 | ValueError, 'cannot seek zstd decompression stream backwards'): | |||
|
461 | reader.seek(0, os.SEEK_SET) | |||
|
462 | ||||
|
463 | with self.assertRaisesRegexp( | |||
|
464 | ValueError, 'cannot seek zstd decompression stream backwards'): | |||
|
465 | reader.seek(-1, os.SEEK_CUR) | |||
|
466 | ||||
|
467 | with self.assertRaisesRegexp( | |||
|
468 | ValueError, | |||
|
469 | 'zstd decompression streams cannot be seeked with SEEK_END'): | |||
|
470 | reader.seek(0, os.SEEK_END) | |||
|
471 | ||||
|
472 | reader.close() | |||
|
473 | ||||
|
474 | with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |||
|
475 | reader.seek(4, os.SEEK_SET) | |||
|
476 | ||||
|
477 | with self.assertRaisesRegexp( | |||
|
478 | zstd.ZstdError, 'seek\(\) must be called from an active context'): | |||
|
479 | reader.seek(0) | |||
|
480 | ||||
|
481 | def test_seek(self): | |||
|
482 | source = b'foobar' * 60 | |||
|
483 | cctx = zstd.ZstdCompressor() | |||
|
484 | frame = cctx.compress(source) | |||
|
485 | ||||
|
486 | dctx = zstd.ZstdDecompressor() | |||
|
487 | ||||
|
488 | with dctx.stream_reader(frame) as reader: | |||
|
489 | reader.seek(3) | |||
|
490 | self.assertEqual(reader.read(3), b'bar') | |||
|
491 | ||||
|
492 | reader.seek(4, os.SEEK_CUR) | |||
|
493 | self.assertEqual(reader.read(2), b'ar') | |||
|
494 | ||||
|
495 | ||||
|
496 | @make_cffi | |||
189 | class TestDecompressor_decompressobj(unittest.TestCase): |
|
497 | class TestDecompressor_decompressobj(unittest.TestCase): | |
190 | def test_simple(self): |
|
498 | def test_simple(self): | |
191 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
499 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') | |
@@ -194,6 +502,24 b' class TestDecompressor_decompressobj(uni' | |||||
194 | dobj = dctx.decompressobj() |
|
502 | dobj = dctx.decompressobj() | |
195 | self.assertEqual(dobj.decompress(data), b'foobar') |
|
503 | self.assertEqual(dobj.decompress(data), b'foobar') | |
196 |
|
504 | |||
|
505 | def test_input_types(self): | |||
|
506 | compressed = zstd.ZstdCompressor(level=1).compress(b'foo') | |||
|
507 | ||||
|
508 | dctx = zstd.ZstdDecompressor() | |||
|
509 | ||||
|
510 | mutable_array = bytearray(len(compressed)) | |||
|
511 | mutable_array[:] = compressed | |||
|
512 | ||||
|
513 | sources = [ | |||
|
514 | memoryview(compressed), | |||
|
515 | bytearray(compressed), | |||
|
516 | mutable_array, | |||
|
517 | ] | |||
|
518 | ||||
|
519 | for source in sources: | |||
|
520 | dobj = dctx.decompressobj() | |||
|
521 | self.assertEqual(dobj.decompress(source), b'foo') | |||
|
522 | ||||
197 | def test_reuse(self): |
|
523 | def test_reuse(self): | |
198 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
524 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') | |
199 |
|
525 | |||
@@ -204,22 +530,58 b' class TestDecompressor_decompressobj(uni' | |||||
204 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): |
|
530 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): | |
205 | dobj.decompress(data) |
|
531 | dobj.decompress(data) | |
206 |
|
532 | |||
|
533 | def test_bad_write_size(self): | |||
|
534 | dctx = zstd.ZstdDecompressor() | |||
|
535 | ||||
|
536 | with self.assertRaisesRegexp(ValueError, 'write_size must be positive'): | |||
|
537 | dctx.decompressobj(write_size=0) | |||
|
538 | ||||
|
539 | def test_write_size(self): | |||
|
540 | source = b'foo' * 64 + b'bar' * 128 | |||
|
541 | data = zstd.ZstdCompressor(level=1).compress(source) | |||
|
542 | ||||
|
543 | dctx = zstd.ZstdDecompressor() | |||
|
544 | ||||
|
545 | for i in range(128): | |||
|
546 | dobj = dctx.decompressobj(write_size=i + 1) | |||
|
547 | self.assertEqual(dobj.decompress(data), source) | |||
207 |
|
548 | |||
208 | def decompress_via_writer(data): |
|
549 | def decompress_via_writer(data): | |
209 | buffer = io.BytesIO() |
|
550 | buffer = io.BytesIO() | |
210 | dctx = zstd.ZstdDecompressor() |
|
551 | dctx = zstd.ZstdDecompressor() | |
211 |
with dctx. |
|
552 | with dctx.stream_writer(buffer) as decompressor: | |
212 | decompressor.write(data) |
|
553 | decompressor.write(data) | |
213 | return buffer.getvalue() |
|
554 | return buffer.getvalue() | |
214 |
|
555 | |||
215 |
|
556 | |||
216 | @make_cffi |
|
557 | @make_cffi | |
217 |
class TestDecompressor_ |
|
558 | class TestDecompressor_stream_writer(unittest.TestCase): | |
218 | def test_empty_roundtrip(self): |
|
559 | def test_empty_roundtrip(self): | |
219 | cctx = zstd.ZstdCompressor() |
|
560 | cctx = zstd.ZstdCompressor() | |
220 | empty = cctx.compress(b'') |
|
561 | empty = cctx.compress(b'') | |
221 | self.assertEqual(decompress_via_writer(empty), b'') |
|
562 | self.assertEqual(decompress_via_writer(empty), b'') | |
222 |
|
563 | |||
|
564 | def test_input_types(self): | |||
|
565 | cctx = zstd.ZstdCompressor(level=1) | |||
|
566 | compressed = cctx.compress(b'foo') | |||
|
567 | ||||
|
568 | mutable_array = bytearray(len(compressed)) | |||
|
569 | mutable_array[:] = compressed | |||
|
570 | ||||
|
571 | sources = [ | |||
|
572 | memoryview(compressed), | |||
|
573 | bytearray(compressed), | |||
|
574 | mutable_array, | |||
|
575 | ] | |||
|
576 | ||||
|
577 | dctx = zstd.ZstdDecompressor() | |||
|
578 | for source in sources: | |||
|
579 | buffer = io.BytesIO() | |||
|
580 | with dctx.stream_writer(buffer) as decompressor: | |||
|
581 | decompressor.write(source) | |||
|
582 | ||||
|
583 | self.assertEqual(buffer.getvalue(), b'foo') | |||
|
584 | ||||
223 | def test_large_roundtrip(self): |
|
585 | def test_large_roundtrip(self): | |
224 | chunks = [] |
|
586 | chunks = [] | |
225 | for i in range(255): |
|
587 | for i in range(255): | |
@@ -242,7 +604,7 b' class TestDecompressor_write_to(unittest' | |||||
242 |
|
604 | |||
243 | buffer = io.BytesIO() |
|
605 | buffer = io.BytesIO() | |
244 | dctx = zstd.ZstdDecompressor() |
|
606 | dctx = zstd.ZstdDecompressor() | |
245 |
with dctx. |
|
607 | with dctx.stream_writer(buffer) as decompressor: | |
246 | pos = 0 |
|
608 | pos = 0 | |
247 | while pos < len(compressed): |
|
609 | while pos < len(compressed): | |
248 | pos2 = pos + 8192 |
|
610 | pos2 = pos + 8192 | |
@@ -262,14 +624,14 b' class TestDecompressor_write_to(unittest' | |||||
262 | orig = b'foobar' * 16384 |
|
624 | orig = b'foobar' * 16384 | |
263 | buffer = io.BytesIO() |
|
625 | buffer = io.BytesIO() | |
264 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
626 | cctx = zstd.ZstdCompressor(dict_data=d) | |
265 |
with cctx. |
|
627 | with cctx.stream_writer(buffer) as compressor: | |
266 |
self.assertEqual(compressor.write(orig), |
|
628 | self.assertEqual(compressor.write(orig), 0) | |
267 |
|
629 | |||
268 | compressed = buffer.getvalue() |
|
630 | compressed = buffer.getvalue() | |
269 | buffer = io.BytesIO() |
|
631 | buffer = io.BytesIO() | |
270 |
|
632 | |||
271 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
633 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
272 |
with dctx. |
|
634 | with dctx.stream_writer(buffer) as decompressor: | |
273 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
635 | self.assertEqual(decompressor.write(compressed), len(orig)) | |
274 |
|
636 | |||
275 | self.assertEqual(buffer.getvalue(), orig) |
|
637 | self.assertEqual(buffer.getvalue(), orig) | |
@@ -277,7 +639,7 b' class TestDecompressor_write_to(unittest' | |||||
277 | def test_memory_size(self): |
|
639 | def test_memory_size(self): | |
278 | dctx = zstd.ZstdDecompressor() |
|
640 | dctx = zstd.ZstdDecompressor() | |
279 | buffer = io.BytesIO() |
|
641 | buffer = io.BytesIO() | |
280 |
with dctx. |
|
642 | with dctx.stream_writer(buffer) as decompressor: | |
281 | size = decompressor.memory_size() |
|
643 | size = decompressor.memory_size() | |
282 |
|
644 | |||
283 | self.assertGreater(size, 100000) |
|
645 | self.assertGreater(size, 100000) | |
@@ -286,7 +648,7 b' class TestDecompressor_write_to(unittest' | |||||
286 | source = zstd.ZstdCompressor().compress(b'foobarfoobar') |
|
648 | source = zstd.ZstdCompressor().compress(b'foobarfoobar') | |
287 | dest = OpCountingBytesIO() |
|
649 | dest = OpCountingBytesIO() | |
288 | dctx = zstd.ZstdDecompressor() |
|
650 | dctx = zstd.ZstdDecompressor() | |
289 |
with dctx. |
|
651 | with dctx.stream_writer(dest, write_size=1) as decompressor: | |
290 | s = struct.Struct('>B') |
|
652 | s = struct.Struct('>B') | |
291 | for c in source: |
|
653 | for c in source: | |
292 | if not isinstance(c, str): |
|
654 | if not isinstance(c, str): | |
@@ -298,29 +660,29 b' class TestDecompressor_write_to(unittest' | |||||
298 |
|
660 | |||
299 |
|
661 | |||
300 | @make_cffi |
|
662 | @make_cffi | |
301 |
class TestDecompressor_read_ |
|
663 | class TestDecompressor_read_to_iter(unittest.TestCase): | |
302 | def test_type_validation(self): |
|
664 | def test_type_validation(self): | |
303 | dctx = zstd.ZstdDecompressor() |
|
665 | dctx = zstd.ZstdDecompressor() | |
304 |
|
666 | |||
305 | # Object with read() works. |
|
667 | # Object with read() works. | |
306 |
dctx.read_ |
|
668 | dctx.read_to_iter(io.BytesIO()) | |
307 |
|
669 | |||
308 | # Buffer protocol works. |
|
670 | # Buffer protocol works. | |
309 |
dctx.read_ |
|
671 | dctx.read_to_iter(b'foobar') | |
310 |
|
672 | |||
311 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
673 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
312 |
b''.join(dctx.read_ |
|
674 | b''.join(dctx.read_to_iter(True)) | |
313 |
|
675 | |||
314 | def test_empty_input(self): |
|
676 | def test_empty_input(self): | |
315 | dctx = zstd.ZstdDecompressor() |
|
677 | dctx = zstd.ZstdDecompressor() | |
316 |
|
678 | |||
317 | source = io.BytesIO() |
|
679 | source = io.BytesIO() | |
318 |
it = dctx.read_ |
|
680 | it = dctx.read_to_iter(source) | |
319 | # TODO this is arguably wrong. Should get an error about missing frame foo. |
|
681 | # TODO this is arguably wrong. Should get an error about missing frame foo. | |
320 | with self.assertRaises(StopIteration): |
|
682 | with self.assertRaises(StopIteration): | |
321 | next(it) |
|
683 | next(it) | |
322 |
|
684 | |||
323 |
it = dctx.read_ |
|
685 | it = dctx.read_to_iter(b'') | |
324 | with self.assertRaises(StopIteration): |
|
686 | with self.assertRaises(StopIteration): | |
325 | next(it) |
|
687 | next(it) | |
326 |
|
688 | |||
@@ -328,11 +690,11 b' class TestDecompressor_read_from(unittes' | |||||
328 | dctx = zstd.ZstdDecompressor() |
|
690 | dctx = zstd.ZstdDecompressor() | |
329 |
|
691 | |||
330 | source = io.BytesIO(b'foobar') |
|
692 | source = io.BytesIO(b'foobar') | |
331 |
it = dctx.read_ |
|
693 | it = dctx.read_to_iter(source) | |
332 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
694 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |
333 | next(it) |
|
695 | next(it) | |
334 |
|
696 | |||
335 |
it = dctx.read_ |
|
697 | it = dctx.read_to_iter(b'foobar') | |
336 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
698 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |
337 | next(it) |
|
699 | next(it) | |
338 |
|
700 | |||
@@ -344,7 +706,7 b' class TestDecompressor_read_from(unittes' | |||||
344 | source.seek(0) |
|
706 | source.seek(0) | |
345 |
|
707 | |||
346 | dctx = zstd.ZstdDecompressor() |
|
708 | dctx = zstd.ZstdDecompressor() | |
347 |
it = dctx.read_ |
|
709 | it = dctx.read_to_iter(source) | |
348 |
|
710 | |||
349 | # No chunks should be emitted since there is no data. |
|
711 | # No chunks should be emitted since there is no data. | |
350 | with self.assertRaises(StopIteration): |
|
712 | with self.assertRaises(StopIteration): | |
@@ -358,17 +720,17 b' class TestDecompressor_read_from(unittes' | |||||
358 | dctx = zstd.ZstdDecompressor() |
|
720 | dctx = zstd.ZstdDecompressor() | |
359 |
|
721 | |||
360 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): |
|
722 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): | |
361 |
b''.join(dctx.read_ |
|
723 | b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) | |
362 |
|
724 | |||
363 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): |
|
725 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): | |
364 |
b''.join(dctx.read_ |
|
726 | b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) | |
365 |
|
727 | |||
366 | def test_skip_bytes(self): |
|
728 | def test_skip_bytes(self): | |
367 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
729 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
368 | compressed = cctx.compress(b'foobar') |
|
730 | compressed = cctx.compress(b'foobar') | |
369 |
|
731 | |||
370 | dctx = zstd.ZstdDecompressor() |
|
732 | dctx = zstd.ZstdDecompressor() | |
371 |
output = b''.join(dctx.read_ |
|
733 | output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3)) | |
372 | self.assertEqual(output, b'foobar') |
|
734 | self.assertEqual(output, b'foobar') | |
373 |
|
735 | |||
374 | def test_large_output(self): |
|
736 | def test_large_output(self): | |
@@ -382,7 +744,7 b' class TestDecompressor_read_from(unittes' | |||||
382 | compressed.seek(0) |
|
744 | compressed.seek(0) | |
383 |
|
745 | |||
384 | dctx = zstd.ZstdDecompressor() |
|
746 | dctx = zstd.ZstdDecompressor() | |
385 |
it = dctx.read_ |
|
747 | it = dctx.read_to_iter(compressed) | |
386 |
|
748 | |||
387 | chunks = [] |
|
749 | chunks = [] | |
388 | chunks.append(next(it)) |
|
750 | chunks.append(next(it)) | |
@@ -395,7 +757,7 b' class TestDecompressor_read_from(unittes' | |||||
395 | self.assertEqual(decompressed, source.getvalue()) |
|
757 | self.assertEqual(decompressed, source.getvalue()) | |
396 |
|
758 | |||
397 | # And again with buffer protocol. |
|
759 | # And again with buffer protocol. | |
398 |
it = dctx.read_ |
|
760 | it = dctx.read_to_iter(compressed.getvalue()) | |
399 | chunks = [] |
|
761 | chunks = [] | |
400 | chunks.append(next(it)) |
|
762 | chunks.append(next(it)) | |
401 | chunks.append(next(it)) |
|
763 | chunks.append(next(it)) | |
@@ -406,12 +768,13 b' class TestDecompressor_read_from(unittes' | |||||
406 | decompressed = b''.join(chunks) |
|
768 | decompressed = b''.join(chunks) | |
407 | self.assertEqual(decompressed, source.getvalue()) |
|
769 | self.assertEqual(decompressed, source.getvalue()) | |
408 |
|
770 | |||
|
771 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
409 | def test_large_input(self): |
|
772 | def test_large_input(self): | |
410 | bytes = list(struct.Struct('>B').pack(i) for i in range(256)) |
|
773 | bytes = list(struct.Struct('>B').pack(i) for i in range(256)) | |
411 | compressed = io.BytesIO() |
|
774 | compressed = io.BytesIO() | |
412 | input_size = 0 |
|
775 | input_size = 0 | |
413 | cctx = zstd.ZstdCompressor(level=1) |
|
776 | cctx = zstd.ZstdCompressor(level=1) | |
414 |
with cctx. |
|
777 | with cctx.stream_writer(compressed) as compressor: | |
415 | while True: |
|
778 | while True: | |
416 | compressor.write(random.choice(bytes)) |
|
779 | compressor.write(random.choice(bytes)) | |
417 | input_size += 1 |
|
780 | input_size += 1 | |
@@ -426,7 +789,7 b' class TestDecompressor_read_from(unittes' | |||||
426 | zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
789 | zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) | |
427 |
|
790 | |||
428 | dctx = zstd.ZstdDecompressor() |
|
791 | dctx = zstd.ZstdDecompressor() | |
429 |
it = dctx.read_ |
|
792 | it = dctx.read_to_iter(compressed) | |
430 |
|
793 | |||
431 | chunks = [] |
|
794 | chunks = [] | |
432 | chunks.append(next(it)) |
|
795 | chunks.append(next(it)) | |
@@ -440,7 +803,7 b' class TestDecompressor_read_from(unittes' | |||||
440 | self.assertEqual(len(decompressed), input_size) |
|
803 | self.assertEqual(len(decompressed), input_size) | |
441 |
|
804 | |||
442 | # And again with buffer protocol. |
|
805 | # And again with buffer protocol. | |
443 |
it = dctx.read_ |
|
806 | it = dctx.read_to_iter(compressed.getvalue()) | |
444 |
|
807 | |||
445 | chunks = [] |
|
808 | chunks = [] | |
446 | chunks.append(next(it)) |
|
809 | chunks.append(next(it)) | |
@@ -460,7 +823,7 b' class TestDecompressor_read_from(unittes' | |||||
460 | source = io.BytesIO() |
|
823 | source = io.BytesIO() | |
461 |
|
824 | |||
462 | compressed = io.BytesIO() |
|
825 | compressed = io.BytesIO() | |
463 |
with cctx. |
|
826 | with cctx.stream_writer(compressed) as compressor: | |
464 | for i in range(256): |
|
827 | for i in range(256): | |
465 | chunk = b'\0' * 1024 |
|
828 | chunk = b'\0' * 1024 | |
466 | compressor.write(chunk) |
|
829 | compressor.write(chunk) | |
@@ -473,17 +836,34 b' class TestDecompressor_read_from(unittes' | |||||
473 | self.assertEqual(simple, source.getvalue()) |
|
836 | self.assertEqual(simple, source.getvalue()) | |
474 |
|
837 | |||
475 | compressed.seek(0) |
|
838 | compressed.seek(0) | |
476 |
streamed = b''.join(dctx.read_ |
|
839 | streamed = b''.join(dctx.read_to_iter(compressed)) | |
477 | self.assertEqual(streamed, source.getvalue()) |
|
840 | self.assertEqual(streamed, source.getvalue()) | |
478 |
|
841 | |||
479 | def test_read_write_size(self): |
|
842 | def test_read_write_size(self): | |
480 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) |
|
843 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) | |
481 | dctx = zstd.ZstdDecompressor() |
|
844 | dctx = zstd.ZstdDecompressor() | |
482 |
for chunk in dctx.read_ |
|
845 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): | |
483 | self.assertEqual(len(chunk), 1) |
|
846 | self.assertEqual(len(chunk), 1) | |
484 |
|
847 | |||
485 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
848 | self.assertEqual(source._read_count, len(source.getvalue())) | |
486 |
|
849 | |||
|
850 | def test_magic_less(self): | |||
|
851 | params = zstd.CompressionParameters.from_level( | |||
|
852 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) | |||
|
853 | cctx = zstd.ZstdCompressor(compression_params=params) | |||
|
854 | frame = cctx.compress(b'foobar') | |||
|
855 | ||||
|
856 | self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd') | |||
|
857 | ||||
|
858 | dctx = zstd.ZstdDecompressor() | |||
|
859 | with self.assertRaisesRegexp( | |||
|
860 | zstd.ZstdError, 'error determining content size from frame header'): | |||
|
861 | dctx.decompress(frame) | |||
|
862 | ||||
|
863 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) | |||
|
864 | res = b''.join(dctx.read_to_iter(frame)) | |||
|
865 | self.assertEqual(res, b'foobar') | |||
|
866 | ||||
487 |
|
867 | |||
488 | @make_cffi |
|
868 | @make_cffi | |
489 | class TestDecompressor_content_dict_chain(unittest.TestCase): |
|
869 | class TestDecompressor_content_dict_chain(unittest.TestCase): | |
@@ -511,19 +891,20 b' class TestDecompressor_content_dict_chai' | |||||
511 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): |
|
891 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): | |
512 | dctx.decompress_content_dict_chain([b'foo' * 8]) |
|
892 | dctx.decompress_content_dict_chain([b'foo' * 8]) | |
513 |
|
893 | |||
514 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
894 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) | |
515 |
|
895 | |||
516 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): |
|
896 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): | |
517 | dctx.decompress_content_dict_chain([no_size]) |
|
897 | dctx.decompress_content_dict_chain([no_size]) | |
518 |
|
898 | |||
519 | # Corrupt first frame. |
|
899 | # Corrupt first frame. | |
520 |
frame = zstd.ZstdCompressor( |
|
900 | frame = zstd.ZstdCompressor().compress(b'foo' * 64) | |
521 | frame = frame[0:12] + frame[15:] |
|
901 | frame = frame[0:12] + frame[15:] | |
522 |
with self.assertRaisesRegexp(zstd.ZstdError, |
|
902 | with self.assertRaisesRegexp(zstd.ZstdError, | |
|
903 | 'chunk 0 did not decompress full frame'): | |||
523 | dctx.decompress_content_dict_chain([frame]) |
|
904 | dctx.decompress_content_dict_chain([frame]) | |
524 |
|
905 | |||
525 | def test_bad_subsequent_input(self): |
|
906 | def test_bad_subsequent_input(self): | |
526 |
initial = zstd.ZstdCompressor( |
|
907 | initial = zstd.ZstdCompressor().compress(b'foo' * 64) | |
527 |
|
908 | |||
528 | dctx = zstd.ZstdDecompressor() |
|
909 | dctx = zstd.ZstdDecompressor() | |
529 |
|
910 | |||
@@ -539,17 +920,17 b' class TestDecompressor_content_dict_chai' | |||||
539 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): |
|
920 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): | |
540 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) |
|
921 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) | |
541 |
|
922 | |||
542 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
923 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) | |
543 |
|
924 | |||
544 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): |
|
925 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): | |
545 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
926 | dctx.decompress_content_dict_chain([initial, no_size]) | |
546 |
|
927 | |||
547 | # Corrupt second frame. |
|
928 | # Corrupt second frame. | |
548 |
cctx = zstd.ZstdCompressor( |
|
929 | cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) | |
549 | frame = cctx.compress(b'bar' * 64) |
|
930 | frame = cctx.compress(b'bar' * 64) | |
550 | frame = frame[0:12] + frame[15:] |
|
931 | frame = frame[0:12] + frame[15:] | |
551 |
|
932 | |||
552 |
with self.assertRaisesRegexp(zstd.ZstdError, 'c |
|
933 | with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): | |
553 | dctx.decompress_content_dict_chain([initial, frame]) |
|
934 | dctx.decompress_content_dict_chain([initial, frame]) | |
554 |
|
935 | |||
555 | def test_simple(self): |
|
936 | def test_simple(self): | |
@@ -562,10 +943,10 b' class TestDecompressor_content_dict_chai' | |||||
562 | ] |
|
943 | ] | |
563 |
|
944 | |||
564 | chunks = [] |
|
945 | chunks = [] | |
565 |
chunks.append(zstd.ZstdCompressor( |
|
946 | chunks.append(zstd.ZstdCompressor().compress(original[0])) | |
566 | for i, chunk in enumerate(original[1:]): |
|
947 | for i, chunk in enumerate(original[1:]): | |
567 | d = zstd.ZstdCompressionDict(original[i]) |
|
948 | d = zstd.ZstdCompressionDict(original[i]) | |
568 |
cctx = zstd.ZstdCompressor(dict_data=d |
|
949 | cctx = zstd.ZstdCompressor(dict_data=d) | |
569 | chunks.append(cctx.compress(chunk)) |
|
950 | chunks.append(cctx.compress(chunk)) | |
570 |
|
951 | |||
571 | for i in range(1, len(original)): |
|
952 | for i in range(1, len(original)): | |
@@ -594,7 +975,7 b' class TestDecompressor_multi_decompress_' | |||||
594 | dctx.multi_decompress_to_buffer([b'foobarbaz']) |
|
975 | dctx.multi_decompress_to_buffer([b'foobarbaz']) | |
595 |
|
976 | |||
596 | def test_list_input(self): |
|
977 | def test_list_input(self): | |
597 |
cctx = zstd.ZstdCompressor( |
|
978 | cctx = zstd.ZstdCompressor() | |
598 |
|
979 | |||
599 | original = [b'foo' * 4, b'bar' * 6] |
|
980 | original = [b'foo' * 4, b'bar' * 6] | |
600 | frames = [cctx.compress(d) for d in original] |
|
981 | frames = [cctx.compress(d) for d in original] | |
@@ -614,7 +995,7 b' class TestDecompressor_multi_decompress_' | |||||
614 | self.assertEqual(len(result[1]), 18) |
|
995 | self.assertEqual(len(result[1]), 18) | |
615 |
|
996 | |||
616 | def test_list_input_frame_sizes(self): |
|
997 | def test_list_input_frame_sizes(self): | |
617 |
cctx = zstd.ZstdCompressor( |
|
998 | cctx = zstd.ZstdCompressor() | |
618 |
|
999 | |||
619 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] |
|
1000 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] | |
620 | frames = [cctx.compress(d) for d in original] |
|
1001 | frames = [cctx.compress(d) for d in original] | |
@@ -630,7 +1011,7 b' class TestDecompressor_multi_decompress_' | |||||
630 | self.assertEqual(result[i].tobytes(), data) |
|
1011 | self.assertEqual(result[i].tobytes(), data) | |
631 |
|
1012 | |||
632 | def test_buffer_with_segments_input(self): |
|
1013 | def test_buffer_with_segments_input(self): | |
633 |
cctx = zstd.ZstdCompressor( |
|
1014 | cctx = zstd.ZstdCompressor() | |
634 |
|
1015 | |||
635 | original = [b'foo' * 4, b'bar' * 6] |
|
1016 | original = [b'foo' * 4, b'bar' * 6] | |
636 | frames = [cctx.compress(d) for d in original] |
|
1017 | frames = [cctx.compress(d) for d in original] | |
@@ -669,7 +1050,7 b' class TestDecompressor_multi_decompress_' | |||||
669 | self.assertEqual(result[i].tobytes(), data) |
|
1050 | self.assertEqual(result[i].tobytes(), data) | |
670 |
|
1051 | |||
671 | def test_buffer_with_segments_collection_input(self): |
|
1052 | def test_buffer_with_segments_collection_input(self): | |
672 |
cctx = zstd.ZstdCompressor( |
|
1053 | cctx = zstd.ZstdCompressor() | |
673 |
|
1054 | |||
674 | original = [ |
|
1055 | original = [ | |
675 | b'foo0' * 2, |
|
1056 | b'foo0' * 2, | |
@@ -711,8 +1092,18 b' class TestDecompressor_multi_decompress_' | |||||
711 | for i in range(5): |
|
1092 | for i in range(5): | |
712 | self.assertEqual(decompressed[i].tobytes(), original[i]) |
|
1093 | self.assertEqual(decompressed[i].tobytes(), original[i]) | |
713 |
|
1094 | |||
|
1095 | def test_dict(self): | |||
|
1096 | d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) | |||
|
1097 | ||||
|
1098 | cctx = zstd.ZstdCompressor(dict_data=d, level=1) | |||
|
1099 | frames = [cctx.compress(s) for s in generate_samples()] | |||
|
1100 | ||||
|
1101 | dctx = zstd.ZstdDecompressor(dict_data=d) | |||
|
1102 | result = dctx.multi_decompress_to_buffer(frames) | |||
|
1103 | self.assertEqual([o.tobytes() for o in result], generate_samples()) | |||
|
1104 | ||||
714 | def test_multiple_threads(self): |
|
1105 | def test_multiple_threads(self): | |
715 |
cctx = zstd.ZstdCompressor( |
|
1106 | cctx = zstd.ZstdCompressor() | |
716 |
|
1107 | |||
717 | frames = [] |
|
1108 | frames = [] | |
718 | frames.extend(cctx.compress(b'x' * 64) for i in range(256)) |
|
1109 | frames.extend(cctx.compress(b'x' * 64) for i in range(256)) | |
@@ -727,15 +1118,22 b' class TestDecompressor_multi_decompress_' | |||||
727 | self.assertEqual(result[256].tobytes(), b'y' * 64) |
|
1118 | self.assertEqual(result[256].tobytes(), b'y' * 64) | |
728 |
|
1119 | |||
729 | def test_item_failure(self): |
|
1120 | def test_item_failure(self): | |
730 |
cctx = zstd.ZstdCompressor( |
|
1121 | cctx = zstd.ZstdCompressor() | |
731 | frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] |
|
1122 | frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] | |
732 |
|
1123 | |||
733 | frames[1] = frames[1] + b'extra' |
|
1124 | frames[1] = frames[1][0:15] + b'extra' + frames[1][15:] | |
734 |
|
1125 | |||
735 | dctx = zstd.ZstdDecompressor() |
|
1126 | dctx = zstd.ZstdDecompressor() | |
736 |
|
1127 | |||
737 |
with self.assertRaisesRegexp(zstd.ZstdError, |
|
1128 | with self.assertRaisesRegexp(zstd.ZstdError, | |
|
1129 | 'error decompressing item 1: (' | |||
|
1130 | 'Corrupted block|' | |||
|
1131 | 'Destination buffer is too small)'): | |||
738 | dctx.multi_decompress_to_buffer(frames) |
|
1132 | dctx.multi_decompress_to_buffer(frames) | |
739 |
|
1133 | |||
740 |
with self.assertRaisesRegexp(zstd.ZstdError, |
|
1134 | with self.assertRaisesRegexp(zstd.ZstdError, | |
|
1135 | 'error decompressing item 1: (' | |||
|
1136 | 'Corrupted block|' | |||
|
1137 | 'Destination buffer is too small)'): | |||
741 | dctx.multi_decompress_to_buffer(frames, threads=2) |
|
1138 | dctx.multi_decompress_to_buffer(frames, threads=2) | |
|
1139 |
@@ -1,10 +1,6 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 |
|
3 | import unittest | ||
4 | try: |
|
|||
5 | import unittest2 as unittest |
|
|||
6 | except ImportError: |
|
|||
7 | import unittest |
|
|||
8 |
|
4 | |||
9 | try: |
|
5 | try: | |
10 | import hypothesis |
|
6 | import hypothesis | |
@@ -12,7 +8,7 b' try:' | |||||
12 | except ImportError: |
|
8 | except ImportError: | |
13 | raise unittest.SkipTest('hypothesis not available') |
|
9 | raise unittest.SkipTest('hypothesis not available') | |
14 |
|
10 | |||
15 | import zstd |
|
11 | import zstandard as zstd | |
16 |
|
12 | |||
17 | from . common import ( |
|
13 | from . common import ( | |
18 | make_cffi, |
|
14 | make_cffi, | |
@@ -22,15 +18,96 b' from . common import (' | |||||
22 |
|
18 | |||
23 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
19 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
24 | @make_cffi |
|
20 | @make_cffi | |
25 |
class TestDecompressor_ |
|
21 | class TestDecompressor_stream_reader_fuzzing(unittest.TestCase): | |
|
22 | @hypothesis.settings( | |||
|
23 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |||
|
24 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
25 | level=strategies.integers(min_value=1, max_value=5), | |||
|
26 | source_read_size=strategies.integers(1, 16384), | |||
|
27 | read_sizes=strategies.data()) | |||
|
28 | def test_stream_source_read_variance(self, original, level, source_read_size, | |||
|
29 | read_sizes): | |||
|
30 | cctx = zstd.ZstdCompressor(level=level) | |||
|
31 | frame = cctx.compress(original) | |||
|
32 | ||||
|
33 | dctx = zstd.ZstdDecompressor() | |||
|
34 | source = io.BytesIO(frame) | |||
|
35 | ||||
|
36 | chunks = [] | |||
|
37 | with dctx.stream_reader(source, read_size=source_read_size) as reader: | |||
|
38 | while True: | |||
|
39 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |||
|
40 | chunk = reader.read(read_size) | |||
|
41 | if not chunk: | |||
|
42 | break | |||
|
43 | ||||
|
44 | chunks.append(chunk) | |||
|
45 | ||||
|
46 | self.assertEqual(b''.join(chunks), original) | |||
|
47 | ||||
|
48 | @hypothesis.settings( | |||
|
49 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |||
|
50 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
51 | level=strategies.integers(min_value=1, max_value=5), | |||
|
52 | source_read_size=strategies.integers(1, 16384), | |||
|
53 | read_sizes=strategies.data()) | |||
|
54 | def test_buffer_source_read_variance(self, original, level, source_read_size, | |||
|
55 | read_sizes): | |||
|
56 | cctx = zstd.ZstdCompressor(level=level) | |||
|
57 | frame = cctx.compress(original) | |||
|
58 | ||||
|
59 | dctx = zstd.ZstdDecompressor() | |||
|
60 | chunks = [] | |||
|
61 | ||||
|
62 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: | |||
|
63 | while True: | |||
|
64 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |||
|
65 | chunk = reader.read(read_size) | |||
|
66 | if not chunk: | |||
|
67 | break | |||
|
68 | ||||
|
69 | chunks.append(chunk) | |||
|
70 | ||||
|
71 | self.assertEqual(b''.join(chunks), original) | |||
|
72 | ||||
|
73 | @hypothesis.settings( | |||
|
74 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |||
|
75 | @hypothesis.given( | |||
|
76 | original=strategies.sampled_from(random_input_data()), | |||
|
77 | level=strategies.integers(min_value=1, max_value=5), | |||
|
78 | source_read_size=strategies.integers(1, 16384), | |||
|
79 | seek_amounts=strategies.data(), | |||
|
80 | read_sizes=strategies.data()) | |||
|
81 | def test_relative_seeks(self, original, level, source_read_size, seek_amounts, | |||
|
82 | read_sizes): | |||
|
83 | cctx = zstd.ZstdCompressor(level=level) | |||
|
84 | frame = cctx.compress(original) | |||
|
85 | ||||
|
86 | dctx = zstd.ZstdDecompressor() | |||
|
87 | ||||
|
88 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: | |||
|
89 | while True: | |||
|
90 | amount = seek_amounts.draw(strategies.integers(0, 16384)) | |||
|
91 | reader.seek(amount, os.SEEK_CUR) | |||
|
92 | ||||
|
93 | offset = reader.tell() | |||
|
94 | read_amount = read_sizes.draw(strategies.integers(1, 16384)) | |||
|
95 | chunk = reader.read(read_amount) | |||
|
96 | ||||
|
97 | if not chunk: | |||
|
98 | break | |||
|
99 | ||||
|
100 | self.assertEqual(original[offset:offset + len(chunk)], chunk) | |||
|
101 | ||||
|
102 | ||||
|
103 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
104 | @make_cffi | |||
|
105 | class TestDecompressor_stream_writer_fuzzing(unittest.TestCase): | |||
26 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
106 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
27 | level=strategies.integers(min_value=1, max_value=5), |
|
107 | level=strategies.integers(min_value=1, max_value=5), | |
28 | write_size=strategies.integers(min_value=1, max_value=8192), |
|
108 | write_size=strategies.integers(min_value=1, max_value=8192), | |
29 |
input_sizes=strategies. |
|
109 | input_sizes=strategies.data()) | |
30 | strategies.integers(min_value=1, max_value=4096))) |
|
|||
31 | def test_write_size_variance(self, original, level, write_size, input_sizes): |
|
110 | def test_write_size_variance(self, original, level, write_size, input_sizes): | |
32 | input_sizes = iter(input_sizes) |
|
|||
33 |
|
||||
34 | cctx = zstd.ZstdCompressor(level=level) |
|
111 | cctx = zstd.ZstdCompressor(level=level) | |
35 | frame = cctx.compress(original) |
|
112 | frame = cctx.compress(original) | |
36 |
|
113 | |||
@@ -38,9 +115,10 b' class TestDecompressor_write_to_fuzzing(' | |||||
38 | source = io.BytesIO(frame) |
|
115 | source = io.BytesIO(frame) | |
39 | dest = io.BytesIO() |
|
116 | dest = io.BytesIO() | |
40 |
|
117 | |||
41 |
with dctx. |
|
118 | with dctx.stream_writer(dest, write_size=write_size) as decompressor: | |
42 | while True: |
|
119 | while True: | |
43 | chunk = source.read(next(input_sizes)) |
|
120 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
|
121 | chunk = source.read(input_size) | |||
44 | if not chunk: |
|
122 | if not chunk: | |
45 | break |
|
123 | break | |
46 |
|
124 | |||
@@ -74,11 +152,8 b' class TestDecompressor_copy_stream_fuzzi' | |||||
74 | class TestDecompressor_decompressobj_fuzzing(unittest.TestCase): |
|
152 | class TestDecompressor_decompressobj_fuzzing(unittest.TestCase): | |
75 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
153 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
76 | level=strategies.integers(min_value=1, max_value=5), |
|
154 | level=strategies.integers(min_value=1, max_value=5), | |
77 |
chunk_sizes=strategies. |
|
155 | chunk_sizes=strategies.data()) | |
78 | strategies.integers(min_value=1, max_value=4096))) |
|
|||
79 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
156 | def test_random_input_sizes(self, original, level, chunk_sizes): | |
80 | chunk_sizes = iter(chunk_sizes) |
|
|||
81 |
|
||||
82 | cctx = zstd.ZstdCompressor(level=level) |
|
157 | cctx = zstd.ZstdCompressor(level=level) | |
83 | frame = cctx.compress(original) |
|
158 | frame = cctx.compress(original) | |
84 |
|
159 | |||
@@ -89,7 +164,33 b' class TestDecompressor_decompressobj_fuz' | |||||
89 |
|
164 | |||
90 | chunks = [] |
|
165 | chunks = [] | |
91 | while True: |
|
166 | while True: | |
92 | chunk = source.read(next(chunk_sizes)) |
|
167 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
|
168 | chunk = source.read(chunk_size) | |||
|
169 | if not chunk: | |||
|
170 | break | |||
|
171 | ||||
|
172 | chunks.append(dobj.decompress(chunk)) | |||
|
173 | ||||
|
174 | self.assertEqual(b''.join(chunks), original) | |||
|
175 | ||||
|
176 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
177 | level=strategies.integers(min_value=1, max_value=5), | |||
|
178 | write_size=strategies.integers(min_value=1, | |||
|
179 | max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |||
|
180 | chunk_sizes=strategies.data()) | |||
|
181 | def test_random_output_sizes(self, original, level, write_size, chunk_sizes): | |||
|
182 | cctx = zstd.ZstdCompressor(level=level) | |||
|
183 | frame = cctx.compress(original) | |||
|
184 | ||||
|
185 | source = io.BytesIO(frame) | |||
|
186 | ||||
|
187 | dctx = zstd.ZstdDecompressor() | |||
|
188 | dobj = dctx.decompressobj(write_size=write_size) | |||
|
189 | ||||
|
190 | chunks = [] | |||
|
191 | while True: | |||
|
192 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |||
|
193 | chunk = source.read(chunk_size) | |||
93 | if not chunk: |
|
194 | if not chunk: | |
94 | break |
|
195 | break | |
95 |
|
196 | |||
@@ -100,7 +201,7 b' class TestDecompressor_decompressobj_fuz' | |||||
100 |
|
201 | |||
101 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
202 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
102 | @make_cffi |
|
203 | @make_cffi | |
103 |
class TestDecompressor_read_ |
|
204 | class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase): | |
104 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
205 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
105 | level=strategies.integers(min_value=1, max_value=5), |
|
206 | level=strategies.integers(min_value=1, max_value=5), | |
106 | read_size=strategies.integers(min_value=1, max_value=4096), |
|
207 | read_size=strategies.integers(min_value=1, max_value=4096), | |
@@ -112,7 +213,7 b' class TestDecompressor_read_from_fuzzing' | |||||
112 | source = io.BytesIO(frame) |
|
213 | source = io.BytesIO(frame) | |
113 |
|
214 | |||
114 | dctx = zstd.ZstdDecompressor() |
|
215 | dctx = zstd.ZstdDecompressor() | |
115 |
chunks = list(dctx.read_ |
|
216 | chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size)) | |
116 |
|
217 | |||
117 | self.assertEqual(b''.join(chunks), original) |
|
218 | self.assertEqual(b''.join(chunks), original) | |
118 |
|
219 |
@@ -1,9 +1,6 b'' | |||||
1 | try: |
|
1 | import unittest | |
2 | import unittest2 as unittest |
|
|||
3 | except ImportError: |
|
|||
4 | import unittest |
|
|||
5 |
|
2 | |||
6 | import zstd |
|
3 | import zstandard as zstd | |
7 |
|
4 | |||
8 | from . common import ( |
|
5 | from . common import ( | |
9 | make_cffi, |
|
6 | make_cffi, | |
@@ -16,7 +13,3 b' class TestSizes(unittest.TestCase):' | |||||
16 | size = zstd.estimate_decompression_context_size() |
|
13 | size = zstd.estimate_decompression_context_size() | |
17 | self.assertGreater(size, 100000) |
|
14 | self.assertGreater(size, 100000) | |
18 |
|
15 | |||
19 | def test_compression_size(self): |
|
|||
20 | params = zstd.get_compression_parameters(3) |
|
|||
21 | size = zstd.estimate_compression_context_size(params) |
|
|||
22 | self.assertGreater(size, 100000) |
|
@@ -1,11 +1,8 b'' | |||||
1 | from __future__ import unicode_literals |
|
1 | from __future__ import unicode_literals | |
2 |
|
2 | |||
3 | try: |
|
3 | import unittest | |
4 | import unittest2 as unittest |
|
|||
5 | except ImportError: |
|
|||
6 | import unittest |
|
|||
7 |
|
4 | |||
8 | import zstd |
|
5 | import zstandard as zstd | |
9 |
|
6 | |||
10 | from . common import ( |
|
7 | from . common import ( | |
11 | make_cffi, |
|
8 | make_cffi, | |
@@ -15,7 +12,7 b' from . common import (' | |||||
15 | @make_cffi |
|
12 | @make_cffi | |
16 | class TestModuleAttributes(unittest.TestCase): |
|
13 | class TestModuleAttributes(unittest.TestCase): | |
17 | def test_version(self): |
|
14 | def test_version(self): | |
18 |
self.assertEqual(zstd.ZSTD_VERSION, (1, |
|
15 | self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4)) | |
19 |
|
16 | |||
20 | def test_constants(self): |
|
17 | def test_constants(self): | |
21 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
18 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) | |
@@ -23,6 +20,8 b' class TestModuleAttributes(unittest.Test' | |||||
23 |
|
20 | |||
24 | def test_hasattr(self): |
|
21 | def test_hasattr(self): | |
25 | attrs = ( |
|
22 | attrs = ( | |
|
23 | 'CONTENTSIZE_UNKNOWN', | |||
|
24 | 'CONTENTSIZE_ERROR', | |||
26 | 'COMPRESSION_RECOMMENDED_INPUT_SIZE', |
|
25 | 'COMPRESSION_RECOMMENDED_INPUT_SIZE', | |
27 | 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE', |
|
26 | 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE', | |
28 | 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE', |
|
27 | 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE', | |
@@ -40,7 +39,9 b' class TestModuleAttributes(unittest.Test' | |||||
40 | 'SEARCHLENGTH_MIN', |
|
39 | 'SEARCHLENGTH_MIN', | |
41 | 'SEARCHLENGTH_MAX', |
|
40 | 'SEARCHLENGTH_MAX', | |
42 | 'TARGETLENGTH_MIN', |
|
41 | 'TARGETLENGTH_MIN', | |
43 |
' |
|
42 | 'LDM_MINMATCH_MIN', | |
|
43 | 'LDM_MINMATCH_MAX', | |||
|
44 | 'LDM_BUCKETSIZELOG_MAX', | |||
44 | 'STRATEGY_FAST', |
|
45 | 'STRATEGY_FAST', | |
45 | 'STRATEGY_DFAST', |
|
46 | 'STRATEGY_DFAST', | |
46 | 'STRATEGY_GREEDY', |
|
47 | 'STRATEGY_GREEDY', | |
@@ -48,6 +49,10 b' class TestModuleAttributes(unittest.Test' | |||||
48 | 'STRATEGY_LAZY2', |
|
49 | 'STRATEGY_LAZY2', | |
49 | 'STRATEGY_BTLAZY2', |
|
50 | 'STRATEGY_BTLAZY2', | |
50 | 'STRATEGY_BTOPT', |
|
51 | 'STRATEGY_BTOPT', | |
|
52 | 'STRATEGY_BTULTRA', | |||
|
53 | 'DICT_TYPE_AUTO', | |||
|
54 | 'DICT_TYPE_RAWCONTENT', | |||
|
55 | 'DICT_TYPE_FULLDICT', | |||
51 | ) |
|
56 | ) | |
52 |
|
57 | |||
53 | for a in attrs: |
|
58 | for a in attrs: |
@@ -1,13 +1,11 b'' | |||||
|
1 | import struct | |||
1 | import sys |
|
2 | import sys | |
|
3 | import unittest | |||
2 |
|
4 | |||
3 | try: |
|
5 | import zstandard as zstd | |
4 | import unittest2 as unittest |
|
|||
5 | except ImportError: |
|
|||
6 | import unittest |
|
|||
7 |
|
||||
8 | import zstd |
|
|||
9 |
|
6 | |||
10 | from . common import ( |
|
7 | from . common import ( | |
|
8 | generate_samples, | |||
11 | make_cffi, |
|
9 | make_cffi, | |
12 | ) |
|
10 | ) | |
13 |
|
11 | |||
@@ -30,55 +28,18 b' class TestTrainDictionary(unittest.TestC' | |||||
30 | with self.assertRaises(ValueError): |
|
28 | with self.assertRaises(ValueError): | |
31 | zstd.train_dictionary(8192, [u'foo']) |
|
29 | zstd.train_dictionary(8192, [u'foo']) | |
32 |
|
30 | |||
33 |
def test_ |
|
31 | def test_no_params(self): | |
34 | samples = [] |
|
32 | d = zstd.train_dictionary(8192, generate_samples()) | |
35 | for i in range(128): |
|
33 | self.assertIsInstance(d.dict_id(), int_type) | |
36 | samples.append(b'foo' * 64) |
|
|||
37 | samples.append(b'bar' * 64) |
|
|||
38 | samples.append(b'foobar' * 64) |
|
|||
39 | samples.append(b'baz' * 64) |
|
|||
40 | samples.append(b'foobaz' * 64) |
|
|||
41 | samples.append(b'bazfoo' * 64) |
|
|||
42 |
|
34 | |||
43 | d = zstd.train_dictionary(8192, samples) |
|
35 | # The dictionary ID may be different across platforms. | |
44 | self.assertLessEqual(len(d), 8192) |
|
36 | expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id()) | |
45 |
|
||||
46 | dict_id = d.dict_id() |
|
|||
47 | self.assertIsInstance(dict_id, int_type) |
|
|||
48 |
|
37 | |||
49 | data = d.as_bytes() |
|
38 | data = d.as_bytes() | |
50 |
self.assertEqual(data[0: |
|
39 | self.assertEqual(data[0:8], expected) | |
51 |
|
||||
52 | def test_set_dict_id(self): |
|
|||
53 | samples = [] |
|
|||
54 | for i in range(128): |
|
|||
55 | samples.append(b'foo' * 64) |
|
|||
56 | samples.append(b'foobar' * 64) |
|
|||
57 |
|
||||
58 | d = zstd.train_dictionary(8192, samples, dict_id=42) |
|
|||
59 | self.assertEqual(d.dict_id(), 42) |
|
|||
60 |
|
||||
61 |
|
||||
62 | @make_cffi |
|
|||
63 | class TestTrainCoverDictionary(unittest.TestCase): |
|
|||
64 | def test_no_args(self): |
|
|||
65 | with self.assertRaises(TypeError): |
|
|||
66 | zstd.train_cover_dictionary() |
|
|||
67 |
|
||||
68 | def test_bad_args(self): |
|
|||
69 | with self.assertRaises(TypeError): |
|
|||
70 | zstd.train_cover_dictionary(8192, u'foo') |
|
|||
71 |
|
||||
72 | with self.assertRaises(ValueError): |
|
|||
73 | zstd.train_cover_dictionary(8192, [u'foo']) |
|
|||
74 |
|
40 | |||
75 | def test_basic(self): |
|
41 | def test_basic(self): | |
76 | samples = [] |
|
42 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) | |
77 | for i in range(128): |
|
|||
78 | samples.append(b'foo' * 64) |
|
|||
79 | samples.append(b'foobar' * 64) |
|
|||
80 |
|
||||
81 | d = zstd.train_cover_dictionary(8192, samples, k=64, d=16) |
|
|||
82 | self.assertIsInstance(d.dict_id(), int_type) |
|
43 | self.assertIsInstance(d.dict_id(), int_type) | |
83 |
|
44 | |||
84 | data = d.as_bytes() |
|
45 | data = d.as_bytes() | |
@@ -88,23 +49,39 b' class TestTrainCoverDictionary(unittest.' | |||||
88 | self.assertEqual(d.d, 16) |
|
49 | self.assertEqual(d.d, 16) | |
89 |
|
50 | |||
90 | def test_set_dict_id(self): |
|
51 | def test_set_dict_id(self): | |
91 | samples = [] |
|
52 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, | |
92 | for i in range(128): |
|
53 | dict_id=42) | |
93 | samples.append(b'foo' * 64) |
|
|||
94 | samples.append(b'foobar' * 64) |
|
|||
95 |
|
||||
96 | d = zstd.train_cover_dictionary(8192, samples, k=64, d=16, |
|
|||
97 | dict_id=42) |
|
|||
98 | self.assertEqual(d.dict_id(), 42) |
|
54 | self.assertEqual(d.dict_id(), 42) | |
99 |
|
55 | |||
100 | def test_optimize(self): |
|
56 | def test_optimize(self): | |
101 | samples = [] |
|
57 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, | |
102 | for i in range(128): |
|
58 | d=16) | |
103 | samples.append(b'foo' * 64) |
|
59 | ||
104 | samples.append(b'foobar' * 64) |
|
60 | self.assertEqual(d.k, 50) | |
|
61 | self.assertEqual(d.d, 16) | |||
|
62 | ||||
|
63 | @make_cffi | |||
|
64 | class TestCompressionDict(unittest.TestCase): | |||
|
65 | def test_bad_mode(self): | |||
|
66 | with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'): | |||
|
67 | zstd.ZstdCompressionDict(b'foo', dict_type=42) | |||
|
68 | ||||
|
69 | def test_bad_precompute_compress(self): | |||
|
70 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) | |||
105 |
|
71 | |||
106 | d = zstd.train_cover_dictionary(8192, samples, optimize=True, |
|
72 | with self.assertRaisesRegexp(ValueError, 'must specify one of level or '): | |
107 | threads=-1, steps=1, d=16) |
|
73 | d.precompute_compress() | |
|
74 | ||||
|
75 | with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '): | |||
|
76 | d.precompute_compress(level=3, | |||
|
77 | compression_params=zstd.CompressionParameters()) | |||
108 |
|
78 | |||
109 | self.assertEqual(d.k, 16) |
|
79 | def test_precompute_compress_rawcontent(self): | |
110 | self.assertEqual(d.d, 16) |
|
80 | d = zstd.ZstdCompressionDict(b'dictcontent' * 64, | |
|
81 | dict_type=zstd.DICT_TYPE_RAWCONTENT) | |||
|
82 | d.precompute_compress(level=1) | |||
|
83 | ||||
|
84 | d = zstd.ZstdCompressionDict(b'dictcontent' * 64, | |||
|
85 | dict_type=zstd.DICT_TYPE_FULLDICT) | |||
|
86 | with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'): | |||
|
87 | d.precompute_compress(level=1) |
@@ -20,12 +20,6 b'' | |||||
20 |
|
20 | |||
21 | PyObject *ZstdError; |
|
21 | PyObject *ZstdError; | |
22 |
|
22 | |||
23 | PyDoc_STRVAR(estimate_compression_context_size__doc__, |
|
|||
24 | "estimate_compression_context_size(compression_parameters)\n" |
|
|||
25 | "\n" |
|
|||
26 | "Give the amount of memory allocated for a compression context given a\n" |
|
|||
27 | "CompressionParameters instance"); |
|
|||
28 |
|
||||
29 | PyDoc_STRVAR(estimate_decompression_context_size__doc__, |
|
23 | PyDoc_STRVAR(estimate_decompression_context_size__doc__, | |
30 | "estimate_decompression_context_size()\n" |
|
24 | "estimate_decompression_context_size()\n" | |
31 | "\n" |
|
25 | "\n" | |
@@ -36,11 +30,101 b' static PyObject* estimate_decompression_' | |||||
36 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); |
|
30 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); | |
37 | } |
|
31 | } | |
38 |
|
32 | |||
39 |
PyDoc_STRVAR( |
|
33 | PyDoc_STRVAR(frame_content_size__doc__, | |
40 | "get_compression_parameters(compression_level[, source_size[, dict_size]])\n" |
|
34 | "frame_content_size(data)\n" | |
41 | "\n" |
|
35 | "\n" | |
42 | "Obtains a ``CompressionParameters`` instance from a compression level and\n" |
|
36 | "Obtain the decompressed size of a frame." | |
43 | "optional input size and dictionary size"); |
|
37 | ); | |
|
38 | ||||
|
39 | static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) { | |||
|
40 | static char* kwlist[] = { | |||
|
41 | "source", | |||
|
42 | NULL | |||
|
43 | }; | |||
|
44 | ||||
|
45 | Py_buffer source; | |||
|
46 | PyObject* result = NULL; | |||
|
47 | unsigned long long size; | |||
|
48 | ||||
|
49 | #if PY_MAJOR_VERSION >= 3 | |||
|
50 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size", | |||
|
51 | #else | |||
|
52 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size", | |||
|
53 | #endif | |||
|
54 | kwlist, &source)) { | |||
|
55 | return NULL; | |||
|
56 | } | |||
|
57 | ||||
|
58 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |||
|
59 | PyErr_SetString(PyExc_ValueError, | |||
|
60 | "data buffer should be contiguous and have at most one dimension"); | |||
|
61 | goto finally; | |||
|
62 | } | |||
|
63 | ||||
|
64 | size = ZSTD_getFrameContentSize(source.buf, source.len); | |||
|
65 | ||||
|
66 | if (size == ZSTD_CONTENTSIZE_ERROR) { | |||
|
67 | PyErr_SetString(ZstdError, "error when determining content size"); | |||
|
68 | } | |||
|
69 | else if (size == ZSTD_CONTENTSIZE_UNKNOWN) { | |||
|
70 | result = PyLong_FromLong(-1); | |||
|
71 | } | |||
|
72 | else { | |||
|
73 | result = PyLong_FromUnsignedLongLong(size); | |||
|
74 | } | |||
|
75 | ||||
|
76 | finally: | |||
|
77 | PyBuffer_Release(&source); | |||
|
78 | ||||
|
79 | return result; | |||
|
80 | } | |||
|
81 | ||||
|
82 | PyDoc_STRVAR(frame_header_size__doc__, | |||
|
83 | "frame_header_size(data)\n" | |||
|
84 | "\n" | |||
|
85 | "Obtain the size of a frame header.\n" | |||
|
86 | ); | |||
|
87 | ||||
|
88 | static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) { | |||
|
89 | static char* kwlist[] = { | |||
|
90 | "source", | |||
|
91 | NULL | |||
|
92 | }; | |||
|
93 | ||||
|
94 | Py_buffer source; | |||
|
95 | PyObject* result = NULL; | |||
|
96 | size_t zresult; | |||
|
97 | ||||
|
98 | #if PY_MAJOR_VERSION >= 3 | |||
|
99 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size", | |||
|
100 | #else | |||
|
101 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size", | |||
|
102 | #endif | |||
|
103 | kwlist, &source)) { | |||
|
104 | return NULL; | |||
|
105 | } | |||
|
106 | ||||
|
107 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |||
|
108 | PyErr_SetString(PyExc_ValueError, | |||
|
109 | "data buffer should be contiguous and have at most one dimension"); | |||
|
110 | goto finally; | |||
|
111 | } | |||
|
112 | ||||
|
113 | zresult = ZSTD_frameHeaderSize(source.buf, source.len); | |||
|
114 | if (ZSTD_isError(zresult)) { | |||
|
115 | PyErr_Format(ZstdError, "could not determine frame header size: %s", | |||
|
116 | ZSTD_getErrorName(zresult)); | |||
|
117 | } | |||
|
118 | else { | |||
|
119 | result = PyLong_FromSize_t(zresult); | |||
|
120 | } | |||
|
121 | ||||
|
122 | finally: | |||
|
123 | ||||
|
124 | PyBuffer_Release(&source); | |||
|
125 | ||||
|
126 | return result; | |||
|
127 | } | |||
44 |
|
128 | |||
45 | PyDoc_STRVAR(get_frame_parameters__doc__, |
|
129 | PyDoc_STRVAR(get_frame_parameters__doc__, | |
46 | "get_frame_parameters(data)\n" |
|
130 | "get_frame_parameters(data)\n" | |
@@ -48,43 +132,48 b' PyDoc_STRVAR(get_frame_parameters__doc__' | |||||
48 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); |
|
132 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); | |
49 |
|
133 | |||
50 | PyDoc_STRVAR(train_dictionary__doc__, |
|
134 | PyDoc_STRVAR(train_dictionary__doc__, | |
51 |
"train_dictionary(dict_size, samples |
|
135 | "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n" | |
52 | "\n" |
|
136 | " threads=None,notifications=0, dict_id=0, level=0)\n" | |
53 | "Train a dictionary from sample data.\n" |
|
|||
54 | "\n" |
|
|||
55 | "A compression dictionary of size ``dict_size`` will be created from the\n" |
|
|||
56 | "iterable of samples provided by ``samples``.\n" |
|
|||
57 | "\n" |
|
|||
58 | "The raw dictionary content will be returned\n"); |
|
|||
59 |
|
||||
60 | PyDoc_STRVAR(train_cover_dictionary__doc__, |
|
|||
61 | "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n" |
|
|||
62 | "\n" |
|
137 | "\n" | |
63 | "Train a dictionary from sample data using the COVER algorithm.\n" |
|
138 | "Train a dictionary from sample data using the COVER algorithm.\n" | |
64 | "\n" |
|
139 | "\n" | |
65 | "This behaves like ``train_dictionary()`` except a different algorithm is\n" |
|
140 | "A compression dictionary of size ``dict_size`` will be created from the\n" | |
66 | "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n" |
|
141 | "iterable of ``samples``. The raw dictionary bytes will be returned.\n" | |
67 | "``d``. These control the *segment size* and *dmer size*. A reasonable range\n" |
|
142 | "\n" | |
68 | "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" |
|
143 | "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n" | |
|
144 | "*segment size* and *dmer size*. A reasonable range for ``k`` is\n" | |||
|
145 | "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" | |||
69 | "``d`` must be less than or equal to ``k``.\n" |
|
146 | "``d`` must be less than or equal to ``k``.\n" | |
|
147 | "\n" | |||
|
148 | "``steps`` can be specified to control the number of steps through potential\n" | |||
|
149 | "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n" | |||
|
150 | "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n" | |||
|
151 | "will be varied in this mode.\n" | |||
|
152 | "\n" | |||
|
153 | "``threads`` can specify how many threads to use to test various ``k`` and\n" | |||
|
154 | "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n" | |||
|
155 | "a single thread is used.\n" | |||
|
156 | "\n" | |||
|
157 | "When ``k`` and ``d`` are not defined, default values are used and the\n" | |||
|
158 | "algorithm will perform multiple iterations - or steps - to try to find\n" | |||
|
159 | "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n" | |||
|
160 | "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n" | |||
|
161 | "multiple ``k`` and ``d`` variations.\n" | |||
70 | ); |
|
162 | ); | |
71 |
|
163 | |||
72 | static char zstd_doc[] = "Interface to zstandard"; |
|
164 | static char zstd_doc[] = "Interface to zstandard"; | |
73 |
|
165 | |||
74 | static PyMethodDef zstd_methods[] = { |
|
166 | static PyMethodDef zstd_methods[] = { | |
75 | /* TODO remove since it is a method on CompressionParameters. */ |
|
|||
76 | { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size, |
|
|||
77 | METH_VARARGS, estimate_compression_context_size__doc__ }, |
|
|||
78 | { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, |
|
167 | { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, | |
79 | METH_NOARGS, estimate_decompression_context_size__doc__ }, |
|
168 | METH_NOARGS, estimate_decompression_context_size__doc__ }, | |
80 | { "get_compression_parameters", (PyCFunction)get_compression_parameters, |
|
169 | { "frame_content_size", (PyCFunction)frame_content_size, | |
81 | METH_VARARGS, get_compression_parameters__doc__ }, |
|
170 | METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ }, | |
|
171 | { "frame_header_size", (PyCFunction)frame_header_size, | |||
|
172 | METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ }, | |||
82 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, |
|
173 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, | |
83 | METH_VARARGS, get_frame_parameters__doc__ }, |
|
174 | METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ }, | |
84 | { "train_dictionary", (PyCFunction)train_dictionary, |
|
175 | { "train_dictionary", (PyCFunction)train_dictionary, | |
85 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, |
|
176 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, | |
86 | { "train_cover_dictionary", (PyCFunction)train_cover_dictionary, |
|
|||
87 | METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ }, |
|
|||
88 | { NULL, NULL } |
|
177 | { NULL, NULL } | |
89 | }; |
|
178 | }; | |
90 |
|
179 | |||
@@ -94,10 +183,12 b' void compressor_module_init(PyObject* mo' | |||||
94 | void compressionparams_module_init(PyObject* mod); |
|
183 | void compressionparams_module_init(PyObject* mod); | |
95 | void constants_module_init(PyObject* mod); |
|
184 | void constants_module_init(PyObject* mod); | |
96 | void compressiondict_module_init(PyObject* mod); |
|
185 | void compressiondict_module_init(PyObject* mod); | |
|
186 | void compressionreader_module_init(PyObject* mod); | |||
97 | void compressionwriter_module_init(PyObject* mod); |
|
187 | void compressionwriter_module_init(PyObject* mod); | |
98 | void compressoriterator_module_init(PyObject* mod); |
|
188 | void compressoriterator_module_init(PyObject* mod); | |
99 | void decompressor_module_init(PyObject* mod); |
|
189 | void decompressor_module_init(PyObject* mod); | |
100 | void decompressobj_module_init(PyObject* mod); |
|
190 | void decompressobj_module_init(PyObject* mod); | |
|
191 | void decompressionreader_module_init(PyObject *mod); | |||
101 | void decompressionwriter_module_init(PyObject* mod); |
|
192 | void decompressionwriter_module_init(PyObject* mod); | |
102 | void decompressoriterator_module_init(PyObject* mod); |
|
193 | void decompressoriterator_module_init(PyObject* mod); | |
103 | void frameparams_module_init(PyObject* mod); |
|
194 | void frameparams_module_init(PyObject* mod); | |
@@ -118,7 +209,7 b' void zstd_module_init(PyObject* m) {' | |||||
118 | We detect this mismatch here and refuse to load the module if this |
|
209 | We detect this mismatch here and refuse to load the module if this | |
119 | scenario is detected. |
|
210 | scenario is detected. | |
120 | */ |
|
211 | */ | |
121 |
if (ZSTD_VERSION_NUMBER != |
|
212 | if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) { | |
122 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
213 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); | |
123 | return; |
|
214 | return; | |
124 | } |
|
215 | } | |
@@ -128,16 +219,24 b' void zstd_module_init(PyObject* m) {' | |||||
128 | compressiondict_module_init(m); |
|
219 | compressiondict_module_init(m); | |
129 | compressobj_module_init(m); |
|
220 | compressobj_module_init(m); | |
130 | compressor_module_init(m); |
|
221 | compressor_module_init(m); | |
|
222 | compressionreader_module_init(m); | |||
131 | compressionwriter_module_init(m); |
|
223 | compressionwriter_module_init(m); | |
132 | compressoriterator_module_init(m); |
|
224 | compressoriterator_module_init(m); | |
133 | constants_module_init(m); |
|
225 | constants_module_init(m); | |
134 | decompressor_module_init(m); |
|
226 | decompressor_module_init(m); | |
135 | decompressobj_module_init(m); |
|
227 | decompressobj_module_init(m); | |
|
228 | decompressionreader_module_init(m); | |||
136 | decompressionwriter_module_init(m); |
|
229 | decompressionwriter_module_init(m); | |
137 | decompressoriterator_module_init(m); |
|
230 | decompressoriterator_module_init(m); | |
138 | frameparams_module_init(m); |
|
231 | frameparams_module_init(m); | |
139 | } |
|
232 | } | |
140 |
|
233 | |||
|
234 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |||
|
235 | # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default"))) | |||
|
236 | #else | |||
|
237 | # define PYTHON_ZSTD_VISIBILITY | |||
|
238 | #endif | |||
|
239 | ||||
141 | #if PY_MAJOR_VERSION >= 3 |
|
240 | #if PY_MAJOR_VERSION >= 3 | |
142 | static struct PyModuleDef zstd_module = { |
|
241 | static struct PyModuleDef zstd_module = { | |
143 | PyModuleDef_HEAD_INIT, |
|
242 | PyModuleDef_HEAD_INIT, | |
@@ -147,7 +246,7 b' static struct PyModuleDef zstd_module = ' | |||||
147 | zstd_methods |
|
246 | zstd_methods | |
148 | }; |
|
247 | }; | |
149 |
|
248 | |||
150 | PyMODINIT_FUNC PyInit_zstd(void) { |
|
249 | PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) { | |
151 | PyObject *m = PyModule_Create(&zstd_module); |
|
250 | PyObject *m = PyModule_Create(&zstd_module); | |
152 | if (m) { |
|
251 | if (m) { | |
153 | zstd_module_init(m); |
|
252 | zstd_module_init(m); | |
@@ -159,7 +258,7 b' PyMODINIT_FUNC PyInit_zstd(void) {' | |||||
159 | return m; |
|
258 | return m; | |
160 | } |
|
259 | } | |
161 | #else |
|
260 | #else | |
162 | PyMODINIT_FUNC initzstd(void) { |
|
261 | PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) { | |
163 | PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); |
|
262 | PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); | |
164 | if (m) { |
|
263 | if (m) { | |
165 | zstd_module_init(m); |
|
264 | zstd_module_init(m); | |
@@ -211,3 +310,33 b' size_t roundpow2(size_t i) {' | |||||
211 |
|
310 | |||
212 | return i; |
|
311 | return i; | |
213 | } |
|
312 | } | |
|
313 | ||||
|
314 | /* Safer version of _PyBytes_Resize(). | |||
|
315 | * | |||
|
316 | * _PyBytes_Resize() only works if the refcount is 1. In some scenarios, | |||
|
317 | * we can get an object with a refcount > 1, even if it was just created | |||
|
318 | * with PyBytes_FromStringAndSize()! That's because (at least) CPython | |||
|
319 | * pre-allocates PyBytes instances of size 1 for every possible byte value. | |||
|
320 | * | |||
|
321 | * If non-0 is returned, obj may or may not be NULL. | |||
|
322 | */ | |||
|
323 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) { | |||
|
324 | PyObject* tmp; | |||
|
325 | ||||
|
326 | if ((*obj)->ob_refcnt == 1) { | |||
|
327 | return _PyBytes_Resize(obj, size); | |||
|
328 | } | |||
|
329 | ||||
|
330 | tmp = PyBytes_FromStringAndSize(NULL, size); | |||
|
331 | if (!tmp) { | |||
|
332 | return -1; | |||
|
333 | } | |||
|
334 | ||||
|
335 | memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj), | |||
|
336 | PyBytes_GET_SIZE(*obj)); | |||
|
337 | ||||
|
338 | Py_DECREF(*obj); | |||
|
339 | *obj = tmp; | |||
|
340 | ||||
|
341 | return 0; | |||
|
342 | } No newline at end of file |
@@ -2,7 +2,7 b'' | |||||
2 | bitstream |
|
2 | bitstream | |
3 | Part of FSE library |
|
3 | Part of FSE library | |
4 | header file (to include) |
|
4 | header file (to include) | |
5 |
Copyright (C) 2013-201 |
|
5 | Copyright (C) 2013-2017, Yann Collet. | |
6 |
|
6 | |||
7 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
|
7 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
8 |
|
8 | |||
@@ -39,7 +39,6 b'' | |||||
39 | extern "C" { |
|
39 | extern "C" { | |
40 | #endif |
|
40 | #endif | |
41 |
|
41 | |||
42 |
|
||||
43 | /* |
|
42 | /* | |
44 | * This API consists of small unitary functions, which must be inlined for best performance. |
|
43 | * This API consists of small unitary functions, which must be inlined for best performance. | |
45 | * Since link-time-optimization is not available for all compilers, |
|
44 | * Since link-time-optimization is not available for all compilers, | |
@@ -53,6 +52,18 b' extern "C" {' | |||||
53 | #include "error_private.h" /* error codes and messages */ |
|
52 | #include "error_private.h" /* error codes and messages */ | |
54 |
|
53 | |||
55 |
|
54 | |||
|
55 | /*-************************************* | |||
|
56 | * Debug | |||
|
57 | ***************************************/ | |||
|
58 | #if defined(BIT_DEBUG) && (BIT_DEBUG>=1) | |||
|
59 | # include <assert.h> | |||
|
60 | #else | |||
|
61 | # ifndef assert | |||
|
62 | # define assert(condition) ((void)0) | |||
|
63 | # endif | |||
|
64 | #endif | |||
|
65 | ||||
|
66 | ||||
56 | /*========================================= |
|
67 | /*========================================= | |
57 | * Target specific |
|
68 | * Target specific | |
58 | =========================================*/ |
|
69 | =========================================*/ | |
@@ -60,18 +71,22 b' extern "C" {' | |||||
60 | # include <immintrin.h> /* support for bextr (experimental) */ |
|
71 | # include <immintrin.h> /* support for bextr (experimental) */ | |
61 | #endif |
|
72 | #endif | |
62 |
|
73 | |||
|
74 | #define STREAM_ACCUMULATOR_MIN_32 25 | |||
|
75 | #define STREAM_ACCUMULATOR_MIN_64 57 | |||
|
76 | #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) | |||
|
77 | ||||
63 |
|
78 | |||
64 | /*-****************************************** |
|
79 | /*-****************************************** | |
65 | * bitStream encoding API (write forward) |
|
80 | * bitStream encoding API (write forward) | |
66 | ********************************************/ |
|
81 | ********************************************/ | |
67 | /* bitStream can mix input from multiple sources. |
|
82 | /* bitStream can mix input from multiple sources. | |
68 |
|
|
83 | * A critical property of these streams is that they encode and decode in **reverse** direction. | |
69 |
|
|
84 | * So the first bit sequence you add will be the last to be read, like a LIFO stack. | |
70 | */ |
|
85 | */ | |
71 | typedef struct |
|
86 | typedef struct | |
72 | { |
|
87 | { | |
73 | size_t bitContainer; |
|
88 | size_t bitContainer; | |
74 |
|
|
89 | unsigned bitPos; | |
75 | char* startPtr; |
|
90 | char* startPtr; | |
76 | char* ptr; |
|
91 | char* ptr; | |
77 | char* endPtr; |
|
92 | char* endPtr; | |
@@ -109,6 +124,7 b' typedef struct' | |||||
109 | unsigned bitsConsumed; |
|
124 | unsigned bitsConsumed; | |
110 | const char* ptr; |
|
125 | const char* ptr; | |
111 | const char* start; |
|
126 | const char* start; | |
|
127 | const char* limitPtr; | |||
112 | } BIT_DStream_t; |
|
128 | } BIT_DStream_t; | |
113 |
|
129 | |||
114 | typedef enum { BIT_DStream_unfinished = 0, |
|
130 | typedef enum { BIT_DStream_unfinished = 0, | |
@@ -151,140 +167,178 b' MEM_STATIC size_t BIT_readBitsFast(BIT_D' | |||||
151 | /*-************************************************************** |
|
167 | /*-************************************************************** | |
152 | * Internal functions |
|
168 | * Internal functions | |
153 | ****************************************************************/ |
|
169 | ****************************************************************/ | |
154 |
MEM_STATIC unsigned BIT_highbit32 ( |
|
170 | MEM_STATIC unsigned BIT_highbit32 (U32 val) | |
155 | { |
|
171 | { | |
|
172 | assert(val != 0); | |||
|
173 | { | |||
156 | # if defined(_MSC_VER) /* Visual */ |
|
174 | # if defined(_MSC_VER) /* Visual */ | |
157 | unsigned long r=0; |
|
175 | unsigned long r=0; | |
158 | _BitScanReverse ( &r, val ); |
|
176 | _BitScanReverse ( &r, val ); | |
159 | return (unsigned) r; |
|
177 | return (unsigned) r; | |
160 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
|
178 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | |
161 | return 31 - __builtin_clz (val); |
|
179 | return 31 - __builtin_clz (val); | |
162 | # else /* Software version */ |
|
180 | # else /* Software version */ | |
163 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; |
|
181 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | |
164 | U32 v = val; |
|
182 | 11, 14, 16, 18, 22, 25, 3, 30, | |
165 | v |= v >> 1; |
|
183 | 8, 12, 20, 28, 15, 17, 24, 7, | |
166 | v |= v >> 2; |
|
184 | 19, 27, 23, 6, 26, 5, 4, 31 }; | |
167 | v |= v >> 4; |
|
185 | U32 v = val; | |
168 |
v |= v >> |
|
186 | v |= v >> 1; | |
169 |
v |= v >> |
|
187 | v |= v >> 2; | |
170 | return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; |
|
188 | v |= v >> 4; | |
|
189 | v |= v >> 8; | |||
|
190 | v |= v >> 16; | |||
|
191 | return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; | |||
171 | # endif |
|
192 | # endif | |
|
193 | } | |||
172 | } |
|
194 | } | |
173 |
|
195 | |||
174 | /*===== Local Constants =====*/ |
|
196 | /*===== Local Constants =====*/ | |
175 | static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ |
|
197 | static const unsigned BIT_mask[] = { | |
176 |
|
198 | 0, 1, 3, 7, 0xF, 0x1F, | ||
|
199 | 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, | |||
|
200 | 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, | |||
|
201 | 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, | |||
|
202 | 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, | |||
|
203 | 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ | |||
|
204 | #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) | |||
177 |
|
205 | |||
178 | /*-************************************************************** |
|
206 | /*-************************************************************** | |
179 | * bitStream encoding |
|
207 | * bitStream encoding | |
180 | ****************************************************************/ |
|
208 | ****************************************************************/ | |
181 | /*! BIT_initCStream() : |
|
209 | /*! BIT_initCStream() : | |
182 |
* `dstCapacity` must be > sizeof( |
|
210 | * `dstCapacity` must be > sizeof(size_t) | |
183 | * @return : 0 if success, |
|
211 | * @return : 0 if success, | |
184 |
|
|
212 | * otherwise an error code (can be tested using ERR_isError()) */ | |
185 |
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, |
|
213 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, | |
|
214 | void* startPtr, size_t dstCapacity) | |||
186 | { |
|
215 | { | |
187 | bitC->bitContainer = 0; |
|
216 | bitC->bitContainer = 0; | |
188 | bitC->bitPos = 0; |
|
217 | bitC->bitPos = 0; | |
189 | bitC->startPtr = (char*)startPtr; |
|
218 | bitC->startPtr = (char*)startPtr; | |
190 | bitC->ptr = bitC->startPtr; |
|
219 | bitC->ptr = bitC->startPtr; | |
191 |
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC-> |
|
220 | bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); | |
192 |
if (dstCapacity <= sizeof(bitC-> |
|
221 | if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); | |
193 | return 0; |
|
222 | return 0; | |
194 | } |
|
223 | } | |
195 |
|
224 | |||
196 | /*! BIT_addBits() : |
|
225 | /*! BIT_addBits() : | |
197 |
|
|
226 | * can add up to 31 bits into `bitC`. | |
198 |
|
|
227 | * Note : does not check for register overflow ! */ | |
199 |
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, |
|
228 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, | |
|
229 | size_t value, unsigned nbBits) | |||
200 | { |
|
230 | { | |
|
231 | MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); | |||
|
232 | assert(nbBits < BIT_MASK_SIZE); | |||
|
233 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
201 | bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; |
|
234 | bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; | |
202 | bitC->bitPos += nbBits; |
|
235 | bitC->bitPos += nbBits; | |
203 | } |
|
236 | } | |
204 |
|
237 | |||
205 | /*! BIT_addBitsFast() : |
|
238 | /*! BIT_addBitsFast() : | |
206 | * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ |
|
239 | * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ | |
207 |
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, |
|
240 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, | |
|
241 | size_t value, unsigned nbBits) | |||
208 | { |
|
242 | { | |
|
243 | assert((value>>nbBits) == 0); | |||
|
244 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
209 | bitC->bitContainer |= value << bitC->bitPos; |
|
245 | bitC->bitContainer |= value << bitC->bitPos; | |
210 | bitC->bitPos += nbBits; |
|
246 | bitC->bitPos += nbBits; | |
211 | } |
|
247 | } | |
212 |
|
248 | |||
213 | /*! BIT_flushBitsFast() : |
|
249 | /*! BIT_flushBitsFast() : | |
|
250 | * assumption : bitContainer has not overflowed | |||
214 | * unsafe version; does not check buffer overflow */ |
|
251 | * unsafe version; does not check buffer overflow */ | |
215 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) |
|
252 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) | |
216 | { |
|
253 | { | |
217 | size_t const nbBytes = bitC->bitPos >> 3; |
|
254 | size_t const nbBytes = bitC->bitPos >> 3; | |
|
255 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
218 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
256 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); | |
219 | bitC->ptr += nbBytes; |
|
257 | bitC->ptr += nbBytes; | |
|
258 | assert(bitC->ptr <= bitC->endPtr); | |||
220 | bitC->bitPos &= 7; |
|
259 | bitC->bitPos &= 7; | |
221 | bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ |
|
260 | bitC->bitContainer >>= nbBytes*8; | |
222 | } |
|
261 | } | |
223 |
|
262 | |||
224 | /*! BIT_flushBits() : |
|
263 | /*! BIT_flushBits() : | |
|
264 | * assumption : bitContainer has not overflowed | |||
225 | * safe version; check for buffer overflow, and prevents it. |
|
265 | * safe version; check for buffer overflow, and prevents it. | |
226 | * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ |
|
266 | * note : does not signal buffer overflow. | |
|
267 | * overflow will be revealed later on using BIT_closeCStream() */ | |||
227 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) |
|
268 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) | |
228 | { |
|
269 | { | |
229 | size_t const nbBytes = bitC->bitPos >> 3; |
|
270 | size_t const nbBytes = bitC->bitPos >> 3; | |
|
271 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
230 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
|
272 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); | |
231 | bitC->ptr += nbBytes; |
|
273 | bitC->ptr += nbBytes; | |
232 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; |
|
274 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; | |
233 | bitC->bitPos &= 7; |
|
275 | bitC->bitPos &= 7; | |
234 | bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ |
|
276 | bitC->bitContainer >>= nbBytes*8; | |
235 | } |
|
277 | } | |
236 |
|
278 | |||
237 | /*! BIT_closeCStream() : |
|
279 | /*! BIT_closeCStream() : | |
238 | * @return : size of CStream, in bytes, |
|
280 | * @return : size of CStream, in bytes, | |
239 |
|
|
281 | * or 0 if it could not fit into dstBuffer */ | |
240 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) |
|
282 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) | |
241 | { |
|
283 | { | |
242 | BIT_addBitsFast(bitC, 1, 1); /* endMark */ |
|
284 | BIT_addBitsFast(bitC, 1, 1); /* endMark */ | |
243 | BIT_flushBits(bitC); |
|
285 | BIT_flushBits(bitC); | |
244 |
|
286 | if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ | ||
245 | if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ |
|
|||
246 |
|
||||
247 | return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); |
|
287 | return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); | |
248 | } |
|
288 | } | |
249 |
|
289 | |||
250 |
|
290 | |||
251 | /*-******************************************************** |
|
291 | /*-******************************************************** | |
252 | * bitStream decoding |
|
292 | * bitStream decoding | |
253 | **********************************************************/ |
|
293 | **********************************************************/ | |
254 | /*! BIT_initDStream() : |
|
294 | /*! BIT_initDStream() : | |
255 |
|
|
295 | * Initialize a BIT_DStream_t. | |
256 |
|
|
296 | * `bitD` : a pointer to an already allocated BIT_DStream_t structure. | |
257 |
|
|
297 | * `srcSize` must be the *exact* size of the bitStream, in bytes. | |
258 |
|
|
298 | * @return : size of stream (== srcSize), or an errorCode if a problem is detected | |
259 | */ |
|
299 | */ | |
260 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) |
|
300 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) | |
261 | { |
|
301 | { | |
262 | if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } |
|
302 | if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } | |
263 |
|
303 | |||
|
304 | bitD->start = (const char*)srcBuffer; | |||
|
305 | bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); | |||
|
306 | ||||
264 | if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ |
|
307 | if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ | |
265 | bitD->start = (const char*)srcBuffer; |
|
|||
266 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); |
|
308 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); | |
267 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
|
309 | bitD->bitContainer = MEM_readLEST(bitD->ptr); | |
268 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
|
310 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | |
269 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ |
|
311 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | |
270 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } |
|
312 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } | |
271 | } else { |
|
313 | } else { | |
272 | bitD->start = (const char*)srcBuffer; |
|
|||
273 | bitD->ptr = bitD->start; |
|
314 | bitD->ptr = bitD->start; | |
274 | bitD->bitContainer = *(const BYTE*)(bitD->start); |
|
315 | bitD->bitContainer = *(const BYTE*)(bitD->start); | |
275 | switch(srcSize) |
|
316 | switch(srcSize) | |
276 | { |
|
317 | { | |
277 |
|
|
318 | case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); | |
278 | case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); |
|
319 | /* fall-through */ | |
279 | case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); |
|
320 | ||
280 |
|
|
321 | case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); | |
281 | case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; |
|
322 | /* fall-through */ | |
282 | case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; |
|
323 | ||
283 | default:; |
|
324 | case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); | |
|
325 | /* fall-through */ | |||
|
326 | ||||
|
327 | case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; | |||
|
328 | /* fall-through */ | |||
|
329 | ||||
|
330 | case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; | |||
|
331 | /* fall-through */ | |||
|
332 | ||||
|
333 | case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; | |||
|
334 | /* fall-through */ | |||
|
335 | ||||
|
336 | default: break; | |||
284 | } |
|
337 | } | |
285 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
|
338 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | |
286 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; |
|
339 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; | |
287 |
if (lastByte == 0) return ERROR( |
|
340 | if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ | |
|
341 | } | |||
288 | bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; |
|
342 | bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; | |
289 | } |
|
343 | } | |
290 |
|
344 | |||
@@ -306,12 +360,14 b' MEM_STATIC size_t BIT_getMiddleBits(size' | |||||
306 | # endif |
|
360 | # endif | |
307 | return _bextr_u32(bitContainer, start, nbBits); |
|
361 | return _bextr_u32(bitContainer, start, nbBits); | |
308 | #else |
|
362 | #else | |
|
363 | assert(nbBits < BIT_MASK_SIZE); | |||
309 | return (bitContainer >> start) & BIT_mask[nbBits]; |
|
364 | return (bitContainer >> start) & BIT_mask[nbBits]; | |
310 | #endif |
|
365 | #endif | |
311 | } |
|
366 | } | |
312 |
|
367 | |||
313 | MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) |
|
368 | MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |
314 | { |
|
369 | { | |
|
370 | assert(nbBits < BIT_MASK_SIZE); | |||
315 | return bitContainer & BIT_mask[nbBits]; |
|
371 | return bitContainer & BIT_mask[nbBits]; | |
316 | } |
|
372 | } | |
317 |
|
373 | |||
@@ -320,24 +376,24 b' MEM_STATIC size_t BIT_getLowerBits(size_' | |||||
320 | * local register is not modified. |
|
376 | * local register is not modified. | |
321 | * On 32-bits, maxNbBits==24. |
|
377 | * On 32-bits, maxNbBits==24. | |
322 | * On 64-bits, maxNbBits==56. |
|
378 | * On 64-bits, maxNbBits==56. | |
323 |
* |
|
379 | * @return : value extracted */ | |
324 | */ |
|
380 | MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | |
325 | MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) |
|
|||
326 | { |
|
381 | { | |
327 | #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ |
|
382 | #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ | |
328 | return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); |
|
383 | return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); | |
329 | #else |
|
384 | #else | |
330 |
U32 const |
|
385 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | |
331 |
return ((bitD->bitContainer << (bitD->bitsConsumed & |
|
386 | return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); | |
332 | #endif |
|
387 | #endif | |
333 | } |
|
388 | } | |
334 |
|
389 | |||
335 | /*! BIT_lookBitsFast() : |
|
390 | /*! BIT_lookBitsFast() : | |
336 |
|
|
391 | * unsafe version; only works if nbBits >= 1 */ | |
337 | MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) |
|
392 | MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) | |
338 | { |
|
393 | { | |
339 |
U32 const |
|
394 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | |
340 | return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); |
|
395 | assert(nbBits >= 1); | |
|
396 | return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); | |||
341 | } |
|
397 | } | |
342 |
|
398 | |||
343 | MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) |
|
399 | MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | |
@@ -348,8 +404,7 b' MEM_STATIC void BIT_skipBits(BIT_DStream' | |||||
348 | /*! BIT_readBits() : |
|
404 | /*! BIT_readBits() : | |
349 | * Read (consume) next n bits from local register and update. |
|
405 | * Read (consume) next n bits from local register and update. | |
350 | * Pay attention to not read more than nbBits contained into local register. |
|
406 | * Pay attention to not read more than nbBits contained into local register. | |
351 |
* |
|
407 | * @return : extracted value. */ | |
352 | */ |
|
|||
353 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) |
|
408 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) | |
354 | { |
|
409 | { | |
355 | size_t const value = BIT_lookBits(bitD, nbBits); |
|
410 | size_t const value = BIT_lookBits(bitD, nbBits); | |
@@ -358,25 +413,26 b' MEM_STATIC size_t BIT_readBits(BIT_DStre' | |||||
358 | } |
|
413 | } | |
359 |
|
414 | |||
360 | /*! BIT_readBitsFast() : |
|
415 | /*! BIT_readBitsFast() : | |
361 |
|
|
416 | * unsafe version; only works only if nbBits >= 1 */ | |
362 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) |
|
417 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) | |
363 | { |
|
418 | { | |
364 | size_t const value = BIT_lookBitsFast(bitD, nbBits); |
|
419 | size_t const value = BIT_lookBitsFast(bitD, nbBits); | |
|
420 | assert(nbBits >= 1); | |||
365 | BIT_skipBits(bitD, nbBits); |
|
421 | BIT_skipBits(bitD, nbBits); | |
366 | return value; |
|
422 | return value; | |
367 | } |
|
423 | } | |
368 |
|
424 | |||
369 | /*! BIT_reloadDStream() : |
|
425 | /*! BIT_reloadDStream() : | |
370 |
|
|
426 | * Refill `bitD` from buffer previously set in BIT_initDStream() . | |
371 |
|
|
427 | * This function is safe, it guarantees it will not read beyond src buffer. | |
372 |
|
|
428 | * @return : status of `BIT_DStream_t` internal register. | |
373 |
|
|
429 | * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ | |
374 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) |
|
430 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) | |
375 | { |
|
431 | { | |
376 |
|
|
432 | if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ | |
377 |
|
|
433 | return BIT_DStream_overflow; | |
378 |
|
434 | |||
379 |
if (bitD->ptr >= bitD-> |
|
435 | if (bitD->ptr >= bitD->limitPtr) { | |
380 | bitD->ptr -= bitD->bitsConsumed >> 3; |
|
436 | bitD->ptr -= bitD->bitsConsumed >> 3; | |
381 | bitD->bitsConsumed &= 7; |
|
437 | bitD->bitsConsumed &= 7; | |
382 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
|
438 | bitD->bitContainer = MEM_readLEST(bitD->ptr); | |
@@ -386,6 +442,7 b' MEM_STATIC BIT_DStream_status BIT_reload' | |||||
386 | if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; |
|
442 | if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; | |
387 | return BIT_DStream_completed; |
|
443 | return BIT_DStream_completed; | |
388 | } |
|
444 | } | |
|
445 | /* start < ptr < limitPtr */ | |||
389 | { U32 nbBytes = bitD->bitsConsumed >> 3; |
|
446 | { U32 nbBytes = bitD->bitsConsumed >> 3; | |
390 | BIT_DStream_status result = BIT_DStream_unfinished; |
|
447 | BIT_DStream_status result = BIT_DStream_unfinished; | |
391 | if (bitD->ptr - nbBytes < bitD->start) { |
|
448 | if (bitD->ptr - nbBytes < bitD->start) { | |
@@ -394,14 +451,14 b' MEM_STATIC BIT_DStream_status BIT_reload' | |||||
394 | } |
|
451 | } | |
395 | bitD->ptr -= nbBytes; |
|
452 | bitD->ptr -= nbBytes; | |
396 | bitD->bitsConsumed -= nbBytes*8; |
|
453 | bitD->bitsConsumed -= nbBytes*8; | |
397 | bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ |
|
454 | bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ | |
398 | return result; |
|
455 | return result; | |
399 | } |
|
456 | } | |
400 | } |
|
457 | } | |
401 |
|
458 | |||
402 | /*! BIT_endOfDStream() : |
|
459 | /*! BIT_endOfDStream() : | |
403 |
|
|
460 | * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). | |
404 | */ |
|
461 | */ | |
405 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) |
|
462 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) | |
406 | { |
|
463 | { | |
407 | return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); |
|
464 | return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); |
@@ -43,27 +43,21 b'' | |||||
43 | #include "huf.h" |
|
43 | #include "huf.h" | |
44 |
|
44 | |||
45 |
|
45 | |||
46 | /*-**************************************** |
|
46 | /*=== Version ===*/ | |
47 | * FSE Error Management |
|
47 | unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } | |
48 | ******************************************/ |
|
48 | ||
|
49 | ||||
|
50 | /*=== Error Management ===*/ | |||
49 | unsigned FSE_isError(size_t code) { return ERR_isError(code); } |
|
51 | unsigned FSE_isError(size_t code) { return ERR_isError(code); } | |
50 |
|
||||
51 | const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } |
|
52 | const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } | |
52 |
|
53 | |||
53 |
|
||||
54 | /* ************************************************************** |
|
|||
55 | * HUF Error Management |
|
|||
56 | ****************************************************************/ |
|
|||
57 | unsigned HUF_isError(size_t code) { return ERR_isError(code); } |
|
54 | unsigned HUF_isError(size_t code) { return ERR_isError(code); } | |
58 |
|
||||
59 | const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } |
|
55 | const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | |
60 |
|
56 | |||
61 |
|
57 | |||
62 | /*-************************************************************** |
|
58 | /*-************************************************************** | |
63 | * FSE NCount encoding-decoding |
|
59 | * FSE NCount encoding-decoding | |
64 | ****************************************************************/ |
|
60 | ****************************************************************/ | |
65 | static short FSE_abs(short a) { return (short)(a<0 ? -a : a); } |
|
|||
66 |
|
||||
67 | size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, |
|
61 | size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |
68 | const void* headerBuffer, size_t hbSize) |
|
62 | const void* headerBuffer, size_t hbSize) | |
69 | { |
|
63 | { | |
@@ -117,21 +111,21 b' size_t FSE_readNCount (short* normalized' | |||||
117 | } else { |
|
111 | } else { | |
118 | bitStream >>= 2; |
|
112 | bitStream >>= 2; | |
119 | } } |
|
113 | } } | |
120 |
{ |
|
114 | { int const max = (2*threshold-1) - remaining; | |
121 |
|
|
115 | int count; | |
122 |
|
116 | |||
123 | if ((bitStream & (threshold-1)) < (U32)max) { |
|
117 | if ((bitStream & (threshold-1)) < (U32)max) { | |
124 |
count = |
|
118 | count = bitStream & (threshold-1); | |
125 |
bitCount |
|
119 | bitCount += nbBits-1; | |
126 | } else { |
|
120 | } else { | |
127 |
count = |
|
121 | count = bitStream & (2*threshold-1); | |
128 | if (count >= threshold) count -= max; |
|
122 | if (count >= threshold) count -= max; | |
129 |
bitCount |
|
123 | bitCount += nbBits; | |
130 | } |
|
124 | } | |
131 |
|
125 | |||
132 | count--; /* extra accuracy */ |
|
126 | count--; /* extra accuracy */ | |
133 | remaining -= FSE_abs(count); |
|
127 | remaining -= count < 0 ? -count : count; /* -1 means +1 */ | |
134 | normalizedCounter[charnum++] = count; |
|
128 | normalizedCounter[charnum++] = (short)count; | |
135 | previous0 = !count; |
|
129 | previous0 = !count; | |
136 | while (remaining < threshold) { |
|
130 | while (remaining < threshold) { | |
137 | nbBits--; |
|
131 | nbBits--; |
@@ -1,10 +1,11 b'' | |||||
1 |
/* |
|
1 | /* | |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
8 | */ |
|
9 | */ | |
9 |
|
10 | |||
10 | /* The purpose of this file is to have a single list of error strings embedded in binary */ |
|
11 | /* The purpose of this file is to have a single list of error strings embedded in binary */ | |
@@ -20,23 +21,27 b' const char* ERR_getErrorString(ERR_enum ' | |||||
20 | case PREFIX(GENERIC): return "Error (generic)"; |
|
21 | case PREFIX(GENERIC): return "Error (generic)"; | |
21 | case PREFIX(prefix_unknown): return "Unknown frame descriptor"; |
|
22 | case PREFIX(prefix_unknown): return "Unknown frame descriptor"; | |
22 | case PREFIX(version_unsupported): return "Version not supported"; |
|
23 | case PREFIX(version_unsupported): return "Version not supported"; | |
23 | case PREFIX(parameter_unknown): return "Unknown parameter type"; |
|
|||
24 | case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; |
|
24 | case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; | |
25 | case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; |
|
|||
26 | case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; |
|
25 | case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; | |
27 |
case PREFIX(co |
|
26 | case PREFIX(corruption_detected): return "Corrupted block detected"; | |
|
27 | case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; | |||
|
28 | case PREFIX(parameter_unsupported): return "Unsupported parameter"; | |||
|
29 | case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; | |||
28 | case PREFIX(init_missing): return "Context should be init first"; |
|
30 | case PREFIX(init_missing): return "Context should be init first"; | |
29 | case PREFIX(memory_allocation): return "Allocation error : not enough memory"; |
|
31 | case PREFIX(memory_allocation): return "Allocation error : not enough memory"; | |
|
32 | case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; | |||
30 | case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; |
|
33 | case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; | |
31 | case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; |
|
|||
32 | case PREFIX(srcSize_wrong): return "Src size incorrect"; |
|
|||
33 | case PREFIX(corruption_detected): return "Corrupted block detected"; |
|
|||
34 | case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; |
|
|||
35 | case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; |
|
34 | case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; | |
36 | case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; |
|
35 | case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; | |
37 | case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; |
|
36 | case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; | |
38 | case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; |
|
37 | case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; | |
39 | case PREFIX(dictionary_wrong): return "Dictionary mismatch"; |
|
38 | case PREFIX(dictionary_wrong): return "Dictionary mismatch"; | |
|
39 | case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; | |||
|
40 | case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; | |||
|
41 | case PREFIX(srcSize_wrong): return "Src size is incorrect"; | |||
|
42 | /* following error codes are not stable and may be removed or changed in a future version */ | |||
|
43 | case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; | |||
|
44 | case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; | |||
40 | case PREFIX(maxCode): |
|
45 | case PREFIX(maxCode): | |
41 | default: return notErrorCode; |
|
46 | default: return notErrorCode; | |
42 | } |
|
47 | } |
@@ -1,10 +1,11 b'' | |||||
1 |
/* |
|
1 | /* | |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
8 | */ |
|
9 | */ | |
9 |
|
10 | |||
10 | /* Note : this module is expected to remain private, do not expose it */ |
|
11 | /* Note : this module is expected to remain private, do not expose it */ | |
@@ -48,10 +49,9 b' typedef ZSTD_ErrorCode ERR_enum;' | |||||
48 | /*-**************************************** |
|
49 | /*-**************************************** | |
49 | * Error codes handling |
|
50 | * Error codes handling | |
50 | ******************************************/ |
|
51 | ******************************************/ | |
51 | #ifdef ERROR |
|
52 | #undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ | |
52 | # undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ |
|
53 | #define ERROR(name) ZSTD_ERROR(name) | |
53 | #endif |
|
54 | #define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) | |
54 | #define ERROR(name) ((size_t)-PREFIX(name)) |
|
|||
55 |
|
55 | |||
56 | ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } |
|
56 | ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } | |
57 |
|
57 |
@@ -31,13 +31,14 b'' | |||||
31 | You can contact the author at : |
|
31 | You can contact the author at : | |
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
|
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
33 | ****************************************************************** */ |
|
33 | ****************************************************************** */ | |
34 | #ifndef FSE_H |
|
|||
35 | #define FSE_H |
|
|||
36 |
|
34 | |||
37 | #if defined (__cplusplus) |
|
35 | #if defined (__cplusplus) | |
38 | extern "C" { |
|
36 | extern "C" { | |
39 | #endif |
|
37 | #endif | |
40 |
|
38 | |||
|
39 | #ifndef FSE_H | |||
|
40 | #define FSE_H | |||
|
41 | ||||
41 |
|
42 | |||
42 | /*-***************************************** |
|
43 | /*-***************************************** | |
43 | * Dependencies |
|
44 | * Dependencies | |
@@ -45,6 +46,32 b' extern "C" {' | |||||
45 | #include <stddef.h> /* size_t, ptrdiff_t */ |
|
46 | #include <stddef.h> /* size_t, ptrdiff_t */ | |
46 |
|
47 | |||
47 |
|
48 | |||
|
49 | /*-***************************************** | |||
|
50 | * FSE_PUBLIC_API : control library symbols visibility | |||
|
51 | ******************************************/ | |||
|
52 | #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) | |||
|
53 | # define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) | |||
|
54 | #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ | |||
|
55 | # define FSE_PUBLIC_API __declspec(dllexport) | |||
|
56 | #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) | |||
|
57 | # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
|
58 | #else | |||
|
59 | # define FSE_PUBLIC_API | |||
|
60 | #endif | |||
|
61 | ||||
|
62 | /*------ Version ------*/ | |||
|
63 | #define FSE_VERSION_MAJOR 0 | |||
|
64 | #define FSE_VERSION_MINOR 9 | |||
|
65 | #define FSE_VERSION_RELEASE 0 | |||
|
66 | ||||
|
67 | #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE | |||
|
68 | #define FSE_QUOTE(str) #str | |||
|
69 | #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) | |||
|
70 | #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) | |||
|
71 | ||||
|
72 | #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) | |||
|
73 | FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ | |||
|
74 | ||||
48 | /*-**************************************** |
|
75 | /*-**************************************** | |
49 | * FSE simple functions |
|
76 | * FSE simple functions | |
50 | ******************************************/ |
|
77 | ******************************************/ | |
@@ -56,8 +83,8 b' extern "C" {' | |||||
56 | if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. |
|
83 | if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. | |
57 | if FSE_isError(return), compression failed (more details using FSE_getErrorName()) |
|
84 | if FSE_isError(return), compression failed (more details using FSE_getErrorName()) | |
58 | */ |
|
85 | */ | |
59 | size_t FSE_compress(void* dst, size_t dstCapacity, |
|
86 | FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, | |
60 | const void* src, size_t srcSize); |
|
87 | const void* src, size_t srcSize); | |
61 |
|
88 | |||
62 | /*! FSE_decompress(): |
|
89 | /*! FSE_decompress(): | |
63 | Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', |
|
90 | Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', | |
@@ -69,18 +96,18 b' size_t FSE_compress(void* dst, size_t ds' | |||||
69 | Why ? : making this distinction requires a header. |
|
96 | Why ? : making this distinction requires a header. | |
70 | Header management is intentionally delegated to the user layer, which can better manage special cases. |
|
97 | Header management is intentionally delegated to the user layer, which can better manage special cases. | |
71 | */ |
|
98 | */ | |
72 | size_t FSE_decompress(void* dst, size_t dstCapacity, |
|
99 | FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, | |
73 | const void* cSrc, size_t cSrcSize); |
|
100 | const void* cSrc, size_t cSrcSize); | |
74 |
|
101 | |||
75 |
|
102 | |||
76 | /*-***************************************** |
|
103 | /*-***************************************** | |
77 | * Tool functions |
|
104 | * Tool functions | |
78 | ******************************************/ |
|
105 | ******************************************/ | |
79 | size_t FSE_compressBound(size_t size); /* maximum compressed size */ |
|
106 | FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ | |
80 |
|
107 | |||
81 | /* Error Management */ |
|
108 | /* Error Management */ | |
82 | unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ |
|
109 | FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ | |
83 | const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ |
|
110 | FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ | |
84 |
|
111 | |||
85 |
|
112 | |||
86 | /*-***************************************** |
|
113 | /*-***************************************** | |
@@ -94,7 +121,7 b' const char* FSE_getErrorName(size_t code' | |||||
94 | if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. |
|
121 | if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. | |
95 | if FSE_isError(return), it's an error code. |
|
122 | if FSE_isError(return), it's an error code. | |
96 | */ |
|
123 | */ | |
97 | size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); |
|
124 | FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |
98 |
|
125 | |||
99 |
|
126 | |||
100 | /*-***************************************** |
|
127 | /*-***************************************** | |
@@ -127,50 +154,50 b' or to save and provide normalized distri' | |||||
127 | @return : the count of the most frequent symbol (which is not identified). |
|
154 | @return : the count of the most frequent symbol (which is not identified). | |
128 | if return == srcSize, there is only one symbol. |
|
155 | if return == srcSize, there is only one symbol. | |
129 | Can also return an error code, which can be tested with FSE_isError(). */ |
|
156 | Can also return an error code, which can be tested with FSE_isError(). */ | |
130 | size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); |
|
157 | FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |
131 |
|
158 | |||
132 | /*! FSE_optimalTableLog(): |
|
159 | /*! FSE_optimalTableLog(): | |
133 | dynamically downsize 'tableLog' when conditions are met. |
|
160 | dynamically downsize 'tableLog' when conditions are met. | |
134 | It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. |
|
161 | It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. | |
135 | @return : recommended tableLog (necessarily <= 'maxTableLog') */ |
|
162 | @return : recommended tableLog (necessarily <= 'maxTableLog') */ | |
136 | unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); |
|
163 | FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); | |
137 |
|
164 | |||
138 | /*! FSE_normalizeCount(): |
|
165 | /*! FSE_normalizeCount(): | |
139 | normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) |
|
166 | normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) | |
140 | 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). |
|
167 | 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). | |
141 | @return : tableLog, |
|
168 | @return : tableLog, | |
142 | or an errorCode, which can be tested using FSE_isError() */ |
|
169 | or an errorCode, which can be tested using FSE_isError() */ | |
143 | size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); |
|
170 | FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); | |
144 |
|
171 | |||
145 | /*! FSE_NCountWriteBound(): |
|
172 | /*! FSE_NCountWriteBound(): | |
146 | Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. |
|
173 | Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. | |
147 | Typically useful for allocation purpose. */ |
|
174 | Typically useful for allocation purpose. */ | |
148 | size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); |
|
175 | FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); | |
149 |
|
176 | |||
150 | /*! FSE_writeNCount(): |
|
177 | /*! FSE_writeNCount(): | |
151 | Compactly save 'normalizedCounter' into 'buffer'. |
|
178 | Compactly save 'normalizedCounter' into 'buffer'. | |
152 | @return : size of the compressed table, |
|
179 | @return : size of the compressed table, | |
153 | or an errorCode, which can be tested using FSE_isError(). */ |
|
180 | or an errorCode, which can be tested using FSE_isError(). */ | |
154 | size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); |
|
181 | FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); | |
155 |
|
182 | |||
156 |
|
183 | |||
157 | /*! Constructor and Destructor of FSE_CTable. |
|
184 | /*! Constructor and Destructor of FSE_CTable. | |
158 | Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ |
|
185 | Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ | |
159 | typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ |
|
186 | typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ | |
160 |
FSE_CTable* FSE_createCTable (unsigned |
|
187 | FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); | |
161 | void FSE_freeCTable (FSE_CTable* ct); |
|
188 | FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); | |
162 |
|
189 | |||
163 | /*! FSE_buildCTable(): |
|
190 | /*! FSE_buildCTable(): | |
164 | Builds `ct`, which must be already allocated, using FSE_createCTable(). |
|
191 | Builds `ct`, which must be already allocated, using FSE_createCTable(). | |
165 | @return : 0, or an errorCode, which can be tested using FSE_isError() */ |
|
192 | @return : 0, or an errorCode, which can be tested using FSE_isError() */ | |
166 | size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); |
|
193 | FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); | |
167 |
|
194 | |||
168 | /*! FSE_compress_usingCTable(): |
|
195 | /*! FSE_compress_usingCTable(): | |
169 | Compress `src` using `ct` into `dst` which must be already allocated. |
|
196 | Compress `src` using `ct` into `dst` which must be already allocated. | |
170 | @return : size of compressed data (<= `dstCapacity`), |
|
197 | @return : size of compressed data (<= `dstCapacity`), | |
171 | or 0 if compressed data could not fit into `dst`, |
|
198 | or 0 if compressed data could not fit into `dst`, | |
172 | or an errorCode, which can be tested using FSE_isError() */ |
|
199 | or an errorCode, which can be tested using FSE_isError() */ | |
173 | size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); |
|
200 | FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); | |
174 |
|
201 | |||
175 | /*! |
|
202 | /*! | |
176 | Tutorial : |
|
203 | Tutorial : | |
@@ -223,25 +250,25 b' If there is an error, the function will ' | |||||
223 | @return : size read from 'rBuffer', |
|
250 | @return : size read from 'rBuffer', | |
224 | or an errorCode, which can be tested using FSE_isError(). |
|
251 | or an errorCode, which can be tested using FSE_isError(). | |
225 | maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ |
|
252 | maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ | |
226 | size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); |
|
253 | FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); | |
227 |
|
254 | |||
228 | /*! Constructor and Destructor of FSE_DTable. |
|
255 | /*! Constructor and Destructor of FSE_DTable. | |
229 | Note that its size depends on 'tableLog' */ |
|
256 | Note that its size depends on 'tableLog' */ | |
230 | typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ |
|
257 | typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ | |
231 | FSE_DTable* FSE_createDTable(unsigned tableLog); |
|
258 | FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); | |
232 | void FSE_freeDTable(FSE_DTable* dt); |
|
259 | FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); | |
233 |
|
260 | |||
234 | /*! FSE_buildDTable(): |
|
261 | /*! FSE_buildDTable(): | |
235 | Builds 'dt', which must be already allocated, using FSE_createDTable(). |
|
262 | Builds 'dt', which must be already allocated, using FSE_createDTable(). | |
236 | return : 0, or an errorCode, which can be tested using FSE_isError() */ |
|
263 | return : 0, or an errorCode, which can be tested using FSE_isError() */ | |
237 | size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); |
|
264 | FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); | |
238 |
|
265 | |||
239 | /*! FSE_decompress_usingDTable(): |
|
266 | /*! FSE_decompress_usingDTable(): | |
240 | Decompress compressed source `cSrc` of size `cSrcSize` using `dt` |
|
267 | Decompress compressed source `cSrc` of size `cSrcSize` using `dt` | |
241 | into `dst` which must be already allocated. |
|
268 | into `dst` which must be already allocated. | |
242 | @return : size of regenerated data (necessarily <= `dstCapacity`), |
|
269 | @return : size of regenerated data (necessarily <= `dstCapacity`), | |
243 | or an errorCode, which can be tested using FSE_isError() */ |
|
270 | or an errorCode, which can be tested using FSE_isError() */ | |
244 | size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); |
|
271 | FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); | |
245 |
|
272 | |||
246 | /*! |
|
273 | /*! | |
247 | Tutorial : |
|
274 | Tutorial : | |
@@ -271,8 +298,10 b' FSE_decompress_usingDTable() result will' | |||||
271 | If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) |
|
298 | If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) | |
272 | */ |
|
299 | */ | |
273 |
|
300 | |||
|
301 | #endif /* FSE_H */ | |||
274 |
|
302 | |||
275 | #ifdef FSE_STATIC_LINKING_ONLY |
|
303 | #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) | |
|
304 | #define FSE_H_FSE_STATIC_LINKING_ONLY | |||
276 |
|
305 | |||
277 | /* *** Dependency *** */ |
|
306 | /* *** Dependency *** */ | |
278 | #include "bitstream.h" |
|
307 | #include "bitstream.h" | |
@@ -290,6 +319,10 b' If there is an error, the function will ' | |||||
290 | #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) |
|
319 | #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) | |
291 | #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) |
|
320 | #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) | |
292 |
|
321 | |||
|
322 | /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ | |||
|
323 | #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) | |||
|
324 | #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) | |||
|
325 | ||||
293 |
|
326 | |||
294 | /* ***************************************** |
|
327 | /* ***************************************** | |
295 | * FSE advanced API |
|
328 | * FSE advanced API | |
@@ -312,7 +345,7 b' size_t FSE_countFast(unsigned* count, un' | |||||
312 | */ |
|
345 | */ | |
313 | size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); |
|
346 | size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); | |
314 |
|
347 | |||
315 | /*! FSE_count_simple |
|
348 | /*! FSE_count_simple() : | |
316 | * Same as FSE_countFast(), but does not use any additional memory (not even on stack). |
|
349 | * Same as FSE_countFast(), but does not use any additional memory (not even on stack). | |
317 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). |
|
350 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). | |
318 | */ |
|
351 | */ | |
@@ -327,7 +360,7 b' unsigned FSE_optimalTableLog_internal(un' | |||||
327 | * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). |
|
360 | * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |
328 | * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. |
|
361 | * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. | |
329 | */ |
|
362 | */ | |
330 |
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ( |
|
363 | #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) | |
331 | size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); |
|
364 | size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |
332 |
|
365 | |||
333 | size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); |
|
366 | size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); | |
@@ -351,6 +384,11 b' size_t FSE_buildDTable_rle (FSE_DTable* ' | |||||
351 | size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); |
|
384 | size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); | |
352 | /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ |
|
385 | /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ | |
353 |
|
386 | |||
|
387 | typedef enum { | |||
|
388 | FSE_repeat_none, /**< Cannot use the previous table */ | |||
|
389 | FSE_repeat_check, /**< Can use the previous table but it must be checked */ | |||
|
390 | FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */ | |||
|
391 | } FSE_repeat; | |||
354 |
|
392 | |||
355 | /* ***************************************** |
|
393 | /* ***************************************** | |
356 | * FSE symbol compression API |
|
394 | * FSE symbol compression API | |
@@ -524,9 +562,9 b' MEM_STATIC void FSE_initCState2(FSE_CSta' | |||||
524 |
|
562 | |||
525 | MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) |
|
563 | MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) | |
526 | { |
|
564 | { | |
527 |
const |
|
565 | FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; | |
528 | const U16* const stateTable = (const U16*)(statePtr->stateTable); |
|
566 | const U16* const stateTable = (const U16*)(statePtr->stateTable); | |
529 | U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); |
|
567 | U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); | |
530 | BIT_addBits(bitC, statePtr->value, nbBitsOut); |
|
568 | BIT_addBits(bitC, statePtr->value, nbBitsOut); | |
531 | statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; |
|
569 | statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; | |
532 | } |
|
570 | } | |
@@ -664,5 +702,3 b' MEM_STATIC unsigned FSE_endOfDState(cons' | |||||
664 | #if defined (__cplusplus) |
|
702 | #if defined (__cplusplus) | |
665 | } |
|
703 | } | |
666 | #endif |
|
704 | #endif | |
667 |
|
||||
668 | #endif /* FSE_H */ |
|
@@ -34,35 +34,15 b'' | |||||
34 |
|
34 | |||
35 |
|
35 | |||
36 | /* ************************************************************** |
|
36 | /* ************************************************************** | |
37 | * Compiler specifics |
|
|||
38 | ****************************************************************/ |
|
|||
39 | #ifdef _MSC_VER /* Visual Studio */ |
|
|||
40 | # define FORCE_INLINE static __forceinline |
|
|||
41 | # include <intrin.h> /* For Visual 2005 */ |
|
|||
42 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
|
|||
43 | # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ |
|
|||
44 | #else |
|
|||
45 | # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ |
|
|||
46 | # ifdef __GNUC__ |
|
|||
47 | # define FORCE_INLINE static inline __attribute__((always_inline)) |
|
|||
48 | # else |
|
|||
49 | # define FORCE_INLINE static inline |
|
|||
50 | # endif |
|
|||
51 | # else |
|
|||
52 | # define FORCE_INLINE static |
|
|||
53 | # endif /* __STDC_VERSION__ */ |
|
|||
54 | #endif |
|
|||
55 |
|
||||
56 |
|
||||
57 | /* ************************************************************** |
|
|||
58 | * Includes |
|
37 | * Includes | |
59 | ****************************************************************/ |
|
38 | ****************************************************************/ | |
60 | #include <stdlib.h> /* malloc, free, qsort */ |
|
39 | #include <stdlib.h> /* malloc, free, qsort */ | |
61 | #include <string.h> /* memcpy, memset */ |
|
40 | #include <string.h> /* memcpy, memset */ | |
62 | #include <stdio.h> /* printf (debug) */ |
|
|||
63 | #include "bitstream.h" |
|
41 | #include "bitstream.h" | |
|
42 | #include "compiler.h" | |||
64 | #define FSE_STATIC_LINKING_ONLY |
|
43 | #define FSE_STATIC_LINKING_ONLY | |
65 | #include "fse.h" |
|
44 | #include "fse.h" | |
|
45 | #include "error_private.h" | |||
66 |
|
46 | |||
67 |
|
47 | |||
68 | /* ************************************************************** |
|
48 | /* ************************************************************** | |
@@ -159,8 +139,8 b' size_t FSE_buildDTable(FSE_DTable* dt, c' | |||||
159 | { U32 u; |
|
139 | { U32 u; | |
160 | for (u=0; u<tableSize; u++) { |
|
140 | for (u=0; u<tableSize; u++) { | |
161 | FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); |
|
141 | FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | |
162 |
U |
|
142 | U32 const nextState = symbolNext[symbol]++; | |
163 |
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 |
|
143 | tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); | |
164 | tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); |
|
144 | tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | |
165 | } } |
|
145 | } } | |
166 |
|
146 | |||
@@ -217,7 +197,7 b' size_t FSE_buildDTable_raw (FSE_DTable* ' | |||||
217 | return 0; |
|
197 | return 0; | |
218 | } |
|
198 | } | |
219 |
|
199 | |||
220 | FORCE_INLINE size_t FSE_decompress_usingDTable_generic( |
|
200 | FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( | |
221 | void* dst, size_t maxDstSize, |
|
201 | void* dst, size_t maxDstSize, | |
222 | const void* cSrc, size_t cSrcSize, |
|
202 | const void* cSrc, size_t cSrcSize, | |
223 | const FSE_DTable* dt, const unsigned fast) |
|
203 | const FSE_DTable* dt, const unsigned fast) |
@@ -31,81 +31,114 b'' | |||||
31 | You can contact the author at : |
|
31 | You can contact the author at : | |
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
|
32 | - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
33 | ****************************************************************** */ |
|
33 | ****************************************************************** */ | |
34 | #ifndef HUF_H_298734234 |
|
|||
35 | #define HUF_H_298734234 |
|
|||
36 |
|
34 | |||
37 | #if defined (__cplusplus) |
|
35 | #if defined (__cplusplus) | |
38 | extern "C" { |
|
36 | extern "C" { | |
39 | #endif |
|
37 | #endif | |
40 |
|
38 | |||
|
39 | #ifndef HUF_H_298734234 | |||
|
40 | #define HUF_H_298734234 | |||
41 |
|
41 | |||
42 | /* *** Dependencies *** */ |
|
42 | /* *** Dependencies *** */ | |
43 | #include <stddef.h> /* size_t */ |
|
43 | #include <stddef.h> /* size_t */ | |
44 |
|
44 | |||
45 |
|
45 | |||
46 | /* *** simple functions *** */ |
|
46 | /* *** library symbols visibility *** */ | |
47 | /** |
|
47 | /* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, | |
48 | HUF_compress() : |
|
48 | * HUF symbols remain "private" (internal symbols for library only). | |
49 | Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. |
|
49 | * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ | |
50 | 'dst' buffer must be already allocated. |
|
50 | #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) | |
51 | Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). |
|
51 | # define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) | |
52 | `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. |
|
52 | #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ | |
53 | @return : size of compressed data (<= `dstCapacity`). |
|
53 | # define HUF_PUBLIC_API __declspec(dllexport) | |
54 | Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! |
|
54 | #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) | |
55 | if return == 1, srcData is a single repeated byte symbol (RLE compression). |
|
55 | # define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ | |
56 | if HUF_isError(return), compression failed (more details using HUF_getErrorName()) |
|
56 | #else | |
57 | */ |
|
57 | # define HUF_PUBLIC_API | |
58 | size_t HUF_compress(void* dst, size_t dstCapacity, |
|
58 | #endif | |
59 | const void* src, size_t srcSize); |
|
59 | ||
|
60 | ||||
|
61 | /* ========================== */ | |||
|
62 | /* *** simple functions *** */ | |||
|
63 | /* ========================== */ | |||
60 |
|
64 | |||
61 | /** |
|
65 | /** HUF_compress() : | |
62 | HUF_decompress() : |
|
66 | * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. | |
63 | Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', |
|
67 | * 'dst' buffer must be already allocated. | |
64 | into already allocated buffer 'dst', of minimum size 'dstSize'. |
|
68 | * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). | |
65 | `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. |
|
69 | * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. | |
66 | Note : in contrast with FSE, HUF_decompress can regenerate |
|
70 | * @return : size of compressed data (<= `dstCapacity`). | |
67 | RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, |
|
71 | * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! | |
68 | because it knows size to regenerate. |
|
72 | * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) | |
69 | @return : size of regenerated data (== originalSize), |
|
73 | */ | |
70 | or an error code, which can be tested using HUF_isError() |
|
74 | HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, | |
71 | */ |
|
75 | const void* src, size_t srcSize); | |
72 | size_t HUF_decompress(void* dst, size_t originalSize, |
|
76 | ||
73 | const void* cSrc, size_t cSrcSize); |
|
77 | /** HUF_decompress() : | |
|
78 | * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', | |||
|
79 | * into already allocated buffer 'dst', of minimum size 'dstSize'. | |||
|
80 | * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. | |||
|
81 | * Note : in contrast with FSE, HUF_decompress can regenerate | |||
|
82 | * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, | |||
|
83 | * because it knows size to regenerate (originalSize). | |||
|
84 | * @return : size of regenerated data (== originalSize), | |||
|
85 | * or an error code, which can be tested using HUF_isError() | |||
|
86 | */ | |||
|
87 | HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, | |||
|
88 | const void* cSrc, size_t cSrcSize); | |||
74 |
|
89 | |||
75 |
|
90 | |||
76 | /* *** Tool functions *** */ |
|
91 | /* *** Tool functions *** */ | |
77 | #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ |
|
92 | #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ | |
78 |
size_t HUF_compressBound(size_t size); |
|
93 | HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ | |
79 |
|
94 | |||
80 | /* Error Management */ |
|
95 | /* Error Management */ | |
81 |
unsigned HUF_isError(size_t code); |
|
96 | HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ | |
82 |
const char* HUF_getErrorName(size_t code); |
|
97 | HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ | |
83 |
|
98 | |||
84 |
|
99 | |||
85 | /* *** Advanced function *** */ |
|
100 | /* *** Advanced function *** */ | |
86 |
|
101 | |||
87 | /** HUF_compress2() : |
|
102 | /** HUF_compress2() : | |
88 |
* |
|
103 | * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. | |
89 |
* |
|
104 | * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . | |
90 | size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); |
|
105 | * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ | |
|
106 | HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, | |||
|
107 | const void* src, size_t srcSize, | |||
|
108 | unsigned maxSymbolValue, unsigned tableLog); | |||
91 |
|
109 | |||
92 | /** HUF_compress4X_wksp() : |
|
110 | /** HUF_compress4X_wksp() : | |
93 |
|
|
111 | * Same as HUF_compress2(), but uses externally allocated `workSpace`. | |
94 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */ |
|
112 | * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ | |
|
113 | #define HUF_WORKSPACE_SIZE (6 << 10) | |||
|
114 | #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) | |||
|
115 | HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, | |||
|
116 | const void* src, size_t srcSize, | |||
|
117 | unsigned maxSymbolValue, unsigned tableLog, | |||
|
118 | void* workSpace, size_t wkspSize); | |||
95 |
|
119 | |||
96 |
|
120 | #endif /* HUF_H_298734234 */ | ||
97 |
|
121 | |||
98 | #ifdef HUF_STATIC_LINKING_ONLY |
|
122 | /* ****************************************************************** | |
|
123 | * WARNING !! | |||
|
124 | * The following section contains advanced and experimental definitions | |||
|
125 | * which shall never be used in the context of a dynamic library, | |||
|
126 | * because they are not guaranteed to remain stable in the future. | |||
|
127 | * Only consider them in association with static linking. | |||
|
128 | * *****************************************************************/ | |||
|
129 | #if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) | |||
|
130 | #define HUF_H_HUF_STATIC_LINKING_ONLY | |||
99 |
|
131 | |||
100 | /* *** Dependencies *** */ |
|
132 | /* *** Dependencies *** */ | |
101 | #include "mem.h" /* U32 */ |
|
133 | #include "mem.h" /* U32 */ | |
102 |
|
134 | |||
103 |
|
135 | |||
104 | /* *** Constants *** */ |
|
136 | /* *** Constants *** */ | |
105 | #define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ |
|
137 | #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ | |
106 | #define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ |
|
138 | #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ | |
107 | #define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ |
|
139 | #define HUF_SYMBOLVALUE_MAX 255 | |
108 | #define HUF_SYMBOLVALUE_MAX 255 |
|
140 | ||
|
141 | #define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |||
109 | #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) |
|
142 | #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) | |
110 | # error "HUF_TABLELOG_MAX is too large !" |
|
143 | # error "HUF_TABLELOG_MAX is too large !" | |
111 | #endif |
|
144 | #endif | |
@@ -116,12 +149,14 b' size_t HUF_compress4X_wksp (void* dst, s' | |||||
116 | ******************************************/ |
|
149 | ******************************************/ | |
117 | /* HUF buffer bounds */ |
|
150 | /* HUF buffer bounds */ | |
118 | #define HUF_CTABLEBOUND 129 |
|
151 | #define HUF_CTABLEBOUND 129 | |
119 |
#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true |
|
152 | #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ | |
120 | #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ |
|
153 | #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ | |
121 |
|
154 | |||
122 | /* static allocation of HUF's Compression Table */ |
|
155 | /* static allocation of HUF's Compression Table */ | |
|
156 | #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ | |||
|
157 | #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) | |||
123 | #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ |
|
158 | #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ | |
124 |
U32 name##hb[maxSymbolValue |
|
159 | U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ | |
125 | void* name##hv = &(name##hb); \ |
|
160 | void* name##hv = &(name##hb); \ | |
126 | HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ |
|
161 | HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ | |
127 |
|
162 | |||
@@ -142,97 +177,151 b' size_t HUF_decompress4X4 (void* dst, siz' | |||||
142 |
|
177 | |||
143 | size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ |
|
178 | size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ | |
144 | size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ |
|
179 | size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ | |
|
180 | size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ | |||
145 | size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ |
|
181 | size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |
|
182 | size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
146 | size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ |
|
183 | size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |
|
184 | size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
147 |
|
185 | |||
148 |
|
186 | |||
149 | /* **************************************** |
|
187 | /* **************************************** | |
150 | * HUF detailed API |
|
188 | * HUF detailed API | |
151 |
|
|
189 | * ****************************************/ | |
152 | /*! |
|
|||
153 | HUF_compress() does the following: |
|
|||
154 | 1. count symbol occurrence from source[] into table count[] using FSE_count() |
|
|||
155 | 2. (optional) refine tableLog using HUF_optimalTableLog() |
|
|||
156 | 3. build Huffman table from count using HUF_buildCTable() |
|
|||
157 | 4. save Huffman table to memory buffer using HUF_writeCTable() |
|
|||
158 | 5. encode the data stream using HUF_compress4X_usingCTable() |
|
|||
159 |
|
190 | |||
160 | The following API allows targeting specific sub-functions for advanced tasks. |
|
191 | /*! HUF_compress() does the following: | |
161 | For example, it's possible to compress several blocks using the same 'CTable', |
|
192 | * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") | |
162 | or to save and regenerate 'CTable' using external methods. |
|
193 | * 2. (optional) refine tableLog using HUF_optimalTableLog() | |
163 | */ |
|
194 | * 3. build Huffman table from count using HUF_buildCTable() | |
164 | /* FSE_count() : find it within "fse.h" */ |
|
195 | * 4. save Huffman table to memory buffer using HUF_writeCTable() | |
|
196 | * 5. encode the data stream using HUF_compress4X_usingCTable() | |||
|
197 | * | |||
|
198 | * The following API allows targeting specific sub-functions for advanced tasks. | |||
|
199 | * For example, it's possible to compress several blocks using the same 'CTable', | |||
|
200 | * or to save and regenerate 'CTable' using external methods. | |||
|
201 | */ | |||
165 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); |
|
202 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); | |
166 | typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ |
|
203 | typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ | |
167 | size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); |
|
204 | size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ | |
168 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); |
|
205 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); | |
169 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); |
|
206 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |
170 |
|
207 | |||
|
208 | typedef enum { | |||
|
209 | HUF_repeat_none, /**< Cannot use the previous table */ | |||
|
210 | HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ | |||
|
211 | HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ | |||
|
212 | } HUF_repeat; | |||
|
213 | /** HUF_compress4X_repeat() : | |||
|
214 | * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
|
215 | * If it uses hufTable it does not modify hufTable or repeat. | |||
|
216 | * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
|
217 | * If preferRepeat then the old table will always be used if valid. */ | |||
|
218 | size_t HUF_compress4X_repeat(void* dst, size_t dstSize, | |||
|
219 | const void* src, size_t srcSize, | |||
|
220 | unsigned maxSymbolValue, unsigned tableLog, | |||
|
221 | void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
|
222 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
171 |
|
223 | |||
172 | /** HUF_buildCTable_wksp() : |
|
224 | /** HUF_buildCTable_wksp() : | |
173 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
|
225 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |
174 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. |
|
226 | * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. | |
175 | */ |
|
227 | */ | |
|
228 | #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) | |||
|
229 | #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) | |||
176 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); |
|
230 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); | |
177 |
|
231 | |||
178 | /*! HUF_readStats() : |
|
232 | /*! HUF_readStats() : | |
179 |
|
|
233 | * Read compact Huffman tree, saved by HUF_writeCTable(). | |
180 |
|
|
234 | * `huffWeight` is destination buffer. | |
181 |
|
|
235 | * @return : size read from `src` , or an error Code . | |
182 |
|
|
236 | * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ | |
183 |
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, |
|
237 | size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, | |
184 | U32* nbSymbolsPtr, U32* tableLogPtr, |
|
238 | U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, | |
185 | const void* src, size_t srcSize); |
|
239 | const void* src, size_t srcSize); | |
186 |
|
240 | |||
187 | /** HUF_readCTable() : |
|
241 | /** HUF_readCTable() : | |
188 |
|
|
242 | * Loading a CTable saved with HUF_writeCTable() */ | |
189 | size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); |
|
243 | size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |
190 |
|
244 | |||
191 |
|
245 | |||
192 | /* |
|
246 | /* | |
193 | HUF_decompress() does the following: |
|
247 | * HUF_decompress() does the following: | |
194 | 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics |
|
248 | * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics | |
195 |
2. build Huffman table from save, using HUF_readDTableX |
|
249 | * 2. build Huffman table from save, using HUF_readDTableX?() | |
196 |
3. decode 1 or 4 segments in parallel using HUF_decompress |
|
250 | * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() | |
197 | */ |
|
251 | */ | |
198 |
|
252 | |||
199 | /** HUF_selectDecoder() : |
|
253 | /** HUF_selectDecoder() : | |
200 |
|
|
254 | * Tells which decoder is likely to decode faster, | |
201 |
|
|
255 | * based on a set of pre-computed metrics. | |
202 |
|
|
256 | * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . | |
203 |
|
|
257 | * Assumption : 0 < dstSize <= 128 KB */ | |
204 | U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); |
|
258 | U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); | |
205 |
|
259 | |||
|
260 | /** | |||
|
261 | * The minimum workspace size for the `workSpace` used in | |||
|
262 | * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp(). | |||
|
263 | * | |||
|
264 | * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when | |||
|
265 | * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. | |||
|
266 | * Buffer overflow errors may potentially occur if code modifications result in | |||
|
267 | * a required workspace size greater than that specified in the following | |||
|
268 | * macro. | |||
|
269 | */ | |||
|
270 | #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) | |||
|
271 | #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) | |||
|
272 | ||||
206 | size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); |
|
273 | size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); | |
|
274 | size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
207 | size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); |
|
275 | size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); | |
|
276 | size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
208 |
|
277 | |||
209 | size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
278 | size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |
210 | size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
279 | size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |
211 | size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
280 | size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |
212 |
|
281 | |||
213 |
|
282 | |||
|
283 | /* ====================== */ | |||
214 | /* single stream variants */ |
|
284 | /* single stream variants */ | |
|
285 | /* ====================== */ | |||
215 |
|
286 | |||
216 | size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); |
|
287 | size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |
217 |
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least |
|
288 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |
218 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); |
|
289 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |
|
290 | /** HUF_compress1X_repeat() : | |||
|
291 | * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
|
292 | * If it uses hufTable it does not modify hufTable or repeat. | |||
|
293 | * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
|
294 | * If preferRepeat then the old table will always be used if valid. */ | |||
|
295 | size_t HUF_compress1X_repeat(void* dst, size_t dstSize, | |||
|
296 | const void* src, size_t srcSize, | |||
|
297 | unsigned maxSymbolValue, unsigned tableLog, | |||
|
298 | void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
|
299 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
219 |
|
300 | |||
220 | size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ |
|
301 | size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ | |
221 | size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ |
|
302 | size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ | |
222 |
|
303 | |||
223 | size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); |
|
304 | size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); | |
|
305 | size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); | |||
224 | size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ |
|
306 | size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |
|
307 | size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
225 | size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ |
|
308 | size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |
|
309 | size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
226 |
|
310 | |||
227 | size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ |
|
311 | size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ | |
228 | size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
312 | size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |
229 | size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
313 | size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |
230 |
|
314 | |||
|
315 | /* BMI2 variants. | |||
|
316 | * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. | |||
|
317 | */ | |||
|
318 | size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
|
319 | size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
|
320 | size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
|
321 | size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
|
322 | ||||
231 | #endif /* HUF_STATIC_LINKING_ONLY */ |
|
323 | #endif /* HUF_STATIC_LINKING_ONLY */ | |
232 |
|
324 | |||
233 |
|
||||
234 | #if defined (__cplusplus) |
|
325 | #if defined (__cplusplus) | |
235 | } |
|
326 | } | |
236 | #endif |
|
327 | #endif | |
237 |
|
||||
238 | #endif /* HUF_H_298734234 */ |
|
@@ -1,10 +1,11 b'' | |||||
1 |
/* |
|
1 | /* | |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
8 | */ |
|
9 | */ | |
9 |
|
10 | |||
10 | #ifndef MEM_H_MODULE |
|
11 | #ifndef MEM_H_MODULE | |
@@ -48,14 +49,13 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||||
48 | *****************************************************************/ |
|
49 | *****************************************************************/ | |
49 | #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) |
|
50 | #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) | |
50 | # include <stdint.h> |
|
51 | # include <stdint.h> | |
51 | typedef uint8_t BYTE; |
|
52 | typedef uint8_t BYTE; | |
52 | typedef uint16_t U16; |
|
53 | typedef uint16_t U16; | |
53 | typedef int16_t S16; |
|
54 | typedef int16_t S16; | |
54 | typedef uint32_t U32; |
|
55 | typedef uint32_t U32; | |
55 | typedef int32_t S32; |
|
56 | typedef int32_t S32; | |
56 | typedef uint64_t U64; |
|
57 | typedef uint64_t U64; | |
57 | typedef int64_t S64; |
|
58 | typedef int64_t S64; | |
58 | typedef intptr_t iPtrDiff; |
|
|||
59 | #else |
|
59 | #else | |
60 | typedef unsigned char BYTE; |
|
60 | typedef unsigned char BYTE; | |
61 | typedef unsigned short U16; |
|
61 | typedef unsigned short U16; | |
@@ -64,7 +64,6 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||||
64 | typedef signed int S32; |
|
64 | typedef signed int S32; | |
65 | typedef unsigned long long U64; |
|
65 | typedef unsigned long long U64; | |
66 | typedef signed long long S64; |
|
66 | typedef signed long long S64; | |
67 | typedef ptrdiff_t iPtrDiff; |
|
|||
68 | #endif |
|
67 | #endif | |
69 |
|
68 | |||
70 |
|
69 | |||
@@ -76,19 +75,18 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||||
76 | * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. |
|
75 | * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. | |
77 | * The below switch allow to select different access method for improved performance. |
|
76 | * The below switch allow to select different access method for improved performance. | |
78 | * Method 0 (default) : use `memcpy()`. Safe and portable. |
|
77 | * Method 0 (default) : use `memcpy()`. Safe and portable. | |
79 | * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). |
|
78 | * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). | |
80 | * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. |
|
79 | * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. | |
81 | * Method 2 : direct access. This method is portable but violate C standard. |
|
80 | * Method 2 : direct access. This method is portable but violate C standard. | |
82 | * It can generate buggy code on targets depending on alignment. |
|
81 | * It can generate buggy code on targets depending on alignment. | |
83 | * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) |
|
82 | * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) | |
84 | * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. |
|
83 | * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. | |
85 | * Prefer these methods in priority order (0 > 1 > 2) |
|
84 | * Prefer these methods in priority order (0 > 1 > 2) | |
86 | */ |
|
85 | */ | |
87 | #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ |
|
86 | #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ | |
88 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) |
|
87 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |
89 | # define MEM_FORCE_MEMORY_ACCESS 2 |
|
88 | # define MEM_FORCE_MEMORY_ACCESS 2 | |
90 |
# elif defined(__INTEL_COMPILER) |
|
89 | # elif defined(__INTEL_COMPILER) || defined(__GNUC__) | |
91 | (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) |
|
|||
92 | # define MEM_FORCE_MEMORY_ACCESS 1 |
|
90 | # define MEM_FORCE_MEMORY_ACCESS 1 | |
93 | # endif |
|
91 | # endif | |
94 | #endif |
|
92 | #endif | |
@@ -109,7 +107,7 b' Only use if no other choice to achieve b' | |||||
109 | MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } |
|
107 | MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } | |
110 | MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } |
|
108 | MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } | |
111 | MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } |
|
109 | MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } | |
112 |
MEM_STATIC |
|
110 | MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } | |
113 |
|
111 | |||
114 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } |
|
112 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } | |
115 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } |
|
113 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } | |
@@ -120,21 +118,27 b' MEM_STATIC void MEM_write64(void* memPtr' | |||||
120 | /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ |
|
118 | /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ | |
121 | /* currently only defined for gcc and icc */ |
|
119 | /* currently only defined for gcc and icc */ | |
122 | #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) |
|
120 | #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) | |
123 |
|
|
121 | __pragma( pack(push, 1) ) | |
124 | typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; |
|
122 | typedef struct { U16 v; } unalign16; | |
|
123 | typedef struct { U32 v; } unalign32; | |||
|
124 | typedef struct { U64 v; } unalign64; | |||
|
125 | typedef struct { size_t v; } unalignArch; | |||
125 | __pragma( pack(pop) ) |
|
126 | __pragma( pack(pop) ) | |
126 | #else |
|
127 | #else | |
127 |
typedef |
|
128 | typedef struct { U16 v; } __attribute__((packed)) unalign16; | |
|
129 | typedef struct { U32 v; } __attribute__((packed)) unalign32; | |||
|
130 | typedef struct { U64 v; } __attribute__((packed)) unalign64; | |||
|
131 | typedef struct { size_t v; } __attribute__((packed)) unalignArch; | |||
128 | #endif |
|
132 | #endif | |
129 |
|
133 | |||
130 |
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)-> |
|
134 | MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } | |
131 |
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)-> |
|
135 | MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } | |
132 |
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)-> |
|
136 | MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } | |
133 |
MEM_STATIC |
|
137 | MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } | |
134 |
|
138 | |||
135 |
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)-> |
|
139 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } | |
136 |
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)-> |
|
140 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } | |
137 |
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)-> |
|
141 | MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } | |
138 |
|
142 | |||
139 | #else |
|
143 | #else | |
140 |
|
144 | |||
@@ -182,7 +186,7 b' MEM_STATIC U32 MEM_swap32(U32 in)' | |||||
182 | { |
|
186 | { | |
183 | #if defined(_MSC_VER) /* Visual Studio */ |
|
187 | #if defined(_MSC_VER) /* Visual Studio */ | |
184 | return _byteswap_ulong(in); |
|
188 | return _byteswap_ulong(in); | |
185 | #elif defined (__GNUC__) |
|
189 | #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) | |
186 | return __builtin_bswap32(in); |
|
190 | return __builtin_bswap32(in); | |
187 | #else |
|
191 | #else | |
188 | return ((in << 24) & 0xff000000 ) | |
|
192 | return ((in << 24) & 0xff000000 ) | | |
@@ -196,7 +200,7 b' MEM_STATIC U64 MEM_swap64(U64 in)' | |||||
196 | { |
|
200 | { | |
197 | #if defined(_MSC_VER) /* Visual Studio */ |
|
201 | #if defined(_MSC_VER) /* Visual Studio */ | |
198 | return _byteswap_uint64(in); |
|
202 | return _byteswap_uint64(in); | |
199 | #elif defined (__GNUC__) |
|
203 | #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) | |
200 | return __builtin_bswap64(in); |
|
204 | return __builtin_bswap64(in); | |
201 | #else |
|
205 | #else | |
202 | return ((in << 56) & 0xff00000000000000ULL) | |
|
206 | return ((in << 56) & 0xff00000000000000ULL) | | |
@@ -351,20 +355,6 b' MEM_STATIC void MEM_writeBEST(void* memP' | |||||
351 | } |
|
355 | } | |
352 |
|
356 | |||
353 |
|
357 | |||
354 | /* function safe only for comparisons */ |
|
|||
355 | MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) |
|
|||
356 | { |
|
|||
357 | switch (length) |
|
|||
358 | { |
|
|||
359 | default : |
|
|||
360 | case 4 : return MEM_read32(memPtr); |
|
|||
361 | case 3 : if (MEM_isLittleEndian()) |
|
|||
362 | return MEM_read32(memPtr)<<8; |
|
|||
363 | else |
|
|||
364 | return MEM_read32(memPtr)>>8; |
|
|||
365 | } |
|
|||
366 | } |
|
|||
367 |
|
||||
368 | #if defined (__cplusplus) |
|
358 | #if defined (__cplusplus) | |
369 | } |
|
359 | } | |
370 | #endif |
|
360 | #endif |
@@ -1,17 +1,18 b'' | |||||
1 |
/* |
|
1 | /* | |
2 | * Copyright (c) 2016-present, Facebook, Inc. |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
8 | */ |
|
9 | */ | |
9 |
|
10 | |||
10 |
|
11 | |||
11 | /* ====== Dependencies ======= */ |
|
12 | /* ====== Dependencies ======= */ | |
12 | #include <stddef.h> /* size_t */ |
|
13 | #include <stddef.h> /* size_t */ | |
13 | #include <stdlib.h> /* malloc, calloc, free */ |
|
|||
14 | #include "pool.h" |
|
14 | #include "pool.h" | |
|
15 | #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ | |||
15 |
|
16 | |||
16 | /* ====== Compiler specifics ====== */ |
|
17 | /* ====== Compiler specifics ====== */ | |
17 | #if defined(_MSC_VER) |
|
18 | #if defined(_MSC_VER) | |
@@ -25,13 +26,14 b'' | |||||
25 |
|
26 | |||
26 | /* A job is a function and an opaque argument */ |
|
27 | /* A job is a function and an opaque argument */ | |
27 | typedef struct POOL_job_s { |
|
28 | typedef struct POOL_job_s { | |
28 | POOL_function function; |
|
29 | POOL_function function; | |
29 | void *opaque; |
|
30 | void *opaque; | |
30 | } POOL_job; |
|
31 | } POOL_job; | |
31 |
|
32 | |||
32 | struct POOL_ctx_s { |
|
33 | struct POOL_ctx_s { | |
|
34 | ZSTD_customMem customMem; | |||
33 | /* Keep track of the threads */ |
|
35 | /* Keep track of the threads */ | |
34 | pthread_t *threads; |
|
36 | ZSTD_pthread_t *threads; | |
35 | size_t numThreads; |
|
37 | size_t numThreads; | |
36 |
|
38 | |||
37 | /* The queue is a circular buffer */ |
|
39 | /* The queue is a circular buffer */ | |
@@ -39,12 +41,18 b' struct POOL_ctx_s {' | |||||
39 | size_t queueHead; |
|
41 | size_t queueHead; | |
40 | size_t queueTail; |
|
42 | size_t queueTail; | |
41 | size_t queueSize; |
|
43 | size_t queueSize; | |
|
44 | ||||
|
45 | /* The number of threads working on jobs */ | |||
|
46 | size_t numThreadsBusy; | |||
|
47 | /* Indicates if the queue is empty */ | |||
|
48 | int queueEmpty; | |||
|
49 | ||||
42 | /* The mutex protects the queue */ |
|
50 | /* The mutex protects the queue */ | |
43 | pthread_mutex_t queueMutex; |
|
51 | ZSTD_pthread_mutex_t queueMutex; | |
44 | /* Condition variable for pushers to wait on when the queue is full */ |
|
52 | /* Condition variable for pushers to wait on when the queue is full */ | |
45 | pthread_cond_t queuePushCond; |
|
53 | ZSTD_pthread_cond_t queuePushCond; | |
46 | /* Condition variables for poppers to wait on when the queue is empty */ |
|
54 | /* Condition variables for poppers to wait on when the queue is empty */ | |
47 | pthread_cond_t queuePopCond; |
|
55 | ZSTD_pthread_cond_t queuePopCond; | |
48 | /* Indicates if the queue is shutting down */ |
|
56 | /* Indicates if the queue is shutting down */ | |
49 | int shutdown; |
|
57 | int shutdown; | |
50 | }; |
|
58 | }; | |
@@ -59,55 +67,73 b' static void* POOL_thread(void* opaque) {' | |||||
59 | if (!ctx) { return NULL; } |
|
67 | if (!ctx) { return NULL; } | |
60 | for (;;) { |
|
68 | for (;;) { | |
61 | /* Lock the mutex and wait for a non-empty queue or until shutdown */ |
|
69 | /* Lock the mutex and wait for a non-empty queue or until shutdown */ | |
62 | pthread_mutex_lock(&ctx->queueMutex); |
|
70 | ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |
63 | while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) { |
|
71 | ||
64 | pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); |
|
72 | while (ctx->queueEmpty && !ctx->shutdown) { | |
|
73 | ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); | |||
65 | } |
|
74 | } | |
66 | /* empty => shutting down: so stop */ |
|
75 | /* empty => shutting down: so stop */ | |
67 |
if (ctx->queue |
|
76 | if (ctx->queueEmpty) { | |
68 | pthread_mutex_unlock(&ctx->queueMutex); |
|
77 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |
69 | return opaque; |
|
78 | return opaque; | |
70 | } |
|
79 | } | |
71 | /* Pop a job off the queue */ |
|
80 | /* Pop a job off the queue */ | |
72 | { POOL_job const job = ctx->queue[ctx->queueHead]; |
|
81 | { POOL_job const job = ctx->queue[ctx->queueHead]; | |
73 | ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; |
|
82 | ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; | |
|
83 | ctx->numThreadsBusy++; | |||
|
84 | ctx->queueEmpty = ctx->queueHead == ctx->queueTail; | |||
74 | /* Unlock the mutex, signal a pusher, and run the job */ |
|
85 | /* Unlock the mutex, signal a pusher, and run the job */ | |
75 | pthread_mutex_unlock(&ctx->queueMutex); |
|
86 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |
76 | pthread_cond_signal(&ctx->queuePushCond); |
|
87 | ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |
|
88 | ||||
77 | job.function(job.opaque); |
|
89 | job.function(job.opaque); | |
78 | } |
|
90 | ||
79 | } |
|
91 | /* If the intended queue size was 0, signal after finishing job */ | |
|
92 | if (ctx->queueSize == 1) { | |||
|
93 | ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
|
94 | ctx->numThreadsBusy--; | |||
|
95 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
|
96 | ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
|
97 | } } | |||
|
98 | } /* for (;;) */ | |||
80 | /* Unreachable */ |
|
99 | /* Unreachable */ | |
81 | } |
|
100 | } | |
82 |
|
101 | |||
83 |
POOL_ctx |
|
102 | POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { | |
84 | POOL_ctx *ctx; |
|
103 | return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); | |
|
104 | } | |||
|
105 | ||||
|
106 | POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { | |||
|
107 | POOL_ctx* ctx; | |||
85 | /* Check the parameters */ |
|
108 | /* Check the parameters */ | |
86 |
if (!numThreads |
|
109 | if (!numThreads) { return NULL; } | |
87 | /* Allocate the context and zero initialize */ |
|
110 | /* Allocate the context and zero initialize */ | |
88 |
ctx = (POOL_ctx |
|
111 | ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); | |
89 | if (!ctx) { return NULL; } |
|
112 | if (!ctx) { return NULL; } | |
90 | /* Initialize the job queue. |
|
113 | /* Initialize the job queue. | |
91 | * It needs one extra space since one space is wasted to differentiate empty |
|
114 | * It needs one extra space since one space is wasted to differentiate empty | |
92 | * and full queues. |
|
115 | * and full queues. | |
93 | */ |
|
116 | */ | |
94 | ctx->queueSize = queueSize + 1; |
|
117 | ctx->queueSize = queueSize + 1; | |
95 |
ctx->queue = (POOL_job |
|
118 | ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); | |
96 | ctx->queueHead = 0; |
|
119 | ctx->queueHead = 0; | |
97 | ctx->queueTail = 0; |
|
120 | ctx->queueTail = 0; | |
98 | pthread_mutex_init(&ctx->queueMutex, NULL); |
|
121 | ctx->numThreadsBusy = 0; | |
99 | pthread_cond_init(&ctx->queuePushCond, NULL); |
|
122 | ctx->queueEmpty = 1; | |
100 |
pthread_ |
|
123 | (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); | |
|
124 | (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); | |||
|
125 | (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); | |||
101 | ctx->shutdown = 0; |
|
126 | ctx->shutdown = 0; | |
102 | /* Allocate space for the thread handles */ |
|
127 | /* Allocate space for the thread handles */ | |
103 |
ctx->threads = (pthread_t |
|
128 | ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); | |
104 | ctx->numThreads = 0; |
|
129 | ctx->numThreads = 0; | |
|
130 | ctx->customMem = customMem; | |||
105 | /* Check for errors */ |
|
131 | /* Check for errors */ | |
106 | if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } |
|
132 | if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } | |
107 | /* Initialize the threads */ |
|
133 | /* Initialize the threads */ | |
108 | { size_t i; |
|
134 | { size_t i; | |
109 | for (i = 0; i < numThreads; ++i) { |
|
135 | for (i = 0; i < numThreads; ++i) { | |
110 | if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { |
|
136 | if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { | |
111 | ctx->numThreads = i; |
|
137 | ctx->numThreads = i; | |
112 | POOL_free(ctx); |
|
138 | POOL_free(ctx); | |
113 | return NULL; |
|
139 | return NULL; | |
@@ -120,75 +146,138 b' POOL_ctx *POOL_create(size_t numThreads,' | |||||
120 | /*! POOL_join() : |
|
146 | /*! POOL_join() : | |
121 | Shutdown the queue, wake any sleeping threads, and join all of the threads. |
|
147 | Shutdown the queue, wake any sleeping threads, and join all of the threads. | |
122 | */ |
|
148 | */ | |
123 |
static void POOL_join(POOL_ctx |
|
149 | static void POOL_join(POOL_ctx* ctx) { | |
124 | /* Shut down the queue */ |
|
150 | /* Shut down the queue */ | |
125 | pthread_mutex_lock(&ctx->queueMutex); |
|
151 | ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |
126 | ctx->shutdown = 1; |
|
152 | ctx->shutdown = 1; | |
127 | pthread_mutex_unlock(&ctx->queueMutex); |
|
153 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |
128 | /* Wake up sleeping threads */ |
|
154 | /* Wake up sleeping threads */ | |
129 | pthread_cond_broadcast(&ctx->queuePushCond); |
|
155 | ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); | |
130 | pthread_cond_broadcast(&ctx->queuePopCond); |
|
156 | ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); | |
131 | /* Join all of the threads */ |
|
157 | /* Join all of the threads */ | |
132 | { size_t i; |
|
158 | { size_t i; | |
133 | for (i = 0; i < ctx->numThreads; ++i) { |
|
159 | for (i = 0; i < ctx->numThreads; ++i) { | |
134 | pthread_join(ctx->threads[i], NULL); |
|
160 | ZSTD_pthread_join(ctx->threads[i], NULL); | |
135 | } } |
|
161 | } } | |
136 | } |
|
162 | } | |
137 |
|
163 | |||
138 | void POOL_free(POOL_ctx *ctx) { |
|
164 | void POOL_free(POOL_ctx *ctx) { | |
139 | if (!ctx) { return; } |
|
165 | if (!ctx) { return; } | |
140 | POOL_join(ctx); |
|
166 | POOL_join(ctx); | |
141 | pthread_mutex_destroy(&ctx->queueMutex); |
|
167 | ZSTD_pthread_mutex_destroy(&ctx->queueMutex); | |
142 | pthread_cond_destroy(&ctx->queuePushCond); |
|
168 | ZSTD_pthread_cond_destroy(&ctx->queuePushCond); | |
143 | pthread_cond_destroy(&ctx->queuePopCond); |
|
169 | ZSTD_pthread_cond_destroy(&ctx->queuePopCond); | |
144 | if (ctx->queue) free(ctx->queue); |
|
170 | ZSTD_free(ctx->queue, ctx->customMem); | |
145 | if (ctx->threads) free(ctx->threads); |
|
171 | ZSTD_free(ctx->threads, ctx->customMem); | |
146 | free(ctx); |
|
172 | ZSTD_free(ctx, ctx->customMem); | |
|
173 | } | |||
|
174 | ||||
|
175 | size_t POOL_sizeof(POOL_ctx *ctx) { | |||
|
176 | if (ctx==NULL) return 0; /* supports sizeof NULL */ | |||
|
177 | return sizeof(*ctx) | |||
|
178 | + ctx->queueSize * sizeof(POOL_job) | |||
|
179 | + ctx->numThreads * sizeof(ZSTD_pthread_t); | |||
|
180 | } | |||
|
181 | ||||
|
182 | /** | |||
|
183 | * Returns 1 if the queue is full and 0 otherwise. | |||
|
184 | * | |||
|
185 | * If the queueSize is 1 (the pool was created with an intended queueSize of 0), | |||
|
186 | * then a queue is empty if there is a thread free and no job is waiting. | |||
|
187 | */ | |||
|
188 | static int isQueueFull(POOL_ctx const* ctx) { | |||
|
189 | if (ctx->queueSize > 1) { | |||
|
190 | return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); | |||
|
191 | } else { | |||
|
192 | return ctx->numThreadsBusy == ctx->numThreads || | |||
|
193 | !ctx->queueEmpty; | |||
|
194 | } | |||
|
195 | } | |||
|
196 | ||||
|
197 | ||||
|
198 | static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) | |||
|
199 | { | |||
|
200 | POOL_job const job = {function, opaque}; | |||
|
201 | assert(ctx != NULL); | |||
|
202 | if (ctx->shutdown) return; | |||
|
203 | ||||
|
204 | ctx->queueEmpty = 0; | |||
|
205 | ctx->queue[ctx->queueTail] = job; | |||
|
206 | ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; | |||
|
207 | ZSTD_pthread_cond_signal(&ctx->queuePopCond); | |||
|
208 | } | |||
|
209 | ||||
|
210 | void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) | |||
|
211 | { | |||
|
212 | assert(ctx != NULL); | |||
|
213 | ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
|
214 | /* Wait until there is space in the queue for the new job */ | |||
|
215 | while (isQueueFull(ctx) && (!ctx->shutdown)) { | |||
|
216 | ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); | |||
|
217 | } | |||
|
218 | POOL_add_internal(ctx, function, opaque); | |||
|
219 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
147 | } |
|
220 | } | |
148 |
|
221 | |||
149 | void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { |
|
|||
150 | POOL_ctx *ctx = (POOL_ctx *)ctxVoid; |
|
|||
151 | if (!ctx) { return; } |
|
|||
152 |
|
222 | |||
153 | pthread_mutex_lock(&ctx->queueMutex); |
|
223 | int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) | |
154 | { POOL_job const job = {function, opaque}; |
|
224 | { | |
155 | /* Wait until there is space in the queue for the new job */ |
|
225 | assert(ctx != NULL); | |
156 | size_t newTail = (ctx->queueTail + 1) % ctx->queueSize; |
|
226 | ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |
157 | while (ctx->queueHead == newTail && !ctx->shutdown) { |
|
227 | if (isQueueFull(ctx)) { | |
158 |
|
|
228 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |
159 | newTail = (ctx->queueTail + 1) % ctx->queueSize; |
|
229 | return 0; | |
160 | } |
|
|||
161 | /* The queue is still going => there is space */ |
|
|||
162 | if (!ctx->shutdown) { |
|
|||
163 | ctx->queue[ctx->queueTail] = job; |
|
|||
164 | ctx->queueTail = newTail; |
|
|||
165 | } |
|
|||
166 | } |
|
230 | } | |
167 | pthread_mutex_unlock(&ctx->queueMutex); |
|
231 | POOL_add_internal(ctx, function, opaque); | |
168 | pthread_cond_signal(&ctx->queuePopCond); |
|
232 | ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |
|
233 | return 1; | |||
169 | } |
|
234 | } | |
170 |
|
235 | |||
|
236 | ||||
171 | #else /* ZSTD_MULTITHREAD not defined */ |
|
237 | #else /* ZSTD_MULTITHREAD not defined */ | |
|
238 | ||||
|
239 | /* ========================== */ | |||
172 | /* No multi-threading support */ |
|
240 | /* No multi-threading support */ | |
|
241 | /* ========================== */ | |||
173 |
|
242 | |||
174 | /* We don't need any data, but if it is empty malloc() might return NULL. */ |
|
243 | ||
|
244 | /* We don't need any data, but if it is empty, malloc() might return NULL. */ | |||
175 | struct POOL_ctx_s { |
|
245 | struct POOL_ctx_s { | |
176 |
int d |
|
246 | int dummy; | |
177 | }; |
|
247 | }; | |
|
248 | static POOL_ctx g_ctx; | |||
178 |
|
249 | |||
179 |
POOL_ctx |
|
250 | POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { | |
180 | (void)numThreads; |
|
251 | return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); | |
181 | (void)queueSize; |
|
|||
182 | return (POOL_ctx *)malloc(sizeof(POOL_ctx)); |
|
|||
183 | } |
|
252 | } | |
184 |
|
253 | |||
185 | void POOL_free(POOL_ctx *ctx) { |
|
254 | POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { | |
186 | if (ctx) free(ctx); |
|
255 | (void)numThreads; | |
|
256 | (void)queueSize; | |||
|
257 | (void)customMem; | |||
|
258 | return &g_ctx; | |||
|
259 | } | |||
|
260 | ||||
|
261 | void POOL_free(POOL_ctx* ctx) { | |||
|
262 | assert(!ctx || ctx == &g_ctx); | |||
|
263 | (void)ctx; | |||
187 | } |
|
264 | } | |
188 |
|
265 | |||
189 |
void POOL_add( |
|
266 | void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { | |
190 | (void)ctx; |
|
267 | (void)ctx; | |
191 | function(opaque); |
|
268 | function(opaque); | |
|
269 | } | |||
|
270 | ||||
|
271 | int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { | |||
|
272 | (void)ctx; | |||
|
273 | function(opaque); | |||
|
274 | return 1; | |||
|
275 | } | |||
|
276 | ||||
|
277 | size_t POOL_sizeof(POOL_ctx* ctx) { | |||
|
278 | if (ctx==NULL) return 0; /* supports sizeof NULL */ | |||
|
279 | assert(ctx == &g_ctx); | |||
|
280 | return sizeof(*ctx); | |||
192 | } |
|
281 | } | |
193 |
|
282 | |||
194 | #endif /* ZSTD_MULTITHREAD */ |
|
283 | #endif /* ZSTD_MULTITHREAD */ |
@@ -1,11 +1,13 b'' | |||||
1 |
/* |
|
1 | /* | |
2 | * Copyright (c) 2016-present, Facebook, Inc. |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |||
8 | */ |
|
9 | */ | |
|
10 | ||||
9 | #ifndef POOL_H |
|
11 | #ifndef POOL_H | |
10 | #define POOL_H |
|
12 | #define POOL_H | |
11 |
|
13 | |||
@@ -15,38 +17,54 b' extern "C" {' | |||||
15 |
|
17 | |||
16 |
|
18 | |||
17 | #include <stddef.h> /* size_t */ |
|
19 | #include <stddef.h> /* size_t */ | |
|
20 | #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ | |||
|
21 | #include "zstd.h" | |||
18 |
|
22 | |||
19 | typedef struct POOL_ctx_s POOL_ctx; |
|
23 | typedef struct POOL_ctx_s POOL_ctx; | |
20 |
|
24 | |||
21 | /*! POOL_create() : |
|
25 | /*! POOL_create() : | |
22 |
|
|
26 | * Create a thread pool with at most `numThreads` threads. | |
23 |
|
|
27 | * `numThreads` must be at least 1. | |
24 |
|
|
28 | * The maximum number of queued jobs before blocking is `queueSize`. | |
25 | `queueSize` must be at least 1. |
|
29 | * @return : POOL_ctx pointer on success, else NULL. | |
26 | @return : The POOL_ctx pointer on success else NULL. |
|
|||
27 | */ |
|
30 | */ | |
28 |
POOL_ctx |
|
31 | POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); | |
|
32 | ||||
|
33 | POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem); | |||
29 |
|
34 | |||
30 | /*! POOL_free() : |
|
35 | /*! POOL_free() : | |
31 | Free a thread pool returned by POOL_create(). |
|
36 | Free a thread pool returned by POOL_create(). | |
32 | */ |
|
37 | */ | |
33 |
void POOL_free(POOL_ctx |
|
38 | void POOL_free(POOL_ctx* ctx); | |
|
39 | ||||
|
40 | /*! POOL_sizeof() : | |||
|
41 | return memory usage of pool returned by POOL_create(). | |||
|
42 | */ | |||
|
43 | size_t POOL_sizeof(POOL_ctx* ctx); | |||
34 |
|
44 | |||
35 | /*! POOL_function : |
|
45 | /*! POOL_function : | |
36 | The function type that can be added to a thread pool. |
|
46 | The function type that can be added to a thread pool. | |
37 | */ |
|
47 | */ | |
38 |
typedef void (*POOL_function)(void |
|
48 | typedef void (*POOL_function)(void*); | |
39 | /*! POOL_add_function : |
|
49 | /*! POOL_add_function : | |
40 | The function type for a generic thread pool add function. |
|
50 | The function type for a generic thread pool add function. | |
41 | */ |
|
51 | */ | |
42 |
typedef void (*POOL_add_function)(void |
|
52 | typedef void (*POOL_add_function)(void*, POOL_function, void*); | |
43 |
|
53 | |||
44 | /*! POOL_add() : |
|
54 | /*! POOL_add() : | |
45 | Add the job `function(opaque)` to the thread pool. |
|
55 | Add the job `function(opaque)` to the thread pool. `ctx` must be valid. | |
46 | Possibly blocks until there is room in the queue. |
|
56 | Possibly blocks until there is room in the queue. | |
47 | Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. |
|
57 | Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. | |
48 | */ |
|
58 | */ | |
49 |
void POOL_add( |
|
59 | void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); | |
|
60 | ||||
|
61 | ||||
|
62 | /*! POOL_tryAdd() : | |||
|
63 | Add the job `function(opaque)` to the thread pool if a worker is available. | |||
|
64 | return immediately otherwise. | |||
|
65 | @return : 1 if successful, 0 if not. | |||
|
66 | */ | |||
|
67 | int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); | |||
50 |
|
68 | |||
51 |
|
69 | |||
52 | #if defined (__cplusplus) |
|
70 | #if defined (__cplusplus) |
@@ -1,11 +1,10 b'' | |||||
1 |
|
||||
2 |
|
|
1 | /** | |
3 | * Copyright (c) 2016 Tino Reichardt |
|
2 | * Copyright (c) 2016 Tino Reichardt | |
4 | * All rights reserved. |
|
3 | * All rights reserved. | |
5 | * |
|
4 | * | |
6 | * This source code is licensed under the BSD-style license found in the |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
7 |
* LICENSE file in the root directory of this source tree |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
8 | * of patent rights can be found in the PATENTS file in the same directory. |
|
7 | * in the COPYING file in the root directory of this source tree). | |
9 | * |
|
8 | * | |
10 | * You can contact the author at: |
|
9 | * You can contact the author at: | |
11 | * - zstdmt source repository: https://github.com/mcmilk/zstdmt |
|
10 | * - zstdmt source repository: https://github.com/mcmilk/zstdmt | |
@@ -15,11 +14,8 b'' | |||||
15 | * This file will hold wrapper for systems, which do not support pthreads |
|
14 | * This file will hold wrapper for systems, which do not support pthreads | |
16 | */ |
|
15 | */ | |
17 |
|
16 | |||
18 | /* ====== Compiler specifics ====== */ |
|
17 | /* create fake symbol to avoid empty trnaslation unit warning */ | |
19 | #if defined(_MSC_VER) |
|
18 | int g_ZSTD_threading_useles_symbol; | |
20 | # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */ |
|
|||
21 | #endif |
|
|||
22 |
|
||||
23 |
|
19 | |||
24 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) |
|
20 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |
25 |
|
21 | |||
@@ -39,12 +35,12 b'' | |||||
39 |
|
35 | |||
40 | static unsigned __stdcall worker(void *arg) |
|
36 | static unsigned __stdcall worker(void *arg) | |
41 | { |
|
37 | { | |
42 | pthread_t* const thread = (pthread_t*) arg; |
|
38 | ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; | |
43 | thread->arg = thread->start_routine(thread->arg); |
|
39 | thread->arg = thread->start_routine(thread->arg); | |
44 | return 0; |
|
40 | return 0; | |
45 | } |
|
41 | } | |
46 |
|
42 | |||
47 | int pthread_create(pthread_t* thread, const void* unused, |
|
43 | int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, | |
48 | void* (*start_routine) (void*), void* arg) |
|
44 | void* (*start_routine) (void*), void* arg) | |
49 | { |
|
45 | { | |
50 | (void)unused; |
|
46 | (void)unused; | |
@@ -58,16 +54,16 b' int pthread_create(pthread_t* thread, co' | |||||
58 | return 0; |
|
54 | return 0; | |
59 | } |
|
55 | } | |
60 |
|
56 | |||
61 |
int _pthread_join(pthread_t |
|
57 | int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) | |
62 | { |
|
58 | { | |
63 | DWORD result; |
|
59 | DWORD result; | |
64 |
|
60 | |||
65 |
if (!thread |
|
61 | if (!thread.handle) return 0; | |
66 |
|
62 | |||
67 |
result = WaitForSingleObject(thread |
|
63 | result = WaitForSingleObject(thread.handle, INFINITE); | |
68 | switch (result) { |
|
64 | switch (result) { | |
69 | case WAIT_OBJECT_0: |
|
65 | case WAIT_OBJECT_0: | |
70 |
if (value_ptr) *value_ptr = thread |
|
66 | if (value_ptr) *value_ptr = thread.arg; | |
71 | return 0; |
|
67 | return 0; | |
72 | case WAIT_ABANDONED: |
|
68 | case WAIT_ABANDONED: | |
73 | return EINVAL; |
|
69 | return EINVAL; |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now