##// END OF EJS Templates
zstandard: vendor python-zstandard 0.9.0...
Gregory Szorc -
r37513:b1fb341d default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,405 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
15 PyObject* exc;
16
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
19 return;
20 }
21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
24 Py_DECREF(iomod);
25 return;
26 }
27
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
31 }
32
33 static void reader_dealloc(ZstdCompressionReader* self) {
34 Py_XDECREF(self->compressor);
35 Py_XDECREF(self->reader);
36
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
39 memset(&self->buffer, 0, sizeof(self->buffer));
40 }
41
42 PyObject_Del(self);
43 }
44
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 size_t zresult;
47
48 if (self->entered) {
49 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
50 return NULL;
51 }
52
53 zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
54 if (ZSTD_isError(zresult)) {
55 PyErr_Format(ZstdError, "error setting source size: %s",
56 ZSTD_getErrorName(zresult));
57 return NULL;
58 }
59
60 self->entered = 1;
61
62 Py_INCREF(self);
63 return self;
64 }
65
66 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
67 PyObject* exc_type;
68 PyObject* exc_value;
69 PyObject* exc_tb;
70
71 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
72 return NULL;
73 }
74
75 self->entered = 0;
76 self->closed = 1;
77
78 /* Release resources associated with source. */
79 Py_CLEAR(self->reader);
80 if (self->buffer.buf) {
81 PyBuffer_Release(&self->buffer);
82 memset(&self->buffer, 0, sizeof(self->buffer));
83 }
84
85 Py_CLEAR(self->compressor);
86
87 Py_RETURN_FALSE;
88 }
89
90 static PyObject* reader_readable(ZstdCompressionReader* self) {
91 Py_RETURN_TRUE;
92 }
93
94 static PyObject* reader_writable(ZstdCompressionReader* self) {
95 Py_RETURN_FALSE;
96 }
97
98 static PyObject* reader_seekable(ZstdCompressionReader* self) {
99 Py_RETURN_FALSE;
100 }
101
102 static PyObject* reader_readline(PyObject* self, PyObject* args) {
103 set_unsupported_operation();
104 return NULL;
105 }
106
107 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
108 set_unsupported_operation();
109 return NULL;
110 }
111
112 static PyObject* reader_write(PyObject* self, PyObject* args) {
113 PyErr_SetString(PyExc_OSError, "stream is not writable");
114 return NULL;
115 }
116
117 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
118 PyErr_SetString(PyExc_OSError, "stream is not writable");
119 return NULL;
120 }
121
122 static PyObject* reader_isatty(PyObject* self) {
123 Py_RETURN_FALSE;
124 }
125
126 static PyObject* reader_flush(PyObject* self) {
127 Py_RETURN_NONE;
128 }
129
130 static PyObject* reader_close(ZstdCompressionReader* self) {
131 self->closed = 1;
132 Py_RETURN_NONE;
133 }
134
135 static PyObject* reader_closed(ZstdCompressionReader* self) {
136 if (self->closed) {
137 Py_RETURN_TRUE;
138 }
139 else {
140 Py_RETURN_FALSE;
141 }
142 }
143
144 static PyObject* reader_tell(ZstdCompressionReader* self) {
145 /* TODO should this raise OSError since stream isn't seekable? */
146 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
147 }
148
149 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
150 static char* kwlist[] = {
151 "size",
152 NULL
153 };
154
155 Py_ssize_t size = -1;
156 PyObject* result = NULL;
157 char* resultBuffer;
158 Py_ssize_t resultSize;
159 size_t zresult;
160 size_t oldPos;
161
162 if (!self->entered) {
163 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
164 return NULL;
165 }
166
167 if (self->closed) {
168 PyErr_SetString(PyExc_ValueError, "stream is closed");
169 return NULL;
170 }
171
172 if (self->finishedOutput) {
173 return PyBytes_FromStringAndSize("", 0);
174 }
175
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
177 return NULL;
178 }
179
180 if (size < 1) {
181 PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
182 return NULL;
183 }
184
185 result = PyBytes_FromStringAndSize(NULL, size);
186 if (NULL == result) {
187 return NULL;
188 }
189
190 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
191
192 self->output.dst = resultBuffer;
193 self->output.size = resultSize;
194 self->output.pos = 0;
195
196 readinput:
197
198 /* If we have data left over, consume it. */
199 if (self->input.pos < self->input.size) {
200 oldPos = self->output.pos;
201
202 Py_BEGIN_ALLOW_THREADS
203 zresult = ZSTD_compress_generic(self->compressor->cctx,
204 &self->output, &self->input, ZSTD_e_continue);
205
206 Py_END_ALLOW_THREADS
207
208 self->bytesCompressed += self->output.pos - oldPos;
209
210 /* Input exhausted. Clear out state tracking. */
211 if (self->input.pos == self->input.size) {
212 memset(&self->input, 0, sizeof(self->input));
213 Py_CLEAR(self->readResult);
214
215 if (self->buffer.buf) {
216 self->finishedInput = 1;
217 }
218 }
219
220 if (ZSTD_isError(zresult)) {
221 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
222 return NULL;
223 }
224
225 if (self->output.pos) {
226 /* If no more room in output, emit it. */
227 if (self->output.pos == self->output.size) {
228 memset(&self->output, 0, sizeof(self->output));
229 return result;
230 }
231
232 /*
233 * There is room in the output. We fall through to below, which will either
234 * get more input for us or will attempt to end the stream.
235 */
236 }
237
238 /* Fall through to gather more input. */
239 }
240
241 if (!self->finishedInput) {
242 if (self->reader) {
243 Py_buffer buffer;
244
245 assert(self->readResult == NULL);
246 self->readResult = PyObject_CallMethod(self->reader, "read",
247 "k", self->readSize);
248 if (self->readResult == NULL) {
249 return NULL;
250 }
251
252 memset(&buffer, 0, sizeof(buffer));
253
254 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
255 return NULL;
256 }
257
258 /* EOF */
259 if (0 == buffer.len) {
260 self->finishedInput = 1;
261 Py_CLEAR(self->readResult);
262 }
263 else {
264 self->input.src = buffer.buf;
265 self->input.size = buffer.len;
266 self->input.pos = 0;
267 }
268
269 PyBuffer_Release(&buffer);
270 }
271 else {
272 assert(self->buffer.buf);
273
274 self->input.src = self->buffer.buf;
275 self->input.size = self->buffer.len;
276 self->input.pos = 0;
277 }
278 }
279
280 if (self->input.size) {
281 goto readinput;
282 }
283
284 /* Else EOF */
285 oldPos = self->output.pos;
286
287 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
288 &self->input, ZSTD_e_end);
289
290 self->bytesCompressed += self->output.pos - oldPos;
291
292 if (ZSTD_isError(zresult)) {
293 PyErr_Format(ZstdError, "error ending compression stream: %s",
294 ZSTD_getErrorName(zresult));
295 return NULL;
296 }
297
298 assert(self->output.pos);
299
300 if (0 == zresult) {
301 self->finishedOutput = 1;
302 }
303
304 if (safe_pybytes_resize(&result, self->output.pos)) {
305 Py_XDECREF(result);
306 return NULL;
307 }
308
309 memset(&self->output, 0, sizeof(self->output));
310
311 return result;
312 }
313
314 static PyObject* reader_readall(PyObject* self) {
315 PyErr_SetNone(PyExc_NotImplementedError);
316 return NULL;
317 }
318
319 static PyObject* reader_iter(PyObject* self) {
320 set_unsupported_operation();
321 return NULL;
322 }
323
324 static PyObject* reader_iternext(PyObject* self) {
325 set_unsupported_operation();
326 return NULL;
327 }
328
329 static PyMethodDef reader_methods[] = {
330 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
331 PyDoc_STR("Enter a compression context") },
332 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
333 PyDoc_STR("Exit a compression context") },
334 { "close", (PyCFunction)reader_close, METH_NOARGS,
335 PyDoc_STR("Close the stream so it cannot perform any more operations") },
336 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
337 PyDoc_STR("Whether stream is closed") },
338 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
339 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
340 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
341 PyDoc_STR("Returns True") },
342 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
343 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
344 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
345 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
346 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
347 PyDoc_STR("Returns False") },
348 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
349 PyDoc_STR("Returns current number of bytes compressed") },
350 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
351 PyDoc_STR("Returns False") },
352 { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
353 { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
354 { NULL, NULL }
355 };
356
357 PyTypeObject ZstdCompressionReaderType = {
358 PyVarObject_HEAD_INIT(NULL, 0)
359 "zstd.ZstdCompressionReader", /* tp_name */
360 sizeof(ZstdCompressionReader), /* tp_basicsize */
361 0, /* tp_itemsize */
362 (destructor)reader_dealloc, /* tp_dealloc */
363 0, /* tp_print */
364 0, /* tp_getattr */
365 0, /* tp_setattr */
366 0, /* tp_compare */
367 0, /* tp_repr */
368 0, /* tp_as_number */
369 0, /* tp_as_sequence */
370 0, /* tp_as_mapping */
371 0, /* tp_hash */
372 0, /* tp_call */
373 0, /* tp_str */
374 0, /* tp_getattro */
375 0, /* tp_setattro */
376 0, /* tp_as_buffer */
377 Py_TPFLAGS_DEFAULT, /* tp_flags */
378 0, /* tp_doc */
379 0, /* tp_traverse */
380 0, /* tp_clear */
381 0, /* tp_richcompare */
382 0, /* tp_weaklistoffset */
383 reader_iter, /* tp_iter */
384 reader_iternext, /* tp_iternext */
385 reader_methods, /* tp_methods */
386 0, /* tp_members */
387 0, /* tp_getset */
388 0, /* tp_base */
389 0, /* tp_dict */
390 0, /* tp_descr_get */
391 0, /* tp_descr_set */
392 0, /* tp_dictoffset */
393 0, /* tp_init */
394 0, /* tp_alloc */
395 PyType_GenericNew, /* tp_new */
396 };
397
398 void compressionreader_module_init(PyObject* mod) {
399 /* TODO make reader a sub-class of io.RawIOBase */
400
401 Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
402 if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
403 return;
404 }
405 }
@@ -0,0 +1,459 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
15 PyObject* exc;
16
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
19 return;
20 }
21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
24 Py_DECREF(iomod);
25 return;
26 }
27
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
31 }
32
33 static void reader_dealloc(ZstdDecompressionReader* self) {
34 Py_XDECREF(self->decompressor);
35 Py_XDECREF(self->reader);
36
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
39 }
40
41 PyObject_Del(self);
42 }
43
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
45 if (self->entered) {
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 return NULL;
48 }
49
50 if (ensure_dctx(self->decompressor, 1)) {
51 return NULL;
52 }
53
54 self->entered = 1;
55
56 Py_INCREF(self);
57 return self;
58 }
59
60 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
61 PyObject* exc_type;
62 PyObject* exc_value;
63 PyObject* exc_tb;
64
65 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
66 return NULL;
67 }
68
69 self->entered = 0;
70 self->closed = 1;
71
72 /* Release resources. */
73 Py_CLEAR(self->reader);
74 if (self->buffer.buf) {
75 PyBuffer_Release(&self->buffer);
76 memset(&self->buffer, 0, sizeof(self->buffer));
77 }
78
79 Py_CLEAR(self->decompressor);
80
81 Py_RETURN_FALSE;
82 }
83
84 static PyObject* reader_readable(PyObject* self) {
85 Py_RETURN_TRUE;
86 }
87
88 static PyObject* reader_writable(PyObject* self) {
89 Py_RETURN_FALSE;
90 }
91
92 static PyObject* reader_seekable(PyObject* self) {
93 Py_RETURN_TRUE;
94 }
95
96 static PyObject* reader_close(ZstdDecompressionReader* self) {
97 self->closed = 1;
98 Py_RETURN_NONE;
99 }
100
101 static PyObject* reader_closed(ZstdDecompressionReader* self) {
102 if (self->closed) {
103 Py_RETURN_TRUE;
104 }
105 else {
106 Py_RETURN_FALSE;
107 }
108 }
109
110 static PyObject* reader_flush(PyObject* self) {
111 Py_RETURN_NONE;
112 }
113
114 static PyObject* reader_isatty(PyObject* self) {
115 Py_RETURN_FALSE;
116 }
117
118 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
119 static char* kwlist[] = {
120 "size",
121 NULL
122 };
123
124 Py_ssize_t size = -1;
125 PyObject* result = NULL;
126 char* resultBuffer;
127 Py_ssize_t resultSize;
128 ZSTD_outBuffer output;
129 size_t zresult;
130
131 if (!self->entered) {
132 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
133 return NULL;
134 }
135
136 if (self->closed) {
137 PyErr_SetString(PyExc_ValueError, "stream is closed");
138 return NULL;
139 }
140
141 if (self->finishedOutput) {
142 return PyBytes_FromStringAndSize("", 0);
143 }
144
145 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
146 return NULL;
147 }
148
149 if (size < 1) {
150 PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
151 return NULL;
152 }
153
154 result = PyBytes_FromStringAndSize(NULL, size);
155 if (NULL == result) {
156 return NULL;
157 }
158
159 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
160
161 output.dst = resultBuffer;
162 output.size = resultSize;
163 output.pos = 0;
164
165 readinput:
166
167 /* Consume input data left over from last time. */
168 if (self->input.pos < self->input.size) {
169 Py_BEGIN_ALLOW_THREADS
170 zresult = ZSTD_decompress_generic(self->decompressor->dctx,
171 &output, &self->input);
172 Py_END_ALLOW_THREADS
173
174 /* Input exhausted. Clear our state tracking. */
175 if (self->input.pos == self->input.size) {
176 memset(&self->input, 0, sizeof(self->input));
177 Py_CLEAR(self->readResult);
178
179 if (self->buffer.buf) {
180 self->finishedInput = 1;
181 }
182 }
183
184 if (ZSTD_isError(zresult)) {
185 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
186 return NULL;
187 }
188 else if (0 == zresult) {
189 self->finishedOutput = 1;
190 }
191
192 /* We fulfilled the full read request. Emit it. */
193 if (output.pos && output.pos == output.size) {
194 self->bytesDecompressed += output.size;
195 return result;
196 }
197
198 /*
199 * There is more room in the output. Fall through to try to collect
200 * more data so we can try to fill the output.
201 */
202 }
203
204 if (!self->finishedInput) {
205 if (self->reader) {
206 Py_buffer buffer;
207
208 assert(self->readResult == NULL);
209 self->readResult = PyObject_CallMethod(self->reader, "read",
210 "k", self->readSize);
211 if (NULL == self->readResult) {
212 return NULL;
213 }
214
215 memset(&buffer, 0, sizeof(buffer));
216
217 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
218 return NULL;
219 }
220
221 /* EOF */
222 if (0 == buffer.len) {
223 self->finishedInput = 1;
224 Py_CLEAR(self->readResult);
225 }
226 else {
227 self->input.src = buffer.buf;
228 self->input.size = buffer.len;
229 self->input.pos = 0;
230 }
231
232 PyBuffer_Release(&buffer);
233 }
234 else {
235 assert(self->buffer.buf);
236 /*
237 * We should only get here once since above block will exhaust
238 * source buffer until finishedInput is set.
239 */
240 assert(self->input.src == NULL);
241
242 self->input.src = self->buffer.buf;
243 self->input.size = self->buffer.len;
244 self->input.pos = 0;
245 }
246 }
247
248 if (self->input.size) {
249 goto readinput;
250 }
251
252 /* EOF */
253 self->bytesDecompressed += output.pos;
254
255 if (safe_pybytes_resize(&result, output.pos)) {
256 Py_XDECREF(result);
257 return NULL;
258 }
259
260 return result;
261 }
262
263 static PyObject* reader_readall(PyObject* self) {
264 PyErr_SetNone(PyExc_NotImplementedError);
265 return NULL;
266 }
267
268 static PyObject* reader_readline(PyObject* self) {
269 PyErr_SetNone(PyExc_NotImplementedError);
270 return NULL;
271 }
272
273 static PyObject* reader_readlines(PyObject* self) {
274 PyErr_SetNone(PyExc_NotImplementedError);
275 return NULL;
276 }
277
278 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
279 Py_ssize_t pos;
280 int whence = 0;
281 unsigned long long readAmount = 0;
282 size_t defaultOutSize = ZSTD_DStreamOutSize();
283
284 if (!self->entered) {
285 PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
286 return NULL;
287 }
288
289 if (self->closed) {
290 PyErr_SetString(PyExc_ValueError, "stream is closed");
291 return NULL;
292 }
293
294 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
295 return NULL;
296 }
297
298 if (whence == SEEK_SET) {
299 if (pos < 0) {
300 PyErr_SetString(PyExc_ValueError,
301 "cannot seek to negative position with SEEK_SET");
302 return NULL;
303 }
304
305 if ((unsigned long long)pos < self->bytesDecompressed) {
306 PyErr_SetString(PyExc_ValueError,
307 "cannot seek zstd decompression stream backwards");
308 return NULL;
309 }
310
311 readAmount = pos - self->bytesDecompressed;
312 }
313 else if (whence == SEEK_CUR) {
314 if (pos < 0) {
315 PyErr_SetString(PyExc_ValueError,
316 "cannot seek zstd decompression stream backwards");
317 return NULL;
318 }
319
320 readAmount = pos;
321 }
322 else if (whence == SEEK_END) {
323 /* We /could/ support this with pos==0. But let's not do that until someone
324 needs it. */
325 PyErr_SetString(PyExc_ValueError,
326 "zstd decompression streams cannot be seeked with SEEK_END");
327 return NULL;
328 }
329
330 /* It is a bit inefficient to do this via the Python API. But since there
331 is a bit of state tracking involved to read from this type, it is the
332 easiest to implement. */
333 while (readAmount) {
334 Py_ssize_t readSize;
335 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
336 readAmount < defaultOutSize ? readAmount : defaultOutSize);
337
338 if (!readResult) {
339 return NULL;
340 }
341
342 readSize = PyBytes_GET_SIZE(readResult);
343
344 /* Empty read means EOF. */
345 if (!readSize) {
346 break;
347 }
348
349 readAmount -= readSize;
350 }
351
352 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
353 }
354
355 static PyObject* reader_tell(ZstdDecompressionReader* self) {
356 /* TODO should this raise OSError since stream isn't seekable? */
357 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
358 }
359
360 static PyObject* reader_write(PyObject* self, PyObject* args) {
361 set_unsupported_operation();
362 return NULL;
363 }
364
365 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
366 set_unsupported_operation();
367 return NULL;
368 }
369
370 static PyObject* reader_iter(PyObject* self) {
371 PyErr_SetNone(PyExc_NotImplementedError);
372 return NULL;
373 }
374
375 static PyObject* reader_iternext(PyObject* self) {
376 PyErr_SetNone(PyExc_NotImplementedError);
377 return NULL;
378 }
379
380 static PyMethodDef reader_methods[] = {
381 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
382 PyDoc_STR("Enter a compression context") },
383 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
384 PyDoc_STR("Exit a compression context") },
385 { "close", (PyCFunction)reader_close, METH_NOARGS,
386 PyDoc_STR("Close the stream so it cannot perform any more operations") },
387 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
388 PyDoc_STR("Whether stream is closed") },
389 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
390 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
391 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
392 PyDoc_STR("Returns True") },
393 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
394 PyDoc_STR("read compressed data") },
395 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
396 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
397 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
398 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
399 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
400 PyDoc_STR("Returns True") },
401 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
402 PyDoc_STR("Returns current number of bytes compressed") },
403 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
404 PyDoc_STR("Returns False") },
405 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
406 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
407 { NULL, NULL }
408 };
409
410 PyTypeObject ZstdDecompressionReaderType = {
411 PyVarObject_HEAD_INIT(NULL, 0)
412 "zstd.ZstdDecompressionReader", /* tp_name */
413 sizeof(ZstdDecompressionReader), /* tp_basicsize */
414 0, /* tp_itemsize */
415 (destructor)reader_dealloc, /* tp_dealloc */
416 0, /* tp_print */
417 0, /* tp_getattr */
418 0, /* tp_setattr */
419 0, /* tp_compare */
420 0, /* tp_repr */
421 0, /* tp_as_number */
422 0, /* tp_as_sequence */
423 0, /* tp_as_mapping */
424 0, /* tp_hash */
425 0, /* tp_call */
426 0, /* tp_str */
427 0, /* tp_getattro */
428 0, /* tp_setattro */
429 0, /* tp_as_buffer */
430 Py_TPFLAGS_DEFAULT, /* tp_flags */
431 0, /* tp_doc */
432 0, /* tp_traverse */
433 0, /* tp_clear */
434 0, /* tp_richcompare */
435 0, /* tp_weaklistoffset */
436 reader_iter, /* tp_iter */
437 reader_iternext, /* tp_iternext */
438 reader_methods, /* tp_methods */
439 0, /* tp_members */
440 0, /* tp_getset */
441 0, /* tp_base */
442 0, /* tp_dict */
443 0, /* tp_descr_get */
444 0, /* tp_descr_set */
445 0, /* tp_dictoffset */
446 0, /* tp_init */
447 0, /* tp_alloc */
448 PyType_GenericNew, /* tp_new */
449 };
450
451
452 void decompressionreader_module_init(PyObject* mod) {
453 /* TODO make reader a sub-class of io.RawIOBase */
454
455 Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
456 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
457 return;
458 }
459 }
@@ -0,0 +1,62 b''
1 # Copyright (c) 2017-present, Gregory Szorc
2 # All rights reserved.
3 #
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
6
7 """Python interface to the Zstandard (zstd) compression library."""
8
9 from __future__ import absolute_import, unicode_literals
10
11 # This module serves 2 roles:
12 #
13 # 1) Export the C or CFFI "backend" through a central module.
14 # 2) Implement additional functionality built on top of C or CFFI backend.
15
16 import os
17 import platform
18
19 # Some Python implementations don't support C extensions. That's why we have
20 # a CFFI implementation in the first place. The code here import one of our
21 # "backends" then re-exports the symbols from this module. For convenience,
22 # we support falling back to the CFFI backend if the C extension can't be
23 # imported. But for performance reasons, we only do this on unknown Python
24 # implementation. Notably, for CPython we require the C extension by default.
25 # Because someone will inevitably want special behavior, the behavior is
26 # configurable via an environment variable. A potentially better way to handle
27 # this is to import a special ``__importpolicy__`` module or something
28 # defining a variable and `setup.py` could write the file with whatever
29 # policy was specified at build time. Until someone needs it, we go with
30 # the hacky but simple environment variable approach.
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
32
33 if _module_policy == 'default':
34 if platform.python_implementation() in ('CPython',):
35 from zstd import *
36 backend = 'cext'
37 elif platform.python_implementation() in ('PyPy',):
38 from zstd_cffi import *
39 backend = 'cffi'
40 else:
41 try:
42 from zstd import *
43 backend = 'cext'
44 except ImportError:
45 from zstd_cffi import *
46 backend = 'cffi'
47 elif _module_policy == 'cffi_fallback':
48 try:
49 from zstd import *
50 backend = 'cext'
51 except ImportError:
52 from zstd_cffi import *
53 backend = 'cffi'
54 elif _module_policy == 'cext':
55 from zstd import *
56 backend = 'cext'
57 elif _module_policy == 'cffi':
58 from zstd_cffi import *
59 backend = 'cffi'
60 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
@@ -0,0 +1,339 b''
1 GNU GENERAL PUBLIC LICENSE
2 Version 2, June 1991
3
4 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 Everyone is permitted to copy and distribute verbatim copies
7 of this license document, but changing it is not allowed.
8
9 Preamble
10
11 The licenses for most software are designed to take away your
12 freedom to share and change it. By contrast, the GNU General Public
13 License is intended to guarantee your freedom to share and change free
14 software--to make sure the software is free for all its users. This
15 General Public License applies to most of the Free Software
16 Foundation's software and to any other program whose authors commit to
17 using it. (Some other Free Software Foundation software is covered by
18 the GNU Lesser General Public License instead.) You can apply it to
19 your programs, too.
20
21 When we speak of free software, we are referring to freedom, not
22 price. Our General Public Licenses are designed to make sure that you
23 have the freedom to distribute copies of free software (and charge for
24 this service if you wish), that you receive source code or can get it
25 if you want it, that you can change the software or use pieces of it
26 in new free programs; and that you know you can do these things.
27
28 To protect your rights, we need to make restrictions that forbid
29 anyone to deny you these rights or to ask you to surrender the rights.
30 These restrictions translate to certain responsibilities for you if you
31 distribute copies of the software, or if you modify it.
32
33 For example, if you distribute copies of such a program, whether
34 gratis or for a fee, you must give the recipients all the rights that
35 you have. You must make sure that they, too, receive or can get the
36 source code. And you must show them these terms so they know their
37 rights.
38
39 We protect your rights with two steps: (1) copyright the software, and
40 (2) offer you this license which gives you legal permission to copy,
41 distribute and/or modify the software.
42
43 Also, for each author's protection and ours, we want to make certain
44 that everyone understands that there is no warranty for this free
45 software. If the software is modified by someone else and passed on, we
46 want its recipients to know that what they have is not the original, so
47 that any problems introduced by others will not reflect on the original
48 authors' reputations.
49
50 Finally, any free program is threatened constantly by software
51 patents. We wish to avoid the danger that redistributors of a free
52 program will individually obtain patent licenses, in effect making the
53 program proprietary. To prevent this, we have made it clear that any
54 patent must be licensed for everyone's free use or not licensed at all.
55
56 The precise terms and conditions for copying, distribution and
57 modification follow.
58
59 GNU GENERAL PUBLIC LICENSE
60 TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
62 0. This License applies to any program or other work which contains
63 a notice placed by the copyright holder saying it may be distributed
64 under the terms of this General Public License. The "Program", below,
65 refers to any such program or work, and a "work based on the Program"
66 means either the Program or any derivative work under copyright law:
67 that is to say, a work containing the Program or a portion of it,
68 either verbatim or with modifications and/or translated into another
69 language. (Hereinafter, translation is included without limitation in
70 the term "modification".) Each licensee is addressed as "you".
71
72 Activities other than copying, distribution and modification are not
73 covered by this License; they are outside its scope. The act of
74 running the Program is not restricted, and the output from the Program
75 is covered only if its contents constitute a work based on the
76 Program (independent of having been made by running the Program).
77 Whether that is true depends on what the Program does.
78
79 1. You may copy and distribute verbatim copies of the Program's
80 source code as you receive it, in any medium, provided that you
81 conspicuously and appropriately publish on each copy an appropriate
82 copyright notice and disclaimer of warranty; keep intact all the
83 notices that refer to this License and to the absence of any warranty;
84 and give any other recipients of the Program a copy of this License
85 along with the Program.
86
87 You may charge a fee for the physical act of transferring a copy, and
88 you may at your option offer warranty protection in exchange for a fee.
89
90 2. You may modify your copy or copies of the Program or any portion
91 of it, thus forming a work based on the Program, and copy and
92 distribute such modifications or work under the terms of Section 1
93 above, provided that you also meet all of these conditions:
94
95 a) You must cause the modified files to carry prominent notices
96 stating that you changed the files and the date of any change.
97
98 b) You must cause any work that you distribute or publish, that in
99 whole or in part contains or is derived from the Program or any
100 part thereof, to be licensed as a whole at no charge to all third
101 parties under the terms of this License.
102
103 c) If the modified program normally reads commands interactively
104 when run, you must cause it, when started running for such
105 interactive use in the most ordinary way, to print or display an
106 announcement including an appropriate copyright notice and a
107 notice that there is no warranty (or else, saying that you provide
108 a warranty) and that users may redistribute the program under
109 these conditions, and telling the user how to view a copy of this
110 License. (Exception: if the Program itself is interactive but
111 does not normally print such an announcement, your work based on
112 the Program is not required to print an announcement.)
113
114 These requirements apply to the modified work as a whole. If
115 identifiable sections of that work are not derived from the Program,
116 and can be reasonably considered independent and separate works in
117 themselves, then this License, and its terms, do not apply to those
118 sections when you distribute them as separate works. But when you
119 distribute the same sections as part of a whole which is a work based
120 on the Program, the distribution of the whole must be on the terms of
121 this License, whose permissions for other licensees extend to the
122 entire whole, and thus to each and every part regardless of who wrote it.
123
124 Thus, it is not the intent of this section to claim rights or contest
125 your rights to work written entirely by you; rather, the intent is to
126 exercise the right to control the distribution of derivative or
127 collective works based on the Program.
128
129 In addition, mere aggregation of another work not based on the Program
130 with the Program (or with a work based on the Program) on a volume of
131 a storage or distribution medium does not bring the other work under
132 the scope of this License.
133
134 3. You may copy and distribute the Program (or a work based on it,
135 under Section 2) in object code or executable form under the terms of
136 Sections 1 and 2 above provided that you also do one of the following:
137
138 a) Accompany it with the complete corresponding machine-readable
139 source code, which must be distributed under the terms of Sections
140 1 and 2 above on a medium customarily used for software interchange; or,
141
142 b) Accompany it with a written offer, valid for at least three
143 years, to give any third party, for a charge no more than your
144 cost of physically performing source distribution, a complete
145 machine-readable copy of the corresponding source code, to be
146 distributed under the terms of Sections 1 and 2 above on a medium
147 customarily used for software interchange; or,
148
149 c) Accompany it with the information you received as to the offer
150 to distribute corresponding source code. (This alternative is
151 allowed only for noncommercial distribution and only if you
152 received the program in object code or executable form with such
153 an offer, in accord with Subsection b above.)
154
155 The source code for a work means the preferred form of the work for
156 making modifications to it. For an executable work, complete source
157 code means all the source code for all modules it contains, plus any
158 associated interface definition files, plus the scripts used to
159 control compilation and installation of the executable. However, as a
160 special exception, the source code distributed need not include
161 anything that is normally distributed (in either source or binary
162 form) with the major components (compiler, kernel, and so on) of the
163 operating system on which the executable runs, unless that component
164 itself accompanies the executable.
165
166 If distribution of executable or object code is made by offering
167 access to copy from a designated place, then offering equivalent
168 access to copy the source code from the same place counts as
169 distribution of the source code, even though third parties are not
170 compelled to copy the source along with the object code.
171
172 4. You may not copy, modify, sublicense, or distribute the Program
173 except as expressly provided under this License. Any attempt
174 otherwise to copy, modify, sublicense or distribute the Program is
175 void, and will automatically terminate your rights under this License.
176 However, parties who have received copies, or rights, from you under
177 this License will not have their licenses terminated so long as such
178 parties remain in full compliance.
179
180 5. You are not required to accept this License, since you have not
181 signed it. However, nothing else grants you permission to modify or
182 distribute the Program or its derivative works. These actions are
183 prohibited by law if you do not accept this License. Therefore, by
184 modifying or distributing the Program (or any work based on the
185 Program), you indicate your acceptance of this License to do so, and
186 all its terms and conditions for copying, distributing or modifying
187 the Program or works based on it.
188
189 6. Each time you redistribute the Program (or any work based on the
190 Program), the recipient automatically receives a license from the
191 original licensor to copy, distribute or modify the Program subject to
192 these terms and conditions. You may not impose any further
193 restrictions on the recipients' exercise of the rights granted herein.
194 You are not responsible for enforcing compliance by third parties to
195 this License.
196
197 7. If, as a consequence of a court judgment or allegation of patent
198 infringement or for any other reason (not limited to patent issues),
199 conditions are imposed on you (whether by court order, agreement or
200 otherwise) that contradict the conditions of this License, they do not
201 excuse you from the conditions of this License. If you cannot
202 distribute so as to satisfy simultaneously your obligations under this
203 License and any other pertinent obligations, then as a consequence you
204 may not distribute the Program at all. For example, if a patent
205 license would not permit royalty-free redistribution of the Program by
206 all those who receive copies directly or indirectly through you, then
207 the only way you could satisfy both it and this License would be to
208 refrain entirely from distribution of the Program.
209
210 If any portion of this section is held invalid or unenforceable under
211 any particular circumstance, the balance of the section is intended to
212 apply and the section as a whole is intended to apply in other
213 circumstances.
214
215 It is not the purpose of this section to induce you to infringe any
216 patents or other property right claims or to contest validity of any
217 such claims; this section has the sole purpose of protecting the
218 integrity of the free software distribution system, which is
219 implemented by public license practices. Many people have made
220 generous contributions to the wide range of software distributed
221 through that system in reliance on consistent application of that
222 system; it is up to the author/donor to decide if he or she is willing
223 to distribute software through any other system and a licensee cannot
224 impose that choice.
225
226 This section is intended to make thoroughly clear what is believed to
227 be a consequence of the rest of this License.
228
229 8. If the distribution and/or use of the Program is restricted in
230 certain countries either by patents or by copyrighted interfaces, the
231 original copyright holder who places the Program under this License
232 may add an explicit geographical distribution limitation excluding
233 those countries, so that distribution is permitted only in or among
234 countries not thus excluded. In such case, this License incorporates
235 the limitation as if written in the body of this License.
236
237 9. The Free Software Foundation may publish revised and/or new versions
238 of the General Public License from time to time. Such new versions will
239 be similar in spirit to the present version, but may differ in detail to
240 address new problems or concerns.
241
242 Each version is given a distinguishing version number. If the Program
243 specifies a version number of this License which applies to it and "any
244 later version", you have the option of following the terms and conditions
245 either of that version or of any later version published by the Free
246 Software Foundation. If the Program does not specify a version number of
247 this License, you may choose any version ever published by the Free Software
248 Foundation.
249
250 10. If you wish to incorporate parts of the Program into other free
251 programs whose distribution conditions are different, write to the author
252 to ask for permission. For software which is copyrighted by the Free
253 Software Foundation, write to the Free Software Foundation; we sometimes
254 make exceptions for this. Our decision will be guided by the two goals
255 of preserving the free status of all derivatives of our free software and
256 of promoting the sharing and reuse of software generally.
257
258 NO WARRANTY
259
260 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 REPAIR OR CORRECTION.
269
270 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 POSSIBILITY OF SUCH DAMAGES.
279
280 END OF TERMS AND CONDITIONS
281
282 How to Apply These Terms to Your New Programs
283
284 If you develop a new program, and you want it to be of the greatest
285 possible use to the public, the best way to achieve this is to make it
286 free software which everyone can redistribute and change under these terms.
287
288 To do so, attach the following notices to the program. It is safest
289 to attach them to the start of each source file to most effectively
290 convey the exclusion of warranty; and each file should have at least
291 the "copyright" line and a pointer to where the full notice is found.
292
293 <one line to give the program's name and a brief idea of what it does.>
294 Copyright (C) <year> <name of author>
295
296 This program is free software; you can redistribute it and/or modify
297 it under the terms of the GNU General Public License as published by
298 the Free Software Foundation; either version 2 of the License, or
299 (at your option) any later version.
300
301 This program is distributed in the hope that it will be useful,
302 but WITHOUT ANY WARRANTY; without even the implied warranty of
303 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 GNU General Public License for more details.
305
306 You should have received a copy of the GNU General Public License along
307 with this program; if not, write to the Free Software Foundation, Inc.,
308 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309
310 Also add information on how to contact you by electronic and paper mail.
311
312 If the program is interactive, make it output a short notice like this
313 when it starts in an interactive mode:
314
315 Gnomovision version 69, Copyright (C) year name of author
316 Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 This is free software, and you are welcome to redistribute it
318 under certain conditions; type `show c' for details.
319
320 The hypothetical commands `show w' and `show c' should show the appropriate
321 parts of the General Public License. Of course, the commands you use may
322 be called something other than `show w' and `show c'; they could even be
323 mouse-clicks or menu items--whatever suits your program.
324
325 You should also get your employer (if you work as a programmer) or your
326 school, if any, to sign a "copyright disclaimer" for the program, if
327 necessary. Here is a sample; alter the names:
328
329 Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 `Gnomovision' (which makes passes at compilers) written by James Hacker.
331
332 <signature of Ty Coon>, 1 April 1989
333 Ty Coon, President of Vice
334
335 This General Public License does not permit incorporating your program into
336 proprietary programs. If your program is a subroutine library, you may
337 consider it more useful to permit linking proprietary applications with the
338 library. If this is what you want to do, use the GNU Lesser General
339 Public License instead of this License. No newline at end of file
@@ -0,0 +1,111 b''
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #ifndef ZSTD_COMPILER_H
12 #define ZSTD_COMPILER_H
13
14 /*-*******************************************************
15 * Compiler specifics
16 *********************************************************/
17 /* force inlining */
18 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
19 # define INLINE_KEYWORD inline
20 #else
21 # define INLINE_KEYWORD
22 #endif
23
24 #if defined(__GNUC__)
25 # define FORCE_INLINE_ATTR __attribute__((always_inline))
26 #elif defined(_MSC_VER)
27 # define FORCE_INLINE_ATTR __forceinline
28 #else
29 # define FORCE_INLINE_ATTR
30 #endif
31
32 /**
33 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
34 * parameters. They must be inlined for the compiler to elimininate the constant
35 * branches.
36 */
37 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
38 /**
39 * HINT_INLINE is used to help the compiler generate better code. It is *not*
40 * used for "templates", so it can be tweaked based on the compilers
41 * performance.
42 *
43 * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
44 * always_inline attribute.
45 *
46 * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
47 * attribute.
48 */
49 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
50 # define HINT_INLINE static INLINE_KEYWORD
51 #else
52 # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
53 #endif
54
55 /* force no inlining */
56 #ifdef _MSC_VER
57 # define FORCE_NOINLINE static __declspec(noinline)
58 #else
59 # ifdef __GNUC__
60 # define FORCE_NOINLINE static __attribute__((__noinline__))
61 # else
62 # define FORCE_NOINLINE static
63 # endif
64 #endif
65
66 /* target attribute */
67 #ifndef __has_attribute
68 #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
69 #endif
70 #if defined(__GNUC__)
71 # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
72 #else
73 # define TARGET_ATTRIBUTE(target)
74 #endif
75
76 /* Enable runtime BMI2 dispatch based on the CPU.
77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
78 */
79 #ifndef DYNAMIC_BMI2
80 #if (defined(__clang__) && __has_attribute(__target__)) \
81 || (defined(__GNUC__) \
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
83 && (defined(__x86_64__) || defined(_M_X86)) \
84 && !defined(__BMI2__)
85 # define DYNAMIC_BMI2 1
86 #else
87 # define DYNAMIC_BMI2 0
88 #endif
89 #endif
90
91 /* prefetch */
92 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
93 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
94 # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
95 #elif defined(__GNUC__)
96 # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
97 #else
98 # define PREFETCH(ptr) /* disabled */
99 #endif
100
101 /* disable warnings */
102 #ifdef _MSC_VER /* Visual Studio */
103 # include <intrin.h> /* For Visual 2005 */
104 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
105 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
106 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
107 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
108 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
109 #endif
110
111 #endif /* ZSTD_COMPILER_H */
@@ -0,0 +1,216 b''
1 /*
2 * Copyright (c) 2018-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #ifndef ZSTD_COMMON_CPU_H
12 #define ZSTD_COMMON_CPU_H
13
14 /**
15 * Implementation taken from folly/CpuId.h
16 * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17 */
18
19 #include <string.h>
20
21 #include "mem.h"
22
23 #ifdef _MSC_VER
24 #include <intrin.h>
25 #endif
26
27 typedef struct {
28 U32 f1c;
29 U32 f1d;
30 U32 f7b;
31 U32 f7c;
32 } ZSTD_cpuid_t;
33
34 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
35 U32 f1c = 0;
36 U32 f1d = 0;
37 U32 f7b = 0;
38 U32 f7c = 0;
39 #ifdef _MSC_VER
40 int reg[4];
41 __cpuid((int*)reg, 0);
42 {
43 int const n = reg[0];
44 if (n >= 1) {
45 __cpuid((int*)reg, 1);
46 f1c = (U32)reg[2];
47 f1d = (U32)reg[3];
48 }
49 if (n >= 7) {
50 __cpuidex((int*)reg, 7, 0);
51 f7b = (U32)reg[1];
52 f7c = (U32)reg[2];
53 }
54 }
55 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
56 /* The following block like the normal cpuid branch below, but gcc
57 * reserves ebx for use of its pic register so we must specially
58 * handle the save and restore to avoid clobbering the register
59 */
60 U32 n;
61 __asm__(
62 "pushl %%ebx\n\t"
63 "cpuid\n\t"
64 "popl %%ebx\n\t"
65 : "=a"(n)
66 : "a"(0)
67 : "ecx", "edx");
68 if (n >= 1) {
69 U32 f1a;
70 __asm__(
71 "pushl %%ebx\n\t"
72 "cpuid\n\t"
73 "popl %%ebx\n\t"
74 : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75 : "a"(1)
76 :);
77 }
78 if (n >= 7) {
79 __asm__(
80 "pushl %%ebx\n\t"
81 "cpuid\n\t"
82 "movl %%ebx, %%eax\n\r"
83 "popl %%ebx"
84 : "=a"(f7b), "=c"(f7c)
85 : "a"(7), "c"(0)
86 : "edx");
87 }
88 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
89 U32 n;
90 __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
91 if (n >= 1) {
92 U32 f1a;
93 __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
94 }
95 if (n >= 7) {
96 U32 f7a;
97 __asm__("cpuid"
98 : "=a"(f7a), "=b"(f7b), "=c"(f7c)
99 : "a"(7), "c"(0)
100 : "edx");
101 }
102 #endif
103 {
104 ZSTD_cpuid_t cpuid;
105 cpuid.f1c = f1c;
106 cpuid.f1d = f1d;
107 cpuid.f7b = f7b;
108 cpuid.f7c = f7c;
109 return cpuid;
110 }
111 }
112
113 #define X(name, r, bit) \
114 MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
115 return ((cpuid.r) & (1U << bit)) != 0; \
116 }
117
118 /* cpuid(1): Processor Info and Feature Bits. */
119 #define C(name, bit) X(name, f1c, bit)
120 C(sse3, 0)
121 C(pclmuldq, 1)
122 C(dtes64, 2)
123 C(monitor, 3)
124 C(dscpl, 4)
125 C(vmx, 5)
126 C(smx, 6)
127 C(eist, 7)
128 C(tm2, 8)
129 C(ssse3, 9)
130 C(cnxtid, 10)
131 C(fma, 12)
132 C(cx16, 13)
133 C(xtpr, 14)
134 C(pdcm, 15)
135 C(pcid, 17)
136 C(dca, 18)
137 C(sse41, 19)
138 C(sse42, 20)
139 C(x2apic, 21)
140 C(movbe, 22)
141 C(popcnt, 23)
142 C(tscdeadline, 24)
143 C(aes, 25)
144 C(xsave, 26)
145 C(osxsave, 27)
146 C(avx, 28)
147 C(f16c, 29)
148 C(rdrand, 30)
149 #undef C
150 #define D(name, bit) X(name, f1d, bit)
151 D(fpu, 0)
152 D(vme, 1)
153 D(de, 2)
154 D(pse, 3)
155 D(tsc, 4)
156 D(msr, 5)
157 D(pae, 6)
158 D(mce, 7)
159 D(cx8, 8)
160 D(apic, 9)
161 D(sep, 11)
162 D(mtrr, 12)
163 D(pge, 13)
164 D(mca, 14)
165 D(cmov, 15)
166 D(pat, 16)
167 D(pse36, 17)
168 D(psn, 18)
169 D(clfsh, 19)
170 D(ds, 21)
171 D(acpi, 22)
172 D(mmx, 23)
173 D(fxsr, 24)
174 D(sse, 25)
175 D(sse2, 26)
176 D(ss, 27)
177 D(htt, 28)
178 D(tm, 29)
179 D(pbe, 31)
180 #undef D
181
182 /* cpuid(7): Extended Features. */
183 #define B(name, bit) X(name, f7b, bit)
184 B(bmi1, 3)
185 B(hle, 4)
186 B(avx2, 5)
187 B(smep, 7)
188 B(bmi2, 8)
189 B(erms, 9)
190 B(invpcid, 10)
191 B(rtm, 11)
192 B(mpx, 14)
193 B(avx512f, 16)
194 B(avx512dq, 17)
195 B(rdseed, 18)
196 B(adx, 19)
197 B(smap, 20)
198 B(avx512ifma, 21)
199 B(pcommit, 22)
200 B(clflushopt, 23)
201 B(clwb, 24)
202 B(avx512pf, 26)
203 B(avx512er, 27)
204 B(avx512cd, 28)
205 B(sha, 29)
206 B(avx512bw, 30)
207 B(avx512vl, 31)
208 #undef B
209 #define C(name, bit) X(name, f7c, bit)
210 C(prefetchwt1, 0)
211 C(avx512vbmi, 1)
212 #undef C
213
214 #undef X
215
216 #endif /* ZSTD_COMMON_CPU_H */
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -8,11 +8,13 b' mercurial/cext/revlog.c'
8 contrib/python-zstandard/c-ext/bufferutil.c
8 contrib/python-zstandard/c-ext/bufferutil.c
9 contrib/python-zstandard/c-ext/compressiondict.c
9 contrib/python-zstandard/c-ext/compressiondict.c
10 contrib/python-zstandard/c-ext/compressionparams.c
10 contrib/python-zstandard/c-ext/compressionparams.c
11 contrib/python-zstandard/c-ext/compressionreader.c
11 contrib/python-zstandard/c-ext/compressionwriter.c
12 contrib/python-zstandard/c-ext/compressionwriter.c
12 contrib/python-zstandard/c-ext/compressobj.c
13 contrib/python-zstandard/c-ext/compressobj.c
13 contrib/python-zstandard/c-ext/compressor.c
14 contrib/python-zstandard/c-ext/compressor.c
14 contrib/python-zstandard/c-ext/compressoriterator.c
15 contrib/python-zstandard/c-ext/compressoriterator.c
15 contrib/python-zstandard/c-ext/constants.c
16 contrib/python-zstandard/c-ext/constants.c
17 contrib/python-zstandard/c-ext/decompressionreader.c
16 contrib/python-zstandard/c-ext/decompressionwriter.c
18 contrib/python-zstandard/c-ext/decompressionwriter.c
17 contrib/python-zstandard/c-ext/decompressobj.c
19 contrib/python-zstandard/c-ext/decompressobj.c
18 contrib/python-zstandard/c-ext/decompressor.c
20 contrib/python-zstandard/c-ext/decompressor.c
@@ -21,11 +23,13 b' contrib/python-zstandard/c-ext/framepara'
21 contrib/python-zstandard/c-ext/python-zstandard.h
23 contrib/python-zstandard/c-ext/python-zstandard.h
22 contrib/python-zstandard/zstd.c
24 contrib/python-zstandard/zstd.c
23 contrib/python-zstandard/zstd/common/bitstream.h
25 contrib/python-zstandard/zstd/common/bitstream.h
26 contrib/python-zstandard/zstd/common/compiler.h
27 contrib/python-zstandard/zstd/common/cpu.h
24 contrib/python-zstandard/zstd/common/entropy_common.c
28 contrib/python-zstandard/zstd/common/entropy_common.c
25 contrib/python-zstandard/zstd/common/error_private.c
29 contrib/python-zstandard/zstd/common/error_private.c
26 contrib/python-zstandard/zstd/common/error_private.h
30 contrib/python-zstandard/zstd/common/error_private.h
31 contrib/python-zstandard/zstd/common/fse_decompress.c
27 contrib/python-zstandard/zstd/common/fse.h
32 contrib/python-zstandard/zstd/common/fse.h
28 contrib/python-zstandard/zstd/common/fse_decompress.c
29 contrib/python-zstandard/zstd/common/huf.h
33 contrib/python-zstandard/zstd/common/huf.h
30 contrib/python-zstandard/zstd/common/mem.h
34 contrib/python-zstandard/zstd/common/mem.h
31 contrib/python-zstandard/zstd/common/pool.c
35 contrib/python-zstandard/zstd/common/pool.c
@@ -40,11 +44,25 b' contrib/python-zstandard/zstd/common/zst'
40 contrib/python-zstandard/zstd/compress/fse_compress.c
44 contrib/python-zstandard/zstd/compress/fse_compress.c
41 contrib/python-zstandard/zstd/compress/huf_compress.c
45 contrib/python-zstandard/zstd/compress/huf_compress.c
42 contrib/python-zstandard/zstd/compress/zstd_compress.c
46 contrib/python-zstandard/zstd/compress/zstd_compress.c
43 contrib/python-zstandard/zstd/compress/zstd_opt.h
47 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
48 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
49 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
50 contrib/python-zstandard/zstd/compress/zstd_fast.c
51 contrib/python-zstandard/zstd/compress/zstd_fast.h
52 contrib/python-zstandard/zstd/compress/zstd_lazy.c
53 contrib/python-zstandard/zstd/compress/zstd_lazy.h
54 contrib/python-zstandard/zstd/compress/zstd_ldm.c
55 contrib/python-zstandard/zstd/compress/zstd_ldm.h
44 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
56 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
45 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
57 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
58 contrib/python-zstandard/zstd/compress/zstd_opt.c
59 contrib/python-zstandard/zstd/compress/zstd_opt.h
46 contrib/python-zstandard/zstd/decompress/huf_decompress.c
60 contrib/python-zstandard/zstd/decompress/huf_decompress.c
47 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
61 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
62 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
63 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
64 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
65 contrib/python-zstandard/zstd/deprecated/zbuff.h
48 contrib/python-zstandard/zstd/dictBuilder/cover.c
66 contrib/python-zstandard/zstd/dictBuilder/cover.c
49 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
67 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
50 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
68 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
@@ -1,5 +1,7 b''
1 graft c-ext
1 graft c-ext
2 graft zstd
2 graft zstd
3 graft tests
3 include make_cffi.py
4 include make_cffi.py
4 include setup_zstd.py
5 include setup_zstd.py
5 include zstd.c
6 include zstd.c
7 include LICENSE
@@ -1,13 +1,201 b''
1 ===============
1 Version History
2 Version History
2 ===============
3 ===============
3
4
5 1.0.0 (not yet released)
6 ========================
7
8 Actions Blocking Release
9 ------------------------
10
11 * compression and decompression APIs that support ``io.rawIOBase`` interface
12 (#13).
13 * Refactor module names so C and CFFI extensions live under ``zstandard``
14 package.
15 * Overall API design review.
16 * Use Python allocator where possible.
17 * Figure out what to do about experimental APIs not implemented by CFFI.
18 * APIs for auto adjusting compression parameters based on input size. e.g.
19 clamping the window log so it isn't too large for input.
20 * Consider allowing compressor and decompressor instances to be thread safe,
21 support concurrent operations. Or track when an operation is in progress and
22 refuse to let concurrent operations use the same instance.
23 * Support for magic-less frames for all decompression operations (``decompress()``
24 doesn't work due to sniffing the content size and the lack of a ZSTD API to
25 sniff magic-less frames - this should be fixed in 1.3.5.).
26 * Audit for complete flushing when ending compression streams.
27 * Deprecate legacy APIs.
28 * Audit for ability to control read/write sizes on all APIs.
29 * Detect memory leaks via bench.py.
30 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
31 require use of ``CompressionParameters``.
32 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
33
34 Other Actions Not Blocking Release
35 ---------------------------------------
36
37 * Support for block compression APIs.
38 * API for ensuring max memory ceiling isn't exceeded.
39 * Move off nose for testing.
40
41 0.9.0 (released 2018-04-08)
42 ===========================
43
44 Backwards Compatibility Notes
45 -----------------------------
46
47 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
48 * The primary module is now ``zstandard``. Please change imports of ``zstd``
49 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
50 for importing the old names will be dropped in the next release.
51 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
52 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
53 name and will be deleted in a future release.
54 * Support for Python 2.6 has been removed.
55 * Support for Python 3.3 has been removed.
56 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
57 the feature disappeared from zstd 1.3.
58 * Support for legacy dictionaries has been removed. Cover dictionaries are now
59 the default. ``train_cover_dictionary()`` has effectively been renamed to
60 ``train_dictionary()``.
61 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
62 deleted and the method now allows empty inputs to be compressed by default.
63 * ``estimate_compression_context_size()`` has been removed. Use
64 ``CompressionParameters.estimated_compression_context_size()`` instead.
65 * ``get_compression_parameters()`` has been removed. Use
66 ``CompressionParameters.from_level()`` instead.
67 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
68 were using positional arguments before, the positions now map to different
69 arguments. It is recommended to use keyword arguments to construct
70 ``CompressionParameters`` instances.
71 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
72 1.3.4).
73 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
74 renamed to ``ZstdCompressor.stream_writer()`` and
75 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
76 aliased, but will be removed in the next major release.
77 * Content sizes are written into frame headers by default
78 (``ZstdCompressor(write_content_size=True)`` is now the default).
79 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
80 for consistency with other types. The old name is an alias and will be removed
81 in the next major release.
82
83 Bug Fixes
84 ---------
85
86 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
87 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
88 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
89
90 New Features
91 ------------
92
93 * Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
94 bug fixes and performance improvements. It also gives us access to newer
95 features.
96 * Support for negative compression levels.
97 * Support for *long distance matching* (facilitates compression ratios that approach
98 LZMA).
99 * Supporting for reading empty zstandard frames (with an embedded content size
100 of 0).
101 * Support for writing and partial support for reading zstandard frames without a
102 magic header.
103 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
104 you to ``.read()`` from a file-like object).
105 * Several minor features, bug fixes, and performance enhancements.
106 * Wheels for Linux and macOS are now provided with releases.
107
108 Changes
109 -------
110
111 * Functions accepting bytes data now use the buffer protocol and can accept
112 more types (like ``memoryview`` and ``bytearray``) (#26).
113 * Add #includes so compilation on OS X and BSDs works (#20).
114 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
115 of decompressed data for a source.
116 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
117 compressed data for a source.
118 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
119 The old name is still available.
120 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
121 ``read_from()`` is still available at its old location.
122 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
123 *backend* as appropriate. Behavior can be controlled via the
124 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
125 usage info.
126 * Vendored version of zstd upgraded to 1.3.4.
127 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
128 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
129 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
130 ``ZSTD_getFrameContentSize()`` replacement.
131 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
132 special handling.
133 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
134 method for determining the current memory utilization of the underlying zstd
135 primitive.
136 * ``train_dictionary()`` has new arguments and functionality for trying multiple
137 variations of COVER parameters and selecting the best one.
138 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
139 ``LDM_BUCKETSIZELOG_MAX``.
140 * Converted all consumers to the zstandard *new advanced API*, which uses
141 ``ZSTD_compress_generic()``
142 * ``CompressionParameters.__init__`` now accepts several more arguments,
143 including support for *long distance matching*.
144 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
145 controls how the dictionary should be interpreted. This can be used to
146 force the use of *content-only* dictionaries or to require the presence
147 of the dictionary magic header.
148 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
149 compression dictionary so it can efficiently be used with multiple
150 ``ZstdCompressor`` instances.
151 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
152 created automatically on first use, and automatically reused by all
153 ``ZstdDecompressor`` instances bound to that dictionary.
154 * All meaningful functions now accept keyword arguments.
155 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
156 to control how much work to perform on every decompressor invocation.
157 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
158 total number of bytes written so far.
159 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
160 forward in the stream.
161 * Removed ``TARGETLENGTH_MAX`` constant.
162 * Added ``frame_header_size(data)`` function.
163 * Added ``frame_content_size(data)`` function.
164 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
165 decompression* API.
166 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
167 negative compression levels.
168 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
169 amount of memory required for decompression operations.
170 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
171 the ``format`` compression parameter to control whether the frame magic
172 header is written.
173 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
174 expected frame format.
175 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
176 information about the current compression operation.
177 * Error messages in CFFI no longer have ``b''`` literals.
178 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
179 fixed.
180 * Builds in CI now build with compiler warnings as errors. This should hopefully
181 fix new compiler warnings from being introduced.
182 * Make ``ZstdCompressor(write_content_size=True)`` and
183 ``CompressionParameters(write_content_size=True)`` the default.
184 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
185
186 0.8.2 (released 2018-02-22)
187 ---------------------------
188
189 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
190 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
191
4 0.8.1 (released 2017-04-08)
192 0.8.1 (released 2017-04-08)
5 ---------------------------
193 ---------------------------
6
194
7 * Add #includes so compilation on OS X and BSDs works (#20).
195 * Add #includes so compilation on OS X and BSDs works (#20).
8
196
9 0.8.0 (released 2017-03-08)
197 0.8.0 (released 2017-03-08)
10 ---------------------------
198 ===========================
11
199
12 * CompressionParameters now has a estimated_compression_context_size() method.
200 * CompressionParameters now has a estimated_compression_context_size() method.
13 zstd.estimate_compression_context_size() is now deprecated and slated for
201 zstd.estimate_compression_context_size() is now deprecated and slated for
@@ -35,7 +223,7 b' 0.8.0 (released 2017-03-08)'
35 DictParameters instance to control dictionary generation.
223 DictParameters instance to control dictionary generation.
36
224
37 0.7.0 (released 2017-02-07)
225 0.7.0 (released 2017-02-07)
38 ---------------------------
226 ===========================
39
227
40 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
228 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
41 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
229 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
@@ -62,7 +250,7 b' 0.7.0 (released 2017-02-07)'
62 * DictParameters instances now expose their values as attributes.
250 * DictParameters instances now expose their values as attributes.
63
251
64 0.6.0 (released 2017-01-14)
252 0.6.0 (released 2017-01-14)
65 ---------------------------
253 ===========================
66
254
67 * Support for legacy zstd protocols (build time opt in feature).
255 * Support for legacy zstd protocols (build time opt in feature).
68 * Automation improvements to test against Python 3.6, latest versions
256 * Automation improvements to test against Python 3.6, latest versions
@@ -79,17 +267,17 b' 0.6.0 (released 2017-01-14)'
79 * Disallow compress(b'') when writing content sizes by default (issue #11).
267 * Disallow compress(b'') when writing content sizes by default (issue #11).
80
268
81 0.5.2 (released 2016-11-12)
269 0.5.2 (released 2016-11-12)
82 ---------------------------
270 ===========================
83
271
84 * more packaging fixes for source distribution
272 * more packaging fixes for source distribution
85
273
86 0.5.1 (released 2016-11-12)
274 0.5.1 (released 2016-11-12)
87 ---------------------------
275 ===========================
88
276
89 * setup_zstd.py is included in the source distribution
277 * setup_zstd.py is included in the source distribution
90
278
91 0.5.0 (released 2016-11-10)
279 0.5.0 (released 2016-11-10)
92 ---------------------------
280 ===========================
93
281
94 * Vendored version of zstd updated to 1.1.1.
282 * Vendored version of zstd updated to 1.1.1.
95 * Continuous integration for Python 3.6 and 3.7
283 * Continuous integration for Python 3.6 and 3.7
@@ -114,8 +302,8 b' 0.5.0 (released 2016-11-10)'
114 * The monolithic ``zstd.c`` file has been split into a header file defining
302 * The monolithic ``zstd.c`` file has been split into a header file defining
115 types and separate ``.c`` source files for the implementation.
303 types and separate ``.c`` source files for the implementation.
116
304
117 History of the Project
305 Older History
118 ======================
306 =============
119
307
120 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
308 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
121 Python extension for use by the Mercurial project. A very hacky prototype
309 Python extension for use by the Mercurial project. A very hacky prototype
This diff has been collapsed as it changes many lines, (971 lines changed) Show them Hide them
@@ -11,69 +11,18 b' underlying C API through a Pythonic inte'
11 performance. This means exposing most of the features and flexibility
11 performance. This means exposing most of the features and flexibility
12 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
13
13
14 The canonical home for this project is
14 The canonical home for this project lives in a Mercurial repository run by
15 the author. For convenience, that repository is frequently synchronized to
15 https://github.com/indygreg/python-zstandard.
16 https://github.com/indygreg/python-zstandard.
16
17
17 | |ci-status| |win-ci-status|
18 | |ci-status| |win-ci-status|
18
19
19 State of Project
20 ================
21
22 The project is officially in beta state. The author is reasonably satisfied
23 that functionality works as advertised. **There will be some backwards
24 incompatible changes before 1.0, probably in the 0.9 release.** This may
25 involve renaming the main module from *zstd* to *zstandard* and renaming
26 various types and methods. Pin the package version to prevent unwanted
27 breakage when this change occurs!
28
29 This project is vendored and distributed with Mercurial 4.1, where it is
30 used in a production capacity.
31
32 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
33 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
34 confident the extension is stable and works as advertised on these
35 platforms.
36
37 The CFFI bindings are mostly feature complete. Where a feature is implemented
38 in CFFI, unit tests run against both C extension and CFFI implementation to
39 ensure behavior parity.
40
41 Expected Changes
42 ----------------
43
44 The author is reasonably confident in the current state of what's
45 implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
46 Those APIs likely won't change significantly. Some low-level behavior
47 (such as naming and types expected by arguments) may change.
48
49 There will likely be arguments added to control the input and output
50 buffer sizes (currently, certain operations read and write in chunk
51 sizes using zstd's preferred defaults).
52
53 There should be an API that accepts an object that conforms to the buffer
54 interface and returns an iterator over compressed or decompressed output.
55
56 There should be an API that exposes an ``io.RawIOBase`` interface to
57 compressor and decompressor streams, like how ``gzip.GzipFile`` from
58 the standard library works (issue 13).
59
60 The author is on the fence as to whether to support the extremely
61 low level compression and decompression APIs. It could be useful to
62 support compression without the framing headers. But the author doesn't
63 believe it a high priority at this time.
64
65 There will likely be a refactoring of the module names. Currently,
66 ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
67 This means that all code for the C extension must be implemented in
68 C. ``zstd`` may be converted to a Python module so code can be reused
69 between CFFI and C and so not all code in the C extension has to be C.
70
71 Requirements
20 Requirements
72 ============
21 ============
73
22
74 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
23 This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6
75 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
24 on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested
76 currently well-tested as an architecture.
25 on Windows. Only x86_64 is well-tested on Linux and macOS.
77
26
78 Installing
27 Installing
79 ==========
28 ==========
@@ -96,114 +45,82 b' this package with ``conda``.'
96 Performance
45 Performance
97 ===========
46 ===========
98
47
99 Very crude and non-scientific benchmarking (most benchmarks fall in this
48 zstandard is a highly tunable compression algorithm. In its default settings
100 category because proper benchmarking is hard) show that the Python bindings
49 (compression level 3), it will be faster at compression and decompression and
101 perform within 10% of the native C implementation.
50 will have better compression ratios than zlib on most data sets. When tuned
102
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
103 The following table compares the performance of compressing and decompressing
52 ratio, it approaches lzma ratios and compression speed, but decompression
104 a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
53 speed is much faster. See the official zstandard documentation for more.
105 obtained with the ``zstd`` program are on the left. The remaining columns detail
106 performance of various compression APIs in the Python bindings.
107
54
108 +-------+-----------------+-----------------+-----------------+---------------+
55 zstandard and this library support multi-threaded compression. There is a
109 | Level | Native | Simple | Stream In | Stream Out |
56 mechanism to compress large inputs using multiple threads.
110 | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp |
111 +=======+=================+=================+=================+===============+
112 | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s |
113 +-------+-----------------+-----------------+-----------------+---------------+
114 | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s |
115 +-------+-----------------+-----------------+-----------------+---------------+
116 | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s |
117 +-------+-----------------+-----------------+-----------------+---------------+
118 | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s |
119 +-------+-----------------+-----------------+-----------------+---------------+
120
121 Again, these are very unscientific. But it shows that Python is capable of
122 compressing at several hundred MB/s and decompressing at over 1 GB/s.
123
124 Comparison to Other Python Bindings
125 ===================================
126
127 https://pypi.python.org/pypi/zstd is an alternate Python binding to
128 Zstandard. At the time this was written, the latest release of that
129 package (1.1.2) only exposed the simple APIs for compression and decompression.
130 This package exposes much more of the zstd API, including streaming and
131 dictionary compression. This package also has CFFI support.
132
133 Bundling of Zstandard Source Code
134 =================================
135
136 The source repository for this project contains a vendored copy of the
137 Zstandard source code. This is done for a few reasons.
138
57
139 First, Zstandard is relatively new and not yet widely available as a system
58 The performance of this library is usually very similar to what the zstandard
140 package. Providing a copy of the source code enables the Python C extension
59 C API can deliver. Overhead in this library is due to general Python overhead
141 to be compiled without requiring the user to obtain the Zstandard source code
60 and can't easily be avoided by *any* zstandard Python binding. This library
142 separately.
61 exposes multiple APIs for performing compression and decompression so callers
143
62 can pick an API suitable for their need. Contrast with the compression
144 Second, Zstandard has both a stable *public* API and an *experimental* API.
63 modules in Python's standard library (like ``zlib``), which only offer limited
145 The *experimental* API is actually quite useful (contains functionality for
64 mechanisms for performing operations. The API flexibility means consumers can
146 training dictionaries for example), so it is something we wish to expose to
65 choose to use APIs that facilitate zero copying or minimize Python object
147 Python. However, the *experimental* API is only available via static linking.
66 creation and garbage collection overhead.
148 Furthermore, the *experimental* API can change at any time. So, control over
149 the exact version of the Zstandard library linked against is important to
150 ensure known behavior.
151
152 Instructions for Building and Testing
153 =====================================
154
155 Once you have the source code, the extension can be built via setup.py::
156
157 $ python setup.py build_ext
158
159 We recommend testing with ``nose``::
160
161 $ nosetests
162
67
163 A Tox configuration is present to test against multiple Python versions::
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
164
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
165 $ tox
70 script that can be used to measure things.
166
167 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
168 don't have it, those tests won't run. Since the fuzzing tests take longer
169 to execute than normal tests, you'll need to opt in to running them by
170 setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
171 automatically when using ``tox``.
172
173 The ``cffi`` Python package needs to be installed in order to build the CFFI
174 bindings. If it isn't present, the CFFI bindings won't be built.
175
176 To create a virtualenv with all development dependencies, do something
177 like the following::
178
179 # Python 2
180 $ virtualenv venv
181
182 # Python 3
183 $ python3 -m venv venv
184
185 $ source venv/bin/activate
186 $ pip install cffi hypothesis nose tox
187
71
188 API
72 API
189 ===
73 ===
190
74
191 The compiled C extension provides a ``zstd`` Python module. The CFFI
75 To interface with Zstandard, simply import the ``zstandard`` module::
192 bindings provide a ``zstd_cffi`` module. Both provide an identical API
76
193 interface. The types, functions, and attributes exposed by these modules
77 import zstandard
78
79 It is a popular convention to alias the module as a different name for
80 brevity::
81
82 import zstandard as zstd
83
84 This module attempts to import and use either the C extension or CFFI
85 implementation. On Python platforms known to support C extensions (like
86 CPython), it raises an ImportError if the C extension cannot be imported.
87 On Python platforms known to not support C extensions (like PyPy), it only
88 attempts to import the CFFI implementation and raises ImportError if that
89 can't be done. On other platforms, it first tries to import the C extension
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
91
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
93 environment variable can be set. The following values are accepted:
94
95 default
96 The behavior described above.
97 cffi_fallback
98 Always try to import the C extension then fall back to CFFI if that
99 fails.
100 cext
101 Only attempt to import the C extension.
102 cffi
103 Only attempt to import the CFFI implementation.
104
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
106 containing the string name of the backend being used. It will be one
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
108
109 The types, functions, and attributes exposed by the ``zstandard`` module
194 are documented in the sections below.
110 are documented in the sections below.
195
111
196 .. note::
112 .. note::
197
113
198 The documentation in this section makes references to various zstd
114 The documentation in this section makes references to various zstd
199 concepts and functionality. The ``Concepts`` section below explains
115 concepts and functionality. The source repository contains a
200 these concepts in more detail.
116 ``docs/concepts.rst`` file explaining these in more detail.
201
117
202 ZstdCompressor
118 ZstdCompressor
203 --------------
119 --------------
204
120
205 The ``ZstdCompressor`` class provides an interface for performing
121 The ``ZstdCompressor`` class provides an interface for performing
206 compression operations.
122 compression operations. Each instance is essentially a wrapper around a
123 ``ZSTD_CCtx`` from the C API.
207
124
208 Each instance is associated with parameters that control compression
125 Each instance is associated with parameters that control compression
209 behavior. These come from the following named arguments (all optional):
126 behavior. These come from the following named arguments (all optional):
@@ -214,21 +131,21 b' dict_data'
214 Compression dictionary to use.
131 Compression dictionary to use.
215
132
216 Note: When using dictionary data and ``compress()`` is called multiple
133 Note: When using dictionary data and ``compress()`` is called multiple
217 times, the ``CompressionParameters`` derived from an integer compression
134 times, the ``ZstdCompressionParameters`` derived from an integer
218 ``level`` and the first compressed data's size will be reused for all
135 compression ``level`` and the first compressed data's size will be reused
219 subsequent operations. This may not be desirable if source data size
136 for all subsequent operations. This may not be desirable if source data
220 varies significantly.
137 size varies significantly.
221 compression_params
138 compression_params
222 A ``CompressionParameters`` instance (overrides the ``level`` value).
139 A ``ZstdCompressionParameters`` instance defining compression settings.
223 write_checksum
140 write_checksum
224 Whether a 4 byte checksum should be written with the compressed data.
141 Whether a 4 byte checksum should be written with the compressed data.
225 Defaults to False. If True, the decompressor can verify that decompressed
142 Defaults to False. If True, the decompressor can verify that decompressed
226 data matches the original input data.
143 data matches the original input data.
227 write_content_size
144 write_content_size
228 Whether the size of the uncompressed data will be written into the
145 Whether the size of the uncompressed data will be written into the
229 header of compressed data. Defaults to False. The data will only be
146 header of compressed data. Defaults to True. The data will only be
230 written if the compressor knows the size of the input data. This is
147 written if the compressor knows the size of the input data. This is
231 likely not true for streaming compression.
148 often not true for streaming compression.
232 write_dict_id
149 write_dict_id
233 Whether to write the dictionary ID into the compressed data.
150 Whether to write the dictionary ID into the compressed data.
234 Defaults to True. The dictionary ID is only written if a dictionary
151 Defaults to True. The dictionary ID is only written if a dictionary
@@ -242,10 +159,25 b' threads'
242 data. APIs that spawn multiple threads for working on multiple pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
243 data have their own ``threads`` argument.
160 data have their own ``threads`` argument.
244
161
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
164
245 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
246 instances can be called from multiple Python threads simultaneously. In other
166 instances can be called from multiple Python threads simultaneously. In other
247 words, assume instances are not thread safe unless stated otherwise.
167 words, assume instances are not thread safe unless stated otherwise.
248
168
169 Utility Methods
170 ^^^^^^^^^^^^^^^
171
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
173 ingested, consumed, and produced by the current compression operation.
174
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
176 compression context, in bytes.::
177
178 cctx = zstd.ZstdCompressor()
179 memory = cctx.memory_size()
180
249 Simple API
181 Simple API
250 ^^^^^^^^^^
182 ^^^^^^^^^^
251
183
@@ -256,40 +188,75 b' Simple API'
256
188
257 The ``data`` argument can be any object that implements the *buffer protocol*.
189 The ``data`` argument can be any object that implements the *buffer protocol*.
258
190
259 Unless ``compression_params`` or ``dict_data`` are passed to the
191 Stream Reader API
260 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
192 ^^^^^^^^^^^^^^^^^
261 optimal compression parameters for the configured compression ``level`` and
193
262 input data size (some parameters are fine-tuned for small input sizes).
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
196
197 with open(path, 'rb') as fh:
198 cctx = zstd.ZstdCompressor()
199 with cctx.stream_reader(fh) as reader:
200 while True:
201 chunk = reader.read(16384)
202 if not chunk:
203 break
204
205 # Do something with compressed chunk.
206
207 The stream can only be read within a context manager. When the context
208 manager exits, the stream is closed and the underlying resource is
209 released and future operations against the compression stream stream will fail.
210
211 The ``source`` argument to ``stream_reader()`` can be any object with a
212 ``read(size)`` method or any object implementing the *buffer protocol*.
263
213
264 If a compression dictionary is being used, the compression parameters
214 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
265 determined from the first input's size will be reused for subsequent
215 stream is. This is used to adjust compression parameters so they are
266 operations.
216 tailored to the source size.::
217
218 with open(path, 'rb') as fh:
219 cctx = zstd.ZstdCompressor()
220 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
221 ...
222
223 If the ``source`` is a stream, you can specify how large ``read()`` requests
224 to that stream should be via the ``read_size`` argument. It defaults to
225 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
267
226
268 There is currently a deficiency in zstd's C APIs that makes it difficult
227 with open(path, 'rb') as fh:
269 to round trip empty inputs when ``write_content_size=True``. Attempting
228 cctx = zstd.ZstdCompressor()
270 this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
229 # Will perform fh.read(8192) when obtaining data to feed into the
271 to ``compress()``.
230 # compressor.
231 with cctx.stream_reader(fh, read_size=8192) as reader:
232 ...
233
234 The stream returned by ``stream_reader()`` is neither writable nor seekable
235 (even if the underlying source is seekable). ``readline()`` and
236 ``readlines()`` are not implemented because they don't make sense for
237 compressed data. ``tell()`` returns the number of compressed bytes
238 emitted so far.
272
239
273 Streaming Input API
240 Streaming Input API
274 ^^^^^^^^^^^^^^^^^^^
241 ^^^^^^^^^^^^^^^^^^^
275
242
276 ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
243 ``stream_writer(fh)`` (which behaves as a context manager) allows you to *stream*
277 data into a compressor.::
244 data into a compressor.::
278
245
279 cctx = zstd.ZstdCompressor(level=10)
246 cctx = zstd.ZstdCompressor(level=10)
280 with cctx.write_to(fh) as compressor:
247 with cctx.stream_writer(fh) as compressor:
281 compressor.write(b'chunk 0')
248 compressor.write(b'chunk 0')
282 compressor.write(b'chunk 1')
249 compressor.write(b'chunk 1')
283 ...
250 ...
284
251
285 The argument to ``write_to()`` must have a ``write(data)`` method. As
252 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
286 compressed data is available, ``write()`` will be called with the compressed
253 compressed data is available, ``write()`` will be called with the compressed
287 data as its argument. Many common Python types implement ``write()``, including
254 data as its argument. Many common Python types implement ``write()``, including
288 open file handles and ``io.BytesIO``.
255 open file handles and ``io.BytesIO``.
289
256
290 ``write_to()`` returns an object representing a streaming compressor instance.
257 ``stream_writer()`` returns an object representing a streaming compressor
291 It **must** be used as a context manager. That object's ``write(data)`` method
258 instance. It **must** be used as a context manager. That object's
292 is used to feed data into the compressor.
259 ``write(data)`` method is used to feed data into the compressor.
293
260
294 A ``flush()`` method can be called to evict whatever data remains within the
261 A ``flush()`` method can be called to evict whatever data remains within the
295 compressor's internal state into the output object. This may result in 0 or
262 compressor's internal state into the output object. This may result in 0 or
@@ -303,7 +270,7 b' If the size of the data being fed to thi'
303 you can declare it before compression begins::
270 you can declare it before compression begins::
304
271
305 cctx = zstd.ZstdCompressor()
272 cctx = zstd.ZstdCompressor()
306 with cctx.write_to(fh, size=data_len) as compressor:
273 with cctx.stream_writer(fh, size=data_len) as compressor:
307 compressor.write(chunk0)
274 compressor.write(chunk0)
308 compressor.write(chunk1)
275 compressor.write(chunk1)
309 ...
276 ...
@@ -315,29 +282,35 b' content size being written into the fram'
315 The size of chunks being ``write()`` to the destination can be specified::
282 The size of chunks being ``write()`` to the destination can be specified::
316
283
317 cctx = zstd.ZstdCompressor()
284 cctx = zstd.ZstdCompressor()
318 with cctx.write_to(fh, write_size=32768) as compressor:
285 with cctx.stream_writer(fh, write_size=32768) as compressor:
319 ...
286 ...
320
287
321 To see how much memory is being used by the streaming compressor::
288 To see how much memory is being used by the streaming compressor::
322
289
323 cctx = zstd.ZstdCompressor()
290 cctx = zstd.ZstdCompressor()
324 with cctx.write_to(fh) as compressor:
291 with cctx.stream_writer(fh) as compressor:
325 ...
292 ...
326 byte_size = compressor.memory_size()
293 byte_size = compressor.memory_size()
327
294
295 Thte total number of bytes written so far are exposed via ``tell()``::
296
297 cctx = zstd.ZstdCompressor()
298 with cctx.stream_writer(fh) as compressor:
299 ...
300 total_written = compressor.tell()
301
328 Streaming Output API
302 Streaming Output API
329 ^^^^^^^^^^^^^^^^^^^^
303 ^^^^^^^^^^^^^^^^^^^^
330
304
331 ``read_from(reader)`` provides a mechanism to stream data out of a compressor
305 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
332 as an iterator of data chunks.::
306 compressor as an iterator of data chunks.::
333
307
334 cctx = zstd.ZstdCompressor()
308 cctx = zstd.ZstdCompressor()
335 for chunk in cctx.read_from(fh):
309 for chunk in cctx.read_to_iter(fh):
336 # Do something with emitted data.
310 # Do something with emitted data.
337
311
338 ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
312 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
339 to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
313 conforms to the buffer protocol.
340 provide the buffer protocol.)
341
314
342 Uncompressed data is fetched from the source either by calling ``read(size)``
315 Uncompressed data is fetched from the source either by calling ``read(size)``
343 or by fetching a slice of data from the object directly (in the case where
316 or by fetching a slice of data from the object directly (in the case where
@@ -348,23 +321,24 b' If reading from the source via ``read()`'
348 it raises or returns an empty bytes (``b''``). It is perfectly valid for
321 it raises or returns an empty bytes (``b''``). It is perfectly valid for
349 the source to deliver fewer bytes than were what requested by ``read(size)``.
322 the source to deliver fewer bytes than were what requested by ``read(size)``.
350
323
351 Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
324 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
352 declaring the size of the input stream::
325 declaring the size of the input stream::
353
326
354 cctx = zstd.ZstdCompressor()
327 cctx = zstd.ZstdCompressor()
355 for chunk in cctx.read_from(fh, size=some_int):
328 for chunk in cctx.read_to_iter(fh, size=some_int):
356 pass
329 pass
357
330
358 You can also control the size that data is ``read()`` from the source and
331 You can also control the size that data is ``read()`` from the source and
359 the ideal size of output chunks::
332 the ideal size of output chunks::
360
333
361 cctx = zstd.ZstdCompressor()
334 cctx = zstd.ZstdCompressor()
362 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
335 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
363 pass
336 pass
364
337
365 Unlike ``write_to()``, ``read_from()`` does not give direct control over the
338 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
366 sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
339 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
367 the object being read from delivers. These will often be of a uniform size.
340 be whatever the object being read from delivers. These will often be of a
341 uniform size.
368
342
369 Stream Copying API
343 Stream Copying API
370 ^^^^^^^^^^^^^^^^^^
344 ^^^^^^^^^^^^^^^^^^
@@ -404,7 +378,7 b' Compressor API'
404 ``flush()`` methods. Each returns compressed data or an empty bytes.
378 ``flush()`` methods. Each returns compressed data or an empty bytes.
405
379
406 The purpose of ``compressobj()`` is to provide an API-compatible interface
380 The purpose of ``compressobj()`` is to provide an API-compatible interface
407 with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
381 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
408 swap in different compressor objects while using the same API.
382 swap in different compressor objects while using the same API.
409
383
410 ``flush()`` accepts an optional argument indicating how to end the stream.
384 ``flush()`` accepts an optional argument indicating how to end the stream.
@@ -485,13 +459,23 b' ZstdDecompressor'
485 ----------------
459 ----------------
486
460
487 The ``ZstdDecompressor`` class provides an interface for performing
461 The ``ZstdDecompressor`` class provides an interface for performing
488 decompression.
462 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
463 the C API.
489
464
490 Each instance is associated with parameters that control decompression. These
465 Each instance is associated with parameters that control decompression. These
491 come from the following named arguments (all optional):
466 come from the following named arguments (all optional):
492
467
493 dict_data
468 dict_data
494 Compression dictionary to use.
469 Compression dictionary to use.
470 max_window_size
471 Sets an uppet limit on the window size for decompression operations in
472 kibibytes. This setting can be used to prevent large memory allocations
473 for inputs using large compression windows.
474 format
475 Set the format of data for the decoder. By default, this is
476 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
477 allow decoding frames without the 4 byte magic header. Not all decompression
478 APIs support this mode.
495
479
496 The interface of this class is very similar to ``ZstdCompressor`` (by design).
480 The interface of this class is very similar to ``ZstdCompressor`` (by design).
497
481
@@ -499,6 +483,15 b' Unless specified otherwise, assume that '
499 instances can be called from multiple Python threads simultaneously. In other
483 instances can be called from multiple Python threads simultaneously. In other
500 words, assume instances are not thread safe unless stated otherwise.
484 words, assume instances are not thread safe unless stated otherwise.
501
485
486 Utility Methods
487 ^^^^^^^^^^^^^^^
488
489 ``memory_size()`` obtains the size of the underlying zstd decompression context,
490 in bytes.::
491
492 dctx = zstd.ZstdDecompressor()
493 size = dctx.memory_size()
494
502 Simple API
495 Simple API
503 ^^^^^^^^^^
496 ^^^^^^^^^^
504
497
@@ -509,9 +502,10 b' frame in a single operation.::'
509 decompressed = dctx.decompress(data)
502 decompressed = dctx.decompress(data)
510
503
511 By default, ``decompress(data)`` will only work on data written with the content
504 By default, ``decompress(data)`` will only work on data written with the content
512 size encoded in its header. This can be achieved by creating a
505 size encoded in its header (this is the default behavior of
513 ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
506 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
514 an embedded content size is seen, ``zstd.ZstdError`` will be raised.
507 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
508 be raised.
515
509
516 If the compressed data doesn't have its content size embedded within it,
510 If the compressed data doesn't have its content size embedded within it,
517 decompression can be attempted by specifying the ``max_output_size``
511 decompression can be attempted by specifying the ``max_output_size``
@@ -534,17 +528,67 b' performed every time the method is calle'
534 result in a lot of work for the memory allocator and may result in
528 result in a lot of work for the memory allocator and may result in
535 ``MemoryError`` being raised if the allocation fails.
529 ``MemoryError`` being raised if the allocation fails.
536
530
537 If the exact size of decompressed data is unknown, it is **strongly**
531 .. important::
538 recommended to use a streaming API.
532
533 If the exact size of decompressed data is unknown (not passed in explicitly
534 and not stored in the zstandard frame), for performance reasons it is
535 encouraged to use a streaming API.
536
537 Stream Reader API
538 ^^^^^^^^^^^^^^^^^
539
540 ``stream_reader(source)`` can be used to obtain an object conforming to the
541 ``io.RawIOBase`` interface for reading decompressed output as a stream::
542
543 with open(path, 'rb') as fh:
544 dctx = zstd.ZstdDecompressor()
545 with dctx.stream_reader(fh) as reader:
546 while True:
547 chunk = reader.read(16384)
548 if not chunk:
549 break
550
551 # Do something with decompressed chunk.
552
553 The stream can only be read within a context manager. When the context
554 manager exits, the stream is closed and the underlying resource is
555 released and future operations against the stream will fail.
556
557 The ``source`` argument to ``stream_reader()`` can be any object with a
558 ``read(size)`` method or any object implementing the *buffer protocol*.
559
560 If the ``source`` is a stream, you can specify how large ``read()`` requests
561 to that stream should be via the ``read_size`` argument. It defaults to
562 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
563
564 with open(path, 'rb') as fh:
565 dctx = zstd.ZstdDecompressor()
566 # Will perform fh.read(8192) when obtaining data for the decompressor.
567 with dctx.stream_reader(fh, read_size=8192) as reader:
568 ...
569
570 The stream returned by ``stream_reader()`` is not writable.
571
572 The stream returned by ``stream_reader()`` is *partially* seekable.
573 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
574 of the current position are allowed. Offsets behind the current read
575 position and offsets relative to the end of stream are not allowed and
576 will raise ``ValueError`` if attempted.
577
578 ``tell()`` returns the number of decompressed bytes read so far.
579
580 Not all I/O methods are implemented. Notably missing is support for
581 ``readline()``, ``readlines()``, and linewise iteration support. Support for
582 these is planned for a future release.
539
583
540 Streaming Input API
584 Streaming Input API
541 ^^^^^^^^^^^^^^^^^^^
585 ^^^^^^^^^^^^^^^^^^^
542
586
543 ``write_to(fh)`` can be used to incrementally send compressed data to a
587 ``stream_writer(fh)`` can be used to incrementally send compressed data to a
544 decompressor.::
588 decompressor.::
545
589
546 dctx = zstd.ZstdDecompressor()
590 dctx = zstd.ZstdDecompressor()
547 with dctx.write_to(fh) as decompressor:
591 with dctx.stream_writer(fh) as decompressor:
548 decompressor.write(compressed_data)
592 decompressor.write(compressed_data)
549
593
550 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
594 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
@@ -558,54 +602,56 b' of ``0`` are possible.'
558 The size of chunks being ``write()`` to the destination can be specified::
602 The size of chunks being ``write()`` to the destination can be specified::
559
603
560 dctx = zstd.ZstdDecompressor()
604 dctx = zstd.ZstdDecompressor()
561 with dctx.write_to(fh, write_size=16384) as decompressor:
605 with dctx.stream_writer(fh, write_size=16384) as decompressor:
562 pass
606 pass
563
607
564 You can see how much memory is being used by the decompressor::
608 You can see how much memory is being used by the decompressor::
565
609
566 dctx = zstd.ZstdDecompressor()
610 dctx = zstd.ZstdDecompressor()
567 with dctx.write_to(fh) as decompressor:
611 with dctx.stream_writer(fh) as decompressor:
568 byte_size = decompressor.memory_size()
612 byte_size = decompressor.memory_size()
569
613
570 Streaming Output API
614 Streaming Output API
571 ^^^^^^^^^^^^^^^^^^^^
615 ^^^^^^^^^^^^^^^^^^^^
572
616
573 ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
617 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
574 compressed source as an iterator of data chunks.::
618 compressed source as an iterator of data chunks.::
575
619
576 dctx = zstd.ZstdDecompressor()
620 dctx = zstd.ZstdDecompressor()
577 for chunk in dctx.read_from(fh):
621 for chunk in dctx.read_to_iter(fh):
578 # Do something with original data.
622 # Do something with original data.
579
623
580 ``read_from()`` accepts a) an object with a ``read(size)`` method that will
624 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
581 return compressed bytes b) an object conforming to the buffer protocol that
625 return compressed bytes or an object conforming to the buffer protocol that
582 can expose its data as a contiguous range of bytes. The ``bytes`` and
626 can expose its data as a contiguous range of bytes.
583 ``memoryview`` types expose this buffer protocol.
584
627
585 ``read_from()`` returns an iterator whose elements are chunks of the
628 ``read_to_iter()`` returns an iterator whose elements are chunks of the
586 decompressed data.
629 decompressed data.
587
630
588 The size of requested ``read()`` from the source can be specified::
631 The size of requested ``read()`` from the source can be specified::
589
632
590 dctx = zstd.ZstdDecompressor()
633 dctx = zstd.ZstdDecompressor()
591 for chunk in dctx.read_from(fh, read_size=16384):
634 for chunk in dctx.read_to_iter(fh, read_size=16384):
592 pass
635 pass
593
636
594 It is also possible to skip leading bytes in the input data::
637 It is also possible to skip leading bytes in the input data::
595
638
596 dctx = zstd.ZstdDecompressor()
639 dctx = zstd.ZstdDecompressor()
597 for chunk in dctx.read_from(fh, skip_bytes=1):
640 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
598 pass
641 pass
599
642
600 Skipping leading bytes is useful if the source data contains extra
643 .. tip::
601 *header* data but you want to avoid the overhead of making a buffer copy
602 or allocating a new ``memoryview`` object in order to decompress the data.
603
644
604 Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
645 Skipping leading bytes is useful if the source data contains extra
646 *header* data. Traditionally, you would need to create a slice or
647 ``memoryview`` of the data you want to decompress. This would create
648 overhead. It is more efficient to pass the offset into this API.
649
650 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
605 controls when data is decompressed. If the iterator isn't consumed,
651 controls when data is decompressed. If the iterator isn't consumed,
606 decompression is put on hold.
652 decompression is put on hold.
607
653
608 When ``read_from()`` is passed an object conforming to the buffer protocol,
654 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
609 the behavior may seem similar to what occurs when the simple decompression
655 the behavior may seem similar to what occurs when the simple decompression
610 API is used. However, this API works when the decompressed size is unknown.
656 API is used. However, this API works when the decompressed size is unknown.
611 Furthermore, if feeding large inputs, the decompressor will work in chunks
657 Furthermore, if feeding large inputs, the decompressor will work in chunks
@@ -636,7 +682,7 b' Decompressor API'
636 ^^^^^^^^^^^^^^^^
682 ^^^^^^^^^^^^^^^^
637
683
638 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
684 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
639 methods. Compressed data chunks are fed into ``decompress(data)`` and
685 method. Compressed data chunks are fed into ``decompress(data)`` and
640 uncompressed output (or an empty bytes) is returned. Output from subsequent
686 uncompressed output (or an empty bytes) is returned. Output from subsequent
641 calls needs to be concatenated to reassemble the full decompressed byte
687 calls needs to be concatenated to reassemble the full decompressed byte
642 sequence.
688 sequence.
@@ -650,11 +696,25 b' can no longer be called.'
650
696
651 Here is how this API should be used::
697 Here is how this API should be used::
652
698
653 dctx = zstd.ZstdDeompressor()
699 dctx = zstd.ZstdDecompressor()
654 dobj = cctx.decompressobj()
700 dobj = dctx.decompressobj()
655 data = dobj.decompress(compressed_chunk_0)
701 data = dobj.decompress(compressed_chunk_0)
656 data = dobj.decompress(compressed_chunk_1)
702 data = dobj.decompress(compressed_chunk_1)
657
703
704 By default, calls to ``decompress()`` write output data in chunks of size
705 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
706 before being returned to the caller. It is possible to define the size of
707 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
708
709 dctx = zstd.ZstdDecompressor()
710 dobj = dctx.decompressobj(write_size=1048576)
711
712 .. note::
713
714 Because calls to ``decompress()`` may need to perform multiple
715 memory (re)allocations, this streaming decompression API isn't as
716 efficient as other APIs.
717
658 Batch Decompression API
718 Batch Decompression API
659 ^^^^^^^^^^^^^^^^^^^^^^^
719 ^^^^^^^^^^^^^^^^^^^^^^^
660
720
@@ -671,9 +731,12 b' conform to the buffer protocol. For best'
671 minimal input validation will be done for that type. If calling from
731 minimal input validation will be done for that type. If calling from
672 Python (as opposed to C), constructing one of these instances may add
732 Python (as opposed to C), constructing one of these instances may add
673 overhead cancelling out the performance overhead of validation for list
733 overhead cancelling out the performance overhead of validation for list
674 inputs.
734 inputs.::
675
735
676 The decompressed size of each frame must be discoverable. It can either be
736 dctx = zstd.ZstdDecompressor()
737 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
738
739 The decompressed size of each frame MUST be discoverable. It can either be
677 embedded within the zstd frame (``write_content_size=True`` argument to
740 embedded within the zstd frame (``write_content_size=True`` argument to
678 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
741 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
679
742
@@ -681,7 +744,13 b' The ``decompressed_sizes`` argument is a'
681 protocol which holds an array of 64-bit unsigned integers in the machine's
744 protocol which holds an array of 64-bit unsigned integers in the machine's
682 native format defining the decompressed sizes of each frame. If this argument
745 native format defining the decompressed sizes of each frame. If this argument
683 is passed, it avoids having to scan each frame for its decompressed size.
746 is passed, it avoids having to scan each frame for its decompressed size.
684 This frame scanning can add noticeable overhead in some scenarios.
747 This frame scanning can add noticeable overhead in some scenarios.::
748
749 frames = [...]
750 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
751
752 dctx = zstd.ZstdDecompressor()
753 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
685
754
686 The ``threads`` argument controls the number of threads to use to perform
755 The ``threads`` argument controls the number of threads to use to perform
687 decompression operations. The default (``0``) or the value ``1`` means to
756 decompression operations. The default (``0``) or the value ``1`` means to
@@ -701,22 +770,23 b' This function exists to perform decompre'
701 as possible by having as little overhead as possible. Since decompression is
770 as possible by having as little overhead as possible. Since decompression is
702 performed as a single operation and since the decompressed output is stored in
771 performed as a single operation and since the decompressed output is stored in
703 a single buffer, extra memory allocations, Python objects, and Python function
772 a single buffer, extra memory allocations, Python objects, and Python function
704 calls are avoided. This is ideal for scenarios where callers need to access
773 calls are avoided. This is ideal for scenarios where callers know up front that
705 decompressed data for multiple frames.
774 they need to access data for multiple frames, such as when *delta chains* are
775 being used.
706
776
707 Currently, the implementation always spawns multiple threads when requested,
777 Currently, the implementation always spawns multiple threads when requested,
708 even if the amount of work to do is small. In the future, it will be smarter
778 even if the amount of work to do is small. In the future, it will be smarter
709 about avoiding threads and their associated overhead when the amount of
779 about avoiding threads and their associated overhead when the amount of
710 work to do is small.
780 work to do is small.
711
781
712 Content-Only Dictionary Chain Decompression
782 Prefix Dictionary Chain Decompression
713 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
783 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
714
784
715 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
785 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
716 zstd frames produced using chained *content-only* dictionary compression. Such
786 zstd frames produced using chained *prefix* dictionary compression. Such
717 a list of frames is produced by compressing discrete inputs where each
787 a list of frames is produced by compressing discrete inputs where each
718 non-initial input is compressed with a *content-only* dictionary consisting
788 non-initial input is compressed with a *prefix* dictionary consisting of the
719 of the content of the previous input.
789 content of the previous input.
720
790
721 For example, say you have the following inputs::
791 For example, say you have the following inputs::
722
792
@@ -725,25 +795,25 b' For example, say you have the following '
725 The zstd frame chain consists of:
795 The zstd frame chain consists of:
726
796
727 1. ``b'input 1'`` compressed in standalone/discrete mode
797 1. ``b'input 1'`` compressed in standalone/discrete mode
728 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
798 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
729 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
799 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
730
800
731 Each zstd frame **must** have the content size written.
801 Each zstd frame **must** have the content size written.
732
802
733 The following Python code can be used to produce a *content-only dictionary
803 The following Python code can be used to produce a *prefix dictionary chain*::
734 chain*::
735
804
736 def make_chain(inputs):
805 def make_chain(inputs):
737 frames = []
806 frames = []
738
807
739 # First frame is compressed in standalone/discrete mode.
808 # First frame is compressed in standalone/discrete mode.
740 zctx = zstd.ZstdCompressor(write_content_size=True)
809 zctx = zstd.ZstdCompressor()
741 frames.append(zctx.compress(inputs[0]))
810 frames.append(zctx.compress(inputs[0]))
742
811
743 # Subsequent frames use the previous fulltext as a content-only dictionary
812 # Subsequent frames use the previous fulltext as a prefix dictionary
744 for i, raw in enumerate(inputs[1:]):
813 for i, raw in enumerate(inputs[1:]):
745 dict_data = zstd.ZstdCompressionDict(inputs[i])
814 dict_data = zstd.ZstdCompressionDict(
746 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
815 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
816 zctx = zstd.ZstdCompressor(dict_data=dict_data)
747 frames.append(zctx.compress(raw))
817 frames.append(zctx.compress(raw))
748
818
749 return frames
819 return frames
@@ -751,10 +821,13 b' chain*::'
751 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
821 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
752 element in the input chain.
822 element in the input chain.
753
823
754 It is possible to implement *content-only dictionary chain* decompression
824
755 on top of other Python APIs. However, this function will likely be significantly
825 .. note::
756 faster, especially for long input chains, as it avoids the overhead of
826
757 instantiating and passing around intermediate objects between C and Python.
827 It is possible to implement *prefix dictionary chain* decompression
828 on top of other APIs. However, this function will likely be faster -
829 especially for long input chains - as it avoids the overhead of instantiating
830 and passing around intermediate objects between C and Python.
758
831
759 Multi-Threaded Compression
832 Multi-Threaded Compression
760 --------------------------
833 --------------------------
@@ -764,9 +837,15 b' of threads to use for compression. The w'
764 into segments and each segment is fed into a worker pool for compression. Once
837 into segments and each segment is fed into a worker pool for compression. Once
765 a segment is compressed, it is flushed/appended to the output.
838 a segment is compressed, it is flushed/appended to the output.
766
839
840 .. note::
841
842 These threads are created at the C layer and are not Python threads. So they
843 work outside the GIL. It is therefore possible to CPU saturate multiple cores
844 from Python.
845
767 The segment size for multi-threaded compression is chosen from the window size
846 The segment size for multi-threaded compression is chosen from the window size
768 of the compressor. This is derived from the ``window_log`` attribute of a
847 of the compressor. This is derived from the ``window_log`` attribute of a
769 ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB
848 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
770 range.
849 range.
771
850
772 If multi-threaded compression is requested and the input is smaller than the
851 If multi-threaded compression is requested and the input is smaller than the
@@ -785,31 +864,33 b' than non-multi-threaded compression. The'
785 there is a CPU/wall time versus size trade off that may warrant investigation.
864 there is a CPU/wall time versus size trade off that may warrant investigation.
786
865
787 Output from multi-threaded compression does not require any special handling
866 Output from multi-threaded compression does not require any special handling
788 on the decompression side. In other words, any zstd decompressor should be able
867 on the decompression side. To the decompressor, data generated with single
789 to consume data produced with multi-threaded compression.
868 threaded compressor looks the same as data generated by a multi-threaded
869 compressor and does not require any special handling or additional resource
870 requirements.
790
871
791 Dictionary Creation and Management
872 Dictionary Creation and Management
792 ----------------------------------
873 ----------------------------------
793
874
794 Compression dictionaries are represented as the ``ZstdCompressionDict`` type.
875 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
795
876
796 Instances can be constructed from bytes::
877 Instances can be constructed from bytes::
797
878
798 dict_data = zstd.ZstdCompressionDict(data)
879 dict_data = zstd.ZstdCompressionDict(data)
799
880
800 It is possible to construct a dictionary from *any* data. Unless the
881 It is possible to construct a dictionary from *any* data. If the data doesn't
801 data begins with a magic header, the dictionary will be treated as
882 begin with a magic header, it will be treated as a *prefix* dictionary.
802 *content-only*. *Content-only* dictionaries allow compression operations
883 *Prefix* dictionaries allow compression operations to reference raw data
803 that follow to reference raw data within the content. For one use of
884 within the dictionary.
804 *content-only* dictionaries, see
805 ``ZstdDecompressor.decompress_content_dict_chain()``.
806
885
807 More interestingly, instances can be created by *training* on sample data::
886 It is possible to force the use of *prefix* dictionaries or to require a
887 dictionary header:
808
888
809 dict_data = zstd.train_dictionary(size, samples)
889 dict_data = zstd.ZstdCompressionDict(data,
890 dict_type=zstd.DICT_TYPE_RAWCONTENT)
810
891
811 This takes a list of bytes instances and creates and returns a
892 dict_data = zstd.ZstdCompressionDict(data,
812 ``ZstdCompressionDict``.
893 dict_type=zstd.DICT_TYPE_FULLDICT)
813
894
814 You can see how many bytes are in the dictionary by calling ``len()``::
895 You can see how many bytes are in the dictionary by calling ``len()``::
815
896
@@ -819,7 +900,7 b' You can see how many bytes are in the di'
819 Once you have a dictionary, you can pass it to the objects performing
900 Once you have a dictionary, you can pass it to the objects performing
820 compression and decompression::
901 compression and decompression::
821
902
822 dict_data = zstd.train_dictionary(16384, samples)
903 dict_data = zstd.train_dictionary(131072, samples)
823
904
824 cctx = zstd.ZstdCompressor(dict_data=dict_data)
905 cctx = zstd.ZstdCompressor(dict_data=dict_data)
825 for source_data in input_data:
906 for source_data in input_data:
@@ -829,7 +910,7 b' compression and decompression::'
829 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
910 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
830 for compressed_data in input_data:
911 for compressed_data in input_data:
831 buffer = io.BytesIO()
912 buffer = io.BytesIO()
832 with dctx.write_to(buffer) as decompressor:
913 with dctx.stream_writer(buffer) as decompressor:
833 decompressor.write(compressed_data)
914 decompressor.write(compressed_data)
834 # Do something with raw data in ``buffer``.
915 # Do something with raw data in ``buffer``.
835
916
@@ -843,56 +924,69 b' a ``ZstdCompressionDict`` later) via ``a'
843 dict_data = zstd.train_dictionary(size, samples)
924 dict_data = zstd.train_dictionary(size, samples)
844 raw_data = dict_data.as_bytes()
925 raw_data = dict_data.as_bytes()
845
926
846 The following named arguments to ``train_dictionary`` can also be used
927 By default, when a ``ZstdCompressionDict`` is *attached* to a
847 to further control dictionary generation.
928 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
929 dictionary for use. This is fine if only 1 compression operation is being
930 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
931 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
932 this can add overhead.
848
933
849 selectivity
934 It is possible to *precompute* the dictionary so it can readily be consumed
850 Integer selectivity level. Default is 9. Larger values yield more data in
935 by multiple ``ZstdCompressor`` instances::
851 dictionary.
936
852 level
937 d = zstd.ZstdCompressionDict(data)
853 Integer compression level. Default is 6.
854 dict_id
855 Integer dictionary ID for the produced dictionary. Default is 0, which
856 means to use a random value.
857 notifications
858 Controls writing of informational messages to ``stderr``. ``0`` (the
859 default) means to write nothing. ``1`` writes errors. ``2`` writes
860 progression info. ``3`` writes more details. And ``4`` writes all info.
861
938
862 Cover Dictionaries
939 # Precompute for compression level 3.
863 ^^^^^^^^^^^^^^^^^^
940 d.precompute_compress(level=3)
864
941
865 An alternate dictionary training mechanism named *cover* is also available.
942 # Precompute with specific compression parameters.
866 More details about this training mechanism are available in the paper
943 params = zstd.ZstdCompressionParameters(...)
867 *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors:
944 d.precompute_compress(compression_params=params)
868 Liao, Petri, Moffat, Wirth).
869
870 To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
871 ``zstd.train_dictionary()``. The function behaves nearly the same except
872 its arguments are different and the returned dictionary will contain ``k``
873 and ``d`` attributes reflecting the parameters to the cover algorithm.
874
945
875 .. note::
946 .. note::
876
947
877 The ``k`` and ``d`` attributes are only populated on dictionary
948 When a dictionary is precomputed, the compression parameters used to
878 instances created by this function. If a ``ZstdCompressionDict`` is
949 precompute the dictionary overwrite some of the compression parameters
879 constructed from raw bytes data, the ``k`` and ``d`` attributes will
950 specified to ``ZstdCompressor.__init__``.
880 be ``0``.
951
952 Training Dictionaries
953 ^^^^^^^^^^^^^^^^^^^^^
954
955 Unless using *prefix* dictionaries, dictionary data is produced by *training*
956 on existing data::
957
958 dict_data = zstd.train_dictionary(size, samples)
959
960 This takes a target dictionary size and list of bytes instances and creates and
961 returns a ``ZstdCompressionDict``.
962
963 The dictionary training mechanism is known as *cover*. More details about it are
964 available in the paper *Effective Construction of Relative Lempel-Ziv
965 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
966
967 The cover algorithm takes parameters ``k` and ``d``. These are the
968 *segment size* and *dmer size*, respectively. The returned dictionary
969 instance created by this function has ``k`` and ``d`` attributes
970 containing the values for these parameters. If a ``ZstdCompressionDict``
971 is constructed from raw bytes data (a content-only dictionary), the
972 ``k`` and ``d`` attributes will be ``0``.
881
973
882 The segment and dmer size parameters to the cover algorithm can either be
974 The segment and dmer size parameters to the cover algorithm can either be
883 specified manually or you can ask ``train_cover_dictionary()`` to try
975 specified manually or ``train_dictionary()`` can try multiple values
884 multiple values and pick the best one, where *best* means the smallest
976 and pick the best one, where *best* means the smallest compressed data size.
885 compressed data size.
977 This later mode is called *optimization* mode.
886
887 In manual mode, the ``k`` and ``d`` arguments must be specified or a
888 ``ZstdError`` will be raised.
889
978
890 In automatic mode (triggered by specifying ``optimize=True``), ``k``
979 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
891 and ``d`` are optional. If a value isn't specified, then default values for
980 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
892 both are tested. The ``steps`` argument can control the number of steps
981 struct) are defined, *optimization* mode is used with default parameter
893 through ``k`` values. The ``level`` argument defines the compression level
982 values.
894 that will be used when testing the compressed size. And ``threads`` can
983
895 specify the number of threads to use for concurrent operation.
984 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
985 with explicit control over those parameters. Specifying ``threads=0`` or
986 ``threads=1`` can be used to engage *optimization* mode if other parameters
987 are not defined.
988
989 Otherwise, non-*optimization* mode is used with the parameters specified.
896
990
897 This function takes the following arguments:
991 This function takes the following arguments:
898
992
@@ -909,64 +1003,92 b' d'
909 dict_id
1003 dict_id
910 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1004 Integer dictionary ID for the produced dictionary. Default is 0, which uses
911 a random value.
1005 a random value.
912 optimize
1006 steps
913 When true, test dictionary generation with multiple parameters.
1007 Number of steps through ``k`` values to perform when trying parameter
1008 variations.
1009 threads
1010 Number of threads to use when trying parameter variations. Default is 0,
1011 which means to use a single thread. A negative value can be specified to
1012 use as many threads as there are detected logical CPUs.
914 level
1013 level
915 Integer target compression level when testing compression with
1014 Integer target compression level when trying parameter variations.
916 ``optimize=True``. Default is 1.
917 steps
918 Number of steps through ``k`` values to perform when ``optimize=True``.
919 Default is 32.
920 threads
921 Number of threads to use when ``optimize=True``. Default is 0, which means
922 to use a single thread. A negative value can be specified to use as many
923 threads as there are detected logical CPUs.
924 notifications
1015 notifications
925 Controls writing of informational messages to ``stderr``. See the
1016 Controls writing of informational messages to ``stderr``. ``0`` (the
926 documentation for ``train_dictionary()`` for more.
1017 default) means to write nothing. ``1`` writes errors. ``2`` writes
1018 progression info. ``3`` writes more details. And ``4`` writes all info.
927
1019
928 Explicit Compression Parameters
1020 Explicit Compression Parameters
929 -------------------------------
1021 -------------------------------
930
1022
931 Zstandard's integer compression levels along with the input size and dictionary
1023 Zstandard offers a high-level *compression level* that maps to lower-level
932 size are converted into a data structure defining multiple parameters to tune
1024 compression parameters. For many consumers, this numeric level is the only
933 behavior of the compression algorithm. It is possible to use define this
1025 compression setting you'll need to touch.
934 data structure explicitly to have lower-level control over compression behavior.
1026
1027 But for advanced use cases, it might be desirable to tweak these lower-level
1028 settings.
935
1029
936 The ``zstd.CompressionParameters`` type represents this data structure.
1030 The ``ZstdCompressionParameters`` type represents these low-level compression
937 You can see how Zstandard converts compression levels to this data structure
1031 settings.
938 by calling ``zstd.get_compression_parameters()``. e.g.::
939
1032
940 params = zstd.get_compression_parameters(5)
1033 Instances of this type can be constructed from a myriad of keyword arguments
1034 (defined below) for complete low-level control over each adjustable
1035 compression setting.
1036
1037 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1038 given a desired compression level and target input and dictionary size
1039 using ``ZstdCompressionParameters.from_level()``. e.g.::
941
1040
942 This function also accepts the uncompressed data size and dictionary size
1041 # Derive compression settings for compression level 7.
943 to adjust parameters::
1042 params = zstd.ZstdCompressionParameters.from_level(7)
944
1043
945 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
1044 # With an input size of 1MB
1045 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1046
1047 Using ``from_level()``, it is also possible to override individual compression
1048 parameters or to define additional settings that aren't automatically derived.
1049 e.g.::
946
1050
947 You can also construct compression parameters from their low-level components::
1051 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1052 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1053
1054 Or you can define low-level compression settings directly::
948
1055
949 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
1056 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
950
1057
951 You can then configure a compressor to use the custom parameters::
1058 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1059 configure a compressor::
952
1060
953 cctx = zstd.ZstdCompressor(compression_params=params)
1061 cctx = zstd.ZstdCompressor(compression_params=params)
954
1062
955 The members/attributes of ``CompressionParameters`` instances are as follows::
1063 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1064 follows:
956
1065
1066 * format
1067 * compression_level
957 * window_log
1068 * window_log
1069 * hash_log
958 * chain_log
1070 * chain_log
959 * hash_log
960 * search_log
1071 * search_log
961 * search_length
1072 * min_match
962 * target_length
1073 * target_length
963 * strategy
1074 * compression_strategy
1075 * write_content_size
1076 * write_checksum
1077 * write_dict_id
1078 * job_size
1079 * overlap_size_log
1080 * compress_literals
1081 * force_max_window
1082 * enable_ldm
1083 * ldm_hash_log
1084 * ldm_min_match
1085 * ldm_bucket_size_log
1086 * ldm_hash_every_log
1087 * threads
964
1088
965 This is the order the arguments are passed to the constructor if not using
1089 Some of these are very low-level settings. It may help to consult the official
966 named arguments.
1090 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
967
1091 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
968 You'll need to read the Zstandard documentation for what these parameters
969 do.
970
1092
971 Frame Inspection
1093 Frame Inspection
972 ----------------
1094 ----------------
@@ -1003,15 +1125,17 b' has_checksum'
1003 Bool indicating whether a 4 byte content checksum is stored at the end
1125 Bool indicating whether a 4 byte content checksum is stored at the end
1004 of the frame.
1126 of the frame.
1005
1127
1128 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1129 header.
1130
1131 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1132 the frame header. ``-1`` means the content size is unknown. ``0`` means
1133 an empty frame. The content size is usually correct. However, it may not
1134 be accurate.
1135
1006 Misc Functionality
1136 Misc Functionality
1007 ------------------
1137 ------------------
1008
1138
1009 estimate_compression_context_size(CompressionParameters)
1010 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1011
1012 Given a ``CompressionParameters`` struct, estimate the memory size required
1013 to perform compression.
1014
1015 estimate_decompression_context_size()
1139 estimate_decompression_context_size()
1016 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1140 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1017
1141
@@ -1041,6 +1165,11 b' FRAME_HEADER'
1041 MAGIC_NUMBER
1165 MAGIC_NUMBER
1042 Frame header as an integer
1166 Frame header as an integer
1043
1167
1168 CONTENTSIZE_UNKNOWN
1169 Value for content size when the content size is unknown.
1170 CONTENTSIZE_ERROR
1171 Value for content size when content size couldn't be determined.
1172
1044 WINDOWLOG_MIN
1173 WINDOWLOG_MIN
1045 Minimum value for compression parameter
1174 Minimum value for compression parameter
1046 WINDOWLOG_MAX
1175 WINDOWLOG_MAX
@@ -1063,8 +1192,6 b' SEARCHLENGTH_MAX'
1063 Maximum value for compression parameter
1192 Maximum value for compression parameter
1064 TARGETLENGTH_MIN
1193 TARGETLENGTH_MIN
1065 Minimum value for compression parameter
1194 Minimum value for compression parameter
1066 TARGETLENGTH_MAX
1067 Maximum value for compression parameter
1068 STRATEGY_FAST
1195 STRATEGY_FAST
1069 Compression strategy
1196 Compression strategy
1070 STRATEGY_DFAST
1197 STRATEGY_DFAST
@@ -1079,6 +1206,13 b' STRATEGY_BTLAZY2'
1079 Compression strategy
1206 Compression strategy
1080 STRATEGY_BTOPT
1207 STRATEGY_BTOPT
1081 Compression strategy
1208 Compression strategy
1209 STRATEGY_BTULTRA
1210 Compression strategy
1211
1212 FORMAT_ZSTD1
1213 Zstandard frame format
1214 FORMAT_ZSTD1_MAGICLESS
1215 Zstandard frame format without magic header
1082
1216
1083 Performance Considerations
1217 Performance Considerations
1084 --------------------------
1218 --------------------------
@@ -1090,7 +1224,7 b' instantiating a new ``ZstdCompressor`` o'
1090 operation. The differences are magnified as the size of data decreases. For
1224 operation. The differences are magnified as the size of data decreases. For
1091 example, the difference between *context* reuse and non-reuse for 100,000
1225 example, the difference between *context* reuse and non-reuse for 100,000
1092 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1226 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1093 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
1227 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1094 time spent doing compression dwarfs time spent creating new *contexts*).
1228 time spent doing compression dwarfs time spent creating new *contexts*).
1095
1229
1096 Buffer Types
1230 Buffer Types
@@ -1187,9 +1321,8 b' There are multiple APIs for performing c'
1187 because different applications have different needs and the library wants to
1321 because different applications have different needs and the library wants to
1188 facilitate optimal use in as many use cases as possible.
1322 facilitate optimal use in as many use cases as possible.
1189
1323
1190 From a high-level, APIs are divided into *one-shot* and *streaming*. See
1324 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1191 the ``Concepts`` section for a description of how these are different at
1325 are operating on all data at once or you operate on it piecemeal.
1192 the C layer.
1193
1326
1194 The *one-shot* APIs are useful for small data, where the input or output
1327 The *one-shot* APIs are useful for small data, where the input or output
1195 size is known. (The size can come from a buffer length, file size, or
1328 size is known. (The size can come from a buffer length, file size, or
@@ -1222,145 +1355,39 b' There is potential for long pauses as da'
1222 underlying stream (say from interacting with a filesystem or network). This
1355 underlying stream (say from interacting with a filesystem or network). This
1223 could add considerable overhead.
1356 could add considerable overhead.
1224
1357
1225 Concepts
1358 Thread Safety
1226 ========
1359 =============
1227
1228 It is important to have a basic understanding of how Zstandard works in order
1229 to optimally use this library. In addition, there are some low-level Python
1230 concepts that are worth explaining to aid understanding. This section aims to
1231 provide that knowledge.
1232
1233 Zstandard Frames and Compression Format
1234 ---------------------------------------
1235
1236 Compressed zstandard data almost always exists within a container called a
1237 *frame*. (For the technically curious, see the
1238 `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
1239
1240 The frame contains a header and optional trailer. The header contains a
1241 magic number to self-identify as a zstd frame and a description of the
1242 compressed data that follows.
1243
1244 Among other things, the frame *optionally* contains the size of the
1245 decompressed data the frame represents, a 32-bit checksum of the
1246 decompressed data (to facilitate verification during decompression),
1247 and the ID of the dictionary used to compress the data.
1248
1249 Storing the original content size in the frame (``write_content_size=True``
1250 to ``ZstdCompressor``) is important for performance in some scenarios. Having
1251 the decompressed size stored there (or storing it elsewhere) allows
1252 decompression to perform a single memory allocation that is exactly sized to
1253 the output. This is faster than continuously growing a memory buffer to hold
1254 output.
1255
1360
1256 Compression and Decompression Contexts
1361 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1257 --------------------------------------
1362 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1258
1363 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1259 In order to perform a compression or decompression operation with the zstd
1364 fine to have different threads call into a single instance, just not at the
1260 C API, you need what's called a *context*. A context essentially holds
1365 same time.
1261 configuration and state for a compression or decompression operation. For
1262 example, a compression context holds the configured compression level.
1263
1264 Contexts can be reused for multiple operations. Since creating and
1265 destroying contexts is not free, there are performance advantages to
1266 reusing contexts.
1267
1268 The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
1269 wrappers around these contexts in the zstd C API.
1270
1366
1271 One-shot And Streaming Operations
1367 Some operations require multiple function calls to complete. e.g. streaming
1272 ---------------------------------
1368 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1273
1369 for simultaneously active operations. e.g. you must not start a streaming
1274 A compression or decompression operation can either be performed as a
1370 operation when another streaming operation is already active.
1275 single *one-shot* operation or as a continuous *streaming* operation.
1276
1277 In one-shot mode (the *simple* APIs provided by the Python interface),
1278 **all** input is handed to the compressor or decompressor as a single buffer
1279 and **all** output is returned as a single buffer.
1280
1281 In streaming mode, input is delivered to the compressor or decompressor as
1282 a series of chunks via multiple function calls. Likewise, output is
1283 obtained in chunks as well.
1284
1285 Streaming operations require an additional *stream* object to be created
1286 to track the operation. These are logical extensions of *context*
1287 instances.
1288
1371
1289 There are advantages and disadvantages to each mode of operation. There
1372 The C extension releases the GIL during non-trivial calls into the zstd C
1290 are scenarios where certain modes can't be used. See the
1373 API. Non-trivial calls are notably compression and decompression. Trivial
1291 ``Choosing an API`` section for more.
1374 calls are things like parsing frame parameters. Where the GIL is released
1292
1375 is considered an implementation detail and can change in any release.
1293 Dictionaries
1294 ------------
1295
1296 A compression *dictionary* is essentially data used to seed the compressor
1297 state so it can achieve better compression. The idea is that if you are
1298 compressing a lot of similar pieces of data (e.g. JSON documents or anything
1299 sharing similar structure), then you can find common patterns across multiple
1300 objects then leverage those common patterns during compression and
1301 decompression operations to achieve better compression ratios.
1302
1303 Dictionary compression is generally only useful for small inputs - data no
1304 larger than a few kilobytes. The upper bound on this range is highly dependent
1305 on the input data and the dictionary.
1306
1307 Python Buffer Protocol
1308 ----------------------
1309
1310 Many functions in the library operate on objects that implement Python's
1311 `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
1312
1313 The *buffer protocol* is an internal implementation detail of a Python
1314 type that allows instances of that type (objects) to be exposed as a raw
1315 pointer (or buffer) in the C API. In other words, it allows objects to be
1316 exposed as an array of bytes.
1317
1376
1318 From the perspective of the C API, objects implementing the *buffer protocol*
1377 APIs that accept bytes-like objects don't enforce that the underlying object
1319 all look the same: they are just a pointer to a memory address of a defined
1378 is read-only. However, it is assumed that the passed object is read-only for
1320 length. This allows the C API to be largely type agnostic when accessing their
1379 the duration of the function call. It is possible to pass a mutable object
1321 data. This allows custom types to be passed in without first converting them
1380 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1322 to a specific type.
1381 released, and mutate the object from another thread. Such a race condition
1323
1382 is a bug in the consumer of python-zstandard. Most Python data types are
1324 Many Python types implement the buffer protocol. These include ``bytes``
1383 immutable, so unless you are doing something fancy, you don't need to
1325 (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,
1384 worry about this.
1326 ``mmap.mmap``, and ``memoryview``.
1327
1328 ``python-zstandard`` APIs that accept objects conforming to the buffer
1329 protocol require that the buffer is *C contiguous* and has a single
1330 dimension (``ndim==1``). This is usually the case. An example of where it
1331 is not is a Numpy matrix type.
1332
1333 Requiring Output Sizes for Non-Streaming Decompression APIs
1334 -----------------------------------------------------------
1335
1336 Non-streaming decompression APIs require that either the output size is
1337 explicitly defined (either in the zstd frame header or passed into the
1338 function) or that a max output size is specified. This restriction is for
1339 your safety.
1340
1341 The *one-shot* decompression APIs store the decompressed result in a
1342 single buffer. This means that a buffer needs to be pre-allocated to hold
1343 the result. If the decompressed size is not known, then there is no universal
1344 good default size to use. Any default will fail or will be highly sub-optimal
1345 in some scenarios (it will either be too small or will put stress on the
1346 memory allocator to allocate a too large block).
1347
1348 A *helpful* API may retry decompression with buffers of increasing size.
1349 While useful, there are obvious performance disadvantages, namely redoing
1350 decompression N times until it works. In addition, there is a security
1351 concern. Say the input came from highly compressible data, like 1 GB of the
1352 same byte value. The output size could be several magnitudes larger than the
1353 input size. An input of <100KB could decompress to >1GB. Without a bounds
1354 restriction on the decompressed size, certain inputs could exhaust all system
1355 memory. That's not good and is why the maximum output size is limited.
1356
1385
1357 Note on Zstandard's *Experimental* API
1386 Note on Zstandard's *Experimental* API
1358 ======================================
1387 ======================================
1359
1388
1360 Many of the Zstandard APIs used by this module are marked as *experimental*
1389 Many of the Zstandard APIs used by this module are marked as *experimental*
1361 within the Zstandard project. This includes a large number of useful
1390 within the Zstandard project.
1362 features, such as compression and frame parameters and parts of dictionary
1363 compression.
1364
1391
1365 It is unclear how Zstandard's C API will evolve over time, especially with
1392 It is unclear how Zstandard's C API will evolve over time, especially with
1366 regards to this *experimental* functionality. We will try to maintain
1393 regards to this *experimental* functionality. We will try to maintain
@@ -1371,7 +1398,7 b' Since a copy of the Zstandard source cod'
1371 module and since we compile against it, the behavior of a specific
1398 module and since we compile against it, the behavior of a specific
1372 version of this module should be constant for all of time. So if you
1399 version of this module should be constant for all of time. So if you
1373 pin the version of this module used in your projects (which is a Python
1400 pin the version of this module used in your projects (which is a Python
1374 best practice), you should be buffered from unwanted future changes.
1401 best practice), you should be shielded from unwanted future changes.
1375
1402
1376 Donate
1403 Donate
1377 ======
1404 ======
@@ -83,7 +83,7 b' static int BufferWithSegments_init(ZstdB'
83 }
83 }
84
84
85 if (segments.len % sizeof(BufferSegment)) {
85 if (segments.len % sizeof(BufferSegment)) {
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
87 sizeof(BufferSegment));
87 sizeof(BufferSegment));
88 goto except;
88 goto except;
89 }
89 }
@@ -123,7 +123,7 b' except:'
123 PyBuffer_Release(&self->parent);
123 PyBuffer_Release(&self->parent);
124 PyBuffer_Release(&segments);
124 PyBuffer_Release(&segments);
125 return -1;
125 return -1;
126 };
126 }
127
127
128 /**
128 /**
129 * Construct a BufferWithSegments from existing memory and offsets.
129 * Construct a BufferWithSegments from existing memory and offsets.
@@ -188,6 +188,12 b' static ZstdBufferSegment* BufferWithSegm'
188 return NULL;
188 return NULL;
189 }
189 }
190
190
191 if (self->segments[i].length > PY_SSIZE_T_MAX) {
192 PyErr_Format(PyExc_ValueError,
193 "item at offset %zd is too large for this platform", i);
194 return NULL;
195 }
196
191 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
197 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
192 if (NULL == result) {
198 if (NULL == result) {
193 return NULL;
199 return NULL;
@@ -197,7 +203,7 b' static ZstdBufferSegment* BufferWithSegm'
197 Py_INCREF(self);
203 Py_INCREF(self);
198
204
199 result->data = (char*)self->data + self->segments[i].offset;
205 result->data = (char*)self->data + self->segments[i].offset;
200 result->dataSize = self->segments[i].length;
206 result->dataSize = (Py_ssize_t)self->segments[i].length;
201 result->offset = self->segments[i].offset;
207 result->offset = self->segments[i].offset;
202
208
203 return result;
209 return result;
@@ -205,7 +211,13 b' static ZstdBufferSegment* BufferWithSegm'
205
211
206 #if PY_MAJOR_VERSION >= 3
212 #if PY_MAJOR_VERSION >= 3
207 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
213 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
208 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
214 if (self->dataSize > PY_SSIZE_T_MAX) {
215 view->obj = NULL;
216 PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
217 return -1;
218 }
219
220 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
209 }
221 }
210 #else
222 #else
211 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
223 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
@@ -214,8 +226,13 b' static Py_ssize_t BufferWithSegments_get'
214 return -1;
226 return -1;
215 }
227 }
216
228
229 if (self->dataSize > PY_SSIZE_T_MAX) {
230 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
231 return -1;
232 }
233
217 *ptrptr = self->data;
234 *ptrptr = self->data;
218 return self->dataSize;
235 return (Py_ssize_t)self->dataSize;
219 }
236 }
220
237
221 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
238 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
@@ -232,7 +249,12 b' PyDoc_STRVAR(BufferWithSegments_tobytes_'
232 );
249 );
233
250
234 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
251 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
235 return PyBytes_FromStringAndSize(self->data, self->dataSize);
252 if (self->dataSize > PY_SSIZE_T_MAX) {
253 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
254 return NULL;
255 }
256
257 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
236 }
258 }
237
259
238 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
260 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
@@ -14,125 +14,11 b' ZstdCompressionDict* train_dictionary(Py'
14 static char* kwlist[] = {
14 static char* kwlist[] = {
15 "dict_size",
15 "dict_size",
16 "samples",
16 "samples",
17 "selectivity",
18 "level",
19 "notifications",
20 "dict_id",
21 NULL
22 };
23 size_t capacity;
24 PyObject* samples;
25 Py_ssize_t samplesLen;
26 unsigned selectivity = 0;
27 int level = 0;
28 unsigned notifications = 0;
29 unsigned dictID = 0;
30 ZDICT_params_t zparams;
31 Py_ssize_t sampleIndex;
32 Py_ssize_t sampleSize;
33 PyObject* sampleItem;
34 size_t zresult;
35 void* sampleBuffer = NULL;
36 void* sampleOffset;
37 size_t samplesSize = 0;
38 size_t* sampleSizes = NULL;
39 void* dict = NULL;
40 ZstdCompressionDict* result = NULL;
41
42 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
43 kwlist,
44 &capacity,
45 &PyList_Type, &samples,
46 &selectivity, &level, &notifications, &dictID)) {
47 return NULL;
48 }
49
50 memset(&zparams, 0, sizeof(zparams));
51
52 zparams.selectivityLevel = selectivity;
53 zparams.compressionLevel = level;
54 zparams.notificationLevel = notifications;
55 zparams.dictID = dictID;
56
57 /* Figure out the size of the raw samples */
58 samplesLen = PyList_Size(samples);
59 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
60 sampleItem = PyList_GetItem(samples, sampleIndex);
61 if (!PyBytes_Check(sampleItem)) {
62 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
63 return NULL;
64 }
65 samplesSize += PyBytes_GET_SIZE(sampleItem);
66 }
67
68 /* Now that we know the total size of the raw simples, we can allocate
69 a buffer for the raw data */
70 sampleBuffer = PyMem_Malloc(samplesSize);
71 if (!sampleBuffer) {
72 PyErr_NoMemory();
73 goto finally;
74 }
75 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
76 if (!sampleSizes) {
77 PyErr_NoMemory();
78 goto finally;
79 }
80
81 sampleOffset = sampleBuffer;
82 /* Now iterate again and assemble the samples in the buffer */
83 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
84 sampleItem = PyList_GetItem(samples, sampleIndex);
85 sampleSize = PyBytes_GET_SIZE(sampleItem);
86 sampleSizes[sampleIndex] = sampleSize;
87 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
88 sampleOffset = (char*)sampleOffset + sampleSize;
89 }
90
91 dict = PyMem_Malloc(capacity);
92 if (!dict) {
93 PyErr_NoMemory();
94 goto finally;
95 }
96
97 /* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
98 Py_BEGIN_ALLOW_THREADS
99 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
100 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
101 zparams);
102 Py_END_ALLOW_THREADS
103 if (ZDICT_isError(zresult)) {
104 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
105 PyMem_Free(dict);
106 goto finally;
107 }
108
109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
110 if (!result) {
111 goto finally;
112 }
113
114 result->dictData = dict;
115 result->dictSize = zresult;
116 result->d = 0;
117 result->k = 0;
118
119 finally:
120 PyMem_Free(sampleBuffer);
121 PyMem_Free(sampleSizes);
122
123 return result;
124 }
125
126 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
127 static char* kwlist[] = {
128 "dict_size",
129 "samples",
130 "k",
17 "k",
131 "d",
18 "d",
132 "notifications",
19 "notifications",
133 "dict_id",
20 "dict_id",
134 "level",
21 "level",
135 "optimize",
136 "steps",
22 "steps",
137 "threads",
23 "threads",
138 NULL
24 NULL
@@ -145,10 +31,9 b' ZstdCompressionDict* train_cover_diction'
145 unsigned notifications = 0;
31 unsigned notifications = 0;
146 unsigned dictID = 0;
32 unsigned dictID = 0;
147 int level = 0;
33 int level = 0;
148 PyObject* optimize = NULL;
149 unsigned steps = 0;
34 unsigned steps = 0;
150 int threads = 0;
35 int threads = 0;
151 COVER_params_t params;
36 ZDICT_cover_params_t params;
152 Py_ssize_t samplesLen;
37 Py_ssize_t samplesLen;
153 Py_ssize_t i;
38 Py_ssize_t i;
154 size_t samplesSize = 0;
39 size_t samplesSize = 0;
@@ -160,9 +45,9 b' ZstdCompressionDict* train_cover_diction'
160 size_t zresult;
45 size_t zresult;
161 ZstdCompressionDict* result = NULL;
46 ZstdCompressionDict* result = NULL;
162
47
163 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
48 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
164 kwlist, &capacity, &PyList_Type, &samples,
49 kwlist, &capacity, &PyList_Type, &samples,
165 &k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
50 &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
166 return NULL;
51 return NULL;
167 }
52 }
168
53
@@ -175,9 +60,9 b' ZstdCompressionDict* train_cover_diction'
175 params.d = d;
60 params.d = d;
176 params.steps = steps;
61 params.steps = steps;
177 params.nbThreads = threads;
62 params.nbThreads = threads;
178 params.notificationLevel = notifications;
63 params.zParams.notificationLevel = notifications;
179 params.dictID = dictID;
64 params.zParams.dictID = dictID;
180 params.compressionLevel = level;
65 params.zParams.compressionLevel = level;
181
66
182 /* Figure out total size of input samples. */
67 /* Figure out total size of input samples. */
183 samplesLen = PyList_Size(samples);
68 samplesLen = PyList_Size(samples);
@@ -219,12 +104,21 b' ZstdCompressionDict* train_cover_diction'
219 }
104 }
220
105
221 Py_BEGIN_ALLOW_THREADS
106 Py_BEGIN_ALLOW_THREADS
222 if (optimize && PyObject_IsTrue(optimize)) {
107 /* No parameters uses the default function, which will use default params
223 zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
108 and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
109 if (!params.k && !params.d && !params.zParams.compressionLevel
110 && !params.zParams.notificationLevel && !params.zParams.dictID) {
111 zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
112 sampleSizes, (unsigned)samplesLen);
113 }
114 /* Use optimize mode if user controlled steps or threads explicitly. */
115 else if (params.steps || params.nbThreads) {
116 zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
224 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
117 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
225 }
118 }
119 /* Non-optimize mode with explicit control. */
226 else {
120 else {
227 zresult = COVER_trainFromBuffer(dict, capacity,
121 zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
228 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
122 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
229 }
123 }
230 Py_END_ALLOW_THREADS
124 Py_END_ALLOW_THREADS
@@ -243,8 +137,11 b' ZstdCompressionDict* train_cover_diction'
243
137
244 result->dictData = dict;
138 result->dictData = dict;
245 result->dictSize = zresult;
139 result->dictSize = zresult;
140 result->dictType = ZSTD_dct_fullDict;
246 result->d = params.d;
141 result->d = params.d;
247 result->k = params.k;
142 result->k = params.k;
143 result->cdict = NULL;
144 result->ddict = NULL;
248
145
249 finally:
146 finally:
250 PyMem_Free(sampleBuffer);
147 PyMem_Free(sampleBuffer);
@@ -253,43 +150,99 b' finally:'
253 return result;
150 return result;
254 }
151 }
255
152
153 int ensure_ddict(ZstdCompressionDict* dict) {
154 if (dict->ddict) {
155 return 0;
156 }
157
158 Py_BEGIN_ALLOW_THREADS
159 dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
160 ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
161 Py_END_ALLOW_THREADS
162 if (!dict->ddict) {
163 PyErr_SetString(ZstdError, "could not create decompression dict");
164 return 1;
165 }
166
167 return 0;
168 }
169
256 PyDoc_STRVAR(ZstdCompressionDict__doc__,
170 PyDoc_STRVAR(ZstdCompressionDict__doc__,
257 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
171 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
258 "\n"
172 "\n"
259 "This type holds the results of a computed Zstandard compression dictionary.\n"
173 "This type holds the results of a computed Zstandard compression dictionary.\n"
260 "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
174 "Instances are obtained by calling ``train_dictionary()`` or by passing\n"
261 "obtained from another source into the constructor.\n"
175 "bytes obtained from another source into the constructor.\n"
262 );
176 );
263
177
264 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
178 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
265 const char* source;
179 static char* kwlist[] = {
266 Py_ssize_t sourceSize;
180 "data",
181 "dict_type",
182 NULL
183 };
184
185 int result = -1;
186 Py_buffer source;
187 unsigned dictType = ZSTD_dct_auto;
267
188
268 self->dictData = NULL;
189 self->dictData = NULL;
269 self->dictSize = 0;
190 self->dictSize = 0;
191 self->cdict = NULL;
192 self->ddict = NULL;
270
193
271 #if PY_MAJOR_VERSION >= 3
194 #if PY_MAJOR_VERSION >= 3
272 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
273 #else
196 #else
274 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
197 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
275 #endif
198 #endif
276 &source, &sourceSize)) {
199 kwlist, &source, &dictType)) {
277 return -1;
200 return -1;
278 }
201 }
279
202
280 self->dictData = PyMem_Malloc(sourceSize);
203 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
204 PyErr_SetString(PyExc_ValueError,
205 "data buffer should be contiguous and have at most one dimension");
206 goto finally;
207 }
208
209 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
210 && dictType != ZSTD_dct_fullDict) {
211 PyErr_Format(PyExc_ValueError,
212 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
213 dictType);
214 goto finally;
215 }
216
217 self->dictType = dictType;
218
219 self->dictData = PyMem_Malloc(source.len);
281 if (!self->dictData) {
220 if (!self->dictData) {
282 PyErr_NoMemory();
221 PyErr_NoMemory();
283 return -1;
222 goto finally;
284 }
223 }
285
224
286 memcpy(self->dictData, source, sourceSize);
225 memcpy(self->dictData, source.buf, source.len);
287 self->dictSize = sourceSize;
226 self->dictSize = source.len;
227
228 result = 0;
288
229
289 return 0;
230 finally:
231 PyBuffer_Release(&source);
232 return result;
233 }
234
235 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
236 if (self->cdict) {
237 ZSTD_freeCDict(self->cdict);
238 self->cdict = NULL;
290 }
239 }
291
240
292 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
241 if (self->ddict) {
242 ZSTD_freeDDict(self->ddict);
243 self->ddict = NULL;
244 }
245
293 if (self->dictData) {
246 if (self->dictData) {
294 PyMem_Free(self->dictData);
247 PyMem_Free(self->dictData);
295 self->dictData = NULL;
248 self->dictData = NULL;
@@ -298,6 +251,74 b' static void ZstdCompressionDict_dealloc('
298 PyObject_Del(self);
251 PyObject_Del(self);
299 }
252 }
300
253
254 PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
255 "Precompute a dictionary so it can be used by multiple compressors.\n"
256 );
257
258 static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
259 static char* kwlist[] = {
260 "level",
261 "compression_params",
262 NULL
263 };
264
265 int level = 0;
266 ZstdCompressionParametersObject* compressionParams = NULL;
267 ZSTD_compressionParameters cParams;
268 size_t zresult;
269
270 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
271 &level, &ZstdCompressionParametersType, &compressionParams)) {
272 return NULL;
273 }
274
275 if (level && compressionParams) {
276 PyErr_SetString(PyExc_ValueError,
277 "must only specify one of level or compression_params");
278 return NULL;
279 }
280
281 if (!level && !compressionParams) {
282 PyErr_SetString(PyExc_ValueError,
283 "must specify one of level or compression_params");
284 return NULL;
285 }
286
287 if (self->cdict) {
288 zresult = ZSTD_freeCDict(self->cdict);
289 self->cdict = NULL;
290 if (ZSTD_isError(zresult)) {
291 PyErr_Format(ZstdError, "unable to free CDict: %s",
292 ZSTD_getErrorName(zresult));
293 return NULL;
294 }
295 }
296
297 if (level) {
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
299 }
300 else {
301 cParams.chainLog = compressionParams->chainLog;
302 cParams.hashLog = compressionParams->hashLog;
303 cParams.searchLength = compressionParams->minMatch;
304 cParams.searchLog = compressionParams->searchLog;
305 cParams.strategy = compressionParams->compressionStrategy;
306 cParams.targetLength = compressionParams->targetLength;
307 cParams.windowLog = compressionParams->windowLog;
308 }
309
310 assert(!self->cdict);
311 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
312 ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
313
314 if (!self->cdict) {
315 PyErr_SetString(ZstdError, "unable to precompute dictionary");
316 return NULL;
317 }
318
319 Py_RETURN_NONE;
320 }
321
301 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
322 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
302 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
323 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
303
324
@@ -313,6 +334,8 b' static PyMethodDef ZstdCompressionDict_m'
313 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
334 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
314 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
335 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
315 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
336 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
337 { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
338 METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
316 { NULL, NULL }
339 { NULL, NULL }
317 };
340 };
318
341
This diff has been collapsed as it changes many lines, (525 lines changed) Show them Hide them
@@ -8,204 +8,448 b''
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
11 extern PyObject* ZstdError;
12 zparams->windowLog = params->windowLog;
13 zparams->chainLog = params->chainLog;
14 zparams->hashLog = params->hashLog;
15 zparams->searchLog = params->searchLog;
16 zparams->searchLength = params->searchLength;
17 zparams->targetLength = params->targetLength;
18 zparams->strategy = params->strategy;
19 }
20
12
21 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) {
22 int compressionLevel;
14 size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
23 unsigned PY_LONG_LONG sourceSize = 0;
15 if (ZSTD_isError(zresult)) {
24 Py_ssize_t dictSize = 0;
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
25 ZSTD_compressionParameters params;
17 ZSTD_getErrorName(zresult));
26 CompressionParametersObject* result;
18 return 1;
27
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
29 &compressionLevel, &sourceSize, &dictSize)) {
30 return NULL;
31 }
19 }
32
20
33 params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
21 return 0;
22 }
23
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
34
25
35 result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
26 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
36 if (!result) {
27 TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format);
37 return NULL;
28 TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel);
29 TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog);
30 TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog);
31 TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog);
32 TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog);
33 TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch);
34 TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength);
35 TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy);
36 TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag);
37 TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag);
38 TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag);
39 TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
40 TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
41 TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
42 TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals);
43 TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
44 TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
45 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
46 TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch);
47 TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog);
48 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog);
49
50 return 0;
51 }
52
53 int reset_params(ZstdCompressionParametersObject* params) {
54 if (params->params) {
55 ZSTD_CCtxParams_reset(params->params);
56 }
57 else {
58 params->params = ZSTD_createCCtxParams();
59 if (!params->params) {
60 PyErr_NoMemory();
61 return 1;
62 }
38 }
63 }
39
64
40 result->windowLog = params.windowLog;
65 return set_parameters(params->params, params);
41 result->chainLog = params.chainLog;
42 result->hashLog = params.hashLog;
43 result->searchLog = params.searchLog;
44 result->searchLength = params.searchLength;
45 result->targetLength = params.targetLength;
46 result->strategy = params.strategy;
47
48 return result;
49 }
66 }
50
67
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
68 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
52 static char* kwlist[] = {
69 static char* kwlist[] = {
70 "format",
71 "compression_level",
53 "window_log",
72 "window_log",
54 "chain_log",
55 "hash_log",
73 "hash_log",
74 "chain_log",
56 "search_log",
75 "search_log",
57 "search_length",
76 "min_match",
58 "target_length",
77 "target_length",
59 "strategy",
78 "compression_strategy",
79 "write_content_size",
80 "write_checksum",
81 "write_dict_id",
82 "job_size",
83 "overlap_size_log",
84 "force_max_window",
85 "enable_ldm",
86 "ldm_hash_log",
87 "ldm_min_match",
88 "ldm_bucket_size_log",
89 "ldm_hash_every_log",
90 "threads",
91 "compress_literals",
60 NULL
92 NULL
61 };
93 };
62
94
63 unsigned windowLog;
95 unsigned format = 0;
64 unsigned chainLog;
96 int compressionLevel = 0;
65 unsigned hashLog;
97 unsigned windowLog = 0;
66 unsigned searchLog;
98 unsigned hashLog = 0;
67 unsigned searchLength;
99 unsigned chainLog = 0;
68 unsigned targetLength;
100 unsigned searchLog = 0;
69 unsigned strategy;
101 unsigned minMatch = 0;
70 ZSTD_compressionParameters params;
102 unsigned targetLength = 0;
71 size_t zresult;
103 unsigned compressionStrategy = 0;
104 unsigned contentSizeFlag = 1;
105 unsigned checksumFlag = 0;
106 unsigned dictIDFlag = 0;
107 unsigned jobSize = 0;
108 unsigned overlapSizeLog = 0;
109 unsigned forceMaxWindow = 0;
110 unsigned enableLDM = 0;
111 unsigned ldmHashLog = 0;
112 unsigned ldmMinMatch = 0;
113 unsigned ldmBucketSizeLog = 0;
114 unsigned ldmHashEveryLog = 0;
115 int threads = 0;
72
116
73 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
117 /* Setting value 0 has the effect of disabling. So we use -1 as a default
74 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
118 * to detect whether to set. Then we automatically derive the expected value
75 &targetLength, &strategy)) {
119 * based on the level, just like zstandard does itself. */
76 return -1;
120 int compressLiterals = -1;
77 }
78
121
79 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
122 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
80 PyErr_SetString(PyExc_ValueError, "invalid window log value");
123 "|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters",
81 return -1;
124 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
82 }
125 &searchLog, &minMatch, &targetLength, &compressionStrategy,
83
126 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
84 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
127 &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
85 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
128 &ldmHashEveryLog, &threads, &compressLiterals)) {
86 return -1;
87 }
88
89 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
90 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
91 return -1;
129 return -1;
92 }
130 }
93
131
94 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
132 if (threads < 0) {
95 PyErr_SetString(PyExc_ValueError, "invalid search log value");
133 threads = cpu_count();
96 return -1;
97 }
134 }
98
135
99 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
136 if (compressLiterals < 0) {
100 PyErr_SetString(PyExc_ValueError, "invalid search length value");
137 compressLiterals = compressionLevel >= 0;
101 return -1;
102 }
103
104 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
105 PyErr_SetString(PyExc_ValueError, "invalid target length value");
106 return -1;
107 }
138 }
108
139
109 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
140 self->format = format;
110 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
141 self->compressionLevel = compressionLevel;
111 return -1;
112 }
113
114 self->windowLog = windowLog;
142 self->windowLog = windowLog;
143 self->hashLog = hashLog;
115 self->chainLog = chainLog;
144 self->chainLog = chainLog;
116 self->hashLog = hashLog;
117 self->searchLog = searchLog;
145 self->searchLog = searchLog;
118 self->searchLength = searchLength;
146 self->minMatch = minMatch;
119 self->targetLength = targetLength;
147 self->targetLength = targetLength;
120 self->strategy = strategy;
148 self->compressionStrategy = compressionStrategy;
149 self->contentSizeFlag = contentSizeFlag;
150 self->checksumFlag = checksumFlag;
151 self->dictIDFlag = dictIDFlag;
152 self->threads = threads;
153 self->jobSize = jobSize;
154 self->overlapSizeLog = overlapSizeLog;
155 self->compressLiterals = compressLiterals;
156 self->forceMaxWindow = forceMaxWindow;
157 self->enableLongDistanceMatching = enableLDM;
158 self->ldmHashLog = ldmHashLog;
159 self->ldmMinMatch = ldmMinMatch;
160 self->ldmBucketSizeLog = ldmBucketSizeLog;
161 self->ldmHashEveryLog = ldmHashEveryLog;
121
162
122 ztopy_compression_parameters(self, &params);
163 if (reset_params(self)) {
123 zresult = ZSTD_checkCParams(params);
124
125 if (ZSTD_isError(zresult)) {
126 PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
127 ZSTD_getErrorName(zresult));
128 return -1;
164 return -1;
129 }
165 }
130
166
131 return 0;
167 return 0;
132 }
168 }
133
169
134 PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__,
170 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
171 "Create a CompressionParameters from a compression level and target sizes\n"
172 );
173
174 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
175 int managedKwargs = 0;
176 int level;
177 PyObject* sourceSize = NULL;
178 PyObject* dictSize = NULL;
179 unsigned PY_LONG_LONG iSourceSize = 0;
180 Py_ssize_t iDictSize = 0;
181 PyObject* val;
182 ZSTD_compressionParameters params;
183 ZstdCompressionParametersObject* result = NULL;
184 int res;
185
186 if (!PyArg_ParseTuple(args, "i:from_level",
187 &level)) {
188 return NULL;
189 }
190
191 if (!kwargs) {
192 kwargs = PyDict_New();
193 if (!kwargs) {
194 return NULL;
195 }
196 managedKwargs = 1;
197 }
198
199 sourceSize = PyDict_GetItemString(kwargs, "source_size");
200 if (sourceSize) {
201 #if PY_MAJOR_VERSION >= 3
202 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
203 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
204 goto cleanup;
205 }
206 #else
207 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
208 #endif
209
210 PyDict_DelItemString(kwargs, "source_size");
211 }
212
213 dictSize = PyDict_GetItemString(kwargs, "dict_size");
214 if (dictSize) {
215 #if PY_MAJOR_VERSION >= 3
216 iDictSize = PyLong_AsSsize_t(dictSize);
217 #else
218 iDictSize = PyInt_AsSsize_t(dictSize);
219 #endif
220 if (iDictSize == -1) {
221 goto cleanup;
222 }
223
224 PyDict_DelItemString(kwargs, "dict_size");
225 }
226
227
228 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
229
230 /* Values derived from the input level and sizes are passed along to the
231 constructor. But only if a value doesn't already exist. */
232 val = PyDict_GetItemString(kwargs, "window_log");
233 if (!val) {
234 val = PyLong_FromUnsignedLong(params.windowLog);
235 if (!val) {
236 goto cleanup;
237 }
238 PyDict_SetItemString(kwargs, "window_log", val);
239 Py_DECREF(val);
240 }
241
242 val = PyDict_GetItemString(kwargs, "chain_log");
243 if (!val) {
244 val = PyLong_FromUnsignedLong(params.chainLog);
245 if (!val) {
246 goto cleanup;
247 }
248 PyDict_SetItemString(kwargs, "chain_log", val);
249 Py_DECREF(val);
250 }
251
252 val = PyDict_GetItemString(kwargs, "hash_log");
253 if (!val) {
254 val = PyLong_FromUnsignedLong(params.hashLog);
255 if (!val) {
256 goto cleanup;
257 }
258 PyDict_SetItemString(kwargs, "hash_log", val);
259 Py_DECREF(val);
260 }
261
262 val = PyDict_GetItemString(kwargs, "search_log");
263 if (!val) {
264 val = PyLong_FromUnsignedLong(params.searchLog);
265 if (!val) {
266 goto cleanup;
267 }
268 PyDict_SetItemString(kwargs, "search_log", val);
269 Py_DECREF(val);
270 }
271
272 val = PyDict_GetItemString(kwargs, "min_match");
273 if (!val) {
274 val = PyLong_FromUnsignedLong(params.searchLength);
275 if (!val) {
276 goto cleanup;
277 }
278 PyDict_SetItemString(kwargs, "min_match", val);
279 Py_DECREF(val);
280 }
281
282 val = PyDict_GetItemString(kwargs, "target_length");
283 if (!val) {
284 val = PyLong_FromUnsignedLong(params.targetLength);
285 if (!val) {
286 goto cleanup;
287 }
288 PyDict_SetItemString(kwargs, "target_length", val);
289 Py_DECREF(val);
290 }
291
292 val = PyDict_GetItemString(kwargs, "compression_strategy");
293 if (!val) {
294 val = PyLong_FromUnsignedLong(params.strategy);
295 if (!val) {
296 goto cleanup;
297 }
298 PyDict_SetItemString(kwargs, "compression_strategy", val);
299 Py_DECREF(val);
300 }
301
302 val = PyDict_GetItemString(kwargs, "compress_literals");
303 if (!val) {
304 val = PyLong_FromLong(level >= 0 ? 1 : 0);
305 if (!val) {
306 goto cleanup;
307 }
308 PyDict_SetItemString(kwargs, "compress_literals", val);
309 Py_DECREF(val);
310 }
311
312 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
313 if (!result) {
314 goto cleanup;
315 }
316
317 result->params = NULL;
318
319 val = PyTuple_New(0);
320 if (!val) {
321 Py_CLEAR(result);
322 goto cleanup;
323 }
324
325 res = ZstdCompressionParameters_init(result, val, kwargs);
326 Py_DECREF(val);
327
328 if (res) {
329 Py_CLEAR(result);
330 goto cleanup;
331 }
332
333 cleanup:
334 if (managedKwargs) {
335 Py_DECREF(kwargs);
336 }
337
338 return result;
339 }
340
341 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
135 "Estimate the size in bytes of a compression context for compression parameters\n"
342 "Estimate the size in bytes of a compression context for compression parameters\n"
136 );
343 );
137
344
138 PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {
345 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
139 ZSTD_compressionParameters params;
346 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
140
141 ztopy_compression_parameters(self, &params);
142
143 return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
144 }
347 }
145
348
146 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
349 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
147 CompressionParametersObject* params;
350 "ZstdCompressionParameters: low-level control over zstd compression");
148 ZSTD_compressionParameters zparams;
149 PyObject* result;
150
351
151 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
352 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
152 &CompressionParametersType, &params)) {
353 if (self->params) {
153 return NULL;
354 ZSTD_freeCCtxParams(self->params);
355 self->params = NULL;
154 }
356 }
155
357
156 ztopy_compression_parameters(params, &zparams);
157 result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
158 return result;
159 }
160
161 PyDoc_STRVAR(CompressionParameters__doc__,
162 "CompressionParameters: low-level control over zstd compression");
163
164 static void CompressionParameters_dealloc(PyObject* self) {
165 PyObject_Del(self);
358 PyObject_Del(self);
166 }
359 }
167
360
168 static PyMethodDef CompressionParameters_methods[] = {
361 static PyMethodDef ZstdCompressionParameters_methods[] = {
362 {
363 "from_level",
364 (PyCFunction)CompressionParameters_from_level,
365 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
366 ZstdCompressionParameters_from_level__doc__
367 },
169 {
368 {
170 "estimated_compression_context_size",
369 "estimated_compression_context_size",
171 (PyCFunction)CompressionParameters_estimated_compression_context_size,
370 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
172 METH_NOARGS,
371 METH_NOARGS,
173 CompressionParameters_estimated_compression_context_size__doc__
372 ZstdCompressionParameters_estimated_compression_context_size__doc__
174 },
373 },
175 { NULL, NULL }
374 { NULL, NULL }
176 };
375 };
177
376
178 static PyMemberDef CompressionParameters_members[] = {
377 static PyMemberDef ZstdCompressionParameters_members[] = {
378 { "format", T_UINT,
379 offsetof(ZstdCompressionParametersObject, format), READONLY,
380 "compression format" },
381 { "compression_level", T_INT,
382 offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY,
383 "compression level" },
179 { "window_log", T_UINT,
384 { "window_log", T_UINT,
180 offsetof(CompressionParametersObject, windowLog), READONLY,
385 offsetof(ZstdCompressionParametersObject, windowLog), READONLY,
181 "window log" },
386 "window log" },
182 { "chain_log", T_UINT,
183 offsetof(CompressionParametersObject, chainLog), READONLY,
184 "chain log" },
185 { "hash_log", T_UINT,
387 { "hash_log", T_UINT,
186 offsetof(CompressionParametersObject, hashLog), READONLY,
388 offsetof(ZstdCompressionParametersObject, hashLog), READONLY,
187 "hash log" },
389 "hash log" },
390 { "chain_log", T_UINT,
391 offsetof(ZstdCompressionParametersObject, chainLog), READONLY,
392 "chain log" },
188 { "search_log", T_UINT,
393 { "search_log", T_UINT,
189 offsetof(CompressionParametersObject, searchLog), READONLY,
394 offsetof(ZstdCompressionParametersObject, searchLog), READONLY,
190 "search log" },
395 "search log" },
191 { "search_length", T_UINT,
396 { "min_match", T_UINT,
192 offsetof(CompressionParametersObject, searchLength), READONLY,
397 offsetof(ZstdCompressionParametersObject, minMatch), READONLY,
193 "search length" },
398 "search length" },
194 { "target_length", T_UINT,
399 { "target_length", T_UINT,
195 offsetof(CompressionParametersObject, targetLength), READONLY,
400 offsetof(ZstdCompressionParametersObject, targetLength), READONLY,
196 "target length" },
401 "target length" },
197 { "strategy", T_INT,
402 { "compression_strategy", T_UINT,
198 offsetof(CompressionParametersObject, strategy), READONLY,
403 offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY,
199 "strategy" },
404 "compression strategy" },
405 { "write_content_size", T_UINT,
406 offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY,
407 "whether to write content size in frames" },
408 { "write_checksum", T_UINT,
409 offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY,
410 "whether to write checksum in frames" },
411 { "write_dict_id", T_UINT,
412 offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY,
413 "whether to write dictionary ID in frames" },
414 { "threads", T_UINT,
415 offsetof(ZstdCompressionParametersObject, threads), READONLY,
416 "number of threads to use" },
417 { "job_size", T_UINT,
418 offsetof(ZstdCompressionParametersObject, jobSize), READONLY,
419 "size of compression job when using multiple threads" },
420 { "overlap_size_log", T_UINT,
421 offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
422 "Size of previous input reloaded at the beginning of each job" },
423 { "compress_literals", T_UINT,
424 offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY,
425 "whether Huffman compression of literals is in use" },
426 { "force_max_window", T_UINT,
427 offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
428 "force back references to remain smaller than window size" },
429 { "enable_ldm", T_UINT,
430 offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY,
431 "whether to enable long distance matching" },
432 { "ldm_hash_log", T_UINT,
433 offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY,
434 "Size of the table for long distance matching, as a power of 2" },
435 { "ldm_min_match", T_UINT,
436 offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY,
437 "minimum size of searched matches for long distance matcher" },
438 { "ldm_bucket_size_log", T_UINT,
439 offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY,
440 "log size of each bucket in the LDM hash table for collision resolution" },
441 { "ldm_hash_every_log", T_UINT,
442 offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY,
443 "frequency of inserting/looking up entries in the LDM hash table" },
200 { NULL }
444 { NULL }
201 };
445 };
202
446
203 PyTypeObject CompressionParametersType = {
447 PyTypeObject ZstdCompressionParametersType = {
204 PyVarObject_HEAD_INIT(NULL, 0)
448 PyVarObject_HEAD_INIT(NULL, 0)
205 "CompressionParameters", /* tp_name */
449 "ZstdCompressionParameters", /* tp_name */
206 sizeof(CompressionParametersObject), /* tp_basicsize */
450 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
207 0, /* tp_itemsize */
451 0, /* tp_itemsize */
208 (destructor)CompressionParameters_dealloc, /* tp_dealloc */
452 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
209 0, /* tp_print */
453 0, /* tp_print */
210 0, /* tp_getattr */
454 0, /* tp_getattr */
211 0, /* tp_setattr */
455 0, /* tp_setattr */
@@ -221,33 +465,38 b' PyTypeObject CompressionParametersType ='
221 0, /* tp_setattro */
465 0, /* tp_setattro */
222 0, /* tp_as_buffer */
466 0, /* tp_as_buffer */
223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
467 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
224 CompressionParameters__doc__, /* tp_doc */
468 ZstdCompressionParameters__doc__, /* tp_doc */
225 0, /* tp_traverse */
469 0, /* tp_traverse */
226 0, /* tp_clear */
470 0, /* tp_clear */
227 0, /* tp_richcompare */
471 0, /* tp_richcompare */
228 0, /* tp_weaklistoffset */
472 0, /* tp_weaklistoffset */
229 0, /* tp_iter */
473 0, /* tp_iter */
230 0, /* tp_iternext */
474 0, /* tp_iternext */
231 CompressionParameters_methods, /* tp_methods */
475 ZstdCompressionParameters_methods, /* tp_methods */
232 CompressionParameters_members, /* tp_members */
476 ZstdCompressionParameters_members, /* tp_members */
233 0, /* tp_getset */
477 0, /* tp_getset */
234 0, /* tp_base */
478 0, /* tp_base */
235 0, /* tp_dict */
479 0, /* tp_dict */
236 0, /* tp_descr_get */
480 0, /* tp_descr_get */
237 0, /* tp_descr_set */
481 0, /* tp_descr_set */
238 0, /* tp_dictoffset */
482 0, /* tp_dictoffset */
239 (initproc)CompressionParameters_init, /* tp_init */
483 (initproc)ZstdCompressionParameters_init, /* tp_init */
240 0, /* tp_alloc */
484 0, /* tp_alloc */
241 PyType_GenericNew, /* tp_new */
485 PyType_GenericNew, /* tp_new */
242 };
486 };
243
487
244 void compressionparams_module_init(PyObject* mod) {
488 void compressionparams_module_init(PyObject* mod) {
245 Py_TYPE(&CompressionParametersType) = &PyType_Type;
489 Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
246 if (PyType_Ready(&CompressionParametersType) < 0) {
490 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
247 return;
491 return;
248 }
492 }
249
493
250 Py_INCREF(&CompressionParametersType);
494 Py_INCREF(&ZstdCompressionParametersType);
495 PyModule_AddObject(mod, "ZstdCompressionParameters",
496 (PyObject*)&ZstdCompressionParametersType);
497
498 /* TODO remove deprecated alias. */
499 Py_INCREF(&ZstdCompressionParametersType);
251 PyModule_AddObject(mod, "CompressionParameters",
500 PyModule_AddObject(mod, "CompressionParameters",
252 (PyObject*)&CompressionParametersType);
501 (PyObject*)&ZstdCompressionParametersType);
253 }
502 }
@@ -22,20 +22,18 b' static void ZstdCompressionWriter_deallo'
22 }
22 }
23
23
24 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
24 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
25 size_t zresult;
26
25 if (self->entered) {
27 if (self->entered) {
26 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
28 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
27 return NULL;
29 return NULL;
28 }
30 }
29
31
30 if (self->compressor->mtcctx) {
32 zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
31 if (init_mtcstream(self->compressor, self->sourceSize)) {
33 if (ZSTD_isError(zresult)) {
32 return NULL;
34 PyErr_Format(ZstdError, "error setting source size: %s",
33 }
35 ZSTD_getErrorName(zresult));
34 }
36 return NULL;
35 else {
36 if (0 != init_cstream(self->compressor, self->sourceSize)) {
37 return NULL;
38 }
39 }
37 }
40
38
41 self->entered = 1;
39 self->entered = 1;
@@ -59,8 +57,12 b' static PyObject* ZstdCompressionWriter_e'
59
57
60 self->entered = 0;
58 self->entered = 0;
61
59
62 if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
60 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
63 && exc_value == Py_None && exc_tb == Py_None) {
61 ZSTD_inBuffer inBuffer;
62
63 inBuffer.src = NULL;
64 inBuffer.size = 0;
65 inBuffer.pos = 0;
64
66
65 output.dst = PyMem_Malloc(self->outSize);
67 output.dst = PyMem_Malloc(self->outSize);
66 if (!output.dst) {
68 if (!output.dst) {
@@ -70,12 +72,7 b' static PyObject* ZstdCompressionWriter_e'
70 output.pos = 0;
72 output.pos = 0;
71
73
72 while (1) {
74 while (1) {
73 if (self->compressor->mtcctx) {
75 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end);
74 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
75 }
76 else {
77 zresult = ZSTD_endStream(self->compressor->cstream, &output);
78 }
79 if (ZSTD_isError(zresult)) {
76 if (ZSTD_isError(zresult)) {
80 PyErr_Format(ZstdError, "error ending compression stream: %s",
77 PyErr_Format(ZstdError, "error ending compression stream: %s",
81 ZSTD_getErrorName(zresult));
78 ZSTD_getErrorName(zresult));
@@ -107,18 +104,17 b' static PyObject* ZstdCompressionWriter_e'
107 }
104 }
108
105
109 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
106 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
110 if (!self->compressor->cstream) {
107 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
111 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
112 "call when a context manager is active");
113 return NULL;
114 }
115
116 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
117 }
108 }
118
109
119 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
110 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
120 const char* source;
111 static char* kwlist[] = {
121 Py_ssize_t sourceSize;
112 "data",
113 NULL
114 };
115
116 PyObject* result = NULL;
117 Py_buffer source;
122 size_t zresult;
118 size_t zresult;
123 ZSTD_inBuffer input;
119 ZSTD_inBuffer input;
124 ZSTD_outBuffer output;
120 ZSTD_outBuffer output;
@@ -126,44 +122,46 b' static PyObject* ZstdCompressionWriter_w'
126 Py_ssize_t totalWrite = 0;
122 Py_ssize_t totalWrite = 0;
127
123
128 #if PY_MAJOR_VERSION >= 3
124 #if PY_MAJOR_VERSION >= 3
129 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
125 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
130 #else
126 #else
131 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
127 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
132 #endif
128 #endif
129 kwlist, &source)) {
133 return NULL;
130 return NULL;
134 }
131 }
135
132
136 if (!self->entered) {
133 if (!self->entered) {
137 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
134 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
138 return NULL;
135 goto finally;
136 }
137
138 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
139 PyErr_SetString(PyExc_ValueError,
140 "data buffer should be contiguous and have at most one dimension");
141 goto finally;
139 }
142 }
140
143
141 output.dst = PyMem_Malloc(self->outSize);
144 output.dst = PyMem_Malloc(self->outSize);
142 if (!output.dst) {
145 if (!output.dst) {
143 return PyErr_NoMemory();
146 PyErr_NoMemory();
147 goto finally;
144 }
148 }
145 output.size = self->outSize;
149 output.size = self->outSize;
146 output.pos = 0;
150 output.pos = 0;
147
151
148 input.src = source;
152 input.src = source.buf;
149 input.size = sourceSize;
153 input.size = source.len;
150 input.pos = 0;
154 input.pos = 0;
151
155
152 while ((ssize_t)input.pos < sourceSize) {
156 while ((ssize_t)input.pos < source.len) {
153 Py_BEGIN_ALLOW_THREADS
157 Py_BEGIN_ALLOW_THREADS
154 if (self->compressor->mtcctx) {
158 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue);
155 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
156 &output, &input);
157 }
158 else {
159 zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
160 }
161 Py_END_ALLOW_THREADS
159 Py_END_ALLOW_THREADS
162
160
163 if (ZSTD_isError(zresult)) {
161 if (ZSTD_isError(zresult)) {
164 PyMem_Free(output.dst);
162 PyMem_Free(output.dst);
165 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
163 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
166 return NULL;
164 goto finally;
167 }
165 }
168
166
169 /* Copy data from output buffer to writer. */
167 /* Copy data from output buffer to writer. */
@@ -176,18 +174,24 b' static PyObject* ZstdCompressionWriter_w'
176 output.dst, output.pos);
174 output.dst, output.pos);
177 Py_XDECREF(res);
175 Py_XDECREF(res);
178 totalWrite += output.pos;
176 totalWrite += output.pos;
177 self->bytesCompressed += output.pos;
179 }
178 }
180 output.pos = 0;
179 output.pos = 0;
181 }
180 }
182
181
183 PyMem_Free(output.dst);
182 PyMem_Free(output.dst);
184
183
185 return PyLong_FromSsize_t(totalWrite);
184 result = PyLong_FromSsize_t(totalWrite);
185
186 finally:
187 PyBuffer_Release(&source);
188 return result;
186 }
189 }
187
190
188 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
191 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
189 size_t zresult;
192 size_t zresult;
190 ZSTD_outBuffer output;
193 ZSTD_outBuffer output;
194 ZSTD_inBuffer input;
191 PyObject* res;
195 PyObject* res;
192 Py_ssize_t totalWrite = 0;
196 Py_ssize_t totalWrite = 0;
193
197
@@ -196,6 +200,10 b' static PyObject* ZstdCompressionWriter_f'
196 return NULL;
200 return NULL;
197 }
201 }
198
202
203 input.src = NULL;
204 input.size = 0;
205 input.pos = 0;
206
199 output.dst = PyMem_Malloc(self->outSize);
207 output.dst = PyMem_Malloc(self->outSize);
200 if (!output.dst) {
208 if (!output.dst) {
201 return PyErr_NoMemory();
209 return PyErr_NoMemory();
@@ -205,12 +213,7 b' static PyObject* ZstdCompressionWriter_f'
205
213
206 while (1) {
214 while (1) {
207 Py_BEGIN_ALLOW_THREADS
215 Py_BEGIN_ALLOW_THREADS
208 if (self->compressor->mtcctx) {
216 zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush);
209 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
210 }
211 else {
212 zresult = ZSTD_flushStream(self->compressor->cstream, &output);
213 }
214 Py_END_ALLOW_THREADS
217 Py_END_ALLOW_THREADS
215
218
216 if (ZSTD_isError(zresult)) {
219 if (ZSTD_isError(zresult)) {
@@ -233,6 +236,7 b' static PyObject* ZstdCompressionWriter_f'
233 output.dst, output.pos);
236 output.dst, output.pos);
234 Py_XDECREF(res);
237 Py_XDECREF(res);
235 totalWrite += output.pos;
238 totalWrite += output.pos;
239 self->bytesCompressed += output.pos;
236 }
240 }
237 output.pos = 0;
241 output.pos = 0;
238 }
242 }
@@ -242,6 +246,10 b' static PyObject* ZstdCompressionWriter_f'
242 return PyLong_FromSsize_t(totalWrite);
246 return PyLong_FromSsize_t(totalWrite);
243 }
247 }
244
248
249 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
250 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
251 }
252
245 static PyMethodDef ZstdCompressionWriter_methods[] = {
253 static PyMethodDef ZstdCompressionWriter_methods[] = {
246 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
254 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
247 PyDoc_STR("Enter a compression context.") },
255 PyDoc_STR("Enter a compression context.") },
@@ -249,10 +257,12 b' static PyMethodDef ZstdCompressionWriter'
249 PyDoc_STR("Exit a compression context.") },
257 PyDoc_STR("Exit a compression context.") },
250 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
258 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
251 PyDoc_STR("Obtain the memory size of the underlying compressor") },
259 PyDoc_STR("Obtain the memory size of the underlying compressor") },
252 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
260 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
253 PyDoc_STR("Compress data") },
261 PyDoc_STR("Compress data") },
254 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
262 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
255 PyDoc_STR("Flush data and finish a zstd frame") },
263 PyDoc_STR("Flush data and finish a zstd frame") },
264 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
265 PyDoc_STR("Returns current number of bytes compressed") },
256 { NULL, NULL }
266 { NULL, NULL }
257 };
267 };
258
268
@@ -23,9 +23,13 b' static void ZstdCompressionObj_dealloc(Z'
23 PyObject_Del(self);
23 PyObject_Del(self);
24 }
24 }
25
25
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
27 const char* source;
27 static char* kwlist[] = {
28 Py_ssize_t sourceSize;
28 "data",
29 NULL
30 };
31
32 Py_buffer source;
29 ZSTD_inBuffer input;
33 ZSTD_inBuffer input;
30 size_t zresult;
34 size_t zresult;
31 PyObject* result = NULL;
35 PyObject* result = NULL;
@@ -37,38 +41,43 b' static PyObject* ZstdCompressionObj_comp'
37 }
41 }
38
42
39 #if PY_MAJOR_VERSION >= 3
43 #if PY_MAJOR_VERSION >= 3
40 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
41 #else
45 #else
42 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
43 #endif
47 #endif
48 kwlist, &source)) {
44 return NULL;
49 return NULL;
45 }
50 }
46
51
47 input.src = source;
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
48 input.size = sourceSize;
53 PyErr_SetString(PyExc_ValueError,
54 "data buffer should be contiguous and have at most one dimension");
55 goto finally;
56 }
57
58 input.src = source.buf;
59 input.size = source.len;
49 input.pos = 0;
60 input.pos = 0;
50
61
51 while ((ssize_t)input.pos < sourceSize) {
62 while ((ssize_t)input.pos < source.len) {
52 Py_BEGIN_ALLOW_THREADS
63 Py_BEGIN_ALLOW_THREADS
53 if (self->compressor->mtcctx) {
64 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
54 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
65 &input, ZSTD_e_continue);
55 &self->output, &input);
56 }
57 else {
58 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
59 }
60 Py_END_ALLOW_THREADS
66 Py_END_ALLOW_THREADS
61
67
62 if (ZSTD_isError(zresult)) {
68 if (ZSTD_isError(zresult)) {
63 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
69 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
64 return NULL;
70 Py_CLEAR(result);
71 goto finally;
65 }
72 }
66
73
67 if (self->output.pos) {
74 if (self->output.pos) {
68 if (result) {
75 if (result) {
69 resultSize = PyBytes_GET_SIZE(result);
76 resultSize = PyBytes_GET_SIZE(result);
70 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
77
71 return NULL;
78 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
79 Py_CLEAR(result);
80 goto finally;
72 }
81 }
73
82
74 memcpy(PyBytes_AS_STRING(result) + resultSize,
83 memcpy(PyBytes_AS_STRING(result) + resultSize,
@@ -77,7 +86,7 b' static PyObject* ZstdCompressionObj_comp'
77 else {
86 else {
78 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
87 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
79 if (!result) {
88 if (!result) {
80 return NULL;
89 goto finally;
81 }
90 }
82 }
91 }
83
92
@@ -85,21 +94,29 b' static PyObject* ZstdCompressionObj_comp'
85 }
94 }
86 }
95 }
87
96
88 if (result) {
97 if (NULL == result) {
89 return result;
98 result = PyBytes_FromString("");
90 }
99 }
91 else {
100
92 return PyBytes_FromString("");
101 finally:
93 }
102 PyBuffer_Release(&source);
103
104 return result;
94 }
105 }
95
106
96 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
107 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
108 static char* kwlist[] = {
109 "flush_mode",
110 NULL
111 };
112
97 int flushMode = compressorobj_flush_finish;
113 int flushMode = compressorobj_flush_finish;
98 size_t zresult;
114 size_t zresult;
99 PyObject* result = NULL;
115 PyObject* result = NULL;
100 Py_ssize_t resultSize = 0;
116 Py_ssize_t resultSize = 0;
117 ZSTD_inBuffer input;
101
118
102 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
119 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
103 return NULL;
120 return NULL;
104 }
121 }
105
122
@@ -115,16 +132,16 b' static PyObject* ZstdCompressionObj_flus'
115
132
116 assert(self->output.pos == 0);
133 assert(self->output.pos == 0);
117
134
135 input.src = NULL;
136 input.size = 0;
137 input.pos = 0;
138
118 if (flushMode == compressorobj_flush_block) {
139 if (flushMode == compressorobj_flush_block) {
119 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
140 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
120 guaranteed to hold a full block. */
141 guaranteed to hold a full block. */
121 Py_BEGIN_ALLOW_THREADS
142 Py_BEGIN_ALLOW_THREADS
122 if (self->compressor->mtcctx) {
143 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
123 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
144 &input, ZSTD_e_flush);
124 }
125 else {
126 zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
127 }
128 Py_END_ALLOW_THREADS
145 Py_END_ALLOW_THREADS
129
146
130 if (ZSTD_isError(zresult)) {
147 if (ZSTD_isError(zresult)) {
@@ -156,12 +173,8 b' static PyObject* ZstdCompressionObj_flus'
156 self->finished = 1;
173 self->finished = 1;
157
174
158 while (1) {
175 while (1) {
159 if (self->compressor->mtcctx) {
176 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
160 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
177 &input, ZSTD_e_end);
161 }
162 else {
163 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
164 }
165 if (ZSTD_isError(zresult)) {
178 if (ZSTD_isError(zresult)) {
166 PyErr_Format(ZstdError, "error ending compression stream: %s",
179 PyErr_Format(ZstdError, "error ending compression stream: %s",
167 ZSTD_getErrorName(zresult));
180 ZSTD_getErrorName(zresult));
@@ -171,7 +184,9 b' static PyObject* ZstdCompressionObj_flus'
171 if (self->output.pos) {
184 if (self->output.pos) {
172 if (result) {
185 if (result) {
173 resultSize = PyBytes_GET_SIZE(result);
186 resultSize = PyBytes_GET_SIZE(result);
174 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
187
188 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
189 Py_XDECREF(result);
175 return NULL;
190 return NULL;
176 }
191 }
177
192
@@ -202,9 +217,9 b' static PyObject* ZstdCompressionObj_flus'
202 }
217 }
203
218
204 static PyMethodDef ZstdCompressionObj_methods[] = {
219 static PyMethodDef ZstdCompressionObj_methods[] = {
205 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
220 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
206 PyDoc_STR("compress data") },
221 PyDoc_STR("compress data") },
207 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
222 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
208 PyDoc_STR("finish compression operation") },
223 PyDoc_STR("finish compression operation") },
209 { NULL, NULL }
224 { NULL, NULL }
210 };
225 };
This diff has been collapsed as it changes many lines, (832 lines changed) Show them Hide them
@@ -11,118 +11,78 b''
11
11
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) {
14 int ensure_cctx(ZstdCompressor* compressor) {
15 ZSTD_customMem zmem;
15 size_t zresult;
16
17 assert(compressor);
18 assert(compressor->cctx);
19 assert(compressor->params);
16
20
17 if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) {
21 ZSTD_CCtx_reset(compressor->cctx);
18 return 0;
22
23 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
24 if (ZSTD_isError(zresult)) {
25 PyErr_Format(ZstdError, "could not set compression parameters: %s",
26 ZSTD_getErrorName(zresult));
27 return 1;
19 }
28 }
20
29
21 Py_BEGIN_ALLOW_THREADS
30 if (compressor->dict) {
22 memset(&zmem, 0, sizeof(zmem));
31 if (compressor->dict->cdict) {
23 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
32 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
24 compressor->dict->dictSize, 1, *zparams, zmem);
33 }
25 Py_END_ALLOW_THREADS
34 else {
26
35 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
27 if (!compressor->cdict) {
36 compressor->dict->dictData, compressor->dict->dictSize,
28 PyErr_SetString(ZstdError, "could not create compression dictionary");
37 ZSTD_dlm_byRef, compressor->dict->dictType);
29 return 1;
38 }
39 if (ZSTD_isError(zresult)) {
40 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
41 ZSTD_getErrorName(zresult));
42 return 1;
43 }
30 }
44 }
31
45
32 return 0;
46 return 0;
33 }
47 }
34
48
35 /**
49 static PyObject* frame_progression(ZSTD_CCtx* cctx) {
36 * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.
50 PyObject* result = NULL;
37 *
51 PyObject* value;
38 * Returns 0 on success. Other value on failure. Will set a Python exception
52 ZSTD_frameProgression progression;
39 * on failure.
40 */
41 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
42 ZSTD_parameters zparams;
43 void* dictData = NULL;
44 size_t dictSize = 0;
45 size_t zresult;
46
53
47 if (compressor->cstream) {
54 result = PyTuple_New(3);
48 zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);
55 if (!result) {
49 if (ZSTD_isError(zresult)) {
56 return NULL;
50 PyErr_Format(ZstdError, "could not reset CStream: %s",
51 ZSTD_getErrorName(zresult));
52 return -1;
53 }
54
55 return 0;
56 }
57 }
57
58
58 compressor->cstream = ZSTD_createCStream();
59 progression = ZSTD_getFrameProgression(cctx);
59 if (!compressor->cstream) {
60 PyErr_SetString(ZstdError, "could not create CStream");
61 return -1;
62 }
63
60
64 if (compressor->dict) {
61 value = PyLong_FromUnsignedLongLong(progression.ingested);
65 dictData = compressor->dict->dictData;
62 if (!value) {
66 dictSize = compressor->dict->dictSize;
63 Py_DECREF(result);
67 }
64 return NULL;
68
69 memset(&zparams, 0, sizeof(zparams));
70 if (compressor->cparams) {
71 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
72 /* Do NOT call ZSTD_adjustCParams() here because the compression params
73 come from the user. */
74 }
75 else {
76 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
77 }
65 }
78
66
79 zparams.fParams = compressor->fparams;
67 PyTuple_SET_ITEM(result, 0, value);
80
81 zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
82 zparams, sourceSize);
83
68
84 if (ZSTD_isError(zresult)) {
69 value = PyLong_FromUnsignedLongLong(progression.consumed);
85 ZSTD_freeCStream(compressor->cstream);
70 if (!value) {
86 compressor->cstream = NULL;
71 Py_DECREF(result);
87 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
72 return NULL;
88 return -1;
89 }
73 }
90
74
91 return 0;;
75 PyTuple_SET_ITEM(result, 1, value);
92 }
93
76
94 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
77 value = PyLong_FromUnsignedLongLong(progression.produced);
95 size_t zresult;
78 if (!value) {
96 void* dictData = NULL;
79 Py_DECREF(result);
97 size_t dictSize = 0;
80 return NULL;
98 ZSTD_parameters zparams;
99
100 assert(compressor->mtcctx);
101
102 if (compressor->dict) {
103 dictData = compressor->dict->dictData;
104 dictSize = compressor->dict->dictSize;
105 }
81 }
106
82
107 memset(&zparams, 0, sizeof(zparams));
83 PyTuple_SET_ITEM(result, 2, value);
108 if (compressor->cparams) {
109 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
110 }
111 else {
112 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
113 }
114
115 zparams.fParams = compressor->fparams;
116
84
117 zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,
85 return result;
118 zparams, sourceSize);
119
120 if (ZSTD_isError(zresult)) {
121 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
122 return -1;
123 }
124
125 return 0;
126 }
86 }
127
87
128 PyDoc_STRVAR(ZstdCompressor__doc__,
88 PyDoc_STRVAR(ZstdCompressor__doc__,
@@ -147,9 +107,9 b' PyDoc_STRVAR(ZstdCompressor__doc__,'
147 " If True, a 4 byte content checksum will be written with the compressed\n"
107 " If True, a 4 byte content checksum will be written with the compressed\n"
148 " data, allowing the decompressor to perform content verification.\n"
108 " data, allowing the decompressor to perform content verification.\n"
149 "write_content_size\n"
109 "write_content_size\n"
150 " If True, the decompressed content size will be included in the header of\n"
110 " If True (the default), the decompressed content size will be included in\n"
151 " the compressed data. This data will only be written if the compressor\n"
111 " the header of the compressed data. This data will only be written if the\n"
152 " knows the size of the input data.\n"
112 " compressor knows the size of the input data.\n"
153 "write_dict_id\n"
113 "write_dict_id\n"
154 " Determines whether the dictionary ID will be written into the compressed\n"
114 " Determines whether the dictionary ID will be written into the compressed\n"
155 " data. Defaults to True. Only adds content to the compressed data if\n"
115 " data. Defaults to True. Only adds content to the compressed data if\n"
@@ -175,7 +135,7 b' static int ZstdCompressor_init(ZstdCompr'
175
135
176 int level = 3;
136 int level = 3;
177 ZstdCompressionDict* dict = NULL;
137 ZstdCompressionDict* dict = NULL;
178 CompressionParametersObject* params = NULL;
138 ZstdCompressionParametersObject* params = NULL;
179 PyObject* writeChecksum = NULL;
139 PyObject* writeChecksum = NULL;
180 PyObject* writeContentSize = NULL;
140 PyObject* writeContentSize = NULL;
181 PyObject* writeDictID = NULL;
141 PyObject* writeDictID = NULL;
@@ -183,16 +143,11 b' static int ZstdCompressor_init(ZstdCompr'
183
143
184 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
144 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
185 kwlist, &level, &ZstdCompressionDictType, &dict,
145 kwlist, &level, &ZstdCompressionDictType, &dict,
186 &CompressionParametersType, &params,
146 &ZstdCompressionParametersType, &params,
187 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
147 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
188 return -1;
148 return -1;
189 }
149 }
190
150
191 if (level < 1) {
192 PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
193 return -1;
194 }
195
196 if (level > ZSTD_maxCLevel()) {
151 if (level > ZSTD_maxCLevel()) {
197 PyErr_Format(PyExc_ValueError, "level must be less than %d",
152 PyErr_Format(PyExc_ValueError, "level must be less than %d",
198 ZSTD_maxCLevel() + 1);
153 ZSTD_maxCLevel() + 1);
@@ -203,79 +158,135 b' static int ZstdCompressor_init(ZstdCompr'
203 threads = cpu_count();
158 threads = cpu_count();
204 }
159 }
205
160
206 self->threads = threads;
207
208 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
161 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
209 overhead of each compression operation. */
162 overhead of each compression operation. */
210 if (threads) {
163 self->cctx = ZSTD_createCCtx();
211 self->mtcctx = ZSTDMT_createCCtx(threads);
164 if (!self->cctx) {
212 if (!self->mtcctx) {
165 PyErr_NoMemory();
213 PyErr_NoMemory();
166 return -1;
167 }
168
169 /* TODO stuff the original parameters away somewhere so we can reset later. This
170 will allow us to do things like automatically adjust cparams based on input
171 size (assuming zstd isn't doing that internally). */
172
173 self->params = ZSTD_createCCtxParams();
174 if (!self->params) {
175 PyErr_NoMemory();
176 return -1;
177 }
178
179 if (params && writeChecksum) {
180 PyErr_SetString(PyExc_ValueError,
181 "cannot define compression_params and write_checksum");
182 return -1;
183 }
184
185 if (params && writeContentSize) {
186 PyErr_SetString(PyExc_ValueError,
187 "cannot define compression_params and write_content_size");
188 return -1;
189 }
190
191 if (params && writeDictID) {
192 PyErr_SetString(PyExc_ValueError,
193 "cannot define compression_params and write_dict_id");
194 return -1;
195 }
196
197 if (params && threads) {
198 PyErr_SetString(PyExc_ValueError,
199 "cannot define compression_params and threads");
200 return -1;
201 }
202
203 if (params) {
204 if (set_parameters(self->params, params)) {
214 return -1;
205 return -1;
215 }
206 }
216 }
207 }
217 else {
208 else {
218 self->cctx = ZSTD_createCCtx();
209 if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) {
219 if (!self->cctx) {
210 return -1;
220 PyErr_NoMemory();
211 }
212
213 if (set_parameter(self->params, ZSTD_p_contentSizeFlag,
214 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
215 return -1;
216 }
217
218 if (set_parameter(self->params, ZSTD_p_checksumFlag,
219 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
221 return -1;
220 return -1;
222 }
221 }
223 }
222
223 if (set_parameter(self->params, ZSTD_p_dictIDFlag,
224 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
225 return -1;
226 }
224
227
225 self->compressionLevel = level;
228 if (threads) {
229 if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) {
230 return -1;
231 }
232 }
233 }
226
234
227 if (dict) {
235 if (dict) {
228 self->dict = dict;
236 self->dict = dict;
229 Py_INCREF(dict);
237 Py_INCREF(dict);
230 }
238 }
231
239
232 if (params) {
240 if (ensure_cctx(self)) {
233 self->cparams = params;
241 return -1;
234 Py_INCREF(params);
235 }
236
237 memset(&self->fparams, 0, sizeof(self->fparams));
238
239 if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
240 self->fparams.checksumFlag = 1;
241 }
242 if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
243 self->fparams.contentSizeFlag = 1;
244 }
245 if (writeDictID && PyObject_Not(writeDictID)) {
246 self->fparams.noDictIDFlag = 1;
247 }
242 }
248
243
249 return 0;
244 return 0;
250 }
245 }
251
246
252 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
247 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
253 if (self->cstream) {
254 ZSTD_freeCStream(self->cstream);
255 self->cstream = NULL;
256 }
257
258 Py_XDECREF(self->cparams);
259 Py_XDECREF(self->dict);
260
261 if (self->cdict) {
262 ZSTD_freeCDict(self->cdict);
263 self->cdict = NULL;
264 }
265
266 if (self->cctx) {
248 if (self->cctx) {
267 ZSTD_freeCCtx(self->cctx);
249 ZSTD_freeCCtx(self->cctx);
268 self->cctx = NULL;
250 self->cctx = NULL;
269 }
251 }
270
252
271 if (self->mtcctx) {
253 if (self->params) {
272 ZSTDMT_freeCCtx(self->mtcctx);
254 ZSTD_freeCCtxParams(self->params);
273 self->mtcctx = NULL;
255 self->params = NULL;
274 }
256 }
275
257
258 Py_XDECREF(self->dict);
276 PyObject_Del(self);
259 PyObject_Del(self);
277 }
260 }
278
261
262 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
263 "memory_size()\n"
264 "\n"
265 "Obtain the memory usage of this compressor, in bytes.\n"
266 );
267
268 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
269 if (self->cctx) {
270 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
271 }
272 else {
273 PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
274 return NULL;
275 }
276 }
277
278 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
279 "frame_progression()\n"
280 "\n"
281 "Return information on how much work the compressor has done.\n"
282 "\n"
283 "Returns a 3-tuple of (ingested, consumed, produced).\n"
284 );
285
286 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
287 return frame_progression(self->cctx);
288 }
289
279 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
290 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
280 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
291 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
281 "compress data between streams\n"
292 "compress data between streams\n"
@@ -304,7 +315,7 b' static PyObject* ZstdCompressor_copy_str'
304
315
305 PyObject* source;
316 PyObject* source;
306 PyObject* dest;
317 PyObject* dest;
307 Py_ssize_t sourceSize = 0;
318 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
308 size_t inSize = ZSTD_CStreamInSize();
319 size_t inSize = ZSTD_CStreamInSize();
309 size_t outSize = ZSTD_CStreamOutSize();
320 size_t outSize = ZSTD_CStreamOutSize();
310 ZSTD_inBuffer input;
321 ZSTD_inBuffer input;
@@ -313,14 +324,14 b' static PyObject* ZstdCompressor_copy_str'
313 Py_ssize_t totalWrite = 0;
324 Py_ssize_t totalWrite = 0;
314 char* readBuffer;
325 char* readBuffer;
315 Py_ssize_t readSize;
326 Py_ssize_t readSize;
316 PyObject* readResult;
327 PyObject* readResult = NULL;
317 PyObject* res = NULL;
328 PyObject* res = NULL;
318 size_t zresult;
329 size_t zresult;
319 PyObject* writeResult;
330 PyObject* writeResult;
320 PyObject* totalReadPy;
331 PyObject* totalReadPy;
321 PyObject* totalWritePy;
332 PyObject* totalWritePy;
322
333
323 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
334 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
324 &source, &dest, &sourceSize, &inSize, &outSize)) {
335 &source, &dest, &sourceSize, &inSize, &outSize)) {
325 return NULL;
336 return NULL;
326 }
337 }
@@ -335,22 +346,18 b' static PyObject* ZstdCompressor_copy_str'
335 return NULL;
346 return NULL;
336 }
347 }
337
348
338 /* Prevent free on uninitialized memory in finally. */
349 if (ensure_cctx(self)) {
339 output.dst = NULL;
350 return NULL;
340
341 if (self->mtcctx) {
342 if (init_mtcstream(self, sourceSize)) {
343 res = NULL;
344 goto finally;
345 }
346 }
347 else {
348 if (0 != init_cstream(self, sourceSize)) {
349 res = NULL;
350 goto finally;
351 }
352 }
351 }
353
352
353 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
354 if (ZSTD_isError(zresult)) {
355 PyErr_Format(ZstdError, "error setting source size: %s",
356 ZSTD_getErrorName(zresult));
357 return NULL;
358 }
359
360 /* Prevent free on uninitialized memory in finally. */
354 output.dst = PyMem_Malloc(outSize);
361 output.dst = PyMem_Malloc(outSize);
355 if (!output.dst) {
362 if (!output.dst) {
356 PyErr_NoMemory();
363 PyErr_NoMemory();
@@ -360,6 +367,10 b' static PyObject* ZstdCompressor_copy_str'
360 output.size = outSize;
367 output.size = outSize;
361 output.pos = 0;
368 output.pos = 0;
362
369
370 input.src = NULL;
371 input.size = 0;
372 input.pos = 0;
373
363 while (1) {
374 while (1) {
364 /* Try to read from source stream. */
375 /* Try to read from source stream. */
365 readResult = PyObject_CallMethod(source, "read", "n", inSize);
376 readResult = PyObject_CallMethod(source, "read", "n", inSize);
@@ -384,12 +395,7 b' static PyObject* ZstdCompressor_copy_str'
384
395
385 while (input.pos < input.size) {
396 while (input.pos < input.size) {
386 Py_BEGIN_ALLOW_THREADS
397 Py_BEGIN_ALLOW_THREADS
387 if (self->mtcctx) {
398 zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue);
388 zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
389 }
390 else {
391 zresult = ZSTD_compressStream(self->cstream, &output, &input);
392 }
393 Py_END_ALLOW_THREADS
399 Py_END_ALLOW_THREADS
394
400
395 if (ZSTD_isError(zresult)) {
401 if (ZSTD_isError(zresult)) {
@@ -410,16 +416,18 b' static PyObject* ZstdCompressor_copy_str'
410 output.pos = 0;
416 output.pos = 0;
411 }
417 }
412 }
418 }
419
420 Py_CLEAR(readResult);
413 }
421 }
414
422
415 /* We've finished reading. Now flush the compressor stream. */
423 /* We've finished reading. Now flush the compressor stream. */
424 assert(input.pos == input.size);
425
416 while (1) {
426 while (1) {
417 if (self->mtcctx) {
427 Py_BEGIN_ALLOW_THREADS
418 zresult = ZSTDMT_endStream(self->mtcctx, &output);
428 zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end);
419 }
429 Py_END_ALLOW_THREADS
420 else {
430
421 zresult = ZSTD_endStream(self->cstream, &output);
422 }
423 if (ZSTD_isError(zresult)) {
431 if (ZSTD_isError(zresult)) {
424 PyErr_Format(ZstdError, "error ending compression stream: %s",
432 PyErr_Format(ZstdError, "error ending compression stream: %s",
425 ZSTD_getErrorName(zresult));
433 ZSTD_getErrorName(zresult));
@@ -455,11 +463,81 b' finally:'
455 PyMem_Free(output.dst);
463 PyMem_Free(output.dst);
456 }
464 }
457
465
466 Py_XDECREF(readResult);
467
458 return res;
468 return res;
459 }
469 }
460
470
471 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
472 "stream_reader(source, [size=0])\n"
473 "\n"
474 "Obtain an object that behaves like an I/O stream.\n"
475 "\n"
476 "The source object can be any object with a ``read(size)`` method\n"
477 "or an object that conforms to the buffer protocol.\n"
478 );
479
480 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
481 static char* kwlist[] = {
482 "source",
483 "size",
484 "read_size",
485 NULL
486 };
487
488 PyObject* source;
489 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
490 size_t readSize = ZSTD_CStreamInSize();
491 ZstdCompressionReader* result = NULL;
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
494 &source, &sourceSize, &readSize)) {
495 return NULL;
496 }
497
498 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
499 if (!result) {
500 return NULL;
501 }
502
503 if (PyObject_HasAttrString(source, "read")) {
504 result->reader = source;
505 Py_INCREF(source);
506 result->readSize = readSize;
507 }
508 else if (1 == PyObject_CheckBuffer(source)) {
509 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
510 goto except;
511 }
512
513 assert(result->buffer.len >= 0);
514
515 sourceSize = result->buffer.len;
516 }
517 else {
518 PyErr_SetString(PyExc_TypeError,
519 "must pass an object with a read() method or that conforms to the buffer protocol");
520 goto except;
521 }
522
523 if (ensure_cctx(self)) {
524 goto except;
525 }
526
527 result->compressor = self;
528 Py_INCREF(self);
529 result->sourceSize = sourceSize;
530
531 return result;
532
533 except:
534 Py_CLEAR(result);
535
536 return NULL;
537 }
538
461 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
539 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
462 "compress(data, allow_empty=False)\n"
540 "compress(data)\n"
463 "\n"
541 "\n"
464 "Compress data in a single operation.\n"
542 "Compress data in a single operation.\n"
465 "\n"
543 "\n"
@@ -473,122 +551,79 b' PyDoc_STRVAR(ZstdCompressor_compress__do'
473 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
551 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
474 static char* kwlist[] = {
552 static char* kwlist[] = {
475 "data",
553 "data",
476 "allow_empty",
477 NULL
554 NULL
478 };
555 };
479
556
480 const char* source;
557 Py_buffer source;
481 Py_ssize_t sourceSize;
482 PyObject* allowEmpty = NULL;
483 size_t destSize;
558 size_t destSize;
484 PyObject* output;
559 PyObject* output = NULL;
485 char* dest;
486 void* dictData = NULL;
487 size_t dictSize = 0;
488 size_t zresult;
560 size_t zresult;
489 ZSTD_parameters zparams;
561 ZSTD_outBuffer outBuffer;
562 ZSTD_inBuffer inBuffer;
490
563
491 #if PY_MAJOR_VERSION >= 3
564 #if PY_MAJOR_VERSION >= 3
492 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
565 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
493 #else
566 #else
494 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
567 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
495 #endif
568 #endif
496 kwlist, &source, &sourceSize, &allowEmpty)) {
569 kwlist, &source)) {
497 return NULL;
498 }
499
500 if (self->threads && self->dict) {
501 PyErr_SetString(ZstdError,
502 "compress() cannot be used with both dictionaries and multi-threaded compression");
503 return NULL;
504 }
505
506 if (self->threads && self->cparams) {
507 PyErr_SetString(ZstdError,
508 "compress() cannot be used with both compression parameters and multi-threaded compression");
509 return NULL;
510 }
511
512 /* Limitation in zstd C API doesn't let decompression side distinguish
513 between content size of 0 and unknown content size. This can make round
514 tripping via Python difficult. Until this is fixed, require a flag
515 to fire the footgun.
516 https://github.com/indygreg/python-zstandard/issues/11 */
517 if (0 == sourceSize && self->fparams.contentSizeFlag
518 && (!allowEmpty || PyObject_Not(allowEmpty))) {
519 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
520 return NULL;
521 }
522
523 destSize = ZSTD_compressBound(sourceSize);
524 output = PyBytes_FromStringAndSize(NULL, destSize);
525 if (!output) {
526 return NULL;
570 return NULL;
527 }
571 }
528
572
529 dest = PyBytes_AsString(output);
573 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
530
574 PyErr_SetString(PyExc_ValueError,
531 if (self->dict) {
575 "data buffer should be contiguous and have at most one dimension");
532 dictData = self->dict->dictData;
576 goto finally;
533 dictSize = self->dict->dictSize;
534 }
577 }
535
578
536 memset(&zparams, 0, sizeof(zparams));
579 if (ensure_cctx(self)) {
537 if (!self->cparams) {
580 goto finally;
538 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
539 }
581 }
540 else {
582
541 ztopy_compression_parameters(self->cparams, &zparams.cParams);
583 destSize = ZSTD_compressBound(source.len);
542 /* Do NOT call ZSTD_adjustCParams() here because the compression params
584 output = PyBytes_FromStringAndSize(NULL, destSize);
543 come from the user. */
585 if (!output) {
586 goto finally;
544 }
587 }
545
588
546 zparams.fParams = self->fparams;
589 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
547
590 if (ZSTD_isError(zresult)) {
548 /* The raw dict data has to be processed before it can be used. Since this
591 PyErr_Format(ZstdError, "error setting source size: %s",
549 adds overhead - especially if multiple dictionary compression operations
592 ZSTD_getErrorName(zresult));
550 are performed on the same ZstdCompressor instance - we create a
593 Py_CLEAR(output);
551 ZSTD_CDict once and reuse it for all operations.
594 goto finally;
595 }
552
596
553 Note: the compression parameters used for the first invocation (possibly
597 inBuffer.src = source.buf;
554 derived from the source size) will be reused on all subsequent invocations.
598 inBuffer.size = source.len;
555 https://github.com/facebook/zstd/issues/358 contains more info. We could
599 inBuffer.pos = 0;
556 potentially add an argument somewhere to control this behavior.
600
557 */
601 outBuffer.dst = PyBytes_AsString(output);
558 if (0 != populate_cdict(self, &zparams)) {
602 outBuffer.size = destSize;
559 Py_DECREF(output);
603 outBuffer.pos = 0;
560 return NULL;
561 }
562
604
563 Py_BEGIN_ALLOW_THREADS
605 Py_BEGIN_ALLOW_THREADS
564 if (self->mtcctx) {
606 /* By avoiding ZSTD_compress(), we don't necessarily write out content
565 zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,
607 size. This means the argument to ZstdCompressor to control frame
566 source, sourceSize, self->compressionLevel);
608 parameters is honored. */
567 }
609 zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
568 else {
569 /* By avoiding ZSTD_compress(), we don't necessarily write out content
570 size. This means the argument to ZstdCompressor to control frame
571 parameters is honored. */
572 if (self->cdict) {
573 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
574 source, sourceSize, self->cdict);
575 }
576 else {
577 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
578 source, sourceSize, dictData, dictSize, zparams);
579 }
580 }
581 Py_END_ALLOW_THREADS
610 Py_END_ALLOW_THREADS
582
611
583 if (ZSTD_isError(zresult)) {
612 if (ZSTD_isError(zresult)) {
584 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
613 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
585 Py_CLEAR(output);
614 Py_CLEAR(output);
586 return NULL;
615 goto finally;
587 }
616 }
588 else {
617 else if (zresult) {
589 Py_SIZE(output) = zresult;
618 PyErr_SetString(ZstdError, "unexpected partial frame flush");
619 Py_CLEAR(output);
620 goto finally;
590 }
621 }
591
622
623 Py_SIZE(output) = outBuffer.pos;
624
625 finally:
626 PyBuffer_Release(&source);
592 return output;
627 return output;
593 }
628 }
594
629
@@ -608,11 +643,23 b' static ZstdCompressionObj* ZstdCompresso'
608 NULL
643 NULL
609 };
644 };
610
645
611 Py_ssize_t inSize = 0;
646 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
612 size_t outSize = ZSTD_CStreamOutSize();
647 size_t outSize = ZSTD_CStreamOutSize();
613 ZstdCompressionObj* result = NULL;
648 ZstdCompressionObj* result = NULL;
649 size_t zresult;
614
650
615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
651 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
652 return NULL;
653 }
654
655 if (ensure_cctx(self)) {
656 return NULL;
657 }
658
659 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
660 if (ZSTD_isError(zresult)) {
661 PyErr_Format(ZstdError, "error setting source size: %s",
662 ZSTD_getErrorName(zresult));
616 return NULL;
663 return NULL;
617 }
664 }
618
665
@@ -621,19 +668,6 b' static ZstdCompressionObj* ZstdCompresso'
621 return NULL;
668 return NULL;
622 }
669 }
623
670
624 if (self->mtcctx) {
625 if (init_mtcstream(self, inSize)) {
626 Py_DECREF(result);
627 return NULL;
628 }
629 }
630 else {
631 if (0 != init_cstream(self, inSize)) {
632 Py_DECREF(result);
633 return NULL;
634 }
635 }
636
637 result->output.dst = PyMem_Malloc(outSize);
671 result->output.dst = PyMem_Malloc(outSize);
638 if (!result->output.dst) {
672 if (!result->output.dst) {
639 PyErr_NoMemory();
673 PyErr_NoMemory();
@@ -647,9 +681,9 b' static ZstdCompressionObj* ZstdCompresso'
647 return result;
681 return result;
648 }
682 }
649
683
650 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
684 PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
651 "read_from(reader, [size=0, read_size=default, write_size=default])\n"
685 "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
652 "Read uncompress data from a reader and return an iterator\n"
686 "Read uncompressed data from a reader and return an iterator\n"
653 "\n"
687 "\n"
654 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
688 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
655 "\n"
689 "\n"
@@ -667,7 +701,7 b' PyDoc_STRVAR(ZstdCompressor_read_from__d'
667 "not consume from the reader unless the caller consumes from the iterator.\n"
701 "not consume from the reader unless the caller consumes from the iterator.\n"
668 );
702 );
669
703
670 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
704 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
671 static char* kwlist[] = {
705 static char* kwlist[] = {
672 "reader",
706 "reader",
673 "size",
707 "size",
@@ -677,12 +711,13 b' static ZstdCompressorIterator* ZstdCompr'
677 };
711 };
678
712
679 PyObject* reader;
713 PyObject* reader;
680 Py_ssize_t sourceSize = 0;
714 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
681 size_t inSize = ZSTD_CStreamInSize();
715 size_t inSize = ZSTD_CStreamInSize();
682 size_t outSize = ZSTD_CStreamOutSize();
716 size_t outSize = ZSTD_CStreamOutSize();
683 ZstdCompressorIterator* result;
717 ZstdCompressorIterator* result;
718 size_t zresult;
684
719
685 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
720 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
686 &reader, &sourceSize, &inSize, &outSize)) {
721 &reader, &sourceSize, &inSize, &outSize)) {
687 return NULL;
722 return NULL;
688 }
723 }
@@ -696,18 +731,11 b' static ZstdCompressorIterator* ZstdCompr'
696 Py_INCREF(result->reader);
731 Py_INCREF(result->reader);
697 }
732 }
698 else if (1 == PyObject_CheckBuffer(reader)) {
733 else if (1 == PyObject_CheckBuffer(reader)) {
699 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
734 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
700 if (!result->buffer) {
701 goto except;
735 goto except;
702 }
736 }
703
737
704 memset(result->buffer, 0, sizeof(Py_buffer));
738 sourceSize = result->buffer.len;
705
706 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
707 goto except;
708 }
709
710 sourceSize = result->buffer->len;
711 }
739 }
712 else {
740 else {
713 PyErr_SetString(PyExc_ValueError,
741 PyErr_SetString(PyExc_ValueError,
@@ -715,22 +743,20 b' static ZstdCompressorIterator* ZstdCompr'
715 goto except;
743 goto except;
716 }
744 }
717
745
746 if (ensure_cctx(self)) {
747 return NULL;
748 }
749
750 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
751 if (ZSTD_isError(zresult)) {
752 PyErr_Format(ZstdError, "error setting source size: %s",
753 ZSTD_getErrorName(zresult));
754 return NULL;
755 }
756
718 result->compressor = self;
757 result->compressor = self;
719 Py_INCREF(result->compressor);
758 Py_INCREF(result->compressor);
720
759
721 result->sourceSize = sourceSize;
722
723 if (self->mtcctx) {
724 if (init_mtcstream(self, sourceSize)) {
725 goto except;
726 }
727 }
728 else {
729 if (0 != init_cstream(self, sourceSize)) {
730 goto except;
731 }
732 }
733
734 result->inSize = inSize;
760 result->inSize = inSize;
735 result->outSize = outSize;
761 result->outSize = outSize;
736
762
@@ -744,16 +770,13 b' static ZstdCompressorIterator* ZstdCompr'
744 goto finally;
770 goto finally;
745
771
746 except:
772 except:
747 Py_XDECREF(result->compressor);
773 Py_CLEAR(result);
748 Py_XDECREF(result->reader);
749 Py_DECREF(result);
750 result = NULL;
751
774
752 finally:
775 finally:
753 return result;
776 return result;
754 }
777 }
755
778
756 PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
779 PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
757 "Create a context manager to write compressed data to an object.\n"
780 "Create a context manager to write compressed data to an object.\n"
758 "\n"
781 "\n"
759 "The passed object must have a ``write()`` method.\n"
782 "The passed object must have a ``write()`` method.\n"
@@ -771,7 +794,7 b' PyDoc_STRVAR(ZstdCompressor_write_to___d'
771 "for a compressor output stream.\n"
794 "for a compressor output stream.\n"
772 );
795 );
773
796
774 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
797 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
775 static char* kwlist[] = {
798 static char* kwlist[] = {
776 "writer",
799 "writer",
777 "size",
800 "size",
@@ -781,10 +804,10 b' static ZstdCompressionWriter* ZstdCompre'
781
804
782 PyObject* writer;
805 PyObject* writer;
783 ZstdCompressionWriter* result;
806 ZstdCompressionWriter* result;
784 Py_ssize_t sourceSize = 0;
807 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
785 size_t outSize = ZSTD_CStreamOutSize();
808 size_t outSize = ZSTD_CStreamOutSize();
786
809
787 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
810 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist,
788 &writer, &sourceSize, &outSize)) {
811 &writer, &sourceSize, &outSize)) {
789 return NULL;
812 return NULL;
790 }
813 }
@@ -794,6 +817,10 b' static ZstdCompressionWriter* ZstdCompre'
794 return NULL;
817 return NULL;
795 }
818 }
796
819
820 if (ensure_cctx(self)) {
821 return NULL;
822 }
823
797 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
824 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
798 if (!result) {
825 if (!result) {
799 return NULL;
826 return NULL;
@@ -807,6 +834,7 b' static ZstdCompressionWriter* ZstdCompre'
807
834
808 result->sourceSize = sourceSize;
835 result->sourceSize = sourceSize;
809 result->outSize = outSize;
836 result->outSize = outSize;
837 result->bytesCompressed = 0;
810
838
811 return result;
839 return result;
812 }
840 }
@@ -833,6 +861,7 b' typedef enum {'
833 WorkerError_none = 0,
861 WorkerError_none = 0,
834 WorkerError_zstd = 1,
862 WorkerError_zstd = 1,
835 WorkerError_no_memory = 2,
863 WorkerError_no_memory = 2,
864 WorkerError_nospace = 3,
836 } WorkerError;
865 } WorkerError;
837
866
838 /**
867 /**
@@ -841,10 +870,6 b' typedef enum {'
841 typedef struct {
870 typedef struct {
842 /* Used for compression. */
871 /* Used for compression. */
843 ZSTD_CCtx* cctx;
872 ZSTD_CCtx* cctx;
844 ZSTD_CDict* cdict;
845 int cLevel;
846 CompressionParametersObject* cParams;
847 ZSTD_frameParameters fParams;
848
873
849 /* What to compress. */
874 /* What to compress. */
850 DataSource* sources;
875 DataSource* sources;
@@ -868,7 +893,6 b' static void compress_worker(WorkerState*'
868 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
893 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
869 Py_ssize_t currentBufferStartOffset = state->startOffset;
894 Py_ssize_t currentBufferStartOffset = state->startOffset;
870 size_t zresult;
895 size_t zresult;
871 ZSTD_parameters zparams;
872 void* newDest;
896 void* newDest;
873 size_t allocationSize;
897 size_t allocationSize;
874 size_t boundSize;
898 size_t boundSize;
@@ -879,16 +903,10 b' static void compress_worker(WorkerState*'
879 assert(!state->destBuffers);
903 assert(!state->destBuffers);
880 assert(0 == state->destCount);
904 assert(0 == state->destCount);
881
905
882 if (state->cParams) {
883 ztopy_compression_parameters(state->cParams, &zparams.cParams);
884 }
885
886 zparams.fParams = state->fParams;
887
888 /*
906 /*
889 * The total size of the compressed data is unknown until we actually
907 * The total size of the compressed data is unknown until we actually
890 * compress data. That means we can't pre-allocate the exact size we need.
908 * compress data. That means we can't pre-allocate the exact size we need.
891 *
909 *
892 * There is a cost to every allocation and reallocation. So, it is in our
910 * There is a cost to every allocation and reallocation. So, it is in our
893 * interest to minimize the number of allocations.
911 * interest to minimize the number of allocations.
894 *
912 *
@@ -927,7 +945,8 b' static void compress_worker(WorkerState*'
927
945
928 destBuffer->segmentsSize = remainingItems;
946 destBuffer->segmentsSize = remainingItems;
929
947
930 allocationSize = roundpow2(state->totalSourceSize >> 4);
948 assert(state->totalSourceSize <= SIZE_MAX);
949 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
931
950
932 /* If the maximum size of the output is larger than that, round up. */
951 /* If the maximum size of the output is larger than that, round up. */
933 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
952 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
@@ -949,6 +968,8 b' static void compress_worker(WorkerState*'
949 size_t sourceSize = sources[inputOffset].sourceSize;
968 size_t sourceSize = sources[inputOffset].sourceSize;
950 size_t destAvailable;
969 size_t destAvailable;
951 void* dest;
970 void* dest;
971 ZSTD_outBuffer opOutBuffer;
972 ZSTD_inBuffer opInBuffer;
952
973
953 destAvailable = destBuffer->destSize - destOffset;
974 destAvailable = destBuffer->destSize - destOffset;
954 boundSize = ZSTD_compressBound(sourceSize);
975 boundSize = ZSTD_compressBound(sourceSize);
@@ -1004,7 +1025,8 b' static void compress_worker(WorkerState*'
1004 * We could dynamically update allocation size based on work done so far.
1025 * We could dynamically update allocation size based on work done so far.
1005 * For now, keep is simple.
1026 * For now, keep is simple.
1006 */
1027 */
1007 allocationSize = roundpow2(state->totalSourceSize >> 4);
1028 assert(state->totalSourceSize <= SIZE_MAX);
1029 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1008
1030
1009 if (boundSize > allocationSize) {
1031 if (boundSize > allocationSize) {
1010 allocationSize = roundpow2(boundSize);
1032 allocationSize = roundpow2(boundSize);
@@ -1032,19 +1054,15 b' static void compress_worker(WorkerState*'
1032
1054
1033 dest = (char*)destBuffer->dest + destOffset;
1055 dest = (char*)destBuffer->dest + destOffset;
1034
1056
1035 if (state->cdict) {
1057 opInBuffer.src = source;
1036 zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,
1058 opInBuffer.size = sourceSize;
1037 source, sourceSize, state->cdict);
1059 opInBuffer.pos = 0;
1038 }
1039 else {
1040 if (!state->cParams) {
1041 zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
1042 }
1043
1060
1044 zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,
1061 opOutBuffer.dst = dest;
1045 source, sourceSize, NULL, 0, zparams);
1062 opOutBuffer.size = destAvailable;
1046 }
1063 opOutBuffer.pos = 0;
1047
1064
1065 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1048 if (ZSTD_isError(zresult)) {
1066 if (ZSTD_isError(zresult)) {
1049 state->error = WorkerError_zstd;
1067 state->error = WorkerError_zstd;
1050 state->zresult = zresult;
1068 state->zresult = zresult;
@@ -1052,10 +1070,23 b' static void compress_worker(WorkerState*'
1052 break;
1070 break;
1053 }
1071 }
1054
1072
1073 zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1074 if (ZSTD_isError(zresult)) {
1075 state->error = WorkerError_zstd;
1076 state->zresult = zresult;
1077 state->errorOffset = inputOffset;
1078 break;
1079 }
1080 else if (zresult) {
1081 state->error = WorkerError_nospace;
1082 state->errorOffset = inputOffset;
1083 break;
1084 }
1085
1055 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1086 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1056 destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult;
1087 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
1057
1088
1058 destOffset += zresult;
1089 destOffset += opOutBuffer.pos;
1059 remainingItems--;
1090 remainingItems--;
1060 }
1091 }
1061
1092
@@ -1072,15 +1103,14 b' static void compress_worker(WorkerState*'
1072 }
1103 }
1073
1104
1074 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1105 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1075 DataSources* sources, unsigned int threadCount) {
1106 DataSources* sources, Py_ssize_t threadCount) {
1076 ZSTD_parameters zparams;
1077 unsigned long long bytesPerWorker;
1107 unsigned long long bytesPerWorker;
1078 POOL_ctx* pool = NULL;
1108 POOL_ctx* pool = NULL;
1079 WorkerState* workerStates = NULL;
1109 WorkerState* workerStates = NULL;
1080 Py_ssize_t i;
1110 Py_ssize_t i;
1081 unsigned long long workerBytes = 0;
1111 unsigned long long workerBytes = 0;
1082 Py_ssize_t workerStartOffset = 0;
1112 Py_ssize_t workerStartOffset = 0;
1083 size_t currentThread = 0;
1113 Py_ssize_t currentThread = 0;
1084 int errored = 0;
1114 int errored = 0;
1085 Py_ssize_t segmentsCount = 0;
1115 Py_ssize_t segmentsCount = 0;
1086 Py_ssize_t segmentIndex;
1116 Py_ssize_t segmentIndex;
@@ -1093,34 +1123,12 b' ZstdBufferWithSegmentsCollection* compre'
1093 assert(threadCount >= 1);
1123 assert(threadCount >= 1);
1094
1124
1095 /* More threads than inputs makes no sense. */
1125 /* More threads than inputs makes no sense. */
1096 threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize
1126 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
1097 : threadCount;
1127 : threadCount;
1098
1128
1099 /* TODO lower thread count when input size is too small and threads would add
1129 /* TODO lower thread count when input size is too small and threads would add
1100 overhead. */
1130 overhead. */
1101
1131
1102 /*
1103 * When dictionaries are used, parameters are derived from the size of the
1104 * first element.
1105 *
1106 * TODO come up with a better mechanism.
1107 */
1108 memset(&zparams, 0, sizeof(zparams));
1109 if (compressor->cparams) {
1110 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
1111 }
1112 else {
1113 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
1114 sources->sources[0].sourceSize,
1115 compressor->dict ? compressor->dict->dictSize : 0);
1116 }
1117
1118 zparams.fParams = compressor->fparams;
1119
1120 if (0 != populate_cdict(compressor, &zparams)) {
1121 return NULL;
1122 }
1123
1124 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1132 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1125 if (NULL == workerStates) {
1133 if (NULL == workerStates) {
1126 PyErr_NoMemory();
1134 PyErr_NoMemory();
@@ -1140,16 +1148,42 b' ZstdBufferWithSegmentsCollection* compre'
1140 bytesPerWorker = sources->totalSourceSize / threadCount;
1148 bytesPerWorker = sources->totalSourceSize / threadCount;
1141
1149
1142 for (i = 0; i < threadCount; i++) {
1150 for (i = 0; i < threadCount; i++) {
1151 size_t zresult;
1152
1143 workerStates[i].cctx = ZSTD_createCCtx();
1153 workerStates[i].cctx = ZSTD_createCCtx();
1144 if (!workerStates[i].cctx) {
1154 if (!workerStates[i].cctx) {
1145 PyErr_NoMemory();
1155 PyErr_NoMemory();
1146 goto finally;
1156 goto finally;
1147 }
1157 }
1148
1158
1149 workerStates[i].cdict = compressor->cdict;
1159 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1150 workerStates[i].cLevel = compressor->compressionLevel;
1160 compressor->params);
1151 workerStates[i].cParams = compressor->cparams;
1161 if (ZSTD_isError(zresult)) {
1152 workerStates[i].fParams = compressor->fparams;
1162 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1163 ZSTD_getErrorName(zresult));
1164 goto finally;
1165 }
1166
1167 if (compressor->dict) {
1168 if (compressor->dict->cdict) {
1169 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
1170 }
1171 else {
1172 zresult = ZSTD_CCtx_loadDictionary_advanced(
1173 workerStates[i].cctx,
1174 compressor->dict->dictData,
1175 compressor->dict->dictSize,
1176 ZSTD_dlm_byRef,
1177 compressor->dict->dictType);
1178 }
1179
1180 if (ZSTD_isError(zresult)) {
1181 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
1182 ZSTD_getErrorName(zresult));
1183 goto finally;
1184 }
1185
1186 }
1153
1187
1154 workerStates[i].sources = sources->sources;
1188 workerStates[i].sources = sources->sources;
1155 workerStates[i].sourcesSize = sources->sourcesSize;
1189 workerStates[i].sourcesSize = sources->sourcesSize;
@@ -1221,6 +1255,13 b' ZstdBufferWithSegmentsCollection* compre'
1221 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1255 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1222 errored = 1;
1256 errored = 1;
1223 break;
1257 break;
1258
1259 case WorkerError_nospace:
1260 PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
1261 workerStates[i].errorOffset);
1262 errored = 1;
1263 break;
1264
1224 default:
1265 default:
1225 ;
1266 ;
1226 }
1267 }
@@ -1341,12 +1382,6 b' static ZstdBufferWithSegmentsCollection*'
1341 Py_ssize_t sourceCount = 0;
1382 Py_ssize_t sourceCount = 0;
1342 ZstdBufferWithSegmentsCollection* result = NULL;
1383 ZstdBufferWithSegmentsCollection* result = NULL;
1343
1384
1344 if (self->mtcctx) {
1345 PyErr_SetString(ZstdError,
1346 "function cannot be called on ZstdCompressor configured for multi-threaded compression");
1347 return NULL;
1348 }
1349
1350 memset(&sources, 0, sizeof(sources));
1385 memset(&sources, 0, sizeof(sources));
1351
1386
1352 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1387 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
@@ -1372,8 +1407,14 b' static ZstdBufferWithSegmentsCollection*'
1372 }
1407 }
1373
1408
1374 for (i = 0; i < buffer->segmentCount; i++) {
1409 for (i = 0; i < buffer->segmentCount; i++) {
1410 if (buffer->segments[i].length > SIZE_MAX) {
1411 PyErr_Format(PyExc_ValueError,
1412 "buffer segment %zd is too large for this platform", i);
1413 goto finally;
1414 }
1415
1375 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1416 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1376 sources.sources[i].sourceSize = buffer->segments[i].length;
1417 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1377 sources.totalSourceSize += buffer->segments[i].length;
1418 sources.totalSourceSize += buffer->segments[i].length;
1378 }
1419 }
1379
1420
@@ -1397,8 +1438,15 b' static ZstdBufferWithSegmentsCollection*'
1397 buffer = collection->buffers[i];
1438 buffer = collection->buffers[i];
1398
1439
1399 for (j = 0; j < buffer->segmentCount; j++) {
1440 for (j = 0; j < buffer->segmentCount; j++) {
1441 if (buffer->segments[j].length > SIZE_MAX) {
1442 PyErr_Format(PyExc_ValueError,
1443 "buffer segment %zd in buffer %zd is too large for this platform",
1444 j, i);
1445 goto finally;
1446 }
1447
1400 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1448 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1401 sources.sources[offset].sourceSize = buffer->segments[j].length;
1449 sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
1402 sources.totalSourceSize += buffer->segments[j].length;
1450 sources.totalSourceSize += buffer->segments[j].length;
1403
1451
1404 offset++;
1452 offset++;
@@ -1416,11 +1464,6 b' static ZstdBufferWithSegmentsCollection*'
1416 goto finally;
1464 goto finally;
1417 }
1465 }
1418
1466
1419 /*
1420 * It isn't clear whether the address referred to by Py_buffer.buf
1421 * is still valid after PyBuffer_Release. We we hold a reference to all
1422 * Py_buffer instances for the duration of the operation.
1423 */
1424 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1467 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1425 if (NULL == dataBuffers) {
1468 if (NULL == dataBuffers) {
1426 PyErr_NoMemory();
1469 PyErr_NoMemory();
@@ -1459,6 +1502,11 b' static ZstdBufferWithSegmentsCollection*'
1459 goto finally;
1502 goto finally;
1460 }
1503 }
1461
1504
1505 if (sources.totalSourceSize > SIZE_MAX) {
1506 PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
1507 goto finally;
1508 }
1509
1462 result = compress_from_datasources(self, &sources, threads);
1510 result = compress_from_datasources(self, &sources, threads);
1463
1511
1464 finally:
1512 finally:
@@ -1482,12 +1530,24 b' static PyMethodDef ZstdCompressor_method'
1482 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1530 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1483 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1531 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1484 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1532 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1485 { "read_from", (PyCFunction)ZstdCompressor_read_from,
1533 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1486 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
1534 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
1487 { "write_to", (PyCFunction)ZstdCompressor_write_to,
1535 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1488 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
1536 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1537 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1538 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1539 /* TODO Remove deprecated API */
1540 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
1541 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1542 /* TODO remove deprecated API */
1543 { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
1544 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1489 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1545 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1490 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1546 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1547 { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
1548 METH_NOARGS, ZstdCompressor_memory_size__doc__ },
1549 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1550 METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
1491 { NULL, NULL }
1551 { NULL, NULL }
1492 };
1552 };
1493
1553
@@ -21,10 +21,9 b' static void ZstdCompressorIterator_deall'
21 Py_XDECREF(self->compressor);
21 Py_XDECREF(self->compressor);
22 Py_XDECREF(self->reader);
22 Py_XDECREF(self->reader);
23
23
24 if (self->buffer) {
24 if (self->buffer.buf) {
25 PyBuffer_Release(self->buffer);
25 PyBuffer_Release(&self->buffer);
26 PyMem_FREE(self->buffer);
26 memset(&self->buffer, 0, sizeof(self->buffer));
27 self->buffer = NULL;
28 }
27 }
29
28
30 if (self->output.dst) {
29 if (self->output.dst) {
@@ -58,14 +57,8 b' feedcompressor:'
58 /* If we have data left in the input, consume it. */
57 /* If we have data left in the input, consume it. */
59 if (self->input.pos < self->input.size) {
58 if (self->input.pos < self->input.size) {
60 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
61 if (self->compressor->mtcctx) {
60 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
62 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
61 &self->input, ZSTD_e_continue);
63 &self->output, &self->input);
64 }
65 else {
66 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
67 &self->input);
68 }
69 Py_END_ALLOW_THREADS
62 Py_END_ALLOW_THREADS
70
63
71 /* Release the Python object holding the input buffer. */
64 /* Release the Python object holding the input buffer. */
@@ -107,14 +100,14 b' feedcompressor:'
107 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
100 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
108 }
101 }
109 else {
102 else {
110 assert(self->buffer && self->buffer->buf);
103 assert(self->buffer.buf);
111
104
112 /* Only support contiguous C arrays. */
105 /* Only support contiguous C arrays. */
113 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
106 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
114 assert(self->buffer->itemsize == 1);
107 assert(self->buffer.itemsize == 1);
115
108
116 readBuffer = (char*)self->buffer->buf + self->bufferOffset;
109 readBuffer = (char*)self->buffer.buf + self->bufferOffset;
117 bufferRemaining = self->buffer->len - self->bufferOffset;
110 bufferRemaining = self->buffer.len - self->bufferOffset;
118 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
111 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
119 self->bufferOffset += readSize;
112 self->bufferOffset += readSize;
120 }
113 }
@@ -130,12 +123,12 b' feedcompressor:'
130
123
131 /* EOF */
124 /* EOF */
132 if (0 == readSize) {
125 if (0 == readSize) {
133 if (self->compressor->mtcctx) {
126 self->input.src = NULL;
134 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
127 self->input.size = 0;
135 }
128 self->input.pos = 0;
136 else {
129
137 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
130 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
138 }
131 &self->input, ZSTD_e_end);
139 if (ZSTD_isError(zresult)) {
132 if (ZSTD_isError(zresult)) {
140 PyErr_Format(ZstdError, "error ending compression stream: %s",
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
141 ZSTD_getErrorName(zresult));
134 ZSTD_getErrorName(zresult));
@@ -159,13 +152,8 b' feedcompressor:'
159 self->input.pos = 0;
152 self->input.pos = 0;
160
153
161 Py_BEGIN_ALLOW_THREADS
154 Py_BEGIN_ALLOW_THREADS
162 if (self->compressor->mtcctx) {
155 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
163 zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
156 &self->input, ZSTD_e_continue);
164 &self->input);
165 }
166 else {
167 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
168 }
169 Py_END_ALLOW_THREADS
157 Py_END_ALLOW_THREADS
170
158
171 /* The input buffer currently points to memory managed by Python
159 /* The input buffer currently points to memory managed by Python
@@ -52,6 +52,11 b' void constants_module_init(PyObject* mod'
52 PyErr_Format(PyExc_ValueError, "could not create frame header object");
52 PyErr_Format(PyExc_ValueError, "could not create frame header object");
53 }
53 }
54
54
55 PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
56 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
57 PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
58 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
59
55 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
60 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
56 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
61 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
57 (long)ZSTD_CStreamInSize());
62 (long)ZSTD_CStreamInSize());
@@ -75,7 +80,9 b' void constants_module_init(PyObject* mod'
75 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
80 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
76 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
81 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
77 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
82 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
78 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
83 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
84 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
85 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
79
86
80 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
87 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
81 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
88 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
@@ -84,4 +91,12 b' void constants_module_init(PyObject* mod'
84 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
91 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
85 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
92 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
86 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
93 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
94 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
95
96 PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
97 PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
98 PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
99
100 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
101 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless);
87 }
102 }
@@ -27,7 +27,7 b' static PyObject* ZstdDecompressionWriter'
27 return NULL;
27 return NULL;
28 }
28 }
29
29
30 if (0 != init_dstream(self->decompressor)) {
30 if (ensure_dctx(self->decompressor, 1)) {
31 return NULL;
31 return NULL;
32 }
32 }
33
33
@@ -44,18 +44,17 b' static PyObject* ZstdDecompressionWriter'
44 }
44 }
45
45
46 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
46 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
47 if (!self->decompressor->dstream) {
47 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
48 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
49 "call when context manager is active");
50 return NULL;
51 }
52
53 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
54 }
48 }
55
49
56 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
50 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
57 const char* source;
51 static char* kwlist[] = {
58 Py_ssize_t sourceSize;
52 "data",
53 NULL
54 };
55
56 PyObject* result = NULL;
57 Py_buffer source;
59 size_t zresult = 0;
58 size_t zresult = 0;
60 ZSTD_inBuffer input;
59 ZSTD_inBuffer input;
61 ZSTD_outBuffer output;
60 ZSTD_outBuffer output;
@@ -63,41 +62,47 b' static PyObject* ZstdDecompressionWriter'
63 Py_ssize_t totalWrite = 0;
62 Py_ssize_t totalWrite = 0;
64
63
65 #if PY_MAJOR_VERSION >= 3
64 #if PY_MAJOR_VERSION >= 3
66 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
65 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
67 #else
66 #else
68 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
69 #endif
68 #endif
69 kwlist, &source)) {
70 return NULL;
70 return NULL;
71 }
71 }
72
72
73 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
74 PyErr_SetString(PyExc_ValueError,
75 "data buffer should be contiguous and have at most one dimension");
76 goto finally;
77 }
78
73 if (!self->entered) {
79 if (!self->entered) {
74 PyErr_SetString(ZstdError, "write must be called from an active context manager");
80 PyErr_SetString(ZstdError, "write must be called from an active context manager");
75 return NULL;
81 goto finally;
76 }
82 }
77
83
78 assert(self->decompressor->dstream);
79
80 output.dst = PyMem_Malloc(self->outSize);
84 output.dst = PyMem_Malloc(self->outSize);
81 if (!output.dst) {
85 if (!output.dst) {
82 return PyErr_NoMemory();
86 PyErr_NoMemory();
87 goto finally;
83 }
88 }
84 output.size = self->outSize;
89 output.size = self->outSize;
85 output.pos = 0;
90 output.pos = 0;
86
91
87 input.src = source;
92 input.src = source.buf;
88 input.size = sourceSize;
93 input.size = source.len;
89 input.pos = 0;
94 input.pos = 0;
90
95
91 while ((ssize_t)input.pos < sourceSize) {
96 while ((ssize_t)input.pos < source.len) {
92 Py_BEGIN_ALLOW_THREADS
97 Py_BEGIN_ALLOW_THREADS
93 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
98 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
94 Py_END_ALLOW_THREADS
99 Py_END_ALLOW_THREADS
95
100
96 if (ZSTD_isError(zresult)) {
101 if (ZSTD_isError(zresult)) {
97 PyMem_Free(output.dst);
102 PyMem_Free(output.dst);
98 PyErr_Format(ZstdError, "zstd decompress error: %s",
103 PyErr_Format(ZstdError, "zstd decompress error: %s",
99 ZSTD_getErrorName(zresult));
104 ZSTD_getErrorName(zresult));
100 return NULL;
105 goto finally;
101 }
106 }
102
107
103 if (output.pos) {
108 if (output.pos) {
@@ -115,7 +120,11 b' static PyObject* ZstdDecompressionWriter'
115
120
116 PyMem_Free(output.dst);
121 PyMem_Free(output.dst);
117
122
118 return PyLong_FromSsize_t(totalWrite);
123 result = PyLong_FromSsize_t(totalWrite);
124
125 finally:
126 PyBuffer_Release(&source);
127 return result;
119 }
128 }
120
129
121 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 static PyMethodDef ZstdDecompressionWriter_methods[] = {
@@ -125,7 +134,7 b' static PyMethodDef ZstdDecompressionWrit'
125 PyDoc_STR("Exit a decompression context.") },
134 PyDoc_STR("Exit a decompression context.") },
126 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
135 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
127 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
136 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
128 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
137 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
129 PyDoc_STR("Compress data") },
138 PyDoc_STR("Compress data") },
130 { NULL, NULL }
139 { NULL, NULL }
131 };
140 };
@@ -20,56 +20,61 b' static void DecompressionObj_dealloc(Zst'
20 PyObject_Del(self);
20 PyObject_Del(self);
21 }
21 }
22
22
23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
24 const char* source;
24 static char* kwlist[] = {
25 Py_ssize_t sourceSize;
25 "data",
26 NULL
27 };
28
29 Py_buffer source;
26 size_t zresult;
30 size_t zresult;
27 ZSTD_inBuffer input;
31 ZSTD_inBuffer input;
28 ZSTD_outBuffer output;
32 ZSTD_outBuffer output;
29 size_t outSize = ZSTD_DStreamOutSize();
30 PyObject* result = NULL;
33 PyObject* result = NULL;
31 Py_ssize_t resultSize = 0;
34 Py_ssize_t resultSize = 0;
32
35
33 /* Constructor should ensure stream is populated. */
34 assert(self->decompressor->dstream);
35
36 if (self->finished) {
36 if (self->finished) {
37 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
37 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
38 return NULL;
38 return NULL;
39 }
39 }
40
40
41 #if PY_MAJOR_VERSION >= 3
41 #if PY_MAJOR_VERSION >= 3
42 if (!PyArg_ParseTuple(args, "y#:decompress",
42 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
43 #else
43 #else
44 if (!PyArg_ParseTuple(args, "s#:decompress",
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
45 #endif
45 #endif
46 &source, &sourceSize)) {
46 kwlist, &source)) {
47 return NULL;
47 return NULL;
48 }
48 }
49
49
50 input.src = source;
50 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
51 input.size = sourceSize;
51 PyErr_SetString(PyExc_ValueError,
52 "data buffer should be contiguous and have at most one dimension");
53 goto finally;
54 }
55
56 input.src = source.buf;
57 input.size = source.len;
52 input.pos = 0;
58 input.pos = 0;
53
59
54 output.dst = PyMem_Malloc(outSize);
60 output.dst = PyMem_Malloc(self->outSize);
55 if (!output.dst) {
61 if (!output.dst) {
56 PyErr_NoMemory();
62 PyErr_NoMemory();
57 return NULL;
63 goto except;
58 }
64 }
59 output.size = outSize;
65 output.size = self->outSize;
60 output.pos = 0;
66 output.pos = 0;
61
67
62 /* Read input until exhausted. */
68 /* Read input until exhausted. */
63 while (input.pos < input.size) {
69 while (input.pos < input.size) {
64 Py_BEGIN_ALLOW_THREADS
70 Py_BEGIN_ALLOW_THREADS
65 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
71 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
66 Py_END_ALLOW_THREADS
72 Py_END_ALLOW_THREADS
67
73
68 if (ZSTD_isError(zresult)) {
74 if (ZSTD_isError(zresult)) {
69 PyErr_Format(ZstdError, "zstd decompressor error: %s",
75 PyErr_Format(ZstdError, "zstd decompressor error: %s",
70 ZSTD_getErrorName(zresult));
76 ZSTD_getErrorName(zresult));
71 result = NULL;
77 goto except;
72 goto finally;
73 }
78 }
74
79
75 if (0 == zresult) {
80 if (0 == zresult) {
@@ -79,7 +84,8 b' static PyObject* DecompressionObj_decomp'
79 if (output.pos) {
84 if (output.pos) {
80 if (result) {
85 if (result) {
81 resultSize = PyBytes_GET_SIZE(result);
86 resultSize = PyBytes_GET_SIZE(result);
82 if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
87 if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
88 Py_XDECREF(result);
83 goto except;
89 goto except;
84 }
90 }
85
91
@@ -108,13 +114,14 b' except:'
108
114
109 finally:
115 finally:
110 PyMem_Free(output.dst);
116 PyMem_Free(output.dst);
117 PyBuffer_Release(&source);
111
118
112 return result;
119 return result;
113 }
120 }
114
121
115 static PyMethodDef DecompressionObj_methods[] = {
122 static PyMethodDef DecompressionObj_methods[] = {
116 { "decompress", (PyCFunction)DecompressionObj_decompress,
123 { "decompress", (PyCFunction)DecompressionObj_decompress,
117 METH_VARARGS, PyDoc_STR("decompress data") },
124 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
118 { NULL, NULL }
125 { NULL, NULL }
119 };
126 };
120
127
This diff has been collapsed as it changes many lines, (649 lines changed) Show them Hide them
@@ -12,54 +12,40 b''
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 /**
14 /**
15 * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
16 *
16 */
17 * This should be called before starting a decompression operation with a
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
18 * ZSTD_DStream on a ZstdDecompressor.
19 */
20 int init_dstream(ZstdDecompressor* decompressor) {
21 void* dictData = NULL;
22 size_t dictSize = 0;
23 size_t zresult;
18 size_t zresult;
24
19
25 /* Simple case of dstream already exists. Just reset it. */
20 ZSTD_DCtx_reset(decompressor->dctx);
26 if (decompressor->dstream) {
21
27 zresult = ZSTD_resetDStream(decompressor->dstream);
22 if (decompressor->maxWindowSize) {
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
28 if (ZSTD_isError(zresult)) {
24 if (ZSTD_isError(zresult)) {
29 PyErr_Format(ZstdError, "could not reset DStream: %s",
25 PyErr_Format(ZstdError, "unable to set max window size: %s",
30 ZSTD_getErrorName(zresult));
26 ZSTD_getErrorName(zresult));
31 return -1;
27 return 1;
32 }
28 }
33
34 return 0;
35 }
29 }
36
30
37 decompressor->dstream = ZSTD_createDStream();
31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
38 if (!decompressor->dstream) {
32 if (ZSTD_isError(zresult)) {
39 PyErr_SetString(ZstdError, "could not create DStream");
33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
40 return -1;
34 ZSTD_getErrorName(zresult));
41 }
35 return 1;
42
43 if (decompressor->dict) {
44 dictData = decompressor->dict->dictData;
45 dictSize = decompressor->dict->dictSize;
46 }
36 }
47
37
48 if (dictData) {
38 if (loadDict && decompressor->dict) {
49 zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
39 if (ensure_ddict(decompressor->dict)) {
50 }
40 return 1;
51 else {
41 }
52 zresult = ZSTD_initDStream(decompressor->dstream);
53 }
54
42
55 if (ZSTD_isError(zresult)) {
43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
56 /* Don't leave a reference to an invalid object. */
44 if (ZSTD_isError(zresult)) {
57 ZSTD_freeDStream(decompressor->dstream);
45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
58 decompressor->dstream = NULL;
46 ZSTD_getErrorName(zresult));
59
47 return 1;
60 PyErr_Format(ZstdError, "could not initialize DStream: %s",
48 }
61 ZSTD_getErrorName(zresult));
62 return -1;
63 }
49 }
64
50
65 return 0;
51 return 0;
@@ -76,36 +62,46 b' PyDoc_STRVAR(Decompressor__doc__,'
76 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
77 static char* kwlist[] = {
63 static char* kwlist[] = {
78 "dict_data",
64 "dict_data",
65 "max_window_size",
66 "format",
79 NULL
67 NULL
80 };
68 };
81
69
82 ZstdCompressionDict* dict = NULL;
70 ZstdCompressionDict* dict = NULL;
71 size_t maxWindowSize = 0;
72 ZSTD_format_e format = ZSTD_f_zstd1;
83
73
84 self->dctx = NULL;
74 self->dctx = NULL;
85 self->dict = NULL;
75 self->dict = NULL;
86 self->ddict = NULL;
87
76
88 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist,
89 &ZstdCompressionDictType, &dict)) {
78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
90 return -1;
79 return -1;
91 }
80 }
92
81
93 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
94 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
95 self->dctx = ZSTD_createDCtx();
82 self->dctx = ZSTD_createDCtx();
96 if (!self->dctx) {
83 if (!self->dctx) {
97 PyErr_NoMemory();
84 PyErr_NoMemory();
98 goto except;
85 goto except;
99 }
86 }
100
87
88 self->maxWindowSize = maxWindowSize;
89 self->format = format;
90
101 if (dict) {
91 if (dict) {
102 self->dict = dict;
92 self->dict = dict;
103 Py_INCREF(dict);
93 Py_INCREF(dict);
104 }
94 }
105
95
96 if (ensure_dctx(self, 1)) {
97 goto except;
98 }
99
106 return 0;
100 return 0;
107
101
108 except:
102 except:
103 Py_CLEAR(self->dict);
104
109 if (self->dctx) {
105 if (self->dctx) {
110 ZSTD_freeDCtx(self->dctx);
106 ZSTD_freeDCtx(self->dctx);
111 self->dctx = NULL;
107 self->dctx = NULL;
@@ -117,16 +113,6 b' except:'
117 static void Decompressor_dealloc(ZstdDecompressor* self) {
113 static void Decompressor_dealloc(ZstdDecompressor* self) {
118 Py_CLEAR(self->dict);
114 Py_CLEAR(self->dict);
119
115
120 if (self->ddict) {
121 ZSTD_freeDDict(self->ddict);
122 self->ddict = NULL;
123 }
124
125 if (self->dstream) {
126 ZSTD_freeDStream(self->dstream);
127 self->dstream = NULL;
128 }
129
130 if (self->dctx) {
116 if (self->dctx) {
131 ZSTD_freeDCtx(self->dctx);
117 ZSTD_freeDCtx(self->dctx);
132 self->dctx = NULL;
118 self->dctx = NULL;
@@ -135,6 +121,20 b' static void Decompressor_dealloc(ZstdDec'
135 PyObject_Del(self);
121 PyObject_Del(self);
136 }
122 }
137
123
124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
125 "memory_size() -- Size of decompression context, in bytes\n"
126 );
127
128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
129 if (self->dctx) {
130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
131 }
132 else {
133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
134 return NULL;
135 }
136 }
137
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
140 "\n"
140 "\n"
@@ -166,7 +166,7 b' static PyObject* Decompressor_copy_strea'
166 Py_ssize_t totalWrite = 0;
166 Py_ssize_t totalWrite = 0;
167 char* readBuffer;
167 char* readBuffer;
168 Py_ssize_t readSize;
168 Py_ssize_t readSize;
169 PyObject* readResult;
169 PyObject* readResult = NULL;
170 PyObject* res = NULL;
170 PyObject* res = NULL;
171 size_t zresult = 0;
171 size_t zresult = 0;
172 PyObject* writeResult;
172 PyObject* writeResult;
@@ -191,7 +191,7 b' static PyObject* Decompressor_copy_strea'
191 /* Prevent free on uninitialized memory in finally. */
191 /* Prevent free on uninitialized memory in finally. */
192 output.dst = NULL;
192 output.dst = NULL;
193
193
194 if (0 != init_dstream(self)) {
194 if (ensure_dctx(self, 1)) {
195 res = NULL;
195 res = NULL;
196 goto finally;
196 goto finally;
197 }
197 }
@@ -229,7 +229,7 b' static PyObject* Decompressor_copy_strea'
229
229
230 while (input.pos < input.size) {
230 while (input.pos < input.size) {
231 Py_BEGIN_ALLOW_THREADS
231 Py_BEGIN_ALLOW_THREADS
232 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
232 zresult = ZSTD_decompress_generic(self->dctx, &output, &input);
233 Py_END_ALLOW_THREADS
233 Py_END_ALLOW_THREADS
234
234
235 if (ZSTD_isError(zresult)) {
235 if (ZSTD_isError(zresult)) {
@@ -252,6 +252,8 b' static PyObject* Decompressor_copy_strea'
252 output.pos = 0;
252 output.pos = 0;
253 }
253 }
254 }
254 }
255
256 Py_CLEAR(readResult);
255 }
257 }
256
258
257 /* Source stream is exhausted. Finish up. */
259 /* Source stream is exhausted. Finish up. */
@@ -267,6 +269,8 b' finally:'
267 PyMem_Free(output.dst);
269 PyMem_Free(output.dst);
268 }
270 }
269
271
272 Py_XDECREF(readResult);
273
270 return res;
274 return res;
271 }
275 }
272
276
@@ -300,98 +304,114 b' PyObject* Decompressor_decompress(ZstdDe'
300 NULL
304 NULL
301 };
305 };
302
306
303 const char* source;
307 Py_buffer source;
304 Py_ssize_t sourceSize;
305 Py_ssize_t maxOutputSize = 0;
308 Py_ssize_t maxOutputSize = 0;
306 unsigned long long decompressedSize;
309 unsigned long long decompressedSize;
307 size_t destCapacity;
310 size_t destCapacity;
308 PyObject* result = NULL;
311 PyObject* result = NULL;
309 void* dictData = NULL;
310 size_t dictSize = 0;
311 size_t zresult;
312 size_t zresult;
313 ZSTD_outBuffer outBuffer;
314 ZSTD_inBuffer inBuffer;
312
315
313 #if PY_MAJOR_VERSION >= 3
316 #if PY_MAJOR_VERSION >= 3
314 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
315 #else
318 #else
316 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
317 #endif
320 #endif
318 kwlist, &source, &sourceSize, &maxOutputSize)) {
321 kwlist, &source, &maxOutputSize)) {
319 return NULL;
322 return NULL;
320 }
323 }
321
324
322 if (self->dict) {
325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
323 dictData = self->dict->dictData;
326 PyErr_SetString(PyExc_ValueError,
324 dictSize = self->dict->dictSize;
327 "data buffer should be contiguous and have at most one dimension");
328 goto finally;
325 }
329 }
326
330
327 if (dictData && !self->ddict) {
331 if (ensure_dctx(self, 1)) {
328 Py_BEGIN_ALLOW_THREADS
332 goto finally;
329 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
330 Py_END_ALLOW_THREADS
331
332 if (!self->ddict) {
333 PyErr_SetString(ZstdError, "could not create decompression dict");
334 return NULL;
335 }
336 }
333 }
337
334
338 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
339 /* 0 returned if content size not in the zstd frame header */
336
340 if (0 == decompressedSize) {
337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
338 PyErr_SetString(ZstdError, "error determining content size from frame header");
339 goto finally;
340 }
341 /* Special case of empty frame. */
342 else if (0 == decompressedSize) {
343 result = PyBytes_FromStringAndSize("", 0);
344 goto finally;
345 }
346 /* Missing content size in frame header. */
347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
341 if (0 == maxOutputSize) {
348 if (0 == maxOutputSize) {
342 PyErr_SetString(ZstdError, "input data invalid or missing content size "
349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
343 "in frame header");
350 goto finally;
344 return NULL;
345 }
351 }
346 else {
352
347 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
348 destCapacity = maxOutputSize;
354 destCapacity = maxOutputSize;
355 decompressedSize = 0;
356 }
357 /* Size is recorded in frame header. */
358 else {
359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
360 if (decompressedSize > PY_SSIZE_T_MAX) {
361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
362 goto finally;
349 }
363 }
350 }
364
351 else {
365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
352 result = PyBytes_FromStringAndSize(NULL, decompressedSize);
366 destCapacity = (size_t)decompressedSize;
353 destCapacity = decompressedSize;
354 }
367 }
355
368
356 if (!result) {
369 if (!result) {
357 return NULL;
370 goto finally;
358 }
371 }
359
372
373 outBuffer.dst = PyBytes_AsString(result);
374 outBuffer.size = destCapacity;
375 outBuffer.pos = 0;
376
377 inBuffer.src = source.buf;
378 inBuffer.size = source.len;
379 inBuffer.pos = 0;
380
360 Py_BEGIN_ALLOW_THREADS
381 Py_BEGIN_ALLOW_THREADS
361 if (self->ddict) {
382 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
362 zresult = ZSTD_decompress_usingDDict(self->dctx,
363 PyBytes_AsString(result), destCapacity,
364 source, sourceSize, self->ddict);
365 }
366 else {
367 zresult = ZSTD_decompressDCtx(self->dctx,
368 PyBytes_AsString(result), destCapacity, source, sourceSize);
369 }
370 Py_END_ALLOW_THREADS
383 Py_END_ALLOW_THREADS
371
384
372 if (ZSTD_isError(zresult)) {
385 if (ZSTD_isError(zresult)) {
373 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
374 Py_DECREF(result);
387 Py_CLEAR(result);
375 return NULL;
388 goto finally;
376 }
389 }
377 else if (decompressedSize && zresult != decompressedSize) {
390 else if (zresult) {
391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
392 Py_CLEAR(result);
393 goto finally;
394 }
395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
378 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
379 zresult, decompressedSize);
397 zresult, decompressedSize);
380 Py_DECREF(result);
398 Py_CLEAR(result);
381 return NULL;
399 goto finally;
382 }
400 }
383 else if (zresult < destCapacity) {
401 else if (outBuffer.pos < destCapacity) {
384 if (_PyBytes_Resize(&result, zresult)) {
402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
385 Py_DECREF(result);
403 Py_CLEAR(result);
386 return NULL;
404 goto finally;
387 }
405 }
388 }
406 }
389
407
408 finally:
409 PyBuffer_Release(&source);
390 return result;
410 return result;
391 }
411 }
392
412
393 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
394 "decompressobj()\n"
414 "decompressobj([write_size=default])\n"
395 "\n"
415 "\n"
396 "Incrementally feed data into a decompressor.\n"
416 "Incrementally feed data into a decompressor.\n"
397 "\n"
417 "\n"
@@ -400,25 +420,43 b' PyDoc_STRVAR(Decompressor_decompressobj_'
400 "callers can swap in the zstd decompressor while using the same API.\n"
420 "callers can swap in the zstd decompressor while using the same API.\n"
401 );
421 );
402
422
403 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
404 ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
424 static char* kwlist[] = {
425 "write_size",
426 NULL
427 };
428
429 ZstdDecompressionObj* result = NULL;
430 size_t outSize = ZSTD_DStreamOutSize();
431
432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
433 return NULL;
434 }
435
436 if (!outSize) {
437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
438 return NULL;
439 }
440
441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
405 if (!result) {
442 if (!result) {
406 return NULL;
443 return NULL;
407 }
444 }
408
445
409 if (0 != init_dstream(self)) {
446 if (ensure_dctx(self, 1)) {
410 Py_DECREF(result);
447 Py_DECREF(result);
411 return NULL;
448 return NULL;
412 }
449 }
413
450
414 result->decompressor = self;
451 result->decompressor = self;
415 Py_INCREF(result->decompressor);
452 Py_INCREF(result->decompressor);
453 result->outSize = outSize;
416
454
417 return result;
455 return result;
418 }
456 }
419
457
420 PyDoc_STRVAR(Decompressor_read_from__doc__,
458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
421 "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
422 "Read compressed data and return an iterator\n"
460 "Read compressed data and return an iterator\n"
423 "\n"
461 "\n"
424 "Returns an iterator of decompressed data chunks produced from reading from\n"
462 "Returns an iterator of decompressed data chunks produced from reading from\n"
@@ -437,7 +475,7 b' PyDoc_STRVAR(Decompressor_read_from__doc'
437 "the source.\n"
475 "the source.\n"
438 );
476 );
439
477
440 static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
441 static char* kwlist[] = {
479 static char* kwlist[] = {
442 "reader",
480 "reader",
443 "read_size",
481 "read_size",
@@ -452,7 +490,7 b' static ZstdDecompressorIterator* Decompr'
452 ZstdDecompressorIterator* result;
490 ZstdDecompressorIterator* result;
453 size_t skipBytes = 0;
491 size_t skipBytes = 0;
454
492
455 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
456 &reader, &inSize, &outSize, &skipBytes)) {
494 &reader, &inSize, &outSize, &skipBytes)) {
457 return NULL;
495 return NULL;
458 }
496 }
@@ -474,14 +512,7 b' static ZstdDecompressorIterator* Decompr'
474 }
512 }
475 else if (1 == PyObject_CheckBuffer(reader)) {
513 else if (1 == PyObject_CheckBuffer(reader)) {
476 /* Object claims it is a buffer. Try to get a handle to it. */
514 /* Object claims it is a buffer. Try to get a handle to it. */
477 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
478 if (!result->buffer) {
479 goto except;
480 }
481
482 memset(result->buffer, 0, sizeof(Py_buffer));
483
484 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
485 goto except;
516 goto except;
486 }
517 }
487 }
518 }
@@ -498,7 +529,7 b' static ZstdDecompressorIterator* Decompr'
498 result->outSize = outSize;
529 result->outSize = outSize;
499 result->skipBytes = skipBytes;
530 result->skipBytes = skipBytes;
500
531
501 if (0 != init_dstream(self)) {
532 if (ensure_dctx(self, 1)) {
502 goto except;
533 goto except;
503 }
534 }
504
535
@@ -511,13 +542,6 b' static ZstdDecompressorIterator* Decompr'
511 goto finally;
542 goto finally;
512
543
513 except:
544 except:
514 Py_CLEAR(result->reader);
515
516 if (result->buffer) {
517 PyBuffer_Release(result->buffer);
518 Py_CLEAR(result->buffer);
519 }
520
521 Py_CLEAR(result);
545 Py_CLEAR(result);
522
546
523 finally:
547 finally:
@@ -525,7 +549,62 b' finally:'
525 return result;
549 return result;
526 }
550 }
527
551
528 PyDoc_STRVAR(Decompressor_write_to__doc__,
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
553 "stream_reader(source, [read_size=default])\n"
554 "\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
556 "reading decompressed output from an object.\n"
557 "\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
559 "conforms to the buffer protocol.\n"
560 );
561
562 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
563 static char* kwlist[] = {
564 "source",
565 "read_size",
566 NULL
567 };
568
569 PyObject* source;
570 size_t readSize = ZSTD_DStreamInSize();
571 ZstdDecompressionReader* result;
572
573 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist,
574 &source, &readSize)) {
575 return NULL;
576 }
577
578 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
579 if (NULL == result) {
580 return NULL;
581 }
582
583 if (PyObject_HasAttrString(source, "read")) {
584 result->reader = source;
585 Py_INCREF(source);
586 result->readSize = readSize;
587 }
588 else if (1 == PyObject_CheckBuffer(source)) {
589 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
590 Py_CLEAR(result);
591 return NULL;
592 }
593 }
594 else {
595 PyErr_SetString(PyExc_TypeError,
596 "must pass an object with a read() method or that conforms to the buffer protocol");
597 Py_CLEAR(result);
598 return NULL;
599 }
600
601 result->decompressor = self;
602 Py_INCREF(self);
603
604 return result;
605 }
606
607 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
529 "Create a context manager to write decompressed data to an object.\n"
608 "Create a context manager to write decompressed data to an object.\n"
530 "\n"
609 "\n"
531 "The passed object must have a ``write()`` method.\n"
610 "The passed object must have a ``write()`` method.\n"
@@ -538,7 +617,7 b' PyDoc_STRVAR(Decompressor_write_to__doc_'
538 "streaming decompressor.\n"
617 "streaming decompressor.\n"
539 );
618 );
540
619
541 static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
620 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
542 static char* kwlist[] = {
621 static char* kwlist[] = {
543 "writer",
622 "writer",
544 "write_size",
623 "write_size",
@@ -549,7 +628,7 b' static ZstdDecompressionWriter* Decompre'
549 size_t outSize = ZSTD_DStreamOutSize();
628 size_t outSize = ZSTD_DStreamOutSize();
550 ZstdDecompressionWriter* result;
629 ZstdDecompressionWriter* result;
551
630
552 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
631 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist,
553 &writer, &outSize)) {
632 &writer, &outSize)) {
554 return NULL;
633 return NULL;
555 }
634 }
@@ -579,7 +658,7 b' PyDoc_STRVAR(Decompressor_decompress_con'
579 "Decompress a series of chunks using the content dictionary chaining technique\n"
658 "Decompress a series of chunks using the content dictionary chaining technique\n"
580 );
659 );
581
660
582 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
661 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
583 static char* kwlist[] = {
662 static char* kwlist[] = {
584 "frames",
663 "frames",
585 NULL
664 NULL
@@ -592,9 +671,8 b' static PyObject* Decompressor_decompress'
592 PyObject* chunk;
671 PyObject* chunk;
593 char* chunkData;
672 char* chunkData;
594 Py_ssize_t chunkSize;
673 Py_ssize_t chunkSize;
595 ZSTD_DCtx* dctx = NULL;
596 size_t zresult;
674 size_t zresult;
597 ZSTD_frameParams frameParams;
675 ZSTD_frameHeader frameHeader;
598 void* buffer1 = NULL;
676 void* buffer1 = NULL;
599 size_t buffer1Size = 0;
677 size_t buffer1Size = 0;
600 size_t buffer1ContentSize = 0;
678 size_t buffer1ContentSize = 0;
@@ -603,6 +681,8 b' static PyObject* Decompressor_decompress'
603 size_t buffer2ContentSize = 0;
681 size_t buffer2ContentSize = 0;
604 void* destBuffer = NULL;
682 void* destBuffer = NULL;
605 PyObject* result = NULL;
683 PyObject* result = NULL;
684 ZSTD_outBuffer outBuffer;
685 ZSTD_inBuffer inBuffer;
606
686
607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
687 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
608 kwlist, &PyList_Type, &chunks)) {
688 kwlist, &PyList_Type, &chunks)) {
@@ -624,7 +704,7 b' static PyObject* Decompressor_decompress'
624
704
625 /* We require that all chunks be zstd frames and that they have content size set. */
705 /* We require that all chunks be zstd frames and that they have content size set. */
626 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
706 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
627 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
707 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
628 if (ZSTD_isError(zresult)) {
708 if (ZSTD_isError(zresult)) {
629 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
709 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
630 return NULL;
710 return NULL;
@@ -634,32 +714,56 b' static PyObject* Decompressor_decompress'
634 return NULL;
714 return NULL;
635 }
715 }
636
716
637 if (0 == frameParams.frameContentSize) {
717 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
638 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
718 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
639 return NULL;
719 return NULL;
640 }
720 }
641
721
642 dctx = ZSTD_createDCtx();
722 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
643 if (!dctx) {
723
644 PyErr_NoMemory();
724 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
725 * result to a Python object and it's length can be no greater than
726 * Py_ssize_t. In theory, we could have an intermediate frame that is
727 * larger. But a) why would this API be used for frames that large b)
728 * it isn't worth the complexity to support. */
729 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
730 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
731 PyErr_SetString(PyExc_ValueError,
732 "chunk 0 is too large to decompress on this platform");
733 return NULL;
734 }
735
736 if (ensure_dctx(self, 0)) {
645 goto finally;
737 goto finally;
646 }
738 }
647
739
648 buffer1Size = frameParams.frameContentSize;
740 buffer1Size = (size_t)frameHeader.frameContentSize;
649 buffer1 = PyMem_Malloc(buffer1Size);
741 buffer1 = PyMem_Malloc(buffer1Size);
650 if (!buffer1) {
742 if (!buffer1) {
651 goto finally;
743 goto finally;
652 }
744 }
653
745
746 outBuffer.dst = buffer1;
747 outBuffer.size = buffer1Size;
748 outBuffer.pos = 0;
749
750 inBuffer.src = chunkData;
751 inBuffer.size = chunkSize;
752 inBuffer.pos = 0;
753
654 Py_BEGIN_ALLOW_THREADS
754 Py_BEGIN_ALLOW_THREADS
655 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
755 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
656 Py_END_ALLOW_THREADS
756 Py_END_ALLOW_THREADS
657 if (ZSTD_isError(zresult)) {
757 if (ZSTD_isError(zresult)) {
658 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
758 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
659 goto finally;
759 goto finally;
660 }
760 }
761 else if (zresult) {
762 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
763 goto finally;
764 }
661
765
662 buffer1ContentSize = zresult;
766 buffer1ContentSize = outBuffer.pos;
663
767
664 /* Special case of a simple chain. */
768 /* Special case of a simple chain. */
665 if (1 == chunksLen) {
769 if (1 == chunksLen) {
@@ -668,7 +772,7 b' static PyObject* Decompressor_decompress'
668 }
772 }
669
773
670 /* This should ideally look at next chunk. But this is slightly simpler. */
774 /* This should ideally look at next chunk. But this is slightly simpler. */
671 buffer2Size = frameParams.frameContentSize;
775 buffer2Size = (size_t)frameHeader.frameContentSize;
672 buffer2 = PyMem_Malloc(buffer2Size);
776 buffer2 = PyMem_Malloc(buffer2Size);
673 if (!buffer2) {
777 if (!buffer2) {
674 goto finally;
778 goto finally;
@@ -688,7 +792,7 b' static PyObject* Decompressor_decompress'
688 }
792 }
689
793
690 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
794 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
691 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
795 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
692 if (ZSTD_isError(zresult)) {
796 if (ZSTD_isError(zresult)) {
693 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
797 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
694 goto finally;
798 goto finally;
@@ -698,18 +802,30 b' static PyObject* Decompressor_decompress'
698 goto finally;
802 goto finally;
699 }
803 }
700
804
701 if (0 == frameParams.frameContentSize) {
805 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
702 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
806 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
703 goto finally;
807 goto finally;
704 }
808 }
705
809
810 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
811
812 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
813 PyErr_Format(PyExc_ValueError,
814 "chunk %zd is too large to decompress on this platform", chunkIndex);
815 goto finally;
816 }
817
818 inBuffer.src = chunkData;
819 inBuffer.size = chunkSize;
820 inBuffer.pos = 0;
821
706 parity = chunkIndex % 2;
822 parity = chunkIndex % 2;
707
823
708 /* This could definitely be abstracted to reduce code duplication. */
824 /* This could definitely be abstracted to reduce code duplication. */
709 if (parity) {
825 if (parity) {
710 /* Resize destination buffer to hold larger content. */
826 /* Resize destination buffer to hold larger content. */
711 if (buffer2Size < frameParams.frameContentSize) {
827 if (buffer2Size < frameHeader.frameContentSize) {
712 buffer2Size = frameParams.frameContentSize;
828 buffer2Size = (size_t)frameHeader.frameContentSize;
713 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
829 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
714 if (!destBuffer) {
830 if (!destBuffer) {
715 goto finally;
831 goto finally;
@@ -718,19 +834,38 b' static PyObject* Decompressor_decompress'
718 }
834 }
719
835
720 Py_BEGIN_ALLOW_THREADS
836 Py_BEGIN_ALLOW_THREADS
721 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
837 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
722 chunkData, chunkSize, buffer1, buffer1ContentSize);
838 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
839 Py_END_ALLOW_THREADS
840 if (ZSTD_isError(zresult)) {
841 PyErr_Format(ZstdError,
842 "failed to load prefix dictionary at chunk %zd", chunkIndex);
843 goto finally;
844 }
845
846 outBuffer.dst = buffer2;
847 outBuffer.size = buffer2Size;
848 outBuffer.pos = 0;
849
850 Py_BEGIN_ALLOW_THREADS
851 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
723 Py_END_ALLOW_THREADS
852 Py_END_ALLOW_THREADS
724 if (ZSTD_isError(zresult)) {
853 if (ZSTD_isError(zresult)) {
725 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
854 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
726 chunkIndex, ZSTD_getErrorName(zresult));
855 chunkIndex, ZSTD_getErrorName(zresult));
727 goto finally;
856 goto finally;
728 }
857 }
729 buffer2ContentSize = zresult;
858 else if (zresult) {
859 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
860 chunkIndex);
861 goto finally;
862 }
863
864 buffer2ContentSize = outBuffer.pos;
730 }
865 }
731 else {
866 else {
732 if (buffer1Size < frameParams.frameContentSize) {
867 if (buffer1Size < frameHeader.frameContentSize) {
733 buffer1Size = frameParams.frameContentSize;
868 buffer1Size = (size_t)frameHeader.frameContentSize;
734 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
869 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
735 if (!destBuffer) {
870 if (!destBuffer) {
736 goto finally;
871 goto finally;
@@ -739,15 +874,34 b' static PyObject* Decompressor_decompress'
739 }
874 }
740
875
741 Py_BEGIN_ALLOW_THREADS
876 Py_BEGIN_ALLOW_THREADS
742 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
877 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
743 chunkData, chunkSize, buffer2, buffer2ContentSize);
878 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
879 Py_END_ALLOW_THREADS
880 if (ZSTD_isError(zresult)) {
881 PyErr_Format(ZstdError,
882 "failed to load prefix dictionary at chunk %zd", chunkIndex);
883 goto finally;
884 }
885
886 outBuffer.dst = buffer1;
887 outBuffer.size = buffer1Size;
888 outBuffer.pos = 0;
889
890 Py_BEGIN_ALLOW_THREADS
891 zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
744 Py_END_ALLOW_THREADS
892 Py_END_ALLOW_THREADS
745 if (ZSTD_isError(zresult)) {
893 if (ZSTD_isError(zresult)) {
746 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
894 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
747 chunkIndex, ZSTD_getErrorName(zresult));
895 chunkIndex, ZSTD_getErrorName(zresult));
748 goto finally;
896 goto finally;
749 }
897 }
750 buffer1ContentSize = zresult;
898 else if (zresult) {
899 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
900 chunkIndex);
901 goto finally;
902 }
903
904 buffer1ContentSize = outBuffer.pos;
751 }
905 }
752 }
906 }
753
907
@@ -762,17 +916,13 b' finally:'
762 PyMem_Free(buffer1);
916 PyMem_Free(buffer1);
763 }
917 }
764
918
765 if (dctx) {
766 ZSTD_freeDCtx(dctx);
767 }
768
769 return result;
919 return result;
770 }
920 }
771
921
772 typedef struct {
922 typedef struct {
773 void* sourceData;
923 void* sourceData;
774 size_t sourceSize;
924 size_t sourceSize;
775 unsigned long long destSize;
925 size_t destSize;
776 } FramePointer;
926 } FramePointer;
777
927
778 typedef struct {
928 typedef struct {
@@ -806,7 +956,6 b' typedef struct {'
806
956
807 /* Compression state and settings. */
957 /* Compression state and settings. */
808 ZSTD_DCtx* dctx;
958 ZSTD_DCtx* dctx;
809 ZSTD_DDict* ddict;
810 int requireOutputSizes;
959 int requireOutputSizes;
811
960
812 /* Output storage. */
961 /* Output storage. */
@@ -838,6 +987,14 b' static void decompress_worker(WorkerStat'
838 assert(0 == state->destCount);
987 assert(0 == state->destCount);
839 assert(state->endOffset - state->startOffset >= 0);
988 assert(state->endOffset - state->startOffset >= 0);
840
989
990 /* We could get here due to the way work is allocated. Ideally we wouldn't
991 get here. But that would require a bit of a refactor in the caller. */
992 if (state->totalSourceSize > SIZE_MAX) {
993 state->error = WorkerError_memory;
994 state->errorOffset = 0;
995 return;
996 }
997
841 /*
998 /*
842 * We need to allocate a buffer to hold decompressed data. How we do this
999 * We need to allocate a buffer to hold decompressed data. How we do this
843 * depends on what we know about the output. The following scenarios are
1000 * depends on what we know about the output. The following scenarios are
@@ -853,14 +1010,34 b' static void decompress_worker(WorkerStat'
853 /* Resolve ouput segments. */
1010 /* Resolve ouput segments. */
854 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1011 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
855 FramePointer* fp = &framePointers[frameIndex];
1012 FramePointer* fp = &framePointers[frameIndex];
1013 unsigned long long decompressedSize;
856
1014
857 if (0 == fp->destSize) {
1015 if (0 == fp->destSize) {
858 fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
1016 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
859 if (0 == fp->destSize && state->requireOutputSizes) {
1017
1018 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
860 state->error = WorkerError_unknownSize;
1019 state->error = WorkerError_unknownSize;
861 state->errorOffset = frameIndex;
1020 state->errorOffset = frameIndex;
862 return;
1021 return;
863 }
1022 }
1023 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1024 if (state->requireOutputSizes) {
1025 state->error = WorkerError_unknownSize;
1026 state->errorOffset = frameIndex;
1027 return;
1028 }
1029
1030 /* This will fail the assert for .destSize > 0 below. */
1031 decompressedSize = 0;
1032 }
1033
1034 if (decompressedSize > SIZE_MAX) {
1035 state->error = WorkerError_memory;
1036 state->errorOffset = frameIndex;
1037 return;
1038 }
1039
1040 fp->destSize = (size_t)decompressedSize;
864 }
1041 }
865
1042
866 totalOutputSize += fp->destSize;
1043 totalOutputSize += fp->destSize;
@@ -878,7 +1055,7 b' static void decompress_worker(WorkerStat'
878
1055
879 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1056 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
880
1057
881 allocationSize = roundpow2(state->totalSourceSize);
1058 allocationSize = roundpow2((size_t)state->totalSourceSize);
882
1059
883 if (framePointers[state->startOffset].destSize > allocationSize) {
1060 if (framePointers[state->startOffset].destSize > allocationSize) {
884 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1061 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
@@ -902,6 +1079,8 b' static void decompress_worker(WorkerStat'
902 destBuffer->segmentsSize = remainingItems;
1079 destBuffer->segmentsSize = remainingItems;
903
1080
904 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1081 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1082 ZSTD_outBuffer outBuffer;
1083 ZSTD_inBuffer inBuffer;
905 const void* source = framePointers[frameIndex].sourceData;
1084 const void* source = framePointers[frameIndex].sourceData;
906 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1085 const size_t sourceSize = framePointers[frameIndex].sourceSize;
907 void* dest;
1086 void* dest;
@@ -956,7 +1135,7 b' static void decompress_worker(WorkerStat'
956 /* Don't take any chances will non-NULL pointers. */
1135 /* Don't take any chances will non-NULL pointers. */
957 memset(destBuffer, 0, sizeof(DestBuffer));
1136 memset(destBuffer, 0, sizeof(DestBuffer));
958
1137
959 allocationSize = roundpow2(state->totalSourceSize);
1138 allocationSize = roundpow2((size_t)state->totalSourceSize);
960
1139
961 if (decompressedSize > allocationSize) {
1140 if (decompressedSize > allocationSize) {
962 allocationSize = roundpow2(decompressedSize);
1141 allocationSize = roundpow2(decompressedSize);
@@ -985,31 +1164,31 b' static void decompress_worker(WorkerStat'
985
1164
986 dest = (char*)destBuffer->dest + destOffset;
1165 dest = (char*)destBuffer->dest + destOffset;
987
1166
988 if (state->ddict) {
1167 outBuffer.dst = dest;
989 zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
1168 outBuffer.size = decompressedSize;
990 source, sourceSize, state->ddict);
1169 outBuffer.pos = 0;
991 }
992 else {
993 zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
994 source, sourceSize);
995 }
996
1170
1171 inBuffer.src = source;
1172 inBuffer.size = sourceSize;
1173 inBuffer.pos = 0;
1174
1175 zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer);
997 if (ZSTD_isError(zresult)) {
1176 if (ZSTD_isError(zresult)) {
998 state->error = WorkerError_zstd;
1177 state->error = WorkerError_zstd;
999 state->zresult = zresult;
1178 state->zresult = zresult;
1000 state->errorOffset = frameIndex;
1179 state->errorOffset = frameIndex;
1001 return;
1180 return;
1002 }
1181 }
1003 else if (zresult != decompressedSize) {
1182 else if (zresult || outBuffer.pos != decompressedSize) {
1004 state->error = WorkerError_sizeMismatch;
1183 state->error = WorkerError_sizeMismatch;
1005 state->zresult = zresult;
1184 state->zresult = outBuffer.pos;
1006 state->errorOffset = frameIndex;
1185 state->errorOffset = frameIndex;
1007 return;
1186 return;
1008 }
1187 }
1009
1188
1010 destBuffer->segments[localOffset].offset = destOffset;
1189 destBuffer->segments[localOffset].offset = destOffset;
1011 destBuffer->segments[localOffset].length = decompressedSize;
1190 destBuffer->segments[localOffset].length = outBuffer.pos;
1012 destOffset += zresult;
1191 destOffset += outBuffer.pos;
1013 localOffset++;
1192 localOffset++;
1014 remainingItems--;
1193 remainingItems--;
1015 }
1194 }
@@ -1027,9 +1206,7 b' static void decompress_worker(WorkerStat'
1027 }
1206 }
1028
1207
1029 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1208 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1030 unsigned int threadCount) {
1209 Py_ssize_t threadCount) {
1031 void* dictData = NULL;
1032 size_t dictSize = 0;
1033 Py_ssize_t i = 0;
1210 Py_ssize_t i = 0;
1034 int errored = 0;
1211 int errored = 0;
1035 Py_ssize_t segmentsCount;
1212 Py_ssize_t segmentsCount;
@@ -1039,7 +1216,7 b' ZstdBufferWithSegmentsCollection* decomp'
1039 ZstdBufferWithSegmentsCollection* result = NULL;
1216 ZstdBufferWithSegmentsCollection* result = NULL;
1040 FramePointer* framePointers = frames->frames;
1217 FramePointer* framePointers = frames->frames;
1041 unsigned long long workerBytes = 0;
1218 unsigned long long workerBytes = 0;
1042 int currentThread = 0;
1219 Py_ssize_t currentThread = 0;
1043 Py_ssize_t workerStartOffset = 0;
1220 Py_ssize_t workerStartOffset = 0;
1044 POOL_ctx* pool = NULL;
1221 POOL_ctx* pool = NULL;
1045 WorkerState* workerStates = NULL;
1222 WorkerState* workerStates = NULL;
@@ -1049,24 +1226,14 b' ZstdBufferWithSegmentsCollection* decomp'
1049 assert(threadCount >= 1);
1226 assert(threadCount >= 1);
1050
1227
1051 /* More threads than inputs makes no sense under any conditions. */
1228 /* More threads than inputs makes no sense under any conditions. */
1052 threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
1229 threadCount = frames->framesSize < threadCount ? frames->framesSize
1053 : threadCount;
1230 : threadCount;
1054
1231
1055 /* TODO lower thread count if input size is too small and threads would just
1232 /* TODO lower thread count if input size is too small and threads would just
1056 add overhead. */
1233 add overhead. */
1057
1234
1058 if (decompressor->dict) {
1235 if (decompressor->dict) {
1059 dictData = decompressor->dict->dictData;
1236 if (ensure_ddict(decompressor->dict)) {
1060 dictSize = decompressor->dict->dictSize;
1061 }
1062
1063 if (dictData && !decompressor->ddict) {
1064 Py_BEGIN_ALLOW_THREADS
1065 decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
1066 Py_END_ALLOW_THREADS
1067
1068 if (!decompressor->ddict) {
1069 PyErr_SetString(ZstdError, "could not create decompression dict");
1070 return NULL;
1237 return NULL;
1071 }
1238 }
1072 }
1239 }
@@ -1091,7 +1258,14 b' ZstdBufferWithSegmentsCollection* decomp'
1091
1258
1092 bytesPerWorker = frames->compressedSize / threadCount;
1259 bytesPerWorker = frames->compressedSize / threadCount;
1093
1260
1261 if (bytesPerWorker > SIZE_MAX) {
1262 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1263 goto finally;
1264 }
1265
1094 for (i = 0; i < threadCount; i++) {
1266 for (i = 0; i < threadCount; i++) {
1267 size_t zresult;
1268
1095 workerStates[i].dctx = ZSTD_createDCtx();
1269 workerStates[i].dctx = ZSTD_createDCtx();
1096 if (NULL == workerStates[i].dctx) {
1270 if (NULL == workerStates[i].dctx) {
1097 PyErr_NoMemory();
1271 PyErr_NoMemory();
@@ -1100,7 +1274,15 b' ZstdBufferWithSegmentsCollection* decomp'
1100
1274
1101 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1275 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1102
1276
1103 workerStates[i].ddict = decompressor->ddict;
1277 if (decompressor->dict) {
1278 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1279 if (zresult) {
1280 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1281 ZSTD_getErrorName(zresult));
1282 goto finally;
1283 }
1284 }
1285
1104 workerStates[i].framePointers = framePointers;
1286 workerStates[i].framePointers = framePointers;
1105 workerStates[i].requireOutputSizes = 1;
1287 workerStates[i].requireOutputSizes = 1;
1106 }
1288 }
@@ -1178,7 +1360,7 b' ZstdBufferWithSegmentsCollection* decomp'
1178 break;
1360 break;
1179
1361
1180 case WorkerError_sizeMismatch:
1362 case WorkerError_sizeMismatch:
1181 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
1363 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1182 workerStates[i].errorOffset, workerStates[i].zresult,
1364 workerStates[i].errorOffset, workerStates[i].zresult,
1183 framePointers[workerStates[i].errorOffset].destSize);
1365 framePointers[workerStates[i].errorOffset].destSize);
1184 errored = 1;
1366 errored = 1;
@@ -1388,9 +1570,21 b' static ZstdBufferWithSegmentsCollection*'
1388 decompressedSize = frameSizesP[i];
1570 decompressedSize = frameSizesP[i];
1389 }
1571 }
1390
1572
1573 if (sourceSize > SIZE_MAX) {
1574 PyErr_Format(PyExc_ValueError,
1575 "item %zd is too large for this platform", i);
1576 goto finally;
1577 }
1578
1579 if (decompressedSize > SIZE_MAX) {
1580 PyErr_Format(PyExc_ValueError,
1581 "decompressed size of item %zd is too large for this platform", i);
1582 goto finally;
1583 }
1584
1391 framePointers[i].sourceData = sourceData;
1585 framePointers[i].sourceData = sourceData;
1392 framePointers[i].sourceSize = sourceSize;
1586 framePointers[i].sourceSize = (size_t)sourceSize;
1393 framePointers[i].destSize = decompressedSize;
1587 framePointers[i].destSize = (size_t)decompressedSize;
1394 }
1588 }
1395 }
1589 }
1396 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1590 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
@@ -1419,17 +1613,33 b' static ZstdBufferWithSegmentsCollection*'
1419 buffer = collection->buffers[i];
1613 buffer = collection->buffers[i];
1420
1614
1421 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1615 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1616 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1617
1422 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1618 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1423 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1619 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1424 offset);
1620 offset);
1425 goto finally;
1621 goto finally;
1426 }
1622 }
1427
1623
1624 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1625 PyErr_Format(PyExc_ValueError,
1626 "item %zd in buffer %zd is too large for this platform",
1627 segmentIndex, i);
1628 goto finally;
1629 }
1630
1631 if (decompressedSize > SIZE_MAX) {
1632 PyErr_Format(PyExc_ValueError,
1633 "decompressed size of item %zd in buffer %zd is too large for this platform",
1634 segmentIndex, i);
1635 goto finally;
1636 }
1637
1428 totalInputSize += buffer->segments[segmentIndex].length;
1638 totalInputSize += buffer->segments[segmentIndex].length;
1429
1639
1430 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1640 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1431 framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
1641 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1432 framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
1642 framePointers[offset].destSize = (size_t)decompressedSize;
1433
1643
1434 offset++;
1644 offset++;
1435 }
1645 }
@@ -1450,11 +1660,6 b' static ZstdBufferWithSegmentsCollection*'
1450 goto finally;
1660 goto finally;
1451 }
1661 }
1452
1662
1453 /*
1454 * It is not clear whether Py_buffer.buf is still valid after
1455 * PyBuffer_Release. So, we hold a reference to all Py_buffer instances
1456 * for the duration of the operation.
1457 */
1458 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1663 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1459 if (NULL == frameBuffers) {
1664 if (NULL == frameBuffers) {
1460 PyErr_NoMemory();
1665 PyErr_NoMemory();
@@ -1465,6 +1670,8 b' static ZstdBufferWithSegmentsCollection*'
1465
1670
1466 /* Do a pass to assemble info about our input buffers and output sizes. */
1671 /* Do a pass to assemble info about our input buffers and output sizes. */
1467 for (i = 0; i < frameCount; i++) {
1672 for (i = 0; i < frameCount; i++) {
1673 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1674
1468 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1675 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1469 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1676 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1470 PyErr_Clear();
1677 PyErr_Clear();
@@ -1472,11 +1679,17 b' static ZstdBufferWithSegmentsCollection*'
1472 goto finally;
1679 goto finally;
1473 }
1680 }
1474
1681
1682 if (decompressedSize > SIZE_MAX) {
1683 PyErr_Format(PyExc_ValueError,
1684 "decompressed size of item %zd is too large for this platform", i);
1685 goto finally;
1686 }
1687
1475 totalInputSize += frameBuffers[i].len;
1688 totalInputSize += frameBuffers[i].len;
1476
1689
1477 framePointers[i].sourceData = frameBuffers[i].buf;
1690 framePointers[i].sourceData = frameBuffers[i].buf;
1478 framePointers[i].sourceSize = frameBuffers[i].len;
1691 framePointers[i].sourceSize = frameBuffers[i].len;
1479 framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
1692 framePointers[i].destSize = (size_t)decompressedSize;
1480 }
1693 }
1481 }
1694 }
1482 else {
1695 else {
@@ -1514,16 +1727,26 b' static PyMethodDef Decompressor_methods['
1514 Decompressor_copy_stream__doc__ },
1727 Decompressor_copy_stream__doc__ },
1515 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1728 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1516 Decompressor_decompress__doc__ },
1729 Decompressor_decompress__doc__ },
1517 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
1730 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1518 Decompressor_decompressobj__doc__ },
1731 Decompressor_decompressobj__doc__ },
1519 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
1732 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1520 Decompressor_read_from__doc__ },
1733 Decompressor_read_to_iter__doc__ },
1521 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
1734 /* TODO Remove deprecated API */
1522 Decompressor_write_to__doc__ },
1735 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1736 Decompressor_read_to_iter__doc__ },
1737 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1738 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1739 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1740 Decompressor_stream_writer__doc__ },
1741 /* TODO remove deprecated API */
1742 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1743 Decompressor_stream_writer__doc__ },
1523 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1744 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1524 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1745 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1525 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1746 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1526 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1747 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1748 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1749 Decompressor_memory_size__doc__ },
1527 { NULL, NULL }
1750 { NULL, NULL }
1528 };
1751 };
1529
1752
@@ -20,10 +20,9 b' static void ZstdDecompressorIterator_dea'
20 Py_XDECREF(self->decompressor);
20 Py_XDECREF(self->decompressor);
21 Py_XDECREF(self->reader);
21 Py_XDECREF(self->reader);
22
22
23 if (self->buffer) {
23 if (self->buffer.buf) {
24 PyBuffer_Release(self->buffer);
24 PyBuffer_Release(&self->buffer);
25 PyMem_FREE(self->buffer);
25 memset(&self->buffer, 0, sizeof(self->buffer));
26 self->buffer = NULL;
27 }
26 }
28
27
29 if (self->input.src) {
28 if (self->input.src) {
@@ -45,8 +44,6 b' static DecompressorIteratorResult read_d'
45 DecompressorIteratorResult result;
44 DecompressorIteratorResult result;
46 size_t oldInputPos = self->input.pos;
45 size_t oldInputPos = self->input.pos;
47
46
48 assert(self->decompressor->dstream);
49
50 result.chunk = NULL;
47 result.chunk = NULL;
51
48
52 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
49 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
@@ -60,7 +57,7 b' static DecompressorIteratorResult read_d'
60 self->output.pos = 0;
57 self->output.pos = 0;
61
58
62 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
63 zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
60 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &self->output, &self->input);
64 Py_END_ALLOW_THREADS
61 Py_END_ALLOW_THREADS
65
62
66 /* We're done with the pointer. Nullify to prevent anyone from getting a
63 /* We're done with the pointer. Nullify to prevent anyone from getting a
@@ -86,7 +83,8 b' static DecompressorIteratorResult read_d'
86 /* If it produced output data, return it. */
83 /* If it produced output data, return it. */
87 if (self->output.pos) {
84 if (self->output.pos) {
88 if (self->output.pos < self->outSize) {
85 if (self->output.pos < self->outSize) {
89 if (_PyBytes_Resize(&chunk, self->output.pos)) {
86 if (safe_pybytes_resize(&chunk, self->output.pos)) {
87 Py_XDECREF(chunk);
90 result.errored = 1;
88 result.errored = 1;
91 return result;
89 return result;
92 }
90 }
@@ -137,15 +135,15 b' read_from_source:'
137 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
135 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
138 }
136 }
139 else {
137 else {
140 assert(self->buffer && self->buffer->buf);
138 assert(self->buffer.buf);
141
139
142 /* Only support contiguous C arrays for now */
140 /* Only support contiguous C arrays for now */
143 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
141 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
144 assert(self->buffer->itemsize == 1);
142 assert(self->buffer.itemsize == 1);
145
143
146 /* TODO avoid memcpy() below */
144 /* TODO avoid memcpy() below */
147 readBuffer = (char *)self->buffer->buf + self->bufferOffset;
145 readBuffer = (char *)self->buffer.buf + self->bufferOffset;
148 bufferRemaining = self->buffer->len - self->bufferOffset;
146 bufferRemaining = self->buffer.len - self->bufferOffset;
149 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
147 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
150 self->bufferOffset += readSize;
148 self->bufferOffset += readSize;
151 }
149 }
@@ -13,50 +13,56 b' extern PyObject* ZstdError;'
13 PyDoc_STRVAR(FrameParameters__doc__,
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
14 "FrameParameters: information about a zstd frame");
15
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
17 const char* source;
17 static char* kwlist[] = {
18 Py_ssize_t sourceSize;
18 "data",
19 ZSTD_frameParams params;
19 NULL
20 };
21
22 Py_buffer source;
23 ZSTD_frameHeader header;
20 FrameParametersObject* result = NULL;
24 FrameParametersObject* result = NULL;
21 size_t zresult;
25 size_t zresult;
22
26
23 #if PY_MAJOR_VERSION >= 3
27 #if PY_MAJOR_VERSION >= 3
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
28 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
25 #else
29 #else
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
30 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
27 #endif
31 #endif
28 &source, &sourceSize)) {
32 kwlist, &source)) {
29 return NULL;
33 return NULL;
30 }
34 }
31
35
32 /* Needed for Python 2 to reject unicode */
36 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
37 PyErr_SetString(PyExc_ValueError,
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
38 "data buffer should be contiguous and have at most one dimension");
35 return NULL;
39 goto finally;
36 }
40 }
37
41
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
42 zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
39
43
40 if (ZSTD_isError(zresult)) {
44 if (ZSTD_isError(zresult)) {
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
45 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
42 return NULL;
46 goto finally;
43 }
47 }
44
48
45 if (zresult) {
49 if (zresult) {
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
50 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
47 return NULL;
51 goto finally;
48 }
52 }
49
53
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
54 result = PyObject_New(FrameParametersObject, &FrameParametersType);
51 if (!result) {
55 if (!result) {
52 return NULL;
56 goto finally;
53 }
57 }
54
58
55 result->frameContentSize = params.frameContentSize;
59 result->frameContentSize = header.frameContentSize;
56 result->windowSize = params.windowSize;
60 result->windowSize = header.windowSize;
57 result->dictID = params.dictID;
61 result->dictID = header.dictID;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
62 result->checksumFlag = header.checksumFlag ? 1 : 0;
59
63
64 finally:
65 PyBuffer_Release(&source);
60 return result;
66 return result;
61 }
67 }
62
68
@@ -68,7 +74,7 b' static PyMemberDef FrameParameters_membe'
68 { "content_size", T_ULONGLONG,
74 { "content_size", T_ULONGLONG,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
75 offsetof(FrameParametersObject, frameContentSize), READONLY,
70 "frame content size" },
76 "frame content size" },
71 { "window_size", T_UINT,
77 { "window_size", T_ULONGLONG,
72 offsetof(FrameParametersObject, windowSize), READONLY,
78 offsetof(FrameParametersObject, windowSize), READONLY,
73 "window size" },
79 "window size" },
74 { "dict_id", T_UINT,
80 { "dict_id", T_UINT,
@@ -12,12 +12,10 b''
12
12
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
15 #include "mem.h"
15 #include <zstd.h>
16 #include "zstd.h"
16 #include <zdict.h>
17 #include "zdict.h"
18 #include "zstdmt_compress.h"
19
17
20 #define PYTHON_ZSTANDARD_VERSION "0.8.1"
18 #define PYTHON_ZSTANDARD_VERSION "0.9.0"
21
19
22 typedef enum {
20 typedef enum {
23 compressorobj_flush_finish,
21 compressorobj_flush_finish,
@@ -25,22 +23,38 b' typedef enum {'
25 } CompressorObj_Flush;
23 } CompressorObj_Flush;
26
24
27 /*
25 /*
28 Represents a CompressionParameters type.
26 Represents a ZstdCompressionParameters type.
29
27
30 This type is basically a wrapper around ZSTD_compressionParameters.
28 This type holds all the low-level compression parameters that can be set.
31 */
29 */
32 typedef struct {
30 typedef struct {
33 PyObject_HEAD
31 PyObject_HEAD
32 ZSTD_CCtx_params* params;
33 unsigned format;
34 int compressionLevel;
34 unsigned windowLog;
35 unsigned windowLog;
36 unsigned hashLog;
35 unsigned chainLog;
37 unsigned chainLog;
36 unsigned hashLog;
37 unsigned searchLog;
38 unsigned searchLog;
38 unsigned searchLength;
39 unsigned minMatch;
39 unsigned targetLength;
40 unsigned targetLength;
40 ZSTD_strategy strategy;
41 unsigned compressionStrategy;
41 } CompressionParametersObject;
42 unsigned contentSizeFlag;
43 unsigned checksumFlag;
44 unsigned dictIDFlag;
45 unsigned threads;
46 unsigned jobSize;
47 unsigned overlapSizeLog;
48 unsigned compressLiterals;
49 unsigned forceMaxWindow;
50 unsigned enableLongDistanceMatching;
51 unsigned ldmHashLog;
52 unsigned ldmMinMatch;
53 unsigned ldmBucketSizeLog;
54 unsigned ldmHashEveryLog;
55 } ZstdCompressionParametersObject;
42
56
43 extern PyTypeObject CompressionParametersType;
57 extern PyTypeObject ZstdCompressionParametersType;
44
58
45 /*
59 /*
46 Represents a FrameParameters type.
60 Represents a FrameParameters type.
@@ -50,7 +64,7 b' extern PyTypeObject CompressionParameter'
50 typedef struct {
64 typedef struct {
51 PyObject_HEAD
65 PyObject_HEAD
52 unsigned long long frameContentSize;
66 unsigned long long frameContentSize;
53 unsigned windowSize;
67 unsigned long long windowSize;
54 unsigned dictID;
68 unsigned dictID;
55 char checksumFlag;
69 char checksumFlag;
56 } FrameParametersObject;
70 } FrameParametersObject;
@@ -69,10 +83,14 b' typedef struct {'
69 void* dictData;
83 void* dictData;
70 /* Size of dictionary data. */
84 /* Size of dictionary data. */
71 size_t dictSize;
85 size_t dictSize;
86 ZSTD_dictContentType_e dictType;
72 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
87 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
73 unsigned k;
88 unsigned k;
74 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
89 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
75 unsigned d;
90 unsigned d;
91 /* Digested dictionary, suitable for reuse. */
92 ZSTD_CDict* cdict;
93 ZSTD_DDict* ddict;
76 } ZstdCompressionDict;
94 } ZstdCompressionDict;
77
95
78 extern PyTypeObject ZstdCompressionDictType;
96 extern PyTypeObject ZstdCompressionDictType;
@@ -83,29 +101,15 b' extern PyTypeObject ZstdCompressionDictT'
83 typedef struct {
101 typedef struct {
84 PyObject_HEAD
102 PyObject_HEAD
85
103
86 /* Configured compression level. Should be always set. */
87 int compressionLevel;
88 /* Number of threads to use for operations. */
104 /* Number of threads to use for operations. */
89 unsigned int threads;
105 unsigned int threads;
90 /* Pointer to compression dictionary to use. NULL if not using dictionary
106 /* Pointer to compression dictionary to use. NULL if not using dictionary
91 compression. */
107 compression. */
92 ZstdCompressionDict* dict;
108 ZstdCompressionDict* dict;
93 /* Compression context to use. Populated during object construction. NULL
109 /* Compression context to use. Populated during object construction. */
94 if using multi-threaded compression. */
95 ZSTD_CCtx* cctx;
110 ZSTD_CCtx* cctx;
96 /* Multi-threaded compression context to use. Populated during object
111 /* Compression parameters in use. */
97 construction. NULL if not using multi-threaded compression. */
112 ZSTD_CCtx_params* params;
98 ZSTDMT_CCtx* mtcctx;
99 /* Digest compression dictionary. NULL initially. Populated on first use. */
100 ZSTD_CDict* cdict;
101 /* Low-level compression parameter control. NULL unless passed to
102 constructor. Takes precedence over `compressionLevel` if defined. */
103 CompressionParametersObject* cparams;
104 /* Controls zstd frame options. */
105 ZSTD_frameParameters fparams;
106 /* Holds state for streaming compression. Shared across all invocation.
107 Populated on first use. */
108 ZSTD_CStream* cstream;
109 } ZstdCompressor;
113 } ZstdCompressor;
110
114
111 extern PyTypeObject ZstdCompressorType;
115 extern PyTypeObject ZstdCompressorType;
@@ -125,9 +129,10 b' typedef struct {'
125
129
126 ZstdCompressor* compressor;
130 ZstdCompressor* compressor;
127 PyObject* writer;
131 PyObject* writer;
128 Py_ssize_t sourceSize;
132 unsigned long long sourceSize;
129 size_t outSize;
133 size_t outSize;
130 int entered;
134 int entered;
135 unsigned long long bytesCompressed;
131 } ZstdCompressionWriter;
136 } ZstdCompressionWriter;
132
137
133 extern PyTypeObject ZstdCompressionWriterType;
138 extern PyTypeObject ZstdCompressionWriterType;
@@ -137,9 +142,8 b' typedef struct {'
137
142
138 ZstdCompressor* compressor;
143 ZstdCompressor* compressor;
139 PyObject* reader;
144 PyObject* reader;
140 Py_buffer* buffer;
145 Py_buffer buffer;
141 Py_ssize_t bufferOffset;
146 Py_ssize_t bufferOffset;
142 Py_ssize_t sourceSize;
143 size_t inSize;
147 size_t inSize;
144 size_t outSize;
148 size_t outSize;
145
149
@@ -155,11 +159,32 b' extern PyTypeObject ZstdCompressorIterat'
155 typedef struct {
159 typedef struct {
156 PyObject_HEAD
160 PyObject_HEAD
157
161
162 ZstdCompressor* compressor;
163 PyObject* reader;
164 Py_buffer buffer;
165 unsigned long long sourceSize;
166 size_t readSize;
167
168 int entered;
169 int closed;
170 unsigned long long bytesCompressed;
171
172 ZSTD_inBuffer input;
173 ZSTD_outBuffer output;
174 int finishedInput;
175 int finishedOutput;
176 PyObject* readResult;
177 } ZstdCompressionReader;
178
179 extern PyTypeObject ZstdCompressionReaderType;
180
181 typedef struct {
182 PyObject_HEAD
183
158 ZSTD_DCtx* dctx;
184 ZSTD_DCtx* dctx;
159
160 ZstdCompressionDict* dict;
185 ZstdCompressionDict* dict;
161 ZSTD_DDict* ddict;
186 size_t maxWindowSize;
162 ZSTD_DStream* dstream;
187 ZSTD_format_e format;
163 } ZstdDecompressor;
188 } ZstdDecompressor;
164
189
165 extern PyTypeObject ZstdDecompressorType;
190 extern PyTypeObject ZstdDecompressorType;
@@ -168,6 +193,7 b' typedef struct {'
168 PyObject_HEAD
193 PyObject_HEAD
169
194
170 ZstdDecompressor* decompressor;
195 ZstdDecompressor* decompressor;
196 size_t outSize;
171 int finished;
197 int finished;
172 } ZstdDecompressionObj;
198 } ZstdDecompressionObj;
173
199
@@ -176,6 +202,40 b' extern PyTypeObject ZstdDecompressionObj'
176 typedef struct {
202 typedef struct {
177 PyObject_HEAD
203 PyObject_HEAD
178
204
205 /* Parent decompressor to which this object is associated. */
206 ZstdDecompressor* decompressor;
207 /* Object to read() from (if reading from a stream). */
208 PyObject* reader;
209 /* Size for read() operations on reader. */
210 size_t readSize;
211 /* Buffer to read from (if reading from a buffer). */
212 Py_buffer buffer;
213
214 /* Whether the context manager is active. */
215 int entered;
216 /* Whether we've closed the stream. */
217 int closed;
218
219 /* Number of bytes decompressed and returned to user. */
220 unsigned long long bytesDecompressed;
221
222 /* Tracks data going into decompressor. */
223 ZSTD_inBuffer input;
224
225 /* Holds output from read() operation on reader. */
226 PyObject* readResult;
227
228 /* Whether all input has been sent to the decompressor. */
229 int finishedInput;
230 /* Whether all output has been flushed from the decompressor. */
231 int finishedOutput;
232 } ZstdDecompressionReader;
233
234 extern PyTypeObject ZstdDecompressionReaderType;
235
236 typedef struct {
237 PyObject_HEAD
238
179 ZstdDecompressor* decompressor;
239 ZstdDecompressor* decompressor;
180 PyObject* writer;
240 PyObject* writer;
181 size_t outSize;
241 size_t outSize;
@@ -189,7 +249,7 b' typedef struct {'
189
249
190 ZstdDecompressor* decompressor;
250 ZstdDecompressor* decompressor;
191 PyObject* reader;
251 PyObject* reader;
192 Py_buffer* buffer;
252 Py_buffer buffer;
193 Py_ssize_t bufferOffset;
253 Py_ssize_t bufferOffset;
194 size_t inSize;
254 size_t inSize;
195 size_t outSize;
255 size_t outSize;
@@ -209,6 +269,9 b' typedef struct {'
209 } DecompressorIteratorResult;
269 } DecompressorIteratorResult;
210
270
211 typedef struct {
271 typedef struct {
272 /* The public API is that these are 64-bit unsigned integers. So these can't
273 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
274 * be nonsensical for this platform. */
212 unsigned long long offset;
275 unsigned long long offset;
213 unsigned long long length;
276 unsigned long long length;
214 } BufferSegment;
277 } BufferSegment;
@@ -270,16 +333,14 b' typedef struct {'
270
333
271 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
334 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
272
335
273 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
336 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
274 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
337 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
275 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
338 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
276 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
339 int ensure_ddict(ZstdCompressionDict* dict);
277 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
340 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
278 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
279 int init_dstream(ZstdDecompressor* decompressor);
280 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
341 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
281 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
282 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
342 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
283 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
343 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
284 int cpu_count(void);
344 int cpu_count(void);
285 size_t roundpow2(size_t);
345 size_t roundpow2(size_t);
346 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -27,6 +27,11 b" SOURCES = ['zstd/%s' % p for p in ("
27 'compress/fse_compress.c',
27 'compress/fse_compress.c',
28 'compress/huf_compress.c',
28 'compress/huf_compress.c',
29 'compress/zstd_compress.c',
29 'compress/zstd_compress.c',
30 'compress/zstd_double_fast.c',
31 'compress/zstd_fast.c',
32 'compress/zstd_lazy.c',
33 'compress/zstd_ldm.c',
34 'compress/zstd_opt.c',
30 'compress/zstdmt_compress.c',
35 'compress/zstdmt_compress.c',
31 'decompress/huf_decompress.c',
36 'decompress/huf_decompress.c',
32 'decompress/zstd_decompress.c',
37 'decompress/zstd_decompress.c',
@@ -38,7 +43,6 b" SOURCES = ['zstd/%s' % p for p in ("
38 # Headers whose preprocessed output will be fed into cdef().
43 # Headers whose preprocessed output will be fed into cdef().
39 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
44 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
40 ('zstd.h',),
45 ('zstd.h',),
41 ('compress', 'zstdmt_compress.h'),
42 ('dictBuilder', 'zdict.h'),
46 ('dictBuilder', 'zdict.h'),
43 )]
47 )]
44
48
@@ -80,7 +84,9 b' else:'
80 def preprocess(path):
84 def preprocess(path):
81 with open(path, 'rb') as fh:
85 with open(path, 'rb') as fh:
82 lines = []
86 lines = []
83 for l in fh:
87 it = iter(fh)
88
89 for l in it:
84 # zstd.h includes <stddef.h>, which is also included by cffi's
90 # zstd.h includes <stddef.h>, which is also included by cffi's
85 # boilerplate. This can lead to duplicate declarations. So we strip
91 # boilerplate. This can lead to duplicate declarations. So we strip
86 # this include from the preprocessor invocation.
92 # this include from the preprocessor invocation.
@@ -137,18 +143,21 b' def normalize_output(output):'
137
143
138
144
139 ffi = cffi.FFI()
145 ffi = cffi.FFI()
146 # zstd.h uses a possible undefined MIN(). Define it until
147 # https://github.com/facebook/zstd/issues/976 is fixed.
140 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
148 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
141 # when cffi uses the function. Since we statically link against zstd, even
149 # when cffi uses the function. Since we statically link against zstd, even
142 # if we use the deprecated functions it shouldn't be a huge problem.
150 # if we use the deprecated functions it shouldn't be a huge problem.
143 ffi.set_source('_zstd_cffi', '''
151 ffi.set_source('_zstd_cffi', '''
144 #include "mem.h"
152 #define MIN(a,b) ((a)<(b) ? (a) : (b))
145 #define ZSTD_STATIC_LINKING_ONLY
153 #define ZSTD_STATIC_LINKING_ONLY
146 #include "zstd.h"
154 #include <zstd.h>
147 #define ZDICT_STATIC_LINKING_ONLY
155 #define ZDICT_STATIC_LINKING_ONLY
148 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
156 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
149 #include "zdict.h"
157 #include <zdict.h>
150 #include "zstdmt_compress.h"
158 ''', sources=SOURCES,
151 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
159 include_dirs=INCLUDE_DIRS,
160 extra_compile_args=['-DZSTD_MULTITHREAD'])
152
161
153 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
162 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
154
163
@@ -5,6 +5,7 b''
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 import os
8 import sys
9 import sys
9 from setuptools import setup
10 from setuptools import setup
10
11
@@ -16,14 +17,32 b' except ImportError:'
16 import setup_zstd
17 import setup_zstd
17
18
18 SUPPORT_LEGACY = False
19 SUPPORT_LEGACY = False
20 SYSTEM_ZSTD = False
21 WARNINGS_AS_ERRORS = False
19
22
20 if "--legacy" in sys.argv:
23 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
24 WARNINGS_AS_ERRORS = True
25
26 if '--legacy' in sys.argv:
21 SUPPORT_LEGACY = True
27 SUPPORT_LEGACY = True
22 sys.argv.remove("--legacy")
28 sys.argv.remove('--legacy')
29
30 if '--system-zstd' in sys.argv:
31 SYSTEM_ZSTD = True
32 sys.argv.remove('--system-zstd')
33
34 if '--warnings-as-errors' in sys.argv:
35 WARNINGS_AS_ERRORS = True
36 sys.argv.remote('--warning-as-errors')
23
37
24 # Code for obtaining the Extension instance is in its own module to
38 # Code for obtaining the Extension instance is in its own module to
25 # facilitate reuse in other projects.
39 # facilitate reuse in other projects.
26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
40 extensions = [
41 setup_zstd.get_c_extension(name='zstd',
42 support_legacy=SUPPORT_LEGACY,
43 system_zstd=SYSTEM_ZSTD,
44 warnings_as_errors=WARNINGS_AS_ERRORS),
45 ]
27
46
28 install_requires = []
47 install_requires = []
29
48
@@ -31,8 +50,11 b' if cffi:'
31 import make_cffi
50 import make_cffi
32 extensions.append(make_cffi.ffi.distutils_extension())
51 extensions.append(make_cffi.ffi.distutils_extension())
33
52
34 # Need change in 1.8 for ffi.from_buffer() behavior.
53 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
35 install_requires.append('cffi>=1.8')
54 # (like memoryview).
55 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
56 # garbage collection pitfalls.
57 install_requires.append('cffi>=1.11')
36
58
37 version = None
59 version = None
38
60
@@ -62,14 +84,13 b' setup('
62 'Intended Audience :: Developers',
84 'Intended Audience :: Developers',
63 'License :: OSI Approved :: BSD License',
85 'License :: OSI Approved :: BSD License',
64 'Programming Language :: C',
86 'Programming Language :: C',
65 'Programming Language :: Python :: 2.6',
66 'Programming Language :: Python :: 2.7',
87 'Programming Language :: Python :: 2.7',
67 'Programming Language :: Python :: 3.3',
68 'Programming Language :: Python :: 3.4',
88 'Programming Language :: Python :: 3.4',
69 'Programming Language :: Python :: 3.5',
89 'Programming Language :: Python :: 3.5',
70 'Programming Language :: Python :: 3.6',
90 'Programming Language :: Python :: 3.6',
71 ],
91 ],
72 keywords='zstandard zstd compression',
92 keywords='zstandard zstd compression',
93 packages=['zstandard'],
73 ext_modules=extensions,
94 ext_modules=extensions,
74 test_suite='tests',
95 test_suite='tests',
75 install_requires=install_requires,
96 install_requires=install_requires,
@@ -4,7 +4,10 b''
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 import distutils.ccompiler
7 import os
8 import os
9 import sys
10
8 from distutils.extension import Extension
11 from distutils.extension import Extension
9
12
10
13
@@ -19,6 +22,11 b" zstd_sources = ['zstd/%s' % p for p in ("
19 'compress/fse_compress.c',
22 'compress/fse_compress.c',
20 'compress/huf_compress.c',
23 'compress/huf_compress.c',
21 'compress/zstd_compress.c',
24 'compress/zstd_compress.c',
25 'compress/zstd_double_fast.c',
26 'compress/zstd_fast.c',
27 'compress/zstd_lazy.c',
28 'compress/zstd_ldm.c',
29 'compress/zstd_opt.c',
22 'compress/zstdmt_compress.c',
30 'compress/zstdmt_compress.c',
23 'decompress/huf_decompress.c',
31 'decompress/huf_decompress.c',
24 'decompress/zstd_decompress.c',
32 'decompress/zstd_decompress.c',
@@ -41,7 +49,6 b" zstd_sources_legacy = ['zstd/%s' % p for"
41 )]
49 )]
42
50
43 zstd_includes = [
51 zstd_includes = [
44 'c-ext',
45 'zstd',
52 'zstd',
46 'zstd/common',
53 'zstd/common',
47 'zstd/compress',
54 'zstd/compress',
@@ -54,7 +61,14 b' zstd_includes_legacy = ['
54 'zstd/legacy',
61 'zstd/legacy',
55 ]
62 ]
56
63
64 ext_includes = [
65 'c-ext',
66 'zstd/common',
67 ]
68
57 ext_sources = [
69 ext_sources = [
70 'zstd/common/pool.c',
71 'zstd/common/threading.c',
58 'zstd.c',
72 'zstd.c',
59 'c-ext/bufferutil.c',
73 'c-ext/bufferutil.c',
60 'c-ext/compressiondict.c',
74 'c-ext/compressiondict.c',
@@ -62,11 +76,13 b' ext_sources = ['
62 'c-ext/compressor.c',
76 'c-ext/compressor.c',
63 'c-ext/compressoriterator.c',
77 'c-ext/compressoriterator.c',
64 'c-ext/compressionparams.c',
78 'c-ext/compressionparams.c',
79 'c-ext/compressionreader.c',
65 'c-ext/compressionwriter.c',
80 'c-ext/compressionwriter.c',
66 'c-ext/constants.c',
81 'c-ext/constants.c',
67 'c-ext/decompressobj.c',
82 'c-ext/decompressobj.c',
68 'c-ext/decompressor.c',
83 'c-ext/decompressor.c',
69 'c-ext/decompressoriterator.c',
84 'c-ext/decompressoriterator.c',
85 'c-ext/decompressionreader.c',
70 'c-ext/decompressionwriter.c',
86 'c-ext/decompressionwriter.c',
71 'c-ext/frameparams.c',
87 'c-ext/frameparams.c',
72 ]
88 ]
@@ -76,27 +92,67 b' zstd_depends = ['
76 ]
92 ]
77
93
78
94
79 def get_c_extension(support_legacy=False, name='zstd'):
95 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
96 warnings_as_errors=False):
80 """Obtain a distutils.extension.Extension for the C extension."""
97 """Obtain a distutils.extension.Extension for the C extension."""
81 root = os.path.abspath(os.path.dirname(__file__))
98 root = os.path.abspath(os.path.dirname(__file__))
82
99
83 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
100 sources = set([os.path.join(root, p) for p in ext_sources])
84 if support_legacy:
101 if not system_zstd:
85 sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
102 sources.update([os.path.join(root, p) for p in zstd_sources])
103 if support_legacy:
104 sources.update([os.path.join(root, p) for p in zstd_sources_legacy])
105 sources = list(sources)
86
106
87 include_dirs = [os.path.join(root, d) for d in zstd_includes]
107 include_dirs = set([os.path.join(root, d) for d in ext_includes])
88 if support_legacy:
108 if not system_zstd:
89 include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
109 include_dirs.update([os.path.join(root, d) for d in zstd_includes])
110 if support_legacy:
111 include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy])
112 include_dirs = list(include_dirs)
90
113
91 depends = [os.path.join(root, p) for p in zstd_depends]
114 depends = [os.path.join(root, p) for p in zstd_depends]
92
115
116 compiler = distutils.ccompiler.new_compiler()
117
118 # Needed for MSVC.
119 if hasattr(compiler, 'initialize'):
120 compiler.initialize()
121
122 if compiler.compiler_type == 'unix':
123 compiler_type = 'unix'
124 elif compiler.compiler_type == 'msvc':
125 compiler_type = 'msvc'
126 else:
127 raise Exception('unhandled compiler type: %s' %
128 compiler.compiler_type)
129
93 extra_args = ['-DZSTD_MULTITHREAD']
130 extra_args = ['-DZSTD_MULTITHREAD']
94
131
95 if support_legacy:
132 if not system_zstd:
133 extra_args.append('-DZSTDLIB_VISIBILITY=')
134 extra_args.append('-DZDICTLIB_VISIBILITY=')
135 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
136
137 if compiler_type == 'unix':
138 extra_args.append('-fvisibility=hidden')
139
140 if not system_zstd and support_legacy:
96 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
141 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
97
142
143 if warnings_as_errors:
144 if compiler_type == 'unix':
145 extra_args.append('-Werror')
146 elif compiler_type == 'msvc':
147 extra_args.append('/WX')
148 else:
149 assert False
150
151 libraries = ['zstd'] if system_zstd else []
152
98 # TODO compile with optimizations.
153 # TODO compile with optimizations.
99 return Extension(name, sources,
154 return Extension(name, sources,
100 include_dirs=include_dirs,
155 include_dirs=include_dirs,
101 depends=depends,
156 depends=depends,
102 extra_compile_args=extra_args)
157 extra_compile_args=extra_args,
158 libraries=libraries)
@@ -1,16 +1,48 b''
1 import imp
1 import inspect
2 import inspect
2 import io
3 import io
3 import os
4 import os
4 import types
5 import types
5
6
7 try:
8 import hypothesis
9 except ImportError:
10 hypothesis = None
11
6
12
7 def make_cffi(cls):
13 def make_cffi(cls):
8 """Decorator to add CFFI versions of each test method."""
14 """Decorator to add CFFI versions of each test method."""
9
15
16 # The module containing this class definition should
17 # `import zstandard as zstd`. Otherwise things may blow up.
18 mod = inspect.getmodule(cls)
19 if not hasattr(mod, 'zstd'):
20 raise Exception('test module does not contain "zstd" symbol')
21
22 if not hasattr(mod.zstd, 'backend'):
23 raise Exception('zstd symbol does not have "backend" attribute; did '
24 'you `import zstandard as zstd`?')
25
26 # If `import zstandard` already chose the cffi backend, there is nothing
27 # for us to do: we only add the cffi variation if the default backend
28 # is the C extension.
29 if mod.zstd.backend == 'cffi':
30 return cls
31
32 old_env = dict(os.environ)
33 os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi'
10 try:
34 try:
11 import zstd_cffi
35 try:
12 except ImportError:
36 mod_info = imp.find_module('zstandard')
13 return cls
37 mod = imp.load_module('zstandard_cffi', *mod_info)
38 except ImportError:
39 return cls
40 finally:
41 os.environ.clear()
42 os.environ.update(old_env)
43
44 if mod.backend != 'cffi':
45 raise Exception('got the zstandard %s backend instead of cffi' % mod.backend)
14
46
15 # If CFFI version is available, dynamically construct test methods
47 # If CFFI version is available, dynamically construct test methods
16 # that use it.
48 # that use it.
@@ -29,13 +61,13 b' def make_cffi(cls):'
29 # the function object and install it in a new attribute.
61 # the function object and install it in a new attribute.
30 if isinstance(fn, types.FunctionType):
62 if isinstance(fn, types.FunctionType):
31 globs = dict(fn.__globals__)
63 globs = dict(fn.__globals__)
32 globs['zstd'] = zstd_cffi
64 globs['zstd'] = mod
33 new_fn = types.FunctionType(fn.__code__, globs, name,
65 new_fn = types.FunctionType(fn.__code__, globs, name,
34 fn.__defaults__, fn.__closure__)
66 fn.__defaults__, fn.__closure__)
35 new_method = new_fn
67 new_method = new_fn
36 else:
68 else:
37 globs = dict(fn.__func__.func_globals)
69 globs = dict(fn.__func__.func_globals)
38 globs['zstd'] = zstd_cffi
70 globs['zstd'] = mod
39 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
71 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
40 fn.__func__.func_defaults,
72 fn.__func__.func_defaults,
41 fn.__func__.func_closure)
73 fn.__func__.func_closure)
@@ -86,3 +118,34 b' def random_input_data():'
86 pass
118 pass
87
119
88 return _source_files
120 return _source_files
121
122
123 def generate_samples():
124 inputs = [
125 b'foo',
126 b'bar',
127 b'abcdef',
128 b'sometext',
129 b'baz',
130 ]
131
132 samples = []
133
134 for i in range(128):
135 samples.append(inputs[i % 5])
136 samples.append(inputs[i % 5] * (i + 3))
137 samples.append(inputs[-(i % 5)] * (i + 2))
138
139 return samples
140
141
142 if hypothesis:
143 default_settings = hypothesis.settings()
144 hypothesis.settings.register_profile('default', default_settings)
145
146 ci_settings = hypothesis.settings(max_examples=2500,
147 max_iterations=2500)
148 hypothesis.settings.register_profile('ci', ci_settings)
149
150 hypothesis.settings.load_profile(
151 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
@@ -1,11 +1,7 b''
1 import struct
1 import struct
2 import unittest
2
3
3 try:
4 import zstandard as zstd
4 import unittest2 as unittest
5 except ImportError:
6 import unittest
7
8 import zstd
9
5
10 ss = struct.Struct('=QQ')
6 ss = struct.Struct('=QQ')
11
7
This diff has been collapsed as it changes many lines, (623 lines changed) Show them Hide them
@@ -2,13 +2,10 b' import hashlib'
2 import io
2 import io
3 import struct
3 import struct
4 import sys
4 import sys
5 import tarfile
6 import unittest
5
7
6 try:
8 import zstandard as zstd
7 import unittest2 as unittest
8 except ImportError:
9 import unittest
10
11 import zstd
12
9
13 from .common import (
10 from .common import (
14 make_cffi,
11 make_cffi,
@@ -23,7 +20,8 b' else:'
23
20
24
21
25 def multithreaded_chunk_size(level, source_size=0):
22 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.get_compression_parameters(level, source_size)
23 params = zstd.ZstdCompressionParameters.from_level(level,
24 source_size=source_size)
27
25
28 return 1 << (params.window_log + 2)
26 return 1 << (params.window_log + 2)
29
27
@@ -32,67 +30,82 b' def multithreaded_chunk_size(level, sour'
32 class TestCompressor(unittest.TestCase):
30 class TestCompressor(unittest.TestCase):
33 def test_level_bounds(self):
31 def test_level_bounds(self):
34 with self.assertRaises(ValueError):
32 with self.assertRaises(ValueError):
35 zstd.ZstdCompressor(level=0)
33 zstd.ZstdCompressor(level=23)
36
34
37 with self.assertRaises(ValueError):
35 def test_memory_size(self):
38 zstd.ZstdCompressor(level=23)
36 cctx = zstd.ZstdCompressor(level=1)
37 self.assertGreater(cctx.memory_size(), 100)
39
38
40
39
41 @make_cffi
40 @make_cffi
42 class TestCompressor_compress(unittest.TestCase):
41 class TestCompressor_compress(unittest.TestCase):
43 def test_multithreaded_unsupported(self):
44 samples = []
45 for i in range(128):
46 samples.append(b'foo' * 64)
47 samples.append(b'bar' * 64)
48
49 d = zstd.train_dictionary(8192, samples)
50
51 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
52
53 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
54 cctx.compress(b'foo')
55
56 params = zstd.get_compression_parameters(3)
57 cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
58 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
59 cctx.compress(b'foo')
60
61 def test_compress_empty(self):
42 def test_compress_empty(self):
62 cctx = zstd.ZstdCompressor(level=1)
43 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
63 result = cctx.compress(b'')
44 result = cctx.compress(b'')
64 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
45 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
65 params = zstd.get_frame_parameters(result)
46 params = zstd.get_frame_parameters(result)
66 self.assertEqual(params.content_size, 0)
47 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
67 self.assertEqual(params.window_size, 524288)
48 self.assertEqual(params.window_size, 524288)
68 self.assertEqual(params.dict_id, 0)
49 self.assertEqual(params.dict_id, 0)
69 self.assertFalse(params.has_checksum, 0)
50 self.assertFalse(params.has_checksum, 0)
70
51
71 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
52 cctx = zstd.ZstdCompressor()
72 # is fixed.
53 result = cctx.compress(b'')
73 cctx = zstd.ZstdCompressor(write_content_size=True)
54 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
74 with self.assertRaises(ValueError):
55 params = zstd.get_frame_parameters(result)
75 cctx.compress(b'')
56 self.assertEqual(params.content_size, 0)
57
58 def test_input_types(self):
59 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
60 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
76
61
77 cctx.compress(b'', allow_empty=True)
62 mutable_array = bytearray(3)
63 mutable_array[:] = b'foo'
64
65 sources = [
66 memoryview(b'foo'),
67 bytearray(b'foo'),
68 mutable_array,
69 ]
70
71 for source in sources:
72 self.assertEqual(cctx.compress(source), expected)
78
73
79 def test_compress_large(self):
74 def test_compress_large(self):
80 chunks = []
75 chunks = []
81 for i in range(255):
76 for i in range(255):
82 chunks.append(struct.Struct('>B').pack(i) * 16384)
77 chunks.append(struct.Struct('>B').pack(i) * 16384)
83
78
84 cctx = zstd.ZstdCompressor(level=3)
79 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
85 result = cctx.compress(b''.join(chunks))
80 result = cctx.compress(b''.join(chunks))
86 self.assertEqual(len(result), 999)
81 self.assertEqual(len(result), 999)
87 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
82 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
88
83
89 # This matches the test for read_from() below.
84 # This matches the test for read_to_iter() below.
90 cctx = zstd.ZstdCompressor(level=1)
85 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
91 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
86 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
92 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
87 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
93 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
88 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
94 b'\x02\x09\x00\x00\x6f')
89 b'\x02\x09\x00\x00\x6f')
95
90
91 def test_negative_level(self):
92 cctx = zstd.ZstdCompressor(level=-4)
93 result = cctx.compress(b'foo' * 256)
94
95 def test_no_magic(self):
96 params = zstd.ZstdCompressionParameters.from_level(
97 1, format=zstd.FORMAT_ZSTD1)
98 cctx = zstd.ZstdCompressor(compression_params=params)
99 magic = cctx.compress(b'foobar')
100
101 params = zstd.ZstdCompressionParameters.from_level(
102 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
103 cctx = zstd.ZstdCompressor(compression_params=params)
104 no_magic = cctx.compress(b'foobar')
105
106 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
107 self.assertEqual(magic[4:], no_magic)
108
96 def test_write_checksum(self):
109 def test_write_checksum(self):
97 cctx = zstd.ZstdCompressor(level=1)
110 cctx = zstd.ZstdCompressor(level=1)
98 no_checksum = cctx.compress(b'foobar')
111 no_checksum = cctx.compress(b'foobar')
@@ -109,15 +122,15 b' class TestCompressor_compress(unittest.T'
109
122
110 def test_write_content_size(self):
123 def test_write_content_size(self):
111 cctx = zstd.ZstdCompressor(level=1)
124 cctx = zstd.ZstdCompressor(level=1)
125 with_size = cctx.compress(b'foobar' * 256)
126 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
112 no_size = cctx.compress(b'foobar' * 256)
127 no_size = cctx.compress(b'foobar' * 256)
113 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
114 with_size = cctx.compress(b'foobar' * 256)
115
128
116 self.assertEqual(len(with_size), len(no_size) + 1)
129 self.assertEqual(len(with_size), len(no_size) + 1)
117
130
118 no_params = zstd.get_frame_parameters(no_size)
131 no_params = zstd.get_frame_parameters(no_size)
119 with_params = zstd.get_frame_parameters(with_size)
132 with_params = zstd.get_frame_parameters(with_size)
120 self.assertEqual(no_params.content_size, 0)
133 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
121 self.assertEqual(with_params.content_size, 1536)
134 self.assertEqual(with_params.content_size, 1536)
122
135
123 def test_no_dict_id(self):
136 def test_no_dict_id(self):
@@ -140,7 +153,7 b' class TestCompressor_compress(unittest.T'
140 no_params = zstd.get_frame_parameters(no_dict_id)
153 no_params = zstd.get_frame_parameters(no_dict_id)
141 with_params = zstd.get_frame_parameters(with_dict_id)
154 with_params = zstd.get_frame_parameters(with_dict_id)
142 self.assertEqual(no_params.dict_id, 0)
155 self.assertEqual(no_params.dict_id, 0)
143 self.assertEqual(with_params.dict_id, 1584102229)
156 self.assertEqual(with_params.dict_id, 1387616518)
144
157
145 def test_compress_dict_multiple(self):
158 def test_compress_dict_multiple(self):
146 samples = []
159 samples = []
@@ -156,6 +169,21 b' class TestCompressor_compress(unittest.T'
156 for i in range(32):
169 for i in range(32):
157 cctx.compress(b'foo bar foobar foo bar foobar')
170 cctx.compress(b'foo bar foobar foo bar foobar')
158
171
172 def test_dict_precompute(self):
173 samples = []
174 for i in range(128):
175 samples.append(b'foo' * 64)
176 samples.append(b'bar' * 64)
177 samples.append(b'foobar' * 64)
178
179 d = zstd.train_dictionary(8192, samples)
180 d.precompute_compress(level=1)
181
182 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
183
184 for i in range(32):
185 cctx.compress(b'foo bar foobar foo bar foobar')
186
159 def test_multithreaded(self):
187 def test_multithreaded(self):
160 chunk_size = multithreaded_chunk_size(1)
188 chunk_size = multithreaded_chunk_size(1)
161 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
189 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
@@ -171,16 +199,65 b' class TestCompressor_compress(unittest.T'
171 dctx = zstd.ZstdDecompressor()
199 dctx = zstd.ZstdDecompressor()
172 self.assertEqual(dctx.decompress(compressed), source)
200 self.assertEqual(dctx.decompress(compressed), source)
173
201
202 def test_multithreaded_dict(self):
203 samples = []
204 for i in range(128):
205 samples.append(b'foo' * 64)
206 samples.append(b'bar' * 64)
207 samples.append(b'foobar' * 64)
208
209 d = zstd.train_dictionary(1024, samples)
210
211 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
212
213 result = cctx.compress(b'foo')
214 params = zstd.get_frame_parameters(result);
215 self.assertEqual(params.content_size, 3);
216 self.assertEqual(params.dict_id, d.dict_id())
217
218 self.assertEqual(result,
219 b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
220 b'\x66\x6f\x6f')
221
222 def test_multithreaded_compression_params(self):
223 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
224 cctx = zstd.ZstdCompressor(compression_params=params)
225
226 result = cctx.compress(b'foo')
227 params = zstd.get_frame_parameters(result);
228 self.assertEqual(params.content_size, 3);
229
230 self.assertEqual(result,
231 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
232
174
233
175 @make_cffi
234 @make_cffi
176 class TestCompressor_compressobj(unittest.TestCase):
235 class TestCompressor_compressobj(unittest.TestCase):
177 def test_compressobj_empty(self):
236 def test_compressobj_empty(self):
178 cctx = zstd.ZstdCompressor(level=1)
237 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
179 cobj = cctx.compressobj()
238 cobj = cctx.compressobj()
180 self.assertEqual(cobj.compress(b''), b'')
239 self.assertEqual(cobj.compress(b''), b'')
181 self.assertEqual(cobj.flush(),
240 self.assertEqual(cobj.flush(),
182 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
241 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
183
242
243 def test_input_types(self):
244 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
245 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
246
247 mutable_array = bytearray(3)
248 mutable_array[:] = b'foo'
249
250 sources = [
251 memoryview(b'foo'),
252 bytearray(b'foo'),
253 mutable_array,
254 ]
255
256 for source in sources:
257 cobj = cctx.compressobj()
258 self.assertEqual(cobj.compress(source), b'')
259 self.assertEqual(cobj.flush(), expected)
260
184 def test_compressobj_large(self):
261 def test_compressobj_large(self):
185 chunks = []
262 chunks = []
186 for i in range(255):
263 for i in range(255):
@@ -194,7 +271,7 b' class TestCompressor_compressobj(unittes'
194 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
271 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
195
272
196 params = zstd.get_frame_parameters(result)
273 params = zstd.get_frame_parameters(result)
197 self.assertEqual(params.content_size, 0)
274 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
198 self.assertEqual(params.window_size, 1048576)
275 self.assertEqual(params.window_size, 1048576)
199 self.assertEqual(params.dict_id, 0)
276 self.assertEqual(params.dict_id, 0)
200 self.assertFalse(params.has_checksum)
277 self.assertFalse(params.has_checksum)
@@ -209,8 +286,8 b' class TestCompressor_compressobj(unittes'
209
286
210 no_params = zstd.get_frame_parameters(no_checksum)
287 no_params = zstd.get_frame_parameters(no_checksum)
211 with_params = zstd.get_frame_parameters(with_checksum)
288 with_params = zstd.get_frame_parameters(with_checksum)
212 self.assertEqual(no_params.content_size, 0)
289 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
213 self.assertEqual(with_params.content_size, 0)
290 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
214 self.assertEqual(no_params.dict_id, 0)
291 self.assertEqual(no_params.dict_id, 0)
215 self.assertEqual(with_params.dict_id, 0)
292 self.assertEqual(with_params.dict_id, 0)
216 self.assertFalse(no_params.has_checksum)
293 self.assertFalse(no_params.has_checksum)
@@ -221,14 +298,14 b' class TestCompressor_compressobj(unittes'
221 def test_write_content_size(self):
298 def test_write_content_size(self):
222 cctx = zstd.ZstdCompressor(level=1)
299 cctx = zstd.ZstdCompressor(level=1)
223 cobj = cctx.compressobj(size=len(b'foobar' * 256))
300 cobj = cctx.compressobj(size=len(b'foobar' * 256))
301 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
302 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
224 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
304 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
225 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
226 cobj = cctx.compressobj(size=len(b'foobar' * 256))
227 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
228
305
229 no_params = zstd.get_frame_parameters(no_size)
306 no_params = zstd.get_frame_parameters(no_size)
230 with_params = zstd.get_frame_parameters(with_size)
307 with_params = zstd.get_frame_parameters(with_size)
231 self.assertEqual(no_params.content_size, 0)
308 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
232 self.assertEqual(with_params.content_size, 1536)
309 self.assertEqual(with_params.content_size, 1536)
233 self.assertEqual(no_params.dict_id, 0)
310 self.assertEqual(no_params.dict_id, 0)
234 self.assertEqual(with_params.dict_id, 0)
311 self.assertEqual(with_params.dict_id, 0)
@@ -300,6 +377,34 b' class TestCompressor_compressobj(unittes'
300
377
301 self.assertEqual(len(compressed), 295)
378 self.assertEqual(len(compressed), 295)
302
379
380 def test_frame_progression(self):
381 cctx = zstd.ZstdCompressor()
382
383 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
384
385 cobj = cctx.compressobj()
386
387 cobj.compress(b'foobar')
388 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
389
390 cobj.flush()
391 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
392
393 def test_bad_size(self):
394 cctx = zstd.ZstdCompressor()
395
396 cobj = cctx.compressobj(size=2)
397 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
398 cobj.compress(b'foo')
399
400 # Try another operation on this instance.
401 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
402 cobj.compress(b'aa')
403
404 # Try another operation on the compressor.
405 cctx.compressobj(size=4)
406 cctx.compress(b'foobar')
407
303
408
304 @make_cffi
409 @make_cffi
305 class TestCompressor_copy_stream(unittest.TestCase):
410 class TestCompressor_copy_stream(unittest.TestCase):
@@ -323,7 +428,7 b' class TestCompressor_copy_stream(unittes'
323 source = io.BytesIO()
428 source = io.BytesIO()
324 dest = io.BytesIO()
429 dest = io.BytesIO()
325
430
326 cctx = zstd.ZstdCompressor(level=1)
431 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
327 r, w = cctx.copy_stream(source, dest)
432 r, w = cctx.copy_stream(source, dest)
328 self.assertEqual(int(r), 0)
433 self.assertEqual(int(r), 0)
329 self.assertEqual(w, 9)
434 self.assertEqual(w, 9)
@@ -345,7 +450,7 b' class TestCompressor_copy_stream(unittes'
345 self.assertEqual(w, 999)
450 self.assertEqual(w, 999)
346
451
347 params = zstd.get_frame_parameters(dest.getvalue())
452 params = zstd.get_frame_parameters(dest.getvalue())
348 self.assertEqual(params.content_size, 0)
453 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
349 self.assertEqual(params.window_size, 1048576)
454 self.assertEqual(params.window_size, 1048576)
350 self.assertEqual(params.dict_id, 0)
455 self.assertEqual(params.dict_id, 0)
351 self.assertFalse(params.has_checksum)
456 self.assertFalse(params.has_checksum)
@@ -367,8 +472,8 b' class TestCompressor_copy_stream(unittes'
367
472
368 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
473 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
369 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
474 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
370 self.assertEqual(no_params.content_size, 0)
475 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
371 self.assertEqual(with_params.content_size, 0)
476 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
372 self.assertEqual(no_params.dict_id, 0)
477 self.assertEqual(no_params.dict_id, 0)
373 self.assertEqual(with_params.dict_id, 0)
478 self.assertEqual(with_params.dict_id, 0)
374 self.assertFalse(no_params.has_checksum)
479 self.assertFalse(no_params.has_checksum)
@@ -378,12 +483,12 b' class TestCompressor_copy_stream(unittes'
378 source = io.BytesIO(b'foobar' * 256)
483 source = io.BytesIO(b'foobar' * 256)
379 no_size = io.BytesIO()
484 no_size = io.BytesIO()
380
485
381 cctx = zstd.ZstdCompressor(level=1)
486 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
382 cctx.copy_stream(source, no_size)
487 cctx.copy_stream(source, no_size)
383
488
384 source.seek(0)
489 source.seek(0)
385 with_size = io.BytesIO()
490 with_size = io.BytesIO()
386 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
491 cctx = zstd.ZstdCompressor(level=1)
387 cctx.copy_stream(source, with_size)
492 cctx.copy_stream(source, with_size)
388
493
389 # Source content size is unknown, so no content size written.
494 # Source content size is unknown, so no content size written.
@@ -400,7 +505,7 b' class TestCompressor_copy_stream(unittes'
400
505
401 no_params = zstd.get_frame_parameters(no_size.getvalue())
506 no_params = zstd.get_frame_parameters(no_size.getvalue())
402 with_params = zstd.get_frame_parameters(with_size.getvalue())
507 with_params = zstd.get_frame_parameters(with_size.getvalue())
403 self.assertEqual(no_params.content_size, 0)
508 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
404 self.assertEqual(with_params.content_size, 1536)
509 self.assertEqual(with_params.content_size, 1536)
405 self.assertEqual(no_params.dict_id, 0)
510 self.assertEqual(no_params.dict_id, 0)
406 self.assertEqual(with_params.dict_id, 0)
511 self.assertEqual(with_params.dict_id, 0)
@@ -426,19 +531,18 b' class TestCompressor_copy_stream(unittes'
426 source.seek(0)
531 source.seek(0)
427
532
428 dest = io.BytesIO()
533 dest = io.BytesIO()
429 cctx = zstd.ZstdCompressor(threads=2)
534 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
430 r, w = cctx.copy_stream(source, dest)
535 r, w = cctx.copy_stream(source, dest)
431 self.assertEqual(r, 3145728)
536 self.assertEqual(r, 3145728)
432 self.assertEqual(w, 295)
537 self.assertEqual(w, 295)
433
538
434 params = zstd.get_frame_parameters(dest.getvalue())
539 params = zstd.get_frame_parameters(dest.getvalue())
435 self.assertEqual(params.content_size, 0)
540 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
436 self.assertEqual(params.dict_id, 0)
541 self.assertEqual(params.dict_id, 0)
437 self.assertFalse(params.has_checksum)
542 self.assertFalse(params.has_checksum)
438
543
439 # Writing content size and checksum works.
544 # Writing content size and checksum works.
440 cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
545 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
441 write_checksum=True)
442 dest = io.BytesIO()
546 dest = io.BytesIO()
443 source.seek(0)
547 source.seek(0)
444 cctx.copy_stream(source, dest, size=len(source.getvalue()))
548 cctx.copy_stream(source, dest, size=len(source.getvalue()))
@@ -448,31 +552,227 b' class TestCompressor_copy_stream(unittes'
448 self.assertEqual(params.dict_id, 0)
552 self.assertEqual(params.dict_id, 0)
449 self.assertTrue(params.has_checksum)
553 self.assertTrue(params.has_checksum)
450
554
555 def test_bad_size(self):
556 source = io.BytesIO()
557 source.write(b'a' * 32768)
558 source.write(b'b' * 32768)
559 source.seek(0)
451
560
452 def compress(data, level):
561 dest = io.BytesIO()
453 buffer = io.BytesIO()
562
454 cctx = zstd.ZstdCompressor(level=level)
563 cctx = zstd.ZstdCompressor()
455 with cctx.write_to(buffer) as compressor:
564
456 compressor.write(data)
565 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
457 return buffer.getvalue()
566 cctx.copy_stream(source, dest, size=42)
567
568 # Try another operation on this compressor.
569 source.seek(0)
570 dest = io.BytesIO()
571 cctx.copy_stream(source, dest)
458
572
459
573
460 @make_cffi
574 @make_cffi
461 class TestCompressor_write_to(unittest.TestCase):
575 class TestCompressor_stream_reader(unittest.TestCase):
576 def test_context_manager(self):
577 cctx = zstd.ZstdCompressor()
578
579 reader = cctx.stream_reader(b'foo' * 60)
580 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
581 reader.read(10)
582
583 with cctx.stream_reader(b'foo') as reader:
584 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
585 with reader as reader2:
586 pass
587
588 def test_not_implemented(self):
589 cctx = zstd.ZstdCompressor()
590
591 with cctx.stream_reader(b'foo' * 60) as reader:
592 with self.assertRaises(io.UnsupportedOperation):
593 reader.readline()
594
595 with self.assertRaises(io.UnsupportedOperation):
596 reader.readlines()
597
598 # This could probably be implemented someday.
599 with self.assertRaises(NotImplementedError):
600 reader.readall()
601
602 with self.assertRaises(io.UnsupportedOperation):
603 iter(reader)
604
605 with self.assertRaises(io.UnsupportedOperation):
606 next(reader)
607
608 with self.assertRaises(OSError):
609 reader.writelines([])
610
611 with self.assertRaises(OSError):
612 reader.write(b'foo')
613
614 def test_constant_methods(self):
615 cctx = zstd.ZstdCompressor()
616
617 with cctx.stream_reader(b'boo') as reader:
618 self.assertTrue(reader.readable())
619 self.assertFalse(reader.writable())
620 self.assertFalse(reader.seekable())
621 self.assertFalse(reader.isatty())
622 self.assertIsNone(reader.flush())
623
624 def test_read_closed(self):
625 cctx = zstd.ZstdCompressor()
626
627 with cctx.stream_reader(b'foo' * 60) as reader:
628 reader.close()
629 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
630 reader.read(10)
631
632 def test_read_bad_size(self):
633 cctx = zstd.ZstdCompressor()
634
635 with cctx.stream_reader(b'foo') as reader:
636 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
637 reader.read(-1)
638
639 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
640 reader.read(0)
641
642 def test_read_buffer(self):
643 cctx = zstd.ZstdCompressor()
644
645 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
646 frame = cctx.compress(source)
647
648 with cctx.stream_reader(source) as reader:
649 self.assertEqual(reader.tell(), 0)
650
651 # We should get entire frame in one read.
652 result = reader.read(8192)
653 self.assertEqual(result, frame)
654 self.assertEqual(reader.tell(), len(result))
655 self.assertEqual(reader.read(), b'')
656 self.assertEqual(reader.tell(), len(result))
657
658 def test_read_buffer_small_chunks(self):
659 cctx = zstd.ZstdCompressor()
660
661 source = b'foo' * 60
662 chunks = []
663
664 with cctx.stream_reader(source) as reader:
665 self.assertEqual(reader.tell(), 0)
666
667 while True:
668 chunk = reader.read(1)
669 if not chunk:
670 break
671
672 chunks.append(chunk)
673 self.assertEqual(reader.tell(), sum(map(len, chunks)))
674
675 self.assertEqual(b''.join(chunks), cctx.compress(source))
676
677 def test_read_stream(self):
678 cctx = zstd.ZstdCompressor()
679
680 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
681 frame = cctx.compress(source)
682
683 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
684 self.assertEqual(reader.tell(), 0)
685
686 chunk = reader.read(8192)
687 self.assertEqual(chunk, frame)
688 self.assertEqual(reader.tell(), len(chunk))
689 self.assertEqual(reader.read(), b'')
690 self.assertEqual(reader.tell(), len(chunk))
691
692 def test_read_stream_small_chunks(self):
693 cctx = zstd.ZstdCompressor()
694
695 source = b'foo' * 60
696 chunks = []
697
698 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
699 self.assertEqual(reader.tell(), 0)
700
701 while True:
702 chunk = reader.read(1)
703 if not chunk:
704 break
705
706 chunks.append(chunk)
707 self.assertEqual(reader.tell(), sum(map(len, chunks)))
708
709 self.assertEqual(b''.join(chunks), cctx.compress(source))
710
711 def test_read_after_exit(self):
712 cctx = zstd.ZstdCompressor()
713
714 with cctx.stream_reader(b'foo' * 60) as reader:
715 while reader.read(8192):
716 pass
717
718 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
719 reader.read(10)
720
721 def test_bad_size(self):
722 cctx = zstd.ZstdCompressor()
723
724 source = io.BytesIO(b'foobar')
725
726 with cctx.stream_reader(source, size=2) as reader:
727 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
728 reader.read(10)
729
730 # Try another compression operation.
731 with cctx.stream_reader(source, size=42):
732 pass
733
734
735 @make_cffi
736 class TestCompressor_stream_writer(unittest.TestCase):
462 def test_empty(self):
737 def test_empty(self):
463 result = compress(b'', 1)
738 buffer = io.BytesIO()
739 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
740 with cctx.stream_writer(buffer) as compressor:
741 compressor.write(b'')
742
743 result = buffer.getvalue()
464 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
744 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
465
745
466 params = zstd.get_frame_parameters(result)
746 params = zstd.get_frame_parameters(result)
467 self.assertEqual(params.content_size, 0)
747 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
468 self.assertEqual(params.window_size, 524288)
748 self.assertEqual(params.window_size, 524288)
469 self.assertEqual(params.dict_id, 0)
749 self.assertEqual(params.dict_id, 0)
470 self.assertFalse(params.has_checksum)
750 self.assertFalse(params.has_checksum)
471
751
752 def test_input_types(self):
753 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
754 cctx = zstd.ZstdCompressor(level=1)
755
756 mutable_array = bytearray(3)
757 mutable_array[:] = b'foo'
758
759 sources = [
760 memoryview(b'foo'),
761 bytearray(b'foo'),
762 mutable_array,
763 ]
764
765 for source in sources:
766 buffer = io.BytesIO()
767 with cctx.stream_writer(buffer) as compressor:
768 compressor.write(source)
769
770 self.assertEqual(buffer.getvalue(), expected)
771
472 def test_multiple_compress(self):
772 def test_multiple_compress(self):
473 buffer = io.BytesIO()
773 buffer = io.BytesIO()
474 cctx = zstd.ZstdCompressor(level=5)
774 cctx = zstd.ZstdCompressor(level=5)
475 with cctx.write_to(buffer) as compressor:
775 with cctx.stream_writer(buffer) as compressor:
476 self.assertEqual(compressor.write(b'foo'), 0)
776 self.assertEqual(compressor.write(b'foo'), 0)
477 self.assertEqual(compressor.write(b'bar'), 0)
777 self.assertEqual(compressor.write(b'bar'), 0)
478 self.assertEqual(compressor.write(b'x' * 8192), 0)
778 self.assertEqual(compressor.write(b'x' * 8192), 0)
@@ -491,35 +791,40 b' class TestCompressor_write_to(unittest.T'
491
791
492 d = zstd.train_dictionary(8192, samples)
792 d = zstd.train_dictionary(8192, samples)
493
793
794 h = hashlib.sha1(d.as_bytes()).hexdigest()
795 self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027')
796
494 buffer = io.BytesIO()
797 buffer = io.BytesIO()
495 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
798 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
496 with cctx.write_to(buffer) as compressor:
799 with cctx.stream_writer(buffer) as compressor:
497 self.assertEqual(compressor.write(b'foo'), 0)
800 self.assertEqual(compressor.write(b'foo'), 0)
498 self.assertEqual(compressor.write(b'bar'), 0)
801 self.assertEqual(compressor.write(b'bar'), 0)
499 self.assertEqual(compressor.write(b'foo' * 16384), 634)
802 self.assertEqual(compressor.write(b'foo' * 16384), 0)
500
803
501 compressed = buffer.getvalue()
804 compressed = buffer.getvalue()
502
805
503 params = zstd.get_frame_parameters(compressed)
806 params = zstd.get_frame_parameters(compressed)
504 self.assertEqual(params.content_size, 0)
807 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
505 self.assertEqual(params.window_size, 1024)
808 self.assertEqual(params.window_size, 2097152)
506 self.assertEqual(params.dict_id, d.dict_id())
809 self.assertEqual(params.dict_id, d.dict_id())
507 self.assertFalse(params.has_checksum)
810 self.assertFalse(params.has_checksum)
508
811 self.assertEqual(compressed,
509 self.assertEqual(compressed[0:32],
812 b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00'
510 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
813 b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89')
511 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
512 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
513
514 h = hashlib.sha1(compressed).hexdigest()
515 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
516
814
517 def test_compression_params(self):
815 def test_compression_params(self):
518 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
816 params = zstd.ZstdCompressionParameters(
817 window_log=20,
818 chain_log=6,
819 hash_log=12,
820 min_match=5,
821 search_log=4,
822 target_length=10,
823 compression_strategy=zstd.STRATEGY_FAST)
519
824
520 buffer = io.BytesIO()
825 buffer = io.BytesIO()
521 cctx = zstd.ZstdCompressor(compression_params=params)
826 cctx = zstd.ZstdCompressor(compression_params=params)
522 with cctx.write_to(buffer) as compressor:
827 with cctx.stream_writer(buffer) as compressor:
523 self.assertEqual(compressor.write(b'foo'), 0)
828 self.assertEqual(compressor.write(b'foo'), 0)
524 self.assertEqual(compressor.write(b'bar'), 0)
829 self.assertEqual(compressor.write(b'bar'), 0)
525 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
830 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
@@ -527,29 +832,29 b' class TestCompressor_write_to(unittest.T'
527 compressed = buffer.getvalue()
832 compressed = buffer.getvalue()
528
833
529 params = zstd.get_frame_parameters(compressed)
834 params = zstd.get_frame_parameters(compressed)
530 self.assertEqual(params.content_size, 0)
835 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
531 self.assertEqual(params.window_size, 1048576)
836 self.assertEqual(params.window_size, 1048576)
532 self.assertEqual(params.dict_id, 0)
837 self.assertEqual(params.dict_id, 0)
533 self.assertFalse(params.has_checksum)
838 self.assertFalse(params.has_checksum)
534
839
535 h = hashlib.sha1(compressed).hexdigest()
840 h = hashlib.sha1(compressed).hexdigest()
536 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
841 self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
537
842
538 def test_write_checksum(self):
843 def test_write_checksum(self):
539 no_checksum = io.BytesIO()
844 no_checksum = io.BytesIO()
540 cctx = zstd.ZstdCompressor(level=1)
845 cctx = zstd.ZstdCompressor(level=1)
541 with cctx.write_to(no_checksum) as compressor:
846 with cctx.stream_writer(no_checksum) as compressor:
542 self.assertEqual(compressor.write(b'foobar'), 0)
847 self.assertEqual(compressor.write(b'foobar'), 0)
543
848
544 with_checksum = io.BytesIO()
849 with_checksum = io.BytesIO()
545 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
850 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
546 with cctx.write_to(with_checksum) as compressor:
851 with cctx.stream_writer(with_checksum) as compressor:
547 self.assertEqual(compressor.write(b'foobar'), 0)
852 self.assertEqual(compressor.write(b'foobar'), 0)
548
853
549 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
854 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
550 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
855 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
551 self.assertEqual(no_params.content_size, 0)
856 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
552 self.assertEqual(with_params.content_size, 0)
857 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
553 self.assertEqual(no_params.dict_id, 0)
858 self.assertEqual(no_params.dict_id, 0)
554 self.assertEqual(with_params.dict_id, 0)
859 self.assertEqual(with_params.dict_id, 0)
555 self.assertFalse(no_params.has_checksum)
860 self.assertFalse(no_params.has_checksum)
@@ -560,13 +865,13 b' class TestCompressor_write_to(unittest.T'
560
865
561 def test_write_content_size(self):
866 def test_write_content_size(self):
562 no_size = io.BytesIO()
867 no_size = io.BytesIO()
563 cctx = zstd.ZstdCompressor(level=1)
868 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
564 with cctx.write_to(no_size) as compressor:
869 with cctx.stream_writer(no_size) as compressor:
565 self.assertEqual(compressor.write(b'foobar' * 256), 0)
870 self.assertEqual(compressor.write(b'foobar' * 256), 0)
566
871
567 with_size = io.BytesIO()
872 with_size = io.BytesIO()
568 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
873 cctx = zstd.ZstdCompressor(level=1)
569 with cctx.write_to(with_size) as compressor:
874 with cctx.stream_writer(with_size) as compressor:
570 self.assertEqual(compressor.write(b'foobar' * 256), 0)
875 self.assertEqual(compressor.write(b'foobar' * 256), 0)
571
876
572 # Source size is not known in streaming mode, so header not
877 # Source size is not known in streaming mode, so header not
@@ -576,12 +881,12 b' class TestCompressor_write_to(unittest.T'
576
881
577 # Declaring size will write the header.
882 # Declaring size will write the header.
578 with_size = io.BytesIO()
883 with_size = io.BytesIO()
579 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
884 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
580 self.assertEqual(compressor.write(b'foobar' * 256), 0)
885 self.assertEqual(compressor.write(b'foobar' * 256), 0)
581
886
582 no_params = zstd.get_frame_parameters(no_size.getvalue())
887 no_params = zstd.get_frame_parameters(no_size.getvalue())
583 with_params = zstd.get_frame_parameters(with_size.getvalue())
888 with_params = zstd.get_frame_parameters(with_size.getvalue())
584 self.assertEqual(no_params.content_size, 0)
889 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
585 self.assertEqual(with_params.content_size, 1536)
890 self.assertEqual(with_params.content_size, 1536)
586 self.assertEqual(no_params.dict_id, 0)
891 self.assertEqual(no_params.dict_id, 0)
587 self.assertEqual(with_params.dict_id, 0)
892 self.assertEqual(with_params.dict_id, 0)
@@ -602,18 +907,22 b' class TestCompressor_write_to(unittest.T'
602
907
603 with_dict_id = io.BytesIO()
908 with_dict_id = io.BytesIO()
604 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
909 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
605 with cctx.write_to(with_dict_id) as compressor:
910 with cctx.stream_writer(with_dict_id) as compressor:
606 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
911 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
607
912
913 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
914
608 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
915 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
609 no_dict_id = io.BytesIO()
916 no_dict_id = io.BytesIO()
610 with cctx.write_to(no_dict_id) as compressor:
917 with cctx.stream_writer(no_dict_id) as compressor:
611 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
918 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
612
919
920 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
921
613 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
922 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
614 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
923 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
615 self.assertEqual(no_params.content_size, 0)
924 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
616 self.assertEqual(with_params.content_size, 0)
925 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
617 self.assertEqual(no_params.dict_id, 0)
926 self.assertEqual(no_params.dict_id, 0)
618 self.assertEqual(with_params.dict_id, d.dict_id())
927 self.assertEqual(with_params.dict_id, d.dict_id())
619 self.assertFalse(no_params.has_checksum)
928 self.assertFalse(no_params.has_checksum)
@@ -625,7 +934,8 b' class TestCompressor_write_to(unittest.T'
625 def test_memory_size(self):
934 def test_memory_size(self):
626 cctx = zstd.ZstdCompressor(level=3)
935 cctx = zstd.ZstdCompressor(level=3)
627 buffer = io.BytesIO()
936 buffer = io.BytesIO()
628 with cctx.write_to(buffer) as compressor:
937 with cctx.stream_writer(buffer) as compressor:
938 compressor.write(b'foo')
629 size = compressor.memory_size()
939 size = compressor.memory_size()
630
940
631 self.assertGreater(size, 100000)
941 self.assertGreater(size, 100000)
@@ -633,7 +943,7 b' class TestCompressor_write_to(unittest.T'
633 def test_write_size(self):
943 def test_write_size(self):
634 cctx = zstd.ZstdCompressor(level=3)
944 cctx = zstd.ZstdCompressor(level=3)
635 dest = OpCountingBytesIO()
945 dest = OpCountingBytesIO()
636 with cctx.write_to(dest, write_size=1) as compressor:
946 with cctx.stream_writer(dest, write_size=1) as compressor:
637 self.assertEqual(compressor.write(b'foo'), 0)
947 self.assertEqual(compressor.write(b'foo'), 0)
638 self.assertEqual(compressor.write(b'bar'), 0)
948 self.assertEqual(compressor.write(b'bar'), 0)
639 self.assertEqual(compressor.write(b'foobar'), 0)
949 self.assertEqual(compressor.write(b'foobar'), 0)
@@ -643,7 +953,7 b' class TestCompressor_write_to(unittest.T'
643 def test_flush_repeated(self):
953 def test_flush_repeated(self):
644 cctx = zstd.ZstdCompressor(level=3)
954 cctx = zstd.ZstdCompressor(level=3)
645 dest = OpCountingBytesIO()
955 dest = OpCountingBytesIO()
646 with cctx.write_to(dest) as compressor:
956 with cctx.stream_writer(dest) as compressor:
647 self.assertEqual(compressor.write(b'foo'), 0)
957 self.assertEqual(compressor.write(b'foo'), 0)
648 self.assertEqual(dest._write_count, 0)
958 self.assertEqual(dest._write_count, 0)
649 self.assertEqual(compressor.flush(), 12)
959 self.assertEqual(compressor.flush(), 12)
@@ -659,7 +969,7 b' class TestCompressor_write_to(unittest.T'
659 def test_flush_empty_block(self):
969 def test_flush_empty_block(self):
660 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
970 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
661 dest = OpCountingBytesIO()
971 dest = OpCountingBytesIO()
662 with cctx.write_to(dest) as compressor:
972 with cctx.stream_writer(dest) as compressor:
663 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
973 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
664 count = dest._write_count
974 count = dest._write_count
665 offset = dest.tell()
975 offset = dest.tell()
@@ -680,50 +990,89 b' class TestCompressor_write_to(unittest.T'
680 def test_multithreaded(self):
990 def test_multithreaded(self):
681 dest = io.BytesIO()
991 dest = io.BytesIO()
682 cctx = zstd.ZstdCompressor(threads=2)
992 cctx = zstd.ZstdCompressor(threads=2)
683 with cctx.write_to(dest) as compressor:
993 with cctx.stream_writer(dest) as compressor:
684 compressor.write(b'a' * 1048576)
994 compressor.write(b'a' * 1048576)
685 compressor.write(b'b' * 1048576)
995 compressor.write(b'b' * 1048576)
686 compressor.write(b'c' * 1048576)
996 compressor.write(b'c' * 1048576)
687
997
688 self.assertEqual(len(dest.getvalue()), 295)
998 self.assertEqual(len(dest.getvalue()), 295)
689
999
1000 def test_tell(self):
1001 dest = io.BytesIO()
1002 cctx = zstd.ZstdCompressor()
1003 with cctx.stream_writer(dest) as compressor:
1004 self.assertEqual(compressor.tell(), 0)
1005
1006 for i in range(256):
1007 compressor.write(b'foo' * (i + 1))
1008 self.assertEqual(compressor.tell(), dest.tell())
1009
1010 def test_bad_size(self):
1011 cctx = zstd.ZstdCompressor()
1012
1013 dest = io.BytesIO()
1014
1015 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1016 with cctx.stream_writer(dest, size=2) as compressor:
1017 compressor.write(b'foo')
1018
1019 # Test another operation.
1020 with cctx.stream_writer(dest, size=42):
1021 pass
1022
1023 def test_tarfile_compat(self):
1024 raise unittest.SkipTest('not yet fully working')
1025
1026 dest = io.BytesIO()
1027 cctx = zstd.ZstdCompressor()
1028 with cctx.stream_writer(dest) as compressor:
1029 with tarfile.open('tf', mode='w', fileobj=compressor) as tf:
1030 tf.add(__file__, 'test_compressor.py')
1031
1032 dest.seek(0)
1033
1034 dctx = zstd.ZstdDecompressor()
1035 with dctx.stream_reader(dest) as reader:
1036 with tarfile.open(mode='r:', fileobj=reader) as tf:
1037 for member in tf:
1038 self.assertEqual(member.name, 'test_compressor.py')
690
1039
691 @make_cffi
1040 @make_cffi
692 class TestCompressor_read_from(unittest.TestCase):
1041 class TestCompressor_read_to_iter(unittest.TestCase):
693 def test_type_validation(self):
1042 def test_type_validation(self):
694 cctx = zstd.ZstdCompressor()
1043 cctx = zstd.ZstdCompressor()
695
1044
696 # Object with read() works.
1045 # Object with read() works.
697 for chunk in cctx.read_from(io.BytesIO()):
1046 for chunk in cctx.read_to_iter(io.BytesIO()):
698 pass
1047 pass
699
1048
700 # Buffer protocol works.
1049 # Buffer protocol works.
701 for chunk in cctx.read_from(b'foobar'):
1050 for chunk in cctx.read_to_iter(b'foobar'):
702 pass
1051 pass
703
1052
704 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1053 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
705 for chunk in cctx.read_from(True):
1054 for chunk in cctx.read_to_iter(True):
706 pass
1055 pass
707
1056
708 def test_read_empty(self):
1057 def test_read_empty(self):
709 cctx = zstd.ZstdCompressor(level=1)
1058 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
710
1059
711 source = io.BytesIO()
1060 source = io.BytesIO()
712 it = cctx.read_from(source)
1061 it = cctx.read_to_iter(source)
713 chunks = list(it)
1062 chunks = list(it)
714 self.assertEqual(len(chunks), 1)
1063 self.assertEqual(len(chunks), 1)
715 compressed = b''.join(chunks)
1064 compressed = b''.join(chunks)
716 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1065 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
717
1066
718 # And again with the buffer protocol.
1067 # And again with the buffer protocol.
719 it = cctx.read_from(b'')
1068 it = cctx.read_to_iter(b'')
720 chunks = list(it)
1069 chunks = list(it)
721 self.assertEqual(len(chunks), 1)
1070 self.assertEqual(len(chunks), 1)
722 compressed2 = b''.join(chunks)
1071 compressed2 = b''.join(chunks)
723 self.assertEqual(compressed2, compressed)
1072 self.assertEqual(compressed2, compressed)
724
1073
725 def test_read_large(self):
1074 def test_read_large(self):
726 cctx = zstd.ZstdCompressor(level=1)
1075 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
727
1076
728 source = io.BytesIO()
1077 source = io.BytesIO()
729 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1078 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
@@ -732,7 +1081,7 b' class TestCompressor_read_from(unittest.'
732
1081
733 # Creating an iterator should not perform any compression until
1082 # Creating an iterator should not perform any compression until
734 # first read.
1083 # first read.
735 it = cctx.read_from(source, size=len(source.getvalue()))
1084 it = cctx.read_to_iter(source, size=len(source.getvalue()))
736 self.assertEqual(source.tell(), 0)
1085 self.assertEqual(source.tell(), 0)
737
1086
738 # We should have exactly 2 output chunks.
1087 # We should have exactly 2 output chunks.
@@ -758,21 +1107,28 b' class TestCompressor_read_from(unittest.'
758 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1107 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
759
1108
760 params = zstd.get_frame_parameters(b''.join(chunks))
1109 params = zstd.get_frame_parameters(b''.join(chunks))
761 self.assertEqual(params.content_size, 0)
1110 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
762 self.assertEqual(params.window_size, 262144)
1111 self.assertEqual(params.window_size, 262144)
763 self.assertEqual(params.dict_id, 0)
1112 self.assertEqual(params.dict_id, 0)
764 self.assertFalse(params.has_checksum)
1113 self.assertFalse(params.has_checksum)
765
1114
766 # Now check the buffer protocol.
1115 # Now check the buffer protocol.
767 it = cctx.read_from(source.getvalue())
1116 it = cctx.read_to_iter(source.getvalue())
768 chunks = list(it)
1117 chunks = list(it)
769 self.assertEqual(len(chunks), 2)
1118 self.assertEqual(len(chunks), 2)
1119
1120 params = zstd.get_frame_parameters(b''.join(chunks))
1121 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1122 #self.assertEqual(params.window_size, 262144)
1123 self.assertEqual(params.dict_id, 0)
1124 self.assertFalse(params.has_checksum)
1125
770 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1126 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
771
1127
772 def test_read_write_size(self):
1128 def test_read_write_size(self):
773 source = OpCountingBytesIO(b'foobarfoobar')
1129 source = OpCountingBytesIO(b'foobarfoobar')
774 cctx = zstd.ZstdCompressor(level=3)
1130 cctx = zstd.ZstdCompressor(level=3)
775 for chunk in cctx.read_from(source, read_size=1, write_size=1):
1131 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
776 self.assertEqual(len(chunk), 1)
1132 self.assertEqual(len(chunk), 1)
777
1133
778 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1134 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
@@ -786,17 +1142,22 b' class TestCompressor_read_from(unittest.'
786
1142
787 cctx = zstd.ZstdCompressor(threads=2)
1143 cctx = zstd.ZstdCompressor(threads=2)
788
1144
789 compressed = b''.join(cctx.read_from(source))
1145 compressed = b''.join(cctx.read_to_iter(source))
790 self.assertEqual(len(compressed), 295)
1146 self.assertEqual(len(compressed), 295)
791
1147
1148 def test_bad_size(self):
1149 cctx = zstd.ZstdCompressor()
1150
1151 source = io.BytesIO(b'a' * 42)
1152
1153 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1154 b''.join(cctx.read_to_iter(source, size=2))
1155
1156 # Test another operation on errored compressor.
1157 b''.join(cctx.read_to_iter(source))
1158
792
1159
793 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1160 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
794 def test_multithreaded_unsupported(self):
795 cctx = zstd.ZstdCompressor(threads=2)
796
797 with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
798 cctx.multi_compress_to_buffer([b'foo'])
799
800 def test_invalid_inputs(self):
1161 def test_invalid_inputs(self):
801 cctx = zstd.ZstdCompressor()
1162 cctx = zstd.ZstdCompressor()
802
1163
@@ -819,7 +1180,7 b' class TestCompressor_multi_compress_to_b'
819 cctx.multi_compress_to_buffer([b'', b'', b''])
1180 cctx.multi_compress_to_buffer([b'', b'', b''])
820
1181
821 def test_list_input(self):
1182 def test_list_input(self):
822 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
1183 cctx = zstd.ZstdCompressor(write_checksum=True)
823
1184
824 original = [b'foo' * 12, b'bar' * 6]
1185 original = [b'foo' * 12, b'bar' * 6]
825 frames = [cctx.compress(c) for c in original]
1186 frames = [cctx.compress(c) for c in original]
@@ -834,7 +1195,7 b' class TestCompressor_multi_compress_to_b'
834 self.assertEqual(b[1].tobytes(), frames[1])
1195 self.assertEqual(b[1].tobytes(), frames[1])
835
1196
836 def test_buffer_with_segments_input(self):
1197 def test_buffer_with_segments_input(self):
837 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
1198 cctx = zstd.ZstdCompressor(write_checksum=True)
838
1199
839 original = [b'foo' * 4, b'bar' * 6]
1200 original = [b'foo' * 4, b'bar' * 6]
840 frames = [cctx.compress(c) for c in original]
1201 frames = [cctx.compress(c) for c in original]
@@ -852,7 +1213,7 b' class TestCompressor_multi_compress_to_b'
852 self.assertEqual(result[1].tobytes(), frames[1])
1213 self.assertEqual(result[1].tobytes(), frames[1])
853
1214
854 def test_buffer_with_segments_collection_input(self):
1215 def test_buffer_with_segments_collection_input(self):
855 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
1216 cctx = zstd.ZstdCompressor(write_checksum=True)
856
1217
857 original = [
1218 original = [
858 b'foo1',
1219 b'foo1',
@@ -886,10 +1247,10 b' class TestCompressor_multi_compress_to_b'
886 def test_multiple_threads(self):
1247 def test_multiple_threads(self):
887 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1248 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
888 # make output different.
1249 # make output different.
889 refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
1250 refcctx = zstd.ZstdCompressor(write_checksum=True)
890 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1251 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
891
1252
892 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
1253 cctx = zstd.ZstdCompressor(write_checksum=True)
893
1254
894 frames = []
1255 frames = []
895 frames.extend(b'x' * 64 for i in range(256))
1256 frames.extend(b'x' * 64 for i in range(256))
@@ -1,10 +1,6 b''
1 import io
1 import io
2 import os
2 import os
3
3 import unittest
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
4
9 try:
5 try:
10 import hypothesis
6 import hypothesis
@@ -12,7 +8,7 b' try:'
12 except ImportError:
8 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest('hypothesis not available')
14
10
15 import zstd
11 import zstandard as zstd
16
12
17 from . common import (
13 from . common import (
18 make_cffi,
14 make_cffi,
@@ -22,7 +18,57 b' from . common import ('
22
18
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
19 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
20 @make_cffi
25 class TestCompressor_write_to_fuzzing(unittest.TestCase):
21 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
22 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
23 level=strategies.integers(min_value=1, max_value=5),
24 source_read_size=strategies.integers(1, 16384),
25 read_sizes=strategies.data())
26 def test_stream_source_read_variance(self, original, level, source_read_size,
27 read_sizes):
28 refctx = zstd.ZstdCompressor(level=level)
29 ref_frame = refctx.compress(original)
30
31 cctx = zstd.ZstdCompressor(level=level)
32 with cctx.stream_reader(io.BytesIO(original), size=len(original),
33 read_size=source_read_size) as reader:
34 chunks = []
35 while True:
36 read_size = read_sizes.draw(strategies.integers(1, 16384))
37 chunk = reader.read(read_size)
38
39 if not chunk:
40 break
41 chunks.append(chunk)
42
43 self.assertEqual(b''.join(chunks), ref_frame)
44
45 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
46 level=strategies.integers(min_value=1, max_value=5),
47 source_read_size=strategies.integers(1, 16384),
48 read_sizes=strategies.data())
49 def test_buffer_source_read_variance(self, original, level, source_read_size,
50 read_sizes):
51
52 refctx = zstd.ZstdCompressor(level=level)
53 ref_frame = refctx.compress(original)
54
55 cctx = zstd.ZstdCompressor(level=level)
56 with cctx.stream_reader(original, size=len(original),
57 read_size=source_read_size) as reader:
58 chunks = []
59 while True:
60 read_size = read_sizes.draw(strategies.integers(1, 16384))
61 chunk = reader.read(read_size)
62 if not chunk:
63 break
64 chunks.append(chunk)
65
66 self.assertEqual(b''.join(chunks), ref_frame)
67
68
69 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
70 @make_cffi
71 class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
72 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
73 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=1048576))
74 write_size=strategies.integers(min_value=1, max_value=1048576))
@@ -32,7 +78,7 b' class TestCompressor_write_to_fuzzing(un'
32
78
33 cctx = zstd.ZstdCompressor(level=level)
79 cctx = zstd.ZstdCompressor(level=level)
34 b = io.BytesIO()
80 b = io.BytesIO()
35 with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
81 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
36 compressor.write(original)
82 compressor.write(original)
37
83
38 self.assertEqual(b.getvalue(), ref_frame)
84 self.assertEqual(b.getvalue(), ref_frame)
@@ -62,13 +108,12 b' class TestCompressor_copy_stream_fuzzing'
62 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
108 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
63 @make_cffi
109 @make_cffi
64 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
110 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
111 @hypothesis.settings(
112 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
65 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
113 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
66 level=strategies.integers(min_value=1, max_value=5),
114 level=strategies.integers(min_value=1, max_value=5),
67 chunk_sizes=strategies.streaming(
115 chunk_sizes=strategies.data())
68 strategies.integers(min_value=1, max_value=4096)))
69 def test_random_input_sizes(self, original, level, chunk_sizes):
116 def test_random_input_sizes(self, original, level, chunk_sizes):
70 chunk_sizes = iter(chunk_sizes)
71
72 refctx = zstd.ZstdCompressor(level=level)
117 refctx = zstd.ZstdCompressor(level=level)
73 ref_frame = refctx.compress(original)
118 ref_frame = refctx.compress(original)
74
119
@@ -78,7 +123,7 b' class TestCompressor_compressobj_fuzzing'
78 chunks = []
123 chunks = []
79 i = 0
124 i = 0
80 while True:
125 while True:
81 chunk_size = next(chunk_sizes)
126 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
82 source = original[i:i + chunk_size]
127 source = original[i:i + chunk_size]
83 if not source:
128 if not source:
84 break
129 break
@@ -93,7 +138,7 b' class TestCompressor_compressobj_fuzzing'
93
138
94 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
139 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
95 @make_cffi
140 @make_cffi
96 class TestCompressor_read_from_fuzzing(unittest.TestCase):
141 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
97 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
142 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
98 level=strategies.integers(min_value=1, max_value=5),
143 level=strategies.integers(min_value=1, max_value=5),
99 read_size=strategies.integers(min_value=1, max_value=4096),
144 read_size=strategies.integers(min_value=1, max_value=4096),
@@ -105,8 +150,9 b' class TestCompressor_read_from_fuzzing(u'
105 source = io.BytesIO(original)
150 source = io.BytesIO(original)
106
151
107 cctx = zstd.ZstdCompressor(level=level)
152 cctx = zstd.ZstdCompressor(level=level)
108 chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
153 chunks = list(cctx.read_to_iter(source, size=len(original),
109 write_size=write_size))
154 read_size=read_size,
155 write_size=write_size))
110
156
111 self.assertEqual(b''.join(chunks), ref_frame)
157 self.assertEqual(b''.join(chunks), ref_frame)
112
158
@@ -125,7 +171,6 b' class TestCompressor_multi_compress_to_b'
125 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
171 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
126
172
127 cctx = zstd.ZstdCompressor(level=1,
173 cctx = zstd.ZstdCompressor(level=1,
128 write_content_size=True,
129 write_checksum=True,
174 write_checksum=True,
130 **kwargs)
175 **kwargs)
131
176
@@ -1,9 +1,7 b''
1 try:
1 import sys
2 import unittest2 as unittest
2 import unittest
3 except ImportError:
4 import unittest
5
3
6 import zstd
4 import zstandard as zstd
7
5
8 from . common import (
6 from . common import (
9 make_cffi,
7 make_cffi,
@@ -12,52 +10,104 b' from . common import ('
12
10
13 @make_cffi
11 @make_cffi
14 class TestCompressionParameters(unittest.TestCase):
12 class TestCompressionParameters(unittest.TestCase):
15 def test_init_bad_arg_type(self):
13 def test_bounds(self):
16 with self.assertRaises(TypeError):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
17 zstd.CompressionParameters()
15 chain_log=zstd.CHAINLOG_MIN,
18
16 hash_log=zstd.HASHLOG_MIN,
19 with self.assertRaises(TypeError):
17 search_log=zstd.SEARCHLOG_MIN,
20 zstd.CompressionParameters(0, 1)
18 min_match=zstd.SEARCHLENGTH_MIN + 1,
19 target_length=zstd.TARGETLENGTH_MIN,
20 compression_strategy=zstd.STRATEGY_FAST)
21
21
22 def test_bounds(self):
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23 zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
23 chain_log=zstd.CHAINLOG_MAX,
24 zstd.CHAINLOG_MIN,
24 hash_log=zstd.HASHLOG_MAX,
25 zstd.HASHLOG_MIN,
25 search_log=zstd.SEARCHLOG_MAX,
26 zstd.SEARCHLOG_MIN,
26 min_match=zstd.SEARCHLENGTH_MAX - 1,
27 zstd.SEARCHLENGTH_MIN + 1,
27 compression_strategy=zstd.STRATEGY_BTULTRA)
28 zstd.TARGETLENGTH_MIN,
29 zstd.STRATEGY_FAST)
30
28
31 zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
29 def test_from_level(self):
32 zstd.CHAINLOG_MAX,
30 p = zstd.ZstdCompressionParameters.from_level(1)
33 zstd.HASHLOG_MAX,
34 zstd.SEARCHLOG_MAX,
35 zstd.SEARCHLENGTH_MAX - 1,
36 zstd.TARGETLENGTH_MAX,
37 zstd.STRATEGY_BTOPT)
38
39 def test_get_compression_parameters(self):
40 p = zstd.get_compression_parameters(1)
41 self.assertIsInstance(p, zstd.CompressionParameters)
31 self.assertIsInstance(p, zstd.CompressionParameters)
42
32
43 self.assertEqual(p.window_log, 19)
33 self.assertEqual(p.window_log, 19)
44
34
35 p = zstd.ZstdCompressionParameters.from_level(-4)
36 self.assertEqual(p.window_log, 19)
37 self.assertEqual(p.compress_literals, 0)
38
45 def test_members(self):
39 def test_members(self):
46 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
40 p = zstd.ZstdCompressionParameters(window_log=10,
41 chain_log=6,
42 hash_log=7,
43 search_log=4,
44 min_match=5,
45 target_length=8,
46 compression_strategy=1)
47 self.assertEqual(p.window_log, 10)
47 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6)
48 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7)
49 self.assertEqual(p.hash_log, 7)
50 self.assertEqual(p.search_log, 4)
50 self.assertEqual(p.search_log, 4)
51 self.assertEqual(p.search_length, 5)
51 self.assertEqual(p.min_match, 5)
52 self.assertEqual(p.target_length, 8)
52 self.assertEqual(p.target_length, 8)
53 self.assertEqual(p.strategy, 1)
53 self.assertEqual(p.compression_strategy, 1)
54
55 p = zstd.ZstdCompressionParameters(compression_level=2)
56 self.assertEqual(p.compression_level, 2)
57
58 p = zstd.ZstdCompressionParameters(threads=4)
59 self.assertEqual(p.threads, 4)
60
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
62 overlap_size_log=6)
63 self.assertEqual(p.threads, 2)
64 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_size_log, 6)
66
67 p = zstd.ZstdCompressionParameters(compression_level=2)
68 self.assertEqual(p.compress_literals, 1)
69
70 p = zstd.ZstdCompressionParameters(compress_literals=False)
71 self.assertEqual(p.compress_literals, 0)
72
73 p = zstd.ZstdCompressionParameters(compression_level=-1)
74 self.assertEqual(p.compression_level, -1)
75 self.assertEqual(p.compress_literals, 0)
76
77 p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True)
78 self.assertEqual(p.compression_level, -2)
79 self.assertEqual(p.compress_literals, 1)
80
81 p = zstd.ZstdCompressionParameters(force_max_window=True)
82 self.assertEqual(p.force_max_window, 1)
83
84 p = zstd.ZstdCompressionParameters(enable_ldm=True)
85 self.assertEqual(p.enable_ldm, 1)
86
87 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
88 self.assertEqual(p.ldm_hash_log, 7)
89
90 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
91 self.assertEqual(p.ldm_min_match, 6)
92
93 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
94 self.assertEqual(p.ldm_bucket_size_log, 7)
95
96 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8)
97 self.assertEqual(p.ldm_hash_every_log, 8)
54
98
55 def test_estimated_compression_context_size(self):
99 def test_estimated_compression_context_size(self):
56 p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, zstd.STRATEGY_DFAST)
100 p = zstd.ZstdCompressionParameters(window_log=20,
101 chain_log=16,
102 hash_log=17,
103 search_log=1,
104 min_match=5,
105 target_length=16,
106 compression_strategy=zstd.STRATEGY_DFAST)
57
107
58 # 32-bit has slightly different values from 64-bit.
108 # 32-bit has slightly different values from 64-bit.
59 self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076,
109 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
60 delta=110)
110 delta=250)
61
111
62
112
63 @make_cffi
113 @make_cffi
@@ -66,8 +116,18 b' class TestFrameParameters(unittest.TestC'
66 with self.assertRaises(TypeError):
116 with self.assertRaises(TypeError):
67 zstd.get_frame_parameters(None)
117 zstd.get_frame_parameters(None)
68
118
69 with self.assertRaises(TypeError):
119 # Python 3 doesn't appear to convert unicode to Py_buffer.
70 zstd.get_frame_parameters(u'foobarbaz')
120 if sys.version_info[0] >= 3:
121 with self.assertRaises(TypeError):
122 zstd.get_frame_parameters(u'foobarbaz')
123 else:
124 # CPython will convert unicode to Py_buffer. But CFFI won't.
125 if zstd.backend == 'cffi':
126 with self.assertRaises(TypeError):
127 zstd.get_frame_parameters(u'foobarbaz')
128 else:
129 with self.assertRaises(zstd.ZstdError):
130 zstd.get_frame_parameters(u'foobarbaz')
71
131
72 def test_invalid_input_sizes(self):
132 def test_invalid_input_sizes(self):
73 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
133 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
@@ -82,21 +142,21 b' class TestFrameParameters(unittest.TestC'
82
142
83 def test_attributes(self):
143 def test_attributes(self):
84 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
144 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
85 self.assertEqual(params.content_size, 0)
145 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
86 self.assertEqual(params.window_size, 1024)
146 self.assertEqual(params.window_size, 1024)
87 self.assertEqual(params.dict_id, 0)
147 self.assertEqual(params.dict_id, 0)
88 self.assertFalse(params.has_checksum)
148 self.assertFalse(params.has_checksum)
89
149
90 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
150 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
91 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
151 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
92 self.assertEqual(params.content_size, 0)
152 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
93 self.assertEqual(params.window_size, 1024)
153 self.assertEqual(params.window_size, 1024)
94 self.assertEqual(params.dict_id, 255)
154 self.assertEqual(params.dict_id, 255)
95 self.assertFalse(params.has_checksum)
155 self.assertFalse(params.has_checksum)
96
156
97 # Lowest 3rd bit indicates if checksum is present.
157 # Lowest 3rd bit indicates if checksum is present.
98 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
158 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
99 self.assertEqual(params.content_size, 0)
159 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
100 self.assertEqual(params.window_size, 1024)
160 self.assertEqual(params.window_size, 1024)
101 self.assertEqual(params.dict_id, 0)
161 self.assertEqual(params.dict_id, 0)
102 self.assertTrue(params.has_checksum)
162 self.assertTrue(params.has_checksum)
@@ -110,7 +170,7 b' class TestFrameParameters(unittest.TestC'
110
170
111 # Window descriptor is 2nd byte after frame header.
171 # Window descriptor is 2nd byte after frame header.
112 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
172 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
113 self.assertEqual(params.content_size, 0)
173 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
114 self.assertEqual(params.window_size, 262144)
174 self.assertEqual(params.window_size, 262144)
115 self.assertEqual(params.dict_id, 0)
175 self.assertEqual(params.dict_id, 0)
116 self.assertFalse(params.has_checksum)
176 self.assertFalse(params.has_checksum)
@@ -121,3 +181,22 b' class TestFrameParameters(unittest.TestC'
121 self.assertEqual(params.window_size, 262144)
181 self.assertEqual(params.window_size, 262144)
122 self.assertEqual(params.dict_id, 15)
182 self.assertEqual(params.dict_id, 15)
123 self.assertTrue(params.has_checksum)
183 self.assertTrue(params.has_checksum)
184
185 def test_input_types(self):
186 v = zstd.FRAME_HEADER + b'\x00\x00'
187
188 mutable_array = bytearray(len(v))
189 mutable_array[:] = v
190
191 sources = [
192 memoryview(v),
193 bytearray(v),
194 mutable_array,
195 ]
196
197 for source in sources:
198 params = zstd.get_frame_parameters(source)
199 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 self.assertEqual(params.window_size, 1024)
201 self.assertEqual(params.dict_id, 0)
202 self.assertFalse(params.has_checksum)
@@ -1,10 +1,7 b''
1 import io
1 import io
2 import os
2 import os
3
3 import sys
4 try:
4 import unittest
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
5
9 try:
6 try:
10 import hypothesis
7 import hypothesis
@@ -12,7 +9,7 b' try:'
12 except ImportError:
9 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
10 raise unittest.SkipTest('hypothesis not available')
14
11
15 import zstd
12 import zstandard as zstd
16
13
17 from .common import (
14 from .common import (
18 make_cffi,
15 make_cffi,
@@ -28,16 +25,17 b' s_hashlog = strategies.integers(min_valu'
28 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
29 max_value=zstd.SEARCHLOG_MAX)
26 max_value=zstd.SEARCHLOG_MAX)
30 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
27 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
31 max_value=zstd.SEARCHLENGTH_MAX)
28 max_value=zstd.SEARCHLENGTH_MAX)
32 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
33 max_value=zstd.TARGETLENGTH_MAX)
30 max_value=2**32)
34 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
35 zstd.STRATEGY_DFAST,
32 zstd.STRATEGY_DFAST,
36 zstd.STRATEGY_GREEDY,
33 zstd.STRATEGY_GREEDY,
37 zstd.STRATEGY_LAZY,
34 zstd.STRATEGY_LAZY,
38 zstd.STRATEGY_LAZY2,
35 zstd.STRATEGY_LAZY2,
39 zstd.STRATEGY_BTLAZY2,
36 zstd.STRATEGY_BTLAZY2,
40 zstd.STRATEGY_BTOPT))
37 zstd.STRATEGY_BTOPT,
38 zstd.STRATEGY_BTULTRA))
41
39
42
40
43 @make_cffi
41 @make_cffi
@@ -47,24 +45,17 b' class TestCompressionParametersHypothesi'
47 s_searchlength, s_targetlength, s_strategy)
45 s_searchlength, s_targetlength, s_strategy)
48 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
46 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
49 searchlength, targetlength, strategy):
47 searchlength, targetlength, strategy):
50 # ZSTD_checkCParams moves the goal posts on us from what's advertised
48 zstd.ZstdCompressionParameters(window_log=windowlog,
51 # in the constants. So move along with them.
49 chain_log=chainlog,
52 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
50 hash_log=hashlog,
53 searchlength += 1
51 search_log=searchlog,
54 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
52 min_match=searchlength,
55 searchlength -= 1
53 target_length=targetlength,
56
54 compression_strategy=strategy)
57 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
58 searchlog, searchlength,
59 targetlength, strategy)
60
61 cctx = zstd.ZstdCompressor(compression_params=p)
62 with cctx.write_to(io.BytesIO()):
63 pass
64
55
65 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
56 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
66 s_searchlength, s_targetlength, s_strategy)
57 s_searchlength, s_targetlength, s_strategy)
67 def test_estimate_compression_context_size(self, windowlog, chainlog,
58 def test_estimated_compression_context_size(self, windowlog, chainlog,
68 hashlog, searchlog,
59 hashlog, searchlog,
69 searchlength, targetlength,
60 searchlength, targetlength,
70 strategy):
61 strategy):
@@ -73,7 +64,12 b' class TestCompressionParametersHypothesi'
73 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
64 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
74 searchlength -= 1
65 searchlength -= 1
75
66
76 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
67 p = zstd.ZstdCompressionParameters(window_log=windowlog,
77 searchlog, searchlength,
68 chain_log=chainlog,
78 targetlength, strategy)
69 hash_log=hashlog,
79 size = zstd.estimate_compression_context_size(p)
70 search_log=searchlog,
71 min_match=searchlength,
72 target_length=targetlength,
73 compression_strategy=strategy)
74 size = p.estimated_compression_context_size()
75
This diff has been collapsed as it changes many lines, (516 lines changed) Show them Hide them
@@ -1,16 +1,14 b''
1 import io
1 import io
2 import os
2 import random
3 import random
3 import struct
4 import struct
4 import sys
5 import sys
6 import unittest
5
7
6 try:
8 import zstandard as zstd
7 import unittest2 as unittest
8 except ImportError:
9 import unittest
10
11 import zstd
12
9
13 from .common import (
10 from .common import (
11 generate_samples,
14 make_cffi,
12 make_cffi,
15 OpCountingBytesIO,
13 OpCountingBytesIO,
16 )
14 )
@@ -23,35 +21,124 b' else:'
23
21
24
22
25 @make_cffi
23 @make_cffi
24 class TestFrameHeaderSize(unittest.TestCase):
25 def test_empty(self):
26 with self.assertRaisesRegexp(
27 zstd.ZstdError, 'could not determine frame header size: Src size '
28 'is incorrect'):
29 zstd.frame_header_size(b'')
30
31 def test_too_small(self):
32 with self.assertRaisesRegexp(
33 zstd.ZstdError, 'could not determine frame header size: Src size '
34 'is incorrect'):
35 zstd.frame_header_size(b'foob')
36
37 def test_basic(self):
38 # It doesn't matter that it isn't a valid frame.
39 self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
40
41
42 @make_cffi
43 class TestFrameContentSize(unittest.TestCase):
44 def test_empty(self):
45 with self.assertRaisesRegexp(zstd.ZstdError,
46 'error when determining content size'):
47 zstd.frame_content_size(b'')
48
49 def test_too_small(self):
50 with self.assertRaisesRegexp(zstd.ZstdError,
51 'error when determining content size'):
52 zstd.frame_content_size(b'foob')
53
54 def test_bad_frame(self):
55 with self.assertRaisesRegexp(zstd.ZstdError,
56 'error when determining content size'):
57 zstd.frame_content_size(b'invalid frame header')
58
59 def test_unknown(self):
60 cctx = zstd.ZstdCompressor(write_content_size=False)
61 frame = cctx.compress(b'foobar')
62
63 self.assertEqual(zstd.frame_content_size(frame), -1)
64
65 def test_empty(self):
66 cctx = zstd.ZstdCompressor()
67 frame = cctx.compress(b'')
68
69 self.assertEqual(zstd.frame_content_size(frame), 0)
70
71 def test_basic(self):
72 cctx = zstd.ZstdCompressor()
73 frame = cctx.compress(b'foobar')
74
75 self.assertEqual(zstd.frame_content_size(frame), 6)
76
77
78 @make_cffi
79 class TestDecompressor(unittest.TestCase):
80 def test_memory_size(self):
81 dctx = zstd.ZstdDecompressor()
82
83 self.assertGreater(dctx.memory_size(), 100)
84
85
86 @make_cffi
26 class TestDecompressor_decompress(unittest.TestCase):
87 class TestDecompressor_decompress(unittest.TestCase):
27 def test_empty_input(self):
88 def test_empty_input(self):
28 dctx = zstd.ZstdDecompressor()
89 dctx = zstd.ZstdDecompressor()
29
90
30 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
91 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
31 dctx.decompress(b'')
92 dctx.decompress(b'')
32
93
33 def test_invalid_input(self):
94 def test_invalid_input(self):
34 dctx = zstd.ZstdDecompressor()
95 dctx = zstd.ZstdDecompressor()
35
96
36 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
97 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
37 dctx.decompress(b'foobar')
98 dctx.decompress(b'foobar')
38
99
100 def test_input_types(self):
101 cctx = zstd.ZstdCompressor(level=1)
102 compressed = cctx.compress(b'foo')
103
104 mutable_array = bytearray(len(compressed))
105 mutable_array[:] = compressed
106
107 sources = [
108 memoryview(compressed),
109 bytearray(compressed),
110 mutable_array,
111 ]
112
113 dctx = zstd.ZstdDecompressor()
114 for source in sources:
115 self.assertEqual(dctx.decompress(source), b'foo')
116
39 def test_no_content_size_in_frame(self):
117 def test_no_content_size_in_frame(self):
40 cctx = zstd.ZstdCompressor(write_content_size=False)
118 cctx = zstd.ZstdCompressor(write_content_size=False)
41 compressed = cctx.compress(b'foobar')
119 compressed = cctx.compress(b'foobar')
42
120
43 dctx = zstd.ZstdDecompressor()
121 dctx = zstd.ZstdDecompressor()
44 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
122 with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
45 dctx.decompress(compressed)
123 dctx.decompress(compressed)
46
124
47 def test_content_size_present(self):
125 def test_content_size_present(self):
48 cctx = zstd.ZstdCompressor(write_content_size=True)
126 cctx = zstd.ZstdCompressor()
49 compressed = cctx.compress(b'foobar')
127 compressed = cctx.compress(b'foobar')
50
128
51 dctx = zstd.ZstdDecompressor()
129 dctx = zstd.ZstdDecompressor()
52 decompressed = dctx.decompress(compressed)
130 decompressed = dctx.decompress(compressed)
53 self.assertEqual(decompressed, b'foobar')
131 self.assertEqual(decompressed, b'foobar')
54
132
133 def test_empty_roundtrip(self):
134 cctx = zstd.ZstdCompressor()
135 compressed = cctx.compress(b'')
136
137 dctx = zstd.ZstdDecompressor()
138 decompressed = dctx.decompress(compressed)
139
140 self.assertEqual(decompressed, b'')
141
55 def test_max_output_size(self):
142 def test_max_output_size(self):
56 cctx = zstd.ZstdCompressor(write_content_size=False)
143 cctx = zstd.ZstdCompressor(write_content_size=False)
57 source = b'foobar' * 256
144 source = b'foobar' * 256
@@ -63,7 +150,8 b' class TestDecompressor_decompress(unitte'
63 self.assertEqual(decompressed, source)
150 self.assertEqual(decompressed, source)
64
151
65 # Input size - 1 fails
152 # Input size - 1 fails
66 with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
153 with self.assertRaisesRegexp(zstd.ZstdError,
154 'decompression error: did not decompress full frame'):
67 dctx.decompress(compressed, max_output_size=len(source) - 1)
155 dctx.decompress(compressed, max_output_size=len(source) - 1)
68
156
69 # Input size + 1 works
157 # Input size + 1 works
@@ -94,7 +182,7 b' class TestDecompressor_decompress(unitte'
94 d = zstd.train_dictionary(8192, samples)
182 d = zstd.train_dictionary(8192, samples)
95
183
96 orig = b'foobar' * 16384
184 orig = b'foobar' * 16384
97 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
185 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
98 compressed = cctx.compress(orig)
186 compressed = cctx.compress(orig)
99
187
100 dctx = zstd.ZstdDecompressor(dict_data=d)
188 dctx = zstd.ZstdDecompressor(dict_data=d)
@@ -113,7 +201,7 b' class TestDecompressor_decompress(unitte'
113
201
114 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
202 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
115 compressed = []
203 compressed = []
116 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
204 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
117 for source in sources:
205 for source in sources:
118 compressed.append(cctx.compress(source))
206 compressed.append(cctx.compress(source))
119
207
@@ -122,6 +210,21 b' class TestDecompressor_decompress(unitte'
122 decompressed = dctx.decompress(compressed[i])
210 decompressed = dctx.decompress(compressed[i])
123 self.assertEqual(decompressed, sources[i])
211 self.assertEqual(decompressed, sources[i])
124
212
213 def test_max_window_size(self):
214 with open(__file__, 'rb') as fh:
215 source = fh.read()
216
217 # If we write a content size, the decompressor engages single pass
218 # mode and the window size doesn't come into play.
219 cctx = zstd.ZstdCompressor(write_content_size=False)
220 frame = cctx.compress(source)
221
222 dctx = zstd.ZstdDecompressor(max_window_size=1)
223
224 with self.assertRaisesRegexp(
225 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
226 dctx.decompress(frame, max_output_size=len(source))
227
125
228
126 @make_cffi
229 @make_cffi
127 class TestDecompressor_copy_stream(unittest.TestCase):
230 class TestDecompressor_copy_stream(unittest.TestCase):
@@ -186,6 +289,211 b' class TestDecompressor_copy_stream(unitt'
186
289
187
290
188 @make_cffi
291 @make_cffi
292 class TestDecompressor_stream_reader(unittest.TestCase):
293 def test_context_manager(self):
294 dctx = zstd.ZstdDecompressor()
295
296 reader = dctx.stream_reader(b'foo')
297 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
298 reader.read(1)
299
300 with dctx.stream_reader(b'foo') as reader:
301 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
302 with reader as reader2:
303 pass
304
305 def test_not_implemented(self):
306 dctx = zstd.ZstdDecompressor()
307
308 with dctx.stream_reader(b'foo') as reader:
309 with self.assertRaises(NotImplementedError):
310 reader.readline()
311
312 with self.assertRaises(NotImplementedError):
313 reader.readlines()
314
315 with self.assertRaises(NotImplementedError):
316 reader.readall()
317
318 with self.assertRaises(NotImplementedError):
319 iter(reader)
320
321 with self.assertRaises(NotImplementedError):
322 next(reader)
323
324 with self.assertRaises(io.UnsupportedOperation):
325 reader.write(b'foo')
326
327 with self.assertRaises(io.UnsupportedOperation):
328 reader.writelines([])
329
330 def test_constant_methods(self):
331 dctx = zstd.ZstdDecompressor()
332
333 with dctx.stream_reader(b'foo') as reader:
334 self.assertTrue(reader.readable())
335 self.assertFalse(reader.writable())
336 self.assertTrue(reader.seekable())
337 self.assertFalse(reader.isatty())
338 self.assertIsNone(reader.flush())
339
340 def test_read_closed(self):
341 dctx = zstd.ZstdDecompressor()
342
343 with dctx.stream_reader(b'foo') as reader:
344 reader.close()
345 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
346 reader.read(1)
347
348 def test_bad_read_size(self):
349 dctx = zstd.ZstdDecompressor()
350
351 with dctx.stream_reader(b'foo') as reader:
352 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
353 reader.read(-1)
354
355 with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
356 reader.read(0)
357
358 def test_read_buffer(self):
359 cctx = zstd.ZstdCompressor()
360
361 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
362 frame = cctx.compress(source)
363
364 dctx = zstd.ZstdDecompressor()
365
366 with dctx.stream_reader(frame) as reader:
367 self.assertEqual(reader.tell(), 0)
368
369 # We should get entire frame in one read.
370 result = reader.read(8192)
371 self.assertEqual(result, source)
372 self.assertEqual(reader.tell(), len(source))
373
374 # Read after EOF should return empty bytes.
375 self.assertEqual(reader.read(), b'')
376 self.assertEqual(reader.tell(), len(result))
377
378 self.assertTrue(reader.closed())
379
380 def test_read_buffer_small_chunks(self):
381 cctx = zstd.ZstdCompressor()
382 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
383 frame = cctx.compress(source)
384
385 dctx = zstd.ZstdDecompressor()
386 chunks = []
387
388 with dctx.stream_reader(frame, read_size=1) as reader:
389 while True:
390 chunk = reader.read(1)
391 if not chunk:
392 break
393
394 chunks.append(chunk)
395 self.assertEqual(reader.tell(), sum(map(len, chunks)))
396
397 self.assertEqual(b''.join(chunks), source)
398
399 def test_read_stream(self):
400 cctx = zstd.ZstdCompressor()
401 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
402 frame = cctx.compress(source)
403
404 dctx = zstd.ZstdDecompressor()
405 with dctx.stream_reader(io.BytesIO(frame)) as reader:
406 self.assertEqual(reader.tell(), 0)
407
408 chunk = reader.read(8192)
409 self.assertEqual(chunk, source)
410 self.assertEqual(reader.tell(), len(source))
411 self.assertEqual(reader.read(), b'')
412 self.assertEqual(reader.tell(), len(source))
413
414 def test_read_stream_small_chunks(self):
415 cctx = zstd.ZstdCompressor()
416 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
417 frame = cctx.compress(source)
418
419 dctx = zstd.ZstdDecompressor()
420 chunks = []
421
422 with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
423 while True:
424 chunk = reader.read(1)
425 if not chunk:
426 break
427
428 chunks.append(chunk)
429 self.assertEqual(reader.tell(), sum(map(len, chunks)))
430
431 self.assertEqual(b''.join(chunks), source)
432
433 def test_read_after_exit(self):
434 cctx = zstd.ZstdCompressor()
435 frame = cctx.compress(b'foo' * 60)
436
437 dctx = zstd.ZstdDecompressor()
438
439 with dctx.stream_reader(frame) as reader:
440 while reader.read(16):
441 pass
442
443 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
444 reader.read(10)
445
446 def test_illegal_seeks(self):
447 cctx = zstd.ZstdCompressor()
448 frame = cctx.compress(b'foo' * 60)
449
450 dctx = zstd.ZstdDecompressor()
451
452 with dctx.stream_reader(frame) as reader:
453 with self.assertRaisesRegexp(ValueError,
454 'cannot seek to negative position'):
455 reader.seek(-1, os.SEEK_SET)
456
457 reader.read(1)
458
459 with self.assertRaisesRegexp(
460 ValueError, 'cannot seek zstd decompression stream backwards'):
461 reader.seek(0, os.SEEK_SET)
462
463 with self.assertRaisesRegexp(
464 ValueError, 'cannot seek zstd decompression stream backwards'):
465 reader.seek(-1, os.SEEK_CUR)
466
467 with self.assertRaisesRegexp(
468 ValueError,
469 'zstd decompression streams cannot be seeked with SEEK_END'):
470 reader.seek(0, os.SEEK_END)
471
472 reader.close()
473
474 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
475 reader.seek(4, os.SEEK_SET)
476
477 with self.assertRaisesRegexp(
478 zstd.ZstdError, 'seek\(\) must be called from an active context'):
479 reader.seek(0)
480
481 def test_seek(self):
482 source = b'foobar' * 60
483 cctx = zstd.ZstdCompressor()
484 frame = cctx.compress(source)
485
486 dctx = zstd.ZstdDecompressor()
487
488 with dctx.stream_reader(frame) as reader:
489 reader.seek(3)
490 self.assertEqual(reader.read(3), b'bar')
491
492 reader.seek(4, os.SEEK_CUR)
493 self.assertEqual(reader.read(2), b'ar')
494
495
496 @make_cffi
189 class TestDecompressor_decompressobj(unittest.TestCase):
497 class TestDecompressor_decompressobj(unittest.TestCase):
190 def test_simple(self):
498 def test_simple(self):
191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
499 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
@@ -194,6 +502,24 b' class TestDecompressor_decompressobj(uni'
194 dobj = dctx.decompressobj()
502 dobj = dctx.decompressobj()
195 self.assertEqual(dobj.decompress(data), b'foobar')
503 self.assertEqual(dobj.decompress(data), b'foobar')
196
504
505 def test_input_types(self):
506 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
507
508 dctx = zstd.ZstdDecompressor()
509
510 mutable_array = bytearray(len(compressed))
511 mutable_array[:] = compressed
512
513 sources = [
514 memoryview(compressed),
515 bytearray(compressed),
516 mutable_array,
517 ]
518
519 for source in sources:
520 dobj = dctx.decompressobj()
521 self.assertEqual(dobj.decompress(source), b'foo')
522
197 def test_reuse(self):
523 def test_reuse(self):
198 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
524 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
199
525
@@ -204,22 +530,58 b' class TestDecompressor_decompressobj(uni'
204 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
530 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
205 dobj.decompress(data)
531 dobj.decompress(data)
206
532
533 def test_bad_write_size(self):
534 dctx = zstd.ZstdDecompressor()
535
536 with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
537 dctx.decompressobj(write_size=0)
538
539 def test_write_size(self):
540 source = b'foo' * 64 + b'bar' * 128
541 data = zstd.ZstdCompressor(level=1).compress(source)
542
543 dctx = zstd.ZstdDecompressor()
544
545 for i in range(128):
546 dobj = dctx.decompressobj(write_size=i + 1)
547 self.assertEqual(dobj.decompress(data), source)
207
548
208 def decompress_via_writer(data):
549 def decompress_via_writer(data):
209 buffer = io.BytesIO()
550 buffer = io.BytesIO()
210 dctx = zstd.ZstdDecompressor()
551 dctx = zstd.ZstdDecompressor()
211 with dctx.write_to(buffer) as decompressor:
552 with dctx.stream_writer(buffer) as decompressor:
212 decompressor.write(data)
553 decompressor.write(data)
213 return buffer.getvalue()
554 return buffer.getvalue()
214
555
215
556
216 @make_cffi
557 @make_cffi
217 class TestDecompressor_write_to(unittest.TestCase):
558 class TestDecompressor_stream_writer(unittest.TestCase):
218 def test_empty_roundtrip(self):
559 def test_empty_roundtrip(self):
219 cctx = zstd.ZstdCompressor()
560 cctx = zstd.ZstdCompressor()
220 empty = cctx.compress(b'')
561 empty = cctx.compress(b'')
221 self.assertEqual(decompress_via_writer(empty), b'')
562 self.assertEqual(decompress_via_writer(empty), b'')
222
563
564 def test_input_types(self):
565 cctx = zstd.ZstdCompressor(level=1)
566 compressed = cctx.compress(b'foo')
567
568 mutable_array = bytearray(len(compressed))
569 mutable_array[:] = compressed
570
571 sources = [
572 memoryview(compressed),
573 bytearray(compressed),
574 mutable_array,
575 ]
576
577 dctx = zstd.ZstdDecompressor()
578 for source in sources:
579 buffer = io.BytesIO()
580 with dctx.stream_writer(buffer) as decompressor:
581 decompressor.write(source)
582
583 self.assertEqual(buffer.getvalue(), b'foo')
584
223 def test_large_roundtrip(self):
585 def test_large_roundtrip(self):
224 chunks = []
586 chunks = []
225 for i in range(255):
587 for i in range(255):
@@ -242,7 +604,7 b' class TestDecompressor_write_to(unittest'
242
604
243 buffer = io.BytesIO()
605 buffer = io.BytesIO()
244 dctx = zstd.ZstdDecompressor()
606 dctx = zstd.ZstdDecompressor()
245 with dctx.write_to(buffer) as decompressor:
607 with dctx.stream_writer(buffer) as decompressor:
246 pos = 0
608 pos = 0
247 while pos < len(compressed):
609 while pos < len(compressed):
248 pos2 = pos + 8192
610 pos2 = pos + 8192
@@ -262,14 +624,14 b' class TestDecompressor_write_to(unittest'
262 orig = b'foobar' * 16384
624 orig = b'foobar' * 16384
263 buffer = io.BytesIO()
625 buffer = io.BytesIO()
264 cctx = zstd.ZstdCompressor(dict_data=d)
626 cctx = zstd.ZstdCompressor(dict_data=d)
265 with cctx.write_to(buffer) as compressor:
627 with cctx.stream_writer(buffer) as compressor:
266 self.assertEqual(compressor.write(orig), 1544)
628 self.assertEqual(compressor.write(orig), 0)
267
629
268 compressed = buffer.getvalue()
630 compressed = buffer.getvalue()
269 buffer = io.BytesIO()
631 buffer = io.BytesIO()
270
632
271 dctx = zstd.ZstdDecompressor(dict_data=d)
633 dctx = zstd.ZstdDecompressor(dict_data=d)
272 with dctx.write_to(buffer) as decompressor:
634 with dctx.stream_writer(buffer) as decompressor:
273 self.assertEqual(decompressor.write(compressed), len(orig))
635 self.assertEqual(decompressor.write(compressed), len(orig))
274
636
275 self.assertEqual(buffer.getvalue(), orig)
637 self.assertEqual(buffer.getvalue(), orig)
@@ -277,7 +639,7 b' class TestDecompressor_write_to(unittest'
277 def test_memory_size(self):
639 def test_memory_size(self):
278 dctx = zstd.ZstdDecompressor()
640 dctx = zstd.ZstdDecompressor()
279 buffer = io.BytesIO()
641 buffer = io.BytesIO()
280 with dctx.write_to(buffer) as decompressor:
642 with dctx.stream_writer(buffer) as decompressor:
281 size = decompressor.memory_size()
643 size = decompressor.memory_size()
282
644
283 self.assertGreater(size, 100000)
645 self.assertGreater(size, 100000)
@@ -286,7 +648,7 b' class TestDecompressor_write_to(unittest'
286 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
648 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
287 dest = OpCountingBytesIO()
649 dest = OpCountingBytesIO()
288 dctx = zstd.ZstdDecompressor()
650 dctx = zstd.ZstdDecompressor()
289 with dctx.write_to(dest, write_size=1) as decompressor:
651 with dctx.stream_writer(dest, write_size=1) as decompressor:
290 s = struct.Struct('>B')
652 s = struct.Struct('>B')
291 for c in source:
653 for c in source:
292 if not isinstance(c, str):
654 if not isinstance(c, str):
@@ -298,29 +660,29 b' class TestDecompressor_write_to(unittest'
298
660
299
661
300 @make_cffi
662 @make_cffi
301 class TestDecompressor_read_from(unittest.TestCase):
663 class TestDecompressor_read_to_iter(unittest.TestCase):
302 def test_type_validation(self):
664 def test_type_validation(self):
303 dctx = zstd.ZstdDecompressor()
665 dctx = zstd.ZstdDecompressor()
304
666
305 # Object with read() works.
667 # Object with read() works.
306 dctx.read_from(io.BytesIO())
668 dctx.read_to_iter(io.BytesIO())
307
669
308 # Buffer protocol works.
670 # Buffer protocol works.
309 dctx.read_from(b'foobar')
671 dctx.read_to_iter(b'foobar')
310
672
311 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
673 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
312 b''.join(dctx.read_from(True))
674 b''.join(dctx.read_to_iter(True))
313
675
314 def test_empty_input(self):
676 def test_empty_input(self):
315 dctx = zstd.ZstdDecompressor()
677 dctx = zstd.ZstdDecompressor()
316
678
317 source = io.BytesIO()
679 source = io.BytesIO()
318 it = dctx.read_from(source)
680 it = dctx.read_to_iter(source)
319 # TODO this is arguably wrong. Should get an error about missing frame foo.
681 # TODO this is arguably wrong. Should get an error about missing frame foo.
320 with self.assertRaises(StopIteration):
682 with self.assertRaises(StopIteration):
321 next(it)
683 next(it)
322
684
323 it = dctx.read_from(b'')
685 it = dctx.read_to_iter(b'')
324 with self.assertRaises(StopIteration):
686 with self.assertRaises(StopIteration):
325 next(it)
687 next(it)
326
688
@@ -328,11 +690,11 b' class TestDecompressor_read_from(unittes'
328 dctx = zstd.ZstdDecompressor()
690 dctx = zstd.ZstdDecompressor()
329
691
330 source = io.BytesIO(b'foobar')
692 source = io.BytesIO(b'foobar')
331 it = dctx.read_from(source)
693 it = dctx.read_to_iter(source)
332 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
694 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
333 next(it)
695 next(it)
334
696
335 it = dctx.read_from(b'foobar')
697 it = dctx.read_to_iter(b'foobar')
336 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
698 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
337 next(it)
699 next(it)
338
700
@@ -344,7 +706,7 b' class TestDecompressor_read_from(unittes'
344 source.seek(0)
706 source.seek(0)
345
707
346 dctx = zstd.ZstdDecompressor()
708 dctx = zstd.ZstdDecompressor()
347 it = dctx.read_from(source)
709 it = dctx.read_to_iter(source)
348
710
349 # No chunks should be emitted since there is no data.
711 # No chunks should be emitted since there is no data.
350 with self.assertRaises(StopIteration):
712 with self.assertRaises(StopIteration):
@@ -358,17 +720,17 b' class TestDecompressor_read_from(unittes'
358 dctx = zstd.ZstdDecompressor()
720 dctx = zstd.ZstdDecompressor()
359
721
360 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
722 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
361 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
723 b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
362
724
363 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
725 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
364 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
726 b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
365
727
366 def test_skip_bytes(self):
728 def test_skip_bytes(self):
367 cctx = zstd.ZstdCompressor(write_content_size=False)
729 cctx = zstd.ZstdCompressor(write_content_size=False)
368 compressed = cctx.compress(b'foobar')
730 compressed = cctx.compress(b'foobar')
369
731
370 dctx = zstd.ZstdDecompressor()
732 dctx = zstd.ZstdDecompressor()
371 output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
733 output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
372 self.assertEqual(output, b'foobar')
734 self.assertEqual(output, b'foobar')
373
735
374 def test_large_output(self):
736 def test_large_output(self):
@@ -382,7 +744,7 b' class TestDecompressor_read_from(unittes'
382 compressed.seek(0)
744 compressed.seek(0)
383
745
384 dctx = zstd.ZstdDecompressor()
746 dctx = zstd.ZstdDecompressor()
385 it = dctx.read_from(compressed)
747 it = dctx.read_to_iter(compressed)
386
748
387 chunks = []
749 chunks = []
388 chunks.append(next(it))
750 chunks.append(next(it))
@@ -395,7 +757,7 b' class TestDecompressor_read_from(unittes'
395 self.assertEqual(decompressed, source.getvalue())
757 self.assertEqual(decompressed, source.getvalue())
396
758
397 # And again with buffer protocol.
759 # And again with buffer protocol.
398 it = dctx.read_from(compressed.getvalue())
760 it = dctx.read_to_iter(compressed.getvalue())
399 chunks = []
761 chunks = []
400 chunks.append(next(it))
762 chunks.append(next(it))
401 chunks.append(next(it))
763 chunks.append(next(it))
@@ -406,12 +768,13 b' class TestDecompressor_read_from(unittes'
406 decompressed = b''.join(chunks)
768 decompressed = b''.join(chunks)
407 self.assertEqual(decompressed, source.getvalue())
769 self.assertEqual(decompressed, source.getvalue())
408
770
771 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
409 def test_large_input(self):
772 def test_large_input(self):
410 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
773 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
411 compressed = io.BytesIO()
774 compressed = io.BytesIO()
412 input_size = 0
775 input_size = 0
413 cctx = zstd.ZstdCompressor(level=1)
776 cctx = zstd.ZstdCompressor(level=1)
414 with cctx.write_to(compressed) as compressor:
777 with cctx.stream_writer(compressed) as compressor:
415 while True:
778 while True:
416 compressor.write(random.choice(bytes))
779 compressor.write(random.choice(bytes))
417 input_size += 1
780 input_size += 1
@@ -426,7 +789,7 b' class TestDecompressor_read_from(unittes'
426 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
789 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
427
790
428 dctx = zstd.ZstdDecompressor()
791 dctx = zstd.ZstdDecompressor()
429 it = dctx.read_from(compressed)
792 it = dctx.read_to_iter(compressed)
430
793
431 chunks = []
794 chunks = []
432 chunks.append(next(it))
795 chunks.append(next(it))
@@ -440,7 +803,7 b' class TestDecompressor_read_from(unittes'
440 self.assertEqual(len(decompressed), input_size)
803 self.assertEqual(len(decompressed), input_size)
441
804
442 # And again with buffer protocol.
805 # And again with buffer protocol.
443 it = dctx.read_from(compressed.getvalue())
806 it = dctx.read_to_iter(compressed.getvalue())
444
807
445 chunks = []
808 chunks = []
446 chunks.append(next(it))
809 chunks.append(next(it))
@@ -460,7 +823,7 b' class TestDecompressor_read_from(unittes'
460 source = io.BytesIO()
823 source = io.BytesIO()
461
824
462 compressed = io.BytesIO()
825 compressed = io.BytesIO()
463 with cctx.write_to(compressed) as compressor:
826 with cctx.stream_writer(compressed) as compressor:
464 for i in range(256):
827 for i in range(256):
465 chunk = b'\0' * 1024
828 chunk = b'\0' * 1024
466 compressor.write(chunk)
829 compressor.write(chunk)
@@ -473,17 +836,34 b' class TestDecompressor_read_from(unittes'
473 self.assertEqual(simple, source.getvalue())
836 self.assertEqual(simple, source.getvalue())
474
837
475 compressed.seek(0)
838 compressed.seek(0)
476 streamed = b''.join(dctx.read_from(compressed))
839 streamed = b''.join(dctx.read_to_iter(compressed))
477 self.assertEqual(streamed, source.getvalue())
840 self.assertEqual(streamed, source.getvalue())
478
841
479 def test_read_write_size(self):
842 def test_read_write_size(self):
480 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
843 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
481 dctx = zstd.ZstdDecompressor()
844 dctx = zstd.ZstdDecompressor()
482 for chunk in dctx.read_from(source, read_size=1, write_size=1):
845 for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
483 self.assertEqual(len(chunk), 1)
846 self.assertEqual(len(chunk), 1)
484
847
485 self.assertEqual(source._read_count, len(source.getvalue()))
848 self.assertEqual(source._read_count, len(source.getvalue()))
486
849
850 def test_magic_less(self):
851 params = zstd.CompressionParameters.from_level(
852 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
853 cctx = zstd.ZstdCompressor(compression_params=params)
854 frame = cctx.compress(b'foobar')
855
856 self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
857
858 dctx = zstd.ZstdDecompressor()
859 with self.assertRaisesRegexp(
860 zstd.ZstdError, 'error determining content size from frame header'):
861 dctx.decompress(frame)
862
863 dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
864 res = b''.join(dctx.read_to_iter(frame))
865 self.assertEqual(res, b'foobar')
866
487
867
488 @make_cffi
868 @make_cffi
489 class TestDecompressor_content_dict_chain(unittest.TestCase):
869 class TestDecompressor_content_dict_chain(unittest.TestCase):
@@ -511,19 +891,20 b' class TestDecompressor_content_dict_chai'
511 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
891 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
512 dctx.decompress_content_dict_chain([b'foo' * 8])
892 dctx.decompress_content_dict_chain([b'foo' * 8])
513
893
514 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
894 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
515
895
516 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
896 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
517 dctx.decompress_content_dict_chain([no_size])
897 dctx.decompress_content_dict_chain([no_size])
518
898
519 # Corrupt first frame.
899 # Corrupt first frame.
520 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
900 frame = zstd.ZstdCompressor().compress(b'foo' * 64)
521 frame = frame[0:12] + frame[15:]
901 frame = frame[0:12] + frame[15:]
522 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
902 with self.assertRaisesRegexp(zstd.ZstdError,
903 'chunk 0 did not decompress full frame'):
523 dctx.decompress_content_dict_chain([frame])
904 dctx.decompress_content_dict_chain([frame])
524
905
525 def test_bad_subsequent_input(self):
906 def test_bad_subsequent_input(self):
526 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
907 initial = zstd.ZstdCompressor().compress(b'foo' * 64)
527
908
528 dctx = zstd.ZstdDecompressor()
909 dctx = zstd.ZstdDecompressor()
529
910
@@ -539,17 +920,17 b' class TestDecompressor_content_dict_chai'
539 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
920 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
540 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
921 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
541
922
542 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
923 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
543
924
544 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
925 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
545 dctx.decompress_content_dict_chain([initial, no_size])
926 dctx.decompress_content_dict_chain([initial, no_size])
546
927
547 # Corrupt second frame.
928 # Corrupt second frame.
548 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
929 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
549 frame = cctx.compress(b'bar' * 64)
930 frame = cctx.compress(b'bar' * 64)
550 frame = frame[0:12] + frame[15:]
931 frame = frame[0:12] + frame[15:]
551
932
552 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
933 with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
553 dctx.decompress_content_dict_chain([initial, frame])
934 dctx.decompress_content_dict_chain([initial, frame])
554
935
555 def test_simple(self):
936 def test_simple(self):
@@ -562,10 +943,10 b' class TestDecompressor_content_dict_chai'
562 ]
943 ]
563
944
564 chunks = []
945 chunks = []
565 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
946 chunks.append(zstd.ZstdCompressor().compress(original[0]))
566 for i, chunk in enumerate(original[1:]):
947 for i, chunk in enumerate(original[1:]):
567 d = zstd.ZstdCompressionDict(original[i])
948 d = zstd.ZstdCompressionDict(original[i])
568 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
949 cctx = zstd.ZstdCompressor(dict_data=d)
569 chunks.append(cctx.compress(chunk))
950 chunks.append(cctx.compress(chunk))
570
951
571 for i in range(1, len(original)):
952 for i in range(1, len(original)):
@@ -594,7 +975,7 b' class TestDecompressor_multi_decompress_'
594 dctx.multi_decompress_to_buffer([b'foobarbaz'])
975 dctx.multi_decompress_to_buffer([b'foobarbaz'])
595
976
596 def test_list_input(self):
977 def test_list_input(self):
597 cctx = zstd.ZstdCompressor(write_content_size=True)
978 cctx = zstd.ZstdCompressor()
598
979
599 original = [b'foo' * 4, b'bar' * 6]
980 original = [b'foo' * 4, b'bar' * 6]
600 frames = [cctx.compress(d) for d in original]
981 frames = [cctx.compress(d) for d in original]
@@ -614,7 +995,7 b' class TestDecompressor_multi_decompress_'
614 self.assertEqual(len(result[1]), 18)
995 self.assertEqual(len(result[1]), 18)
615
996
616 def test_list_input_frame_sizes(self):
997 def test_list_input_frame_sizes(self):
617 cctx = zstd.ZstdCompressor(write_content_size=False)
998 cctx = zstd.ZstdCompressor()
618
999
619 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1000 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
620 frames = [cctx.compress(d) for d in original]
1001 frames = [cctx.compress(d) for d in original]
@@ -630,7 +1011,7 b' class TestDecompressor_multi_decompress_'
630 self.assertEqual(result[i].tobytes(), data)
1011 self.assertEqual(result[i].tobytes(), data)
631
1012
632 def test_buffer_with_segments_input(self):
1013 def test_buffer_with_segments_input(self):
633 cctx = zstd.ZstdCompressor(write_content_size=True)
1014 cctx = zstd.ZstdCompressor()
634
1015
635 original = [b'foo' * 4, b'bar' * 6]
1016 original = [b'foo' * 4, b'bar' * 6]
636 frames = [cctx.compress(d) for d in original]
1017 frames = [cctx.compress(d) for d in original]
@@ -669,7 +1050,7 b' class TestDecompressor_multi_decompress_'
669 self.assertEqual(result[i].tobytes(), data)
1050 self.assertEqual(result[i].tobytes(), data)
670
1051
671 def test_buffer_with_segments_collection_input(self):
1052 def test_buffer_with_segments_collection_input(self):
672 cctx = zstd.ZstdCompressor(write_content_size=True)
1053 cctx = zstd.ZstdCompressor()
673
1054
674 original = [
1055 original = [
675 b'foo0' * 2,
1056 b'foo0' * 2,
@@ -711,8 +1092,18 b' class TestDecompressor_multi_decompress_'
711 for i in range(5):
1092 for i in range(5):
712 self.assertEqual(decompressed[i].tobytes(), original[i])
1093 self.assertEqual(decompressed[i].tobytes(), original[i])
713
1094
1095 def test_dict(self):
1096 d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
1097
1098 cctx = zstd.ZstdCompressor(dict_data=d, level=1)
1099 frames = [cctx.compress(s) for s in generate_samples()]
1100
1101 dctx = zstd.ZstdDecompressor(dict_data=d)
1102 result = dctx.multi_decompress_to_buffer(frames)
1103 self.assertEqual([o.tobytes() for o in result], generate_samples())
1104
714 def test_multiple_threads(self):
1105 def test_multiple_threads(self):
715 cctx = zstd.ZstdCompressor(write_content_size=True)
1106 cctx = zstd.ZstdCompressor()
716
1107
717 frames = []
1108 frames = []
718 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
1109 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
@@ -727,15 +1118,22 b' class TestDecompressor_multi_decompress_'
727 self.assertEqual(result[256].tobytes(), b'y' * 64)
1118 self.assertEqual(result[256].tobytes(), b'y' * 64)
728
1119
729 def test_item_failure(self):
1120 def test_item_failure(self):
730 cctx = zstd.ZstdCompressor(write_content_size=True)
1121 cctx = zstd.ZstdCompressor()
731 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
1122 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
732
1123
733 frames[1] = frames[1] + b'extra'
1124 frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
734
1125
735 dctx = zstd.ZstdDecompressor()
1126 dctx = zstd.ZstdDecompressor()
736
1127
737 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
1128 with self.assertRaisesRegexp(zstd.ZstdError,
1129 'error decompressing item 1: ('
1130 'Corrupted block|'
1131 'Destination buffer is too small)'):
738 dctx.multi_decompress_to_buffer(frames)
1132 dctx.multi_decompress_to_buffer(frames)
739
1133
740 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
1134 with self.assertRaisesRegexp(zstd.ZstdError,
1135 'error decompressing item 1: ('
1136 'Corrupted block|'
1137 'Destination buffer is too small)'):
741 dctx.multi_decompress_to_buffer(frames, threads=2)
1138 dctx.multi_decompress_to_buffer(frames, threads=2)
1139
@@ -1,10 +1,6 b''
1 import io
1 import io
2 import os
2 import os
3
3 import unittest
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
4
9 try:
5 try:
10 import hypothesis
6 import hypothesis
@@ -12,7 +8,7 b' try:'
12 except ImportError:
8 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest('hypothesis not available')
14
10
15 import zstd
11 import zstandard as zstd
16
12
17 from . common import (
13 from . common import (
18 make_cffi,
14 make_cffi,
@@ -22,15 +18,96 b' from . common import ('
22
18
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
19 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
20 @make_cffi
25 class TestDecompressor_write_to_fuzzing(unittest.TestCase):
21 class TestDecompressor_stream_reader_fuzzing(unittest.TestCase):
22 @hypothesis.settings(
23 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
24 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
25 level=strategies.integers(min_value=1, max_value=5),
26 source_read_size=strategies.integers(1, 16384),
27 read_sizes=strategies.data())
28 def test_stream_source_read_variance(self, original, level, source_read_size,
29 read_sizes):
30 cctx = zstd.ZstdCompressor(level=level)
31 frame = cctx.compress(original)
32
33 dctx = zstd.ZstdDecompressor()
34 source = io.BytesIO(frame)
35
36 chunks = []
37 with dctx.stream_reader(source, read_size=source_read_size) as reader:
38 while True:
39 read_size = read_sizes.draw(strategies.integers(1, 16384))
40 chunk = reader.read(read_size)
41 if not chunk:
42 break
43
44 chunks.append(chunk)
45
46 self.assertEqual(b''.join(chunks), original)
47
48 @hypothesis.settings(
49 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
50 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
51 level=strategies.integers(min_value=1, max_value=5),
52 source_read_size=strategies.integers(1, 16384),
53 read_sizes=strategies.data())
54 def test_buffer_source_read_variance(self, original, level, source_read_size,
55 read_sizes):
56 cctx = zstd.ZstdCompressor(level=level)
57 frame = cctx.compress(original)
58
59 dctx = zstd.ZstdDecompressor()
60 chunks = []
61
62 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
63 while True:
64 read_size = read_sizes.draw(strategies.integers(1, 16384))
65 chunk = reader.read(read_size)
66 if not chunk:
67 break
68
69 chunks.append(chunk)
70
71 self.assertEqual(b''.join(chunks), original)
72
73 @hypothesis.settings(
74 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
75 @hypothesis.given(
76 original=strategies.sampled_from(random_input_data()),
77 level=strategies.integers(min_value=1, max_value=5),
78 source_read_size=strategies.integers(1, 16384),
79 seek_amounts=strategies.data(),
80 read_sizes=strategies.data())
81 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
82 read_sizes):
83 cctx = zstd.ZstdCompressor(level=level)
84 frame = cctx.compress(original)
85
86 dctx = zstd.ZstdDecompressor()
87
88 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
89 while True:
90 amount = seek_amounts.draw(strategies.integers(0, 16384))
91 reader.seek(amount, os.SEEK_CUR)
92
93 offset = reader.tell()
94 read_amount = read_sizes.draw(strategies.integers(1, 16384))
95 chunk = reader.read(read_amount)
96
97 if not chunk:
98 break
99
100 self.assertEqual(original[offset:offset + len(chunk)], chunk)
101
102
103 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
104 @make_cffi
105 class TestDecompressor_stream_writer_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
106 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
107 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=8192),
108 write_size=strategies.integers(min_value=1, max_value=8192),
29 input_sizes=strategies.streaming(
109 input_sizes=strategies.data())
30 strategies.integers(min_value=1, max_value=4096)))
31 def test_write_size_variance(self, original, level, write_size, input_sizes):
110 def test_write_size_variance(self, original, level, write_size, input_sizes):
32 input_sizes = iter(input_sizes)
33
34 cctx = zstd.ZstdCompressor(level=level)
111 cctx = zstd.ZstdCompressor(level=level)
35 frame = cctx.compress(original)
112 frame = cctx.compress(original)
36
113
@@ -38,9 +115,10 b' class TestDecompressor_write_to_fuzzing('
38 source = io.BytesIO(frame)
115 source = io.BytesIO(frame)
39 dest = io.BytesIO()
116 dest = io.BytesIO()
40
117
41 with dctx.write_to(dest, write_size=write_size) as decompressor:
118 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
42 while True:
119 while True:
43 chunk = source.read(next(input_sizes))
120 input_size = input_sizes.draw(strategies.integers(1, 4096))
121 chunk = source.read(input_size)
44 if not chunk:
122 if not chunk:
45 break
123 break
46
124
@@ -74,11 +152,8 b' class TestDecompressor_copy_stream_fuzzi'
74 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
152 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
75 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
153 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
76 level=strategies.integers(min_value=1, max_value=5),
154 level=strategies.integers(min_value=1, max_value=5),
77 chunk_sizes=strategies.streaming(
155 chunk_sizes=strategies.data())
78 strategies.integers(min_value=1, max_value=4096)))
79 def test_random_input_sizes(self, original, level, chunk_sizes):
156 def test_random_input_sizes(self, original, level, chunk_sizes):
80 chunk_sizes = iter(chunk_sizes)
81
82 cctx = zstd.ZstdCompressor(level=level)
157 cctx = zstd.ZstdCompressor(level=level)
83 frame = cctx.compress(original)
158 frame = cctx.compress(original)
84
159
@@ -89,7 +164,33 b' class TestDecompressor_decompressobj_fuz'
89
164
90 chunks = []
165 chunks = []
91 while True:
166 while True:
92 chunk = source.read(next(chunk_sizes))
167 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
168 chunk = source.read(chunk_size)
169 if not chunk:
170 break
171
172 chunks.append(dobj.decompress(chunk))
173
174 self.assertEqual(b''.join(chunks), original)
175
176 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
177 level=strategies.integers(min_value=1, max_value=5),
178 write_size=strategies.integers(min_value=1,
179 max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE),
180 chunk_sizes=strategies.data())
181 def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
182 cctx = zstd.ZstdCompressor(level=level)
183 frame = cctx.compress(original)
184
185 source = io.BytesIO(frame)
186
187 dctx = zstd.ZstdDecompressor()
188 dobj = dctx.decompressobj(write_size=write_size)
189
190 chunks = []
191 while True:
192 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
193 chunk = source.read(chunk_size)
93 if not chunk:
194 if not chunk:
94 break
195 break
95
196
@@ -100,7 +201,7 b' class TestDecompressor_decompressobj_fuz'
100
201
101 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
202 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
102 @make_cffi
203 @make_cffi
103 class TestDecompressor_read_from_fuzzing(unittest.TestCase):
204 class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase):
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
205 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
105 level=strategies.integers(min_value=1, max_value=5),
206 level=strategies.integers(min_value=1, max_value=5),
106 read_size=strategies.integers(min_value=1, max_value=4096),
207 read_size=strategies.integers(min_value=1, max_value=4096),
@@ -112,7 +213,7 b' class TestDecompressor_read_from_fuzzing'
112 source = io.BytesIO(frame)
213 source = io.BytesIO(frame)
113
214
114 dctx = zstd.ZstdDecompressor()
215 dctx = zstd.ZstdDecompressor()
115 chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size))
216 chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size))
116
217
117 self.assertEqual(b''.join(chunks), original)
218 self.assertEqual(b''.join(chunks), original)
118
219
@@ -1,9 +1,6 b''
1 try:
1 import unittest
2 import unittest2 as unittest
3 except ImportError:
4 import unittest
5
2
6 import zstd
3 import zstandard as zstd
7
4
8 from . common import (
5 from . common import (
9 make_cffi,
6 make_cffi,
@@ -16,7 +13,3 b' class TestSizes(unittest.TestCase):'
16 size = zstd.estimate_decompression_context_size()
13 size = zstd.estimate_decompression_context_size()
17 self.assertGreater(size, 100000)
14 self.assertGreater(size, 100000)
18
15
19 def test_compression_size(self):
20 params = zstd.get_compression_parameters(3)
21 size = zstd.estimate_compression_context_size(params)
22 self.assertGreater(size, 100000)
@@ -1,11 +1,8 b''
1 from __future__ import unicode_literals
1 from __future__ import unicode_literals
2
2
3 try:
3 import unittest
4 import unittest2 as unittest
5 except ImportError:
6 import unittest
7
4
8 import zstd
5 import zstandard as zstd
9
6
10 from . common import (
7 from . common import (
11 make_cffi,
8 make_cffi,
@@ -15,7 +12,7 b' from . common import ('
15 @make_cffi
12 @make_cffi
16 class TestModuleAttributes(unittest.TestCase):
13 class TestModuleAttributes(unittest.TestCase):
17 def test_version(self):
14 def test_version(self):
18 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4))
19
16
20 def test_constants(self):
17 def test_constants(self):
21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
18 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -23,6 +20,8 b' class TestModuleAttributes(unittest.Test'
23
20
24 def test_hasattr(self):
21 def test_hasattr(self):
25 attrs = (
22 attrs = (
23 'CONTENTSIZE_UNKNOWN',
24 'CONTENTSIZE_ERROR',
26 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
25 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
27 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
26 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
28 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
27 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
@@ -40,7 +39,9 b' class TestModuleAttributes(unittest.Test'
40 'SEARCHLENGTH_MIN',
39 'SEARCHLENGTH_MIN',
41 'SEARCHLENGTH_MAX',
40 'SEARCHLENGTH_MAX',
42 'TARGETLENGTH_MIN',
41 'TARGETLENGTH_MIN',
43 'TARGETLENGTH_MAX',
42 'LDM_MINMATCH_MIN',
43 'LDM_MINMATCH_MAX',
44 'LDM_BUCKETSIZELOG_MAX',
44 'STRATEGY_FAST',
45 'STRATEGY_FAST',
45 'STRATEGY_DFAST',
46 'STRATEGY_DFAST',
46 'STRATEGY_GREEDY',
47 'STRATEGY_GREEDY',
@@ -48,6 +49,10 b' class TestModuleAttributes(unittest.Test'
48 'STRATEGY_LAZY2',
49 'STRATEGY_LAZY2',
49 'STRATEGY_BTLAZY2',
50 'STRATEGY_BTLAZY2',
50 'STRATEGY_BTOPT',
51 'STRATEGY_BTOPT',
52 'STRATEGY_BTULTRA',
53 'DICT_TYPE_AUTO',
54 'DICT_TYPE_RAWCONTENT',
55 'DICT_TYPE_FULLDICT',
51 )
56 )
52
57
53 for a in attrs:
58 for a in attrs:
@@ -1,13 +1,11 b''
1 import struct
1 import sys
2 import sys
3 import unittest
2
4
3 try:
5 import zstandard as zstd
4 import unittest2 as unittest
5 except ImportError:
6 import unittest
7
8 import zstd
9
6
10 from . common import (
7 from . common import (
8 generate_samples,
11 make_cffi,
9 make_cffi,
12 )
10 )
13
11
@@ -30,55 +28,18 b' class TestTrainDictionary(unittest.TestC'
30 with self.assertRaises(ValueError):
28 with self.assertRaises(ValueError):
31 zstd.train_dictionary(8192, [u'foo'])
29 zstd.train_dictionary(8192, [u'foo'])
32
30
33 def test_basic(self):
31 def test_no_params(self):
34 samples = []
32 d = zstd.train_dictionary(8192, generate_samples())
35 for i in range(128):
33 self.assertIsInstance(d.dict_id(), int_type)
36 samples.append(b'foo' * 64)
37 samples.append(b'bar' * 64)
38 samples.append(b'foobar' * 64)
39 samples.append(b'baz' * 64)
40 samples.append(b'foobaz' * 64)
41 samples.append(b'bazfoo' * 64)
42
34
43 d = zstd.train_dictionary(8192, samples)
35 # The dictionary ID may be different across platforms.
44 self.assertLessEqual(len(d), 8192)
36 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
45
46 dict_id = d.dict_id()
47 self.assertIsInstance(dict_id, int_type)
48
37
49 data = d.as_bytes()
38 data = d.as_bytes()
50 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
39 self.assertEqual(data[0:8], expected)
51
52 def test_set_dict_id(self):
53 samples = []
54 for i in range(128):
55 samples.append(b'foo' * 64)
56 samples.append(b'foobar' * 64)
57
58 d = zstd.train_dictionary(8192, samples, dict_id=42)
59 self.assertEqual(d.dict_id(), 42)
60
61
62 @make_cffi
63 class TestTrainCoverDictionary(unittest.TestCase):
64 def test_no_args(self):
65 with self.assertRaises(TypeError):
66 zstd.train_cover_dictionary()
67
68 def test_bad_args(self):
69 with self.assertRaises(TypeError):
70 zstd.train_cover_dictionary(8192, u'foo')
71
72 with self.assertRaises(ValueError):
73 zstd.train_cover_dictionary(8192, [u'foo'])
74
40
75 def test_basic(self):
41 def test_basic(self):
76 samples = []
42 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
77 for i in range(128):
78 samples.append(b'foo' * 64)
79 samples.append(b'foobar' * 64)
80
81 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
82 self.assertIsInstance(d.dict_id(), int_type)
43 self.assertIsInstance(d.dict_id(), int_type)
83
44
84 data = d.as_bytes()
45 data = d.as_bytes()
@@ -88,23 +49,39 b' class TestTrainCoverDictionary(unittest.'
88 self.assertEqual(d.d, 16)
49 self.assertEqual(d.d, 16)
89
50
90 def test_set_dict_id(self):
51 def test_set_dict_id(self):
91 samples = []
52 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
92 for i in range(128):
53 dict_id=42)
93 samples.append(b'foo' * 64)
94 samples.append(b'foobar' * 64)
95
96 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
97 dict_id=42)
98 self.assertEqual(d.dict_id(), 42)
54 self.assertEqual(d.dict_id(), 42)
99
55
100 def test_optimize(self):
56 def test_optimize(self):
101 samples = []
57 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
102 for i in range(128):
58 d=16)
103 samples.append(b'foo' * 64)
59
104 samples.append(b'foobar' * 64)
60 self.assertEqual(d.k, 50)
61 self.assertEqual(d.d, 16)
62
63 @make_cffi
64 class TestCompressionDict(unittest.TestCase):
65 def test_bad_mode(self):
66 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
67 zstd.ZstdCompressionDict(b'foo', dict_type=42)
68
69 def test_bad_precompute_compress(self):
70 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
105
71
106 d = zstd.train_cover_dictionary(8192, samples, optimize=True,
72 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
107 threads=-1, steps=1, d=16)
73 d.precompute_compress()
74
75 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
76 d.precompute_compress(level=3,
77 compression_params=zstd.CompressionParameters())
108
78
109 self.assertEqual(d.k, 16)
79 def test_precompute_compress_rawcontent(self):
110 self.assertEqual(d.d, 16)
80 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
81 dict_type=zstd.DICT_TYPE_RAWCONTENT)
82 d.precompute_compress(level=1)
83
84 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
85 dict_type=zstd.DICT_TYPE_FULLDICT)
86 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
87 d.precompute_compress(level=1)
@@ -20,12 +20,6 b''
20
20
21 PyObject *ZstdError;
21 PyObject *ZstdError;
22
22
23 PyDoc_STRVAR(estimate_compression_context_size__doc__,
24 "estimate_compression_context_size(compression_parameters)\n"
25 "\n"
26 "Give the amount of memory allocated for a compression context given a\n"
27 "CompressionParameters instance");
28
29 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
23 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
30 "estimate_decompression_context_size()\n"
24 "estimate_decompression_context_size()\n"
31 "\n"
25 "\n"
@@ -36,11 +30,101 b' static PyObject* estimate_decompression_'
36 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
30 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
37 }
31 }
38
32
39 PyDoc_STRVAR(get_compression_parameters__doc__,
33 PyDoc_STRVAR(frame_content_size__doc__,
40 "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
34 "frame_content_size(data)\n"
41 "\n"
35 "\n"
42 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
36 "Obtain the decompressed size of a frame."
43 "optional input size and dictionary size");
37 );
38
39 static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
40 static char* kwlist[] = {
41 "source",
42 NULL
43 };
44
45 Py_buffer source;
46 PyObject* result = NULL;
47 unsigned long long size;
48
49 #if PY_MAJOR_VERSION >= 3
50 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
51 #else
52 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
53 #endif
54 kwlist, &source)) {
55 return NULL;
56 }
57
58 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
59 PyErr_SetString(PyExc_ValueError,
60 "data buffer should be contiguous and have at most one dimension");
61 goto finally;
62 }
63
64 size = ZSTD_getFrameContentSize(source.buf, source.len);
65
66 if (size == ZSTD_CONTENTSIZE_ERROR) {
67 PyErr_SetString(ZstdError, "error when determining content size");
68 }
69 else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
70 result = PyLong_FromLong(-1);
71 }
72 else {
73 result = PyLong_FromUnsignedLongLong(size);
74 }
75
76 finally:
77 PyBuffer_Release(&source);
78
79 return result;
80 }
81
82 PyDoc_STRVAR(frame_header_size__doc__,
83 "frame_header_size(data)\n"
84 "\n"
85 "Obtain the size of a frame header.\n"
86 );
87
88 static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
89 static char* kwlist[] = {
90 "source",
91 NULL
92 };
93
94 Py_buffer source;
95 PyObject* result = NULL;
96 size_t zresult;
97
98 #if PY_MAJOR_VERSION >= 3
99 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
100 #else
101 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
102 #endif
103 kwlist, &source)) {
104 return NULL;
105 }
106
107 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
108 PyErr_SetString(PyExc_ValueError,
109 "data buffer should be contiguous and have at most one dimension");
110 goto finally;
111 }
112
113 zresult = ZSTD_frameHeaderSize(source.buf, source.len);
114 if (ZSTD_isError(zresult)) {
115 PyErr_Format(ZstdError, "could not determine frame header size: %s",
116 ZSTD_getErrorName(zresult));
117 }
118 else {
119 result = PyLong_FromSize_t(zresult);
120 }
121
122 finally:
123
124 PyBuffer_Release(&source);
125
126 return result;
127 }
44
128
45 PyDoc_STRVAR(get_frame_parameters__doc__,
129 PyDoc_STRVAR(get_frame_parameters__doc__,
46 "get_frame_parameters(data)\n"
130 "get_frame_parameters(data)\n"
@@ -48,43 +132,48 b' PyDoc_STRVAR(get_frame_parameters__doc__'
48 "Obtains a ``FrameParameters`` instance by parsing data.\n");
132 "Obtains a ``FrameParameters`` instance by parsing data.\n");
49
133
50 PyDoc_STRVAR(train_dictionary__doc__,
134 PyDoc_STRVAR(train_dictionary__doc__,
51 "train_dictionary(dict_size, samples)\n"
135 "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
52 "\n"
136 " threads=None,notifications=0, dict_id=0, level=0)\n"
53 "Train a dictionary from sample data.\n"
54 "\n"
55 "A compression dictionary of size ``dict_size`` will be created from the\n"
56 "iterable of samples provided by ``samples``.\n"
57 "\n"
58 "The raw dictionary content will be returned\n");
59
60 PyDoc_STRVAR(train_cover_dictionary__doc__,
61 "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
62 "\n"
137 "\n"
63 "Train a dictionary from sample data using the COVER algorithm.\n"
138 "Train a dictionary from sample data using the COVER algorithm.\n"
64 "\n"
139 "\n"
65 "This behaves like ``train_dictionary()`` except a different algorithm is\n"
140 "A compression dictionary of size ``dict_size`` will be created from the\n"
66 "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
141 "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
67 "``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
142 "\n"
68 "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
143 "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
144 "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
145 "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
69 "``d`` must be less than or equal to ``k``.\n"
146 "``d`` must be less than or equal to ``k``.\n"
147 "\n"
148 "``steps`` can be specified to control the number of steps through potential\n"
149 "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
150 "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
151 "will be varied in this mode.\n"
152 "\n"
153 "``threads`` can specify how many threads to use to test various ``k`` and\n"
154 "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
155 "a single thread is used.\n"
156 "\n"
157 "When ``k`` and ``d`` are not defined, default values are used and the\n"
158 "algorithm will perform multiple iterations - or steps - to try to find\n"
159 "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
160 "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
161 "multiple ``k`` and ``d`` variations.\n"
70 );
162 );
71
163
72 static char zstd_doc[] = "Interface to zstandard";
164 static char zstd_doc[] = "Interface to zstandard";
73
165
74 static PyMethodDef zstd_methods[] = {
166 static PyMethodDef zstd_methods[] = {
75 /* TODO remove since it is a method on CompressionParameters. */
76 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
77 METH_VARARGS, estimate_compression_context_size__doc__ },
78 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
167 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
79 METH_NOARGS, estimate_decompression_context_size__doc__ },
168 METH_NOARGS, estimate_decompression_context_size__doc__ },
80 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
169 { "frame_content_size", (PyCFunction)frame_content_size,
81 METH_VARARGS, get_compression_parameters__doc__ },
170 METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
171 { "frame_header_size", (PyCFunction)frame_header_size,
172 METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
82 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
173 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
83 METH_VARARGS, get_frame_parameters__doc__ },
174 METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
84 { "train_dictionary", (PyCFunction)train_dictionary,
175 { "train_dictionary", (PyCFunction)train_dictionary,
85 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
176 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
86 { "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
87 METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
88 { NULL, NULL }
177 { NULL, NULL }
89 };
178 };
90
179
@@ -94,10 +183,12 b' void compressor_module_init(PyObject* mo'
94 void compressionparams_module_init(PyObject* mod);
183 void compressionparams_module_init(PyObject* mod);
95 void constants_module_init(PyObject* mod);
184 void constants_module_init(PyObject* mod);
96 void compressiondict_module_init(PyObject* mod);
185 void compressiondict_module_init(PyObject* mod);
186 void compressionreader_module_init(PyObject* mod);
97 void compressionwriter_module_init(PyObject* mod);
187 void compressionwriter_module_init(PyObject* mod);
98 void compressoriterator_module_init(PyObject* mod);
188 void compressoriterator_module_init(PyObject* mod);
99 void decompressor_module_init(PyObject* mod);
189 void decompressor_module_init(PyObject* mod);
100 void decompressobj_module_init(PyObject* mod);
190 void decompressobj_module_init(PyObject* mod);
191 void decompressionreader_module_init(PyObject *mod);
101 void decompressionwriter_module_init(PyObject* mod);
192 void decompressionwriter_module_init(PyObject* mod);
102 void decompressoriterator_module_init(PyObject* mod);
193 void decompressoriterator_module_init(PyObject* mod);
103 void frameparams_module_init(PyObject* mod);
194 void frameparams_module_init(PyObject* mod);
@@ -118,7 +209,7 b' void zstd_module_init(PyObject* m) {'
118 We detect this mismatch here and refuse to load the module if this
209 We detect this mismatch here and refuse to load the module if this
119 scenario is detected.
210 scenario is detected.
120 */
211 */
121 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
212 if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) {
122 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
213 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
123 return;
214 return;
124 }
215 }
@@ -128,16 +219,24 b' void zstd_module_init(PyObject* m) {'
128 compressiondict_module_init(m);
219 compressiondict_module_init(m);
129 compressobj_module_init(m);
220 compressobj_module_init(m);
130 compressor_module_init(m);
221 compressor_module_init(m);
222 compressionreader_module_init(m);
131 compressionwriter_module_init(m);
223 compressionwriter_module_init(m);
132 compressoriterator_module_init(m);
224 compressoriterator_module_init(m);
133 constants_module_init(m);
225 constants_module_init(m);
134 decompressor_module_init(m);
226 decompressor_module_init(m);
135 decompressobj_module_init(m);
227 decompressobj_module_init(m);
228 decompressionreader_module_init(m);
136 decompressionwriter_module_init(m);
229 decompressionwriter_module_init(m);
137 decompressoriterator_module_init(m);
230 decompressoriterator_module_init(m);
138 frameparams_module_init(m);
231 frameparams_module_init(m);
139 }
232 }
140
233
234 #if defined(__GNUC__) && (__GNUC__ >= 4)
235 # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
236 #else
237 # define PYTHON_ZSTD_VISIBILITY
238 #endif
239
141 #if PY_MAJOR_VERSION >= 3
240 #if PY_MAJOR_VERSION >= 3
142 static struct PyModuleDef zstd_module = {
241 static struct PyModuleDef zstd_module = {
143 PyModuleDef_HEAD_INIT,
242 PyModuleDef_HEAD_INIT,
@@ -147,7 +246,7 b' static struct PyModuleDef zstd_module = '
147 zstd_methods
246 zstd_methods
148 };
247 };
149
248
150 PyMODINIT_FUNC PyInit_zstd(void) {
249 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
151 PyObject *m = PyModule_Create(&zstd_module);
250 PyObject *m = PyModule_Create(&zstd_module);
152 if (m) {
251 if (m) {
153 zstd_module_init(m);
252 zstd_module_init(m);
@@ -159,7 +258,7 b' PyMODINIT_FUNC PyInit_zstd(void) {'
159 return m;
258 return m;
160 }
259 }
161 #else
260 #else
162 PyMODINIT_FUNC initzstd(void) {
261 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
163 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
262 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
164 if (m) {
263 if (m) {
165 zstd_module_init(m);
264 zstd_module_init(m);
@@ -211,3 +310,33 b' size_t roundpow2(size_t i) {'
211
310
212 return i;
311 return i;
213 }
312 }
313
314 /* Safer version of _PyBytes_Resize().
315 *
316 * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
317 * we can get an object with a refcount > 1, even if it was just created
318 * with PyBytes_FromStringAndSize()! That's because (at least) CPython
319 * pre-allocates PyBytes instances of size 1 for every possible byte value.
320 *
321 * If non-0 is returned, obj may or may not be NULL.
322 */
323 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
324 PyObject* tmp;
325
326 if ((*obj)->ob_refcnt == 1) {
327 return _PyBytes_Resize(obj, size);
328 }
329
330 tmp = PyBytes_FromStringAndSize(NULL, size);
331 if (!tmp) {
332 return -1;
333 }
334
335 memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
336 PyBytes_GET_SIZE(*obj));
337
338 Py_DECREF(*obj);
339 *obj = tmp;
340
341 return 0;
342 } No newline at end of file
@@ -2,7 +2,7 b''
2 bitstream
2 bitstream
3 Part of FSE library
3 Part of FSE library
4 header file (to include)
4 header file (to include)
5 Copyright (C) 2013-2016, Yann Collet.
5 Copyright (C) 2013-2017, Yann Collet.
6
6
7 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
8
@@ -39,7 +39,6 b''
39 extern "C" {
39 extern "C" {
40 #endif
40 #endif
41
41
42
43 /*
42 /*
44 * This API consists of small unitary functions, which must be inlined for best performance.
43 * This API consists of small unitary functions, which must be inlined for best performance.
45 * Since link-time-optimization is not available for all compilers,
44 * Since link-time-optimization is not available for all compilers,
@@ -53,6 +52,18 b' extern "C" {'
53 #include "error_private.h" /* error codes and messages */
52 #include "error_private.h" /* error codes and messages */
54
53
55
54
55 /*-*************************************
56 * Debug
57 ***************************************/
58 #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59 # include <assert.h>
60 #else
61 # ifndef assert
62 # define assert(condition) ((void)0)
63 # endif
64 #endif
65
66
56 /*=========================================
67 /*=========================================
57 * Target specific
68 * Target specific
58 =========================================*/
69 =========================================*/
@@ -60,18 +71,22 b' extern "C" {'
60 # include <immintrin.h> /* support for bextr (experimental) */
71 # include <immintrin.h> /* support for bextr (experimental) */
61 #endif
72 #endif
62
73
74 #define STREAM_ACCUMULATOR_MIN_32 25
75 #define STREAM_ACCUMULATOR_MIN_64 57
76 #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
77
63
78
64 /*-******************************************
79 /*-******************************************
65 * bitStream encoding API (write forward)
80 * bitStream encoding API (write forward)
66 ********************************************/
81 ********************************************/
67 /* bitStream can mix input from multiple sources.
82 /* bitStream can mix input from multiple sources.
68 * A critical property of these streams is that they encode and decode in **reverse** direction.
83 * A critical property of these streams is that they encode and decode in **reverse** direction.
69 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
84 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
70 */
85 */
71 typedef struct
86 typedef struct
72 {
87 {
73 size_t bitContainer;
88 size_t bitContainer;
74 int bitPos;
89 unsigned bitPos;
75 char* startPtr;
90 char* startPtr;
76 char* ptr;
91 char* ptr;
77 char* endPtr;
92 char* endPtr;
@@ -109,6 +124,7 b' typedef struct'
109 unsigned bitsConsumed;
124 unsigned bitsConsumed;
110 const char* ptr;
125 const char* ptr;
111 const char* start;
126 const char* start;
127 const char* limitPtr;
112 } BIT_DStream_t;
128 } BIT_DStream_t;
113
129
114 typedef enum { BIT_DStream_unfinished = 0,
130 typedef enum { BIT_DStream_unfinished = 0,
@@ -151,140 +167,178 b' MEM_STATIC size_t BIT_readBitsFast(BIT_D'
151 /*-**************************************************************
167 /*-**************************************************************
152 * Internal functions
168 * Internal functions
153 ****************************************************************/
169 ****************************************************************/
154 MEM_STATIC unsigned BIT_highbit32 (register U32 val)
170 MEM_STATIC unsigned BIT_highbit32 (U32 val)
155 {
171 {
172 assert(val != 0);
173 {
156 # if defined(_MSC_VER) /* Visual */
174 # if defined(_MSC_VER) /* Visual */
157 unsigned long r=0;
175 unsigned long r=0;
158 _BitScanReverse ( &r, val );
176 _BitScanReverse ( &r, val );
159 return (unsigned) r;
177 return (unsigned) r;
160 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
178 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
161 return 31 - __builtin_clz (val);
179 return 31 - __builtin_clz (val);
162 # else /* Software version */
180 # else /* Software version */
163 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
181 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
164 U32 v = val;
182 11, 14, 16, 18, 22, 25, 3, 30,
165 v |= v >> 1;
183 8, 12, 20, 28, 15, 17, 24, 7,
166 v |= v >> 2;
184 19, 27, 23, 6, 26, 5, 4, 31 };
167 v |= v >> 4;
185 U32 v = val;
168 v |= v >> 8;
186 v |= v >> 1;
169 v |= v >> 16;
187 v |= v >> 2;
170 return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
188 v |= v >> 4;
189 v |= v >> 8;
190 v |= v >> 16;
191 return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
171 # endif
192 # endif
193 }
172 }
194 }
173
195
174 /*===== Local Constants =====*/
196 /*===== Local Constants =====*/
175 static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */
197 static const unsigned BIT_mask[] = {
176
198 0, 1, 3, 7, 0xF, 0x1F,
199 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
200 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
201 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
202 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
203 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
204 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
177
205
178 /*-**************************************************************
206 /*-**************************************************************
179 * bitStream encoding
207 * bitStream encoding
180 ****************************************************************/
208 ****************************************************************/
181 /*! BIT_initCStream() :
209 /*! BIT_initCStream() :
182 * `dstCapacity` must be > sizeof(void*)
210 * `dstCapacity` must be > sizeof(size_t)
183 * @return : 0 if success,
211 * @return : 0 if success,
184 otherwise an error code (can be tested using ERR_isError() ) */
212 * otherwise an error code (can be tested using ERR_isError()) */
185 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
213 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
214 void* startPtr, size_t dstCapacity)
186 {
215 {
187 bitC->bitContainer = 0;
216 bitC->bitContainer = 0;
188 bitC->bitPos = 0;
217 bitC->bitPos = 0;
189 bitC->startPtr = (char*)startPtr;
218 bitC->startPtr = (char*)startPtr;
190 bitC->ptr = bitC->startPtr;
219 bitC->ptr = bitC->startPtr;
191 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
220 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
192 if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
221 if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
193 return 0;
222 return 0;
194 }
223 }
195
224
196 /*! BIT_addBits() :
225 /*! BIT_addBits() :
197 can add up to 26 bits into `bitC`.
226 * can add up to 31 bits into `bitC`.
198 Does not check for register overflow ! */
227 * Note : does not check for register overflow ! */
199 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
228 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
229 size_t value, unsigned nbBits)
200 {
230 {
231 MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
232 assert(nbBits < BIT_MASK_SIZE);
233 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
201 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
234 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
202 bitC->bitPos += nbBits;
235 bitC->bitPos += nbBits;
203 }
236 }
204
237
205 /*! BIT_addBitsFast() :
238 /*! BIT_addBitsFast() :
206 * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
239 * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
207 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
240 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241 size_t value, unsigned nbBits)
208 {
242 {
243 assert((value>>nbBits) == 0);
244 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
209 bitC->bitContainer |= value << bitC->bitPos;
245 bitC->bitContainer |= value << bitC->bitPos;
210 bitC->bitPos += nbBits;
246 bitC->bitPos += nbBits;
211 }
247 }
212
248
213 /*! BIT_flushBitsFast() :
249 /*! BIT_flushBitsFast() :
250 * assumption : bitContainer has not overflowed
214 * unsafe version; does not check buffer overflow */
251 * unsafe version; does not check buffer overflow */
215 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
252 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
216 {
253 {
217 size_t const nbBytes = bitC->bitPos >> 3;
254 size_t const nbBytes = bitC->bitPos >> 3;
255 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
218 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
256 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
219 bitC->ptr += nbBytes;
257 bitC->ptr += nbBytes;
258 assert(bitC->ptr <= bitC->endPtr);
220 bitC->bitPos &= 7;
259 bitC->bitPos &= 7;
221 bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
260 bitC->bitContainer >>= nbBytes*8;
222 }
261 }
223
262
224 /*! BIT_flushBits() :
263 /*! BIT_flushBits() :
264 * assumption : bitContainer has not overflowed
225 * safe version; check for buffer overflow, and prevents it.
265 * safe version; check for buffer overflow, and prevents it.
226 * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
266 * note : does not signal buffer overflow.
267 * overflow will be revealed later on using BIT_closeCStream() */
227 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
268 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
228 {
269 {
229 size_t const nbBytes = bitC->bitPos >> 3;
270 size_t const nbBytes = bitC->bitPos >> 3;
271 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
230 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
272 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
231 bitC->ptr += nbBytes;
273 bitC->ptr += nbBytes;
232 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
274 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
233 bitC->bitPos &= 7;
275 bitC->bitPos &= 7;
234 bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
276 bitC->bitContainer >>= nbBytes*8;
235 }
277 }
236
278
237 /*! BIT_closeCStream() :
279 /*! BIT_closeCStream() :
238 * @return : size of CStream, in bytes,
280 * @return : size of CStream, in bytes,
239 or 0 if it could not fit into dstBuffer */
281 * or 0 if it could not fit into dstBuffer */
240 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
282 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
241 {
283 {
242 BIT_addBitsFast(bitC, 1, 1); /* endMark */
284 BIT_addBitsFast(bitC, 1, 1); /* endMark */
243 BIT_flushBits(bitC);
285 BIT_flushBits(bitC);
244
286 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
245 if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
246
247 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
287 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
248 }
288 }
249
289
250
290
251 /*-********************************************************
291 /*-********************************************************
252 * bitStream decoding
292 * bitStream decoding
253 **********************************************************/
293 **********************************************************/
254 /*! BIT_initDStream() :
294 /*! BIT_initDStream() :
255 * Initialize a BIT_DStream_t.
295 * Initialize a BIT_DStream_t.
256 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
296 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
257 * `srcSize` must be the *exact* size of the bitStream, in bytes.
297 * `srcSize` must be the *exact* size of the bitStream, in bytes.
258 * @return : size of stream (== srcSize) or an errorCode if a problem is detected
298 * @return : size of stream (== srcSize), or an errorCode if a problem is detected
259 */
299 */
260 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
300 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
261 {
301 {
262 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
302 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
263
303
304 bitD->start = (const char*)srcBuffer;
305 bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
306
264 if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
307 if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
265 bitD->start = (const char*)srcBuffer;
266 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
308 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
267 bitD->bitContainer = MEM_readLEST(bitD->ptr);
309 bitD->bitContainer = MEM_readLEST(bitD->ptr);
268 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
310 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
269 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
311 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
270 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
312 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
271 } else {
313 } else {
272 bitD->start = (const char*)srcBuffer;
273 bitD->ptr = bitD->start;
314 bitD->ptr = bitD->start;
274 bitD->bitContainer = *(const BYTE*)(bitD->start);
315 bitD->bitContainer = *(const BYTE*)(bitD->start);
275 switch(srcSize)
316 switch(srcSize)
276 {
317 {
277 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
318 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
278 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
319 /* fall-through */
279 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
320
280 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
321 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
281 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
322 /* fall-through */
282 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
323
283 default:;
324 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
325 /* fall-through */
326
327 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
328 /* fall-through */
329
330 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
331 /* fall-through */
332
333 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
334 /* fall-through */
335
336 default: break;
284 }
337 }
285 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
338 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
286 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
339 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
287 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
340 if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
341 }
288 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
342 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
289 }
343 }
290
344
@@ -306,12 +360,14 b' MEM_STATIC size_t BIT_getMiddleBits(size'
306 # endif
360 # endif
307 return _bextr_u32(bitContainer, start, nbBits);
361 return _bextr_u32(bitContainer, start, nbBits);
308 #else
362 #else
363 assert(nbBits < BIT_MASK_SIZE);
309 return (bitContainer >> start) & BIT_mask[nbBits];
364 return (bitContainer >> start) & BIT_mask[nbBits];
310 #endif
365 #endif
311 }
366 }
312
367
313 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
368 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
314 {
369 {
370 assert(nbBits < BIT_MASK_SIZE);
315 return bitContainer & BIT_mask[nbBits];
371 return bitContainer & BIT_mask[nbBits];
316 }
372 }
317
373
@@ -320,24 +376,24 b' MEM_STATIC size_t BIT_getLowerBits(size_'
320 * local register is not modified.
376 * local register is not modified.
321 * On 32-bits, maxNbBits==24.
377 * On 32-bits, maxNbBits==24.
322 * On 64-bits, maxNbBits==56.
378 * On 64-bits, maxNbBits==56.
323 * @return : value extracted
379 * @return : value extracted */
324 */
380 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
325 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
326 {
381 {
327 #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
382 #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
328 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
383 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
329 #else
384 #else
330 U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
385 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
331 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
386 return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
332 #endif
387 #endif
333 }
388 }
334
389
335 /*! BIT_lookBitsFast() :
390 /*! BIT_lookBitsFast() :
336 * unsafe version; only works only if nbBits >= 1 */
391 * unsafe version; only works if nbBits >= 1 */
337 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
392 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
338 {
393 {
339 U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
394 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
340 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
395 assert(nbBits >= 1);
396 return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
341 }
397 }
342
398
343 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
399 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
@@ -348,8 +404,7 b' MEM_STATIC void BIT_skipBits(BIT_DStream'
348 /*! BIT_readBits() :
404 /*! BIT_readBits() :
349 * Read (consume) next n bits from local register and update.
405 * Read (consume) next n bits from local register and update.
350 * Pay attention to not read more than nbBits contained into local register.
406 * Pay attention to not read more than nbBits contained into local register.
351 * @return : extracted value.
407 * @return : extracted value. */
352 */
353 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
408 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
354 {
409 {
355 size_t const value = BIT_lookBits(bitD, nbBits);
410 size_t const value = BIT_lookBits(bitD, nbBits);
@@ -358,25 +413,26 b' MEM_STATIC size_t BIT_readBits(BIT_DStre'
358 }
413 }
359
414
360 /*! BIT_readBitsFast() :
415 /*! BIT_readBitsFast() :
361 * unsafe version; only works only if nbBits >= 1 */
416 * unsafe version; only works only if nbBits >= 1 */
362 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
417 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
363 {
418 {
364 size_t const value = BIT_lookBitsFast(bitD, nbBits);
419 size_t const value = BIT_lookBitsFast(bitD, nbBits);
420 assert(nbBits >= 1);
365 BIT_skipBits(bitD, nbBits);
421 BIT_skipBits(bitD, nbBits);
366 return value;
422 return value;
367 }
423 }
368
424
369 /*! BIT_reloadDStream() :
425 /*! BIT_reloadDStream() :
370 * Refill `bitD` from buffer previously set in BIT_initDStream() .
426 * Refill `bitD` from buffer previously set in BIT_initDStream() .
371 * This function is safe, it guarantees it will not read beyond src buffer.
427 * This function is safe, it guarantees it will not read beyond src buffer.
372 * @return : status of `BIT_DStream_t` internal register.
428 * @return : status of `BIT_DStream_t` internal register.
373 if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
429 * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
374 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
430 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
375 {
431 {
376 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */
432 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
377 return BIT_DStream_overflow;
433 return BIT_DStream_overflow;
378
434
379 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
435 if (bitD->ptr >= bitD->limitPtr) {
380 bitD->ptr -= bitD->bitsConsumed >> 3;
436 bitD->ptr -= bitD->bitsConsumed >> 3;
381 bitD->bitsConsumed &= 7;
437 bitD->bitsConsumed &= 7;
382 bitD->bitContainer = MEM_readLEST(bitD->ptr);
438 bitD->bitContainer = MEM_readLEST(bitD->ptr);
@@ -386,6 +442,7 b' MEM_STATIC BIT_DStream_status BIT_reload'
386 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
442 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
387 return BIT_DStream_completed;
443 return BIT_DStream_completed;
388 }
444 }
445 /* start < ptr < limitPtr */
389 { U32 nbBytes = bitD->bitsConsumed >> 3;
446 { U32 nbBytes = bitD->bitsConsumed >> 3;
390 BIT_DStream_status result = BIT_DStream_unfinished;
447 BIT_DStream_status result = BIT_DStream_unfinished;
391 if (bitD->ptr - nbBytes < bitD->start) {
448 if (bitD->ptr - nbBytes < bitD->start) {
@@ -394,14 +451,14 b' MEM_STATIC BIT_DStream_status BIT_reload'
394 }
451 }
395 bitD->ptr -= nbBytes;
452 bitD->ptr -= nbBytes;
396 bitD->bitsConsumed -= nbBytes*8;
453 bitD->bitsConsumed -= nbBytes*8;
397 bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
454 bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
398 return result;
455 return result;
399 }
456 }
400 }
457 }
401
458
402 /*! BIT_endOfDStream() :
459 /*! BIT_endOfDStream() :
403 * @return Tells if DStream has exactly reached its end (all bits consumed).
460 * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
404 */
461 */
405 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
462 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
406 {
463 {
407 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
464 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
@@ -43,27 +43,21 b''
43 #include "huf.h"
43 #include "huf.h"
44
44
45
45
46 /*-****************************************
46 /*=== Version ===*/
47 * FSE Error Management
47 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
48 ******************************************/
48
49
50 /*=== Error Management ===*/
49 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
51 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
50
51 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
52 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
52
53
53
54 /* **************************************************************
55 * HUF Error Management
56 ****************************************************************/
57 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
54 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
58
59 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
55 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
60
56
61
57
62 /*-**************************************************************
58 /*-**************************************************************
63 * FSE NCount encoding-decoding
59 * FSE NCount encoding-decoding
64 ****************************************************************/
60 ****************************************************************/
65 static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
66
67 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
61 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
68 const void* headerBuffer, size_t hbSize)
62 const void* headerBuffer, size_t hbSize)
69 {
63 {
@@ -117,21 +111,21 b' size_t FSE_readNCount (short* normalized'
117 } else {
111 } else {
118 bitStream >>= 2;
112 bitStream >>= 2;
119 } }
113 } }
120 { short const max = (short)((2*threshold-1)-remaining);
114 { int const max = (2*threshold-1) - remaining;
121 short count;
115 int count;
122
116
123 if ((bitStream & (threshold-1)) < (U32)max) {
117 if ((bitStream & (threshold-1)) < (U32)max) {
124 count = (short)(bitStream & (threshold-1));
118 count = bitStream & (threshold-1);
125 bitCount += nbBits-1;
119 bitCount += nbBits-1;
126 } else {
120 } else {
127 count = (short)(bitStream & (2*threshold-1));
121 count = bitStream & (2*threshold-1);
128 if (count >= threshold) count -= max;
122 if (count >= threshold) count -= max;
129 bitCount += nbBits;
123 bitCount += nbBits;
130 }
124 }
131
125
132 count--; /* extra accuracy */
126 count--; /* extra accuracy */
133 remaining -= FSE_abs(count);
127 remaining -= count < 0 ? -count : count; /* -1 means +1 */
134 normalizedCounter[charnum++] = count;
128 normalizedCounter[charnum++] = (short)count;
135 previous0 = !count;
129 previous0 = !count;
136 while (remaining < threshold) {
130 while (remaining < threshold) {
137 nbBits--;
131 nbBits--;
@@ -1,10 +1,11 b''
1 /**
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 */
9 */
9
10
10 /* The purpose of this file is to have a single list of error strings embedded in binary */
11 /* The purpose of this file is to have a single list of error strings embedded in binary */
@@ -20,23 +21,27 b' const char* ERR_getErrorString(ERR_enum '
20 case PREFIX(GENERIC): return "Error (generic)";
21 case PREFIX(GENERIC): return "Error (generic)";
21 case PREFIX(prefix_unknown): return "Unknown frame descriptor";
22 case PREFIX(prefix_unknown): return "Unknown frame descriptor";
22 case PREFIX(version_unsupported): return "Version not supported";
23 case PREFIX(version_unsupported): return "Version not supported";
23 case PREFIX(parameter_unknown): return "Unknown parameter type";
24 case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
24 case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
25 case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
26 case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
25 case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
27 case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
26 case PREFIX(corruption_detected): return "Corrupted block detected";
27 case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
28 case PREFIX(parameter_unsupported): return "Unsupported parameter";
29 case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
28 case PREFIX(init_missing): return "Context should be init first";
30 case PREFIX(init_missing): return "Context should be init first";
29 case PREFIX(memory_allocation): return "Allocation error : not enough memory";
31 case PREFIX(memory_allocation): return "Allocation error : not enough memory";
32 case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
30 case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
33 case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
31 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
32 case PREFIX(srcSize_wrong): return "Src size incorrect";
33 case PREFIX(corruption_detected): return "Corrupted block detected";
34 case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
35 case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
34 case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
36 case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
35 case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
37 case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
36 case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
38 case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
37 case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
39 case PREFIX(dictionary_wrong): return "Dictionary mismatch";
38 case PREFIX(dictionary_wrong): return "Dictionary mismatch";
39 case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
40 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
41 case PREFIX(srcSize_wrong): return "Src size is incorrect";
42 /* following error codes are not stable and may be removed or changed in a future version */
43 case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
44 case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
40 case PREFIX(maxCode):
45 case PREFIX(maxCode):
41 default: return notErrorCode;
46 default: return notErrorCode;
42 }
47 }
@@ -1,10 +1,11 b''
1 /**
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 */
9 */
9
10
10 /* Note : this module is expected to remain private, do not expose it */
11 /* Note : this module is expected to remain private, do not expose it */
@@ -48,10 +49,9 b' typedef ZSTD_ErrorCode ERR_enum;'
48 /*-****************************************
49 /*-****************************************
49 * Error codes handling
50 * Error codes handling
50 ******************************************/
51 ******************************************/
51 #ifdef ERROR
52 #undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
52 # undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
53 #define ERROR(name) ZSTD_ERROR(name)
53 #endif
54 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
54 #define ERROR(name) ((size_t)-PREFIX(name))
55
55
56 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
56 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
57
57
@@ -31,13 +31,14 b''
31 You can contact the author at :
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
33 ****************************************************************** */
34 #ifndef FSE_H
35 #define FSE_H
36
34
37 #if defined (__cplusplus)
35 #if defined (__cplusplus)
38 extern "C" {
36 extern "C" {
39 #endif
37 #endif
40
38
39 #ifndef FSE_H
40 #define FSE_H
41
41
42
42 /*-*****************************************
43 /*-*****************************************
43 * Dependencies
44 * Dependencies
@@ -45,6 +46,32 b' extern "C" {'
45 #include <stddef.h> /* size_t, ptrdiff_t */
46 #include <stddef.h> /* size_t, ptrdiff_t */
46
47
47
48
49 /*-*****************************************
50 * FSE_PUBLIC_API : control library symbols visibility
51 ******************************************/
52 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
53 # define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
54 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
55 # define FSE_PUBLIC_API __declspec(dllexport)
56 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
57 # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
58 #else
59 # define FSE_PUBLIC_API
60 #endif
61
62 /*------ Version ------*/
63 #define FSE_VERSION_MAJOR 0
64 #define FSE_VERSION_MINOR 9
65 #define FSE_VERSION_RELEASE 0
66
67 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
68 #define FSE_QUOTE(str) #str
69 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
70 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
71
72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74
48 /*-****************************************
75 /*-****************************************
49 * FSE simple functions
76 * FSE simple functions
50 ******************************************/
77 ******************************************/
@@ -56,8 +83,8 b' extern "C" {'
56 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
83 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
57 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
84 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
58 */
85 */
59 size_t FSE_compress(void* dst, size_t dstCapacity,
86 FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
60 const void* src, size_t srcSize);
87 const void* src, size_t srcSize);
61
88
62 /*! FSE_decompress():
89 /*! FSE_decompress():
63 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
90 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
@@ -69,18 +96,18 b' size_t FSE_compress(void* dst, size_t ds'
69 Why ? : making this distinction requires a header.
96 Why ? : making this distinction requires a header.
70 Header management is intentionally delegated to the user layer, which can better manage special cases.
97 Header management is intentionally delegated to the user layer, which can better manage special cases.
71 */
98 */
72 size_t FSE_decompress(void* dst, size_t dstCapacity,
99 FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
73 const void* cSrc, size_t cSrcSize);
100 const void* cSrc, size_t cSrcSize);
74
101
75
102
76 /*-*****************************************
103 /*-*****************************************
77 * Tool functions
104 * Tool functions
78 ******************************************/
105 ******************************************/
79 size_t FSE_compressBound(size_t size); /* maximum compressed size */
106 FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
80
107
81 /* Error Management */
108 /* Error Management */
82 unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
109 FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
83 const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
110 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
84
111
85
112
86 /*-*****************************************
113 /*-*****************************************
@@ -94,7 +121,7 b' const char* FSE_getErrorName(size_t code'
94 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
121 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
95 if FSE_isError(return), it's an error code.
122 if FSE_isError(return), it's an error code.
96 */
123 */
97 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
124 FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
98
125
99
126
100 /*-*****************************************
127 /*-*****************************************
@@ -127,50 +154,50 b' or to save and provide normalized distri'
127 @return : the count of the most frequent symbol (which is not identified).
154 @return : the count of the most frequent symbol (which is not identified).
128 if return == srcSize, there is only one symbol.
155 if return == srcSize, there is only one symbol.
129 Can also return an error code, which can be tested with FSE_isError(). */
156 Can also return an error code, which can be tested with FSE_isError(). */
130 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
157 FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
131
158
132 /*! FSE_optimalTableLog():
159 /*! FSE_optimalTableLog():
133 dynamically downsize 'tableLog' when conditions are met.
160 dynamically downsize 'tableLog' when conditions are met.
134 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
161 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
135 @return : recommended tableLog (necessarily <= 'maxTableLog') */
162 @return : recommended tableLog (necessarily <= 'maxTableLog') */
136 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
163 FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
137
164
138 /*! FSE_normalizeCount():
165 /*! FSE_normalizeCount():
139 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
166 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
140 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
167 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
141 @return : tableLog,
168 @return : tableLog,
142 or an errorCode, which can be tested using FSE_isError() */
169 or an errorCode, which can be tested using FSE_isError() */
143 size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
170 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
144
171
145 /*! FSE_NCountWriteBound():
172 /*! FSE_NCountWriteBound():
146 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
173 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
147 Typically useful for allocation purpose. */
174 Typically useful for allocation purpose. */
148 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
175 FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
149
176
150 /*! FSE_writeNCount():
177 /*! FSE_writeNCount():
151 Compactly save 'normalizedCounter' into 'buffer'.
178 Compactly save 'normalizedCounter' into 'buffer'.
152 @return : size of the compressed table,
179 @return : size of the compressed table,
153 or an errorCode, which can be tested using FSE_isError(). */
180 or an errorCode, which can be tested using FSE_isError(). */
154 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
181 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
155
182
156
183
157 /*! Constructor and Destructor of FSE_CTable.
184 /*! Constructor and Destructor of FSE_CTable.
158 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
185 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
159 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
186 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
160 FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
187 FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
161 void FSE_freeCTable (FSE_CTable* ct);
188 FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
162
189
163 /*! FSE_buildCTable():
190 /*! FSE_buildCTable():
164 Builds `ct`, which must be already allocated, using FSE_createCTable().
191 Builds `ct`, which must be already allocated, using FSE_createCTable().
165 @return : 0, or an errorCode, which can be tested using FSE_isError() */
192 @return : 0, or an errorCode, which can be tested using FSE_isError() */
166 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
193 FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
167
194
168 /*! FSE_compress_usingCTable():
195 /*! FSE_compress_usingCTable():
169 Compress `src` using `ct` into `dst` which must be already allocated.
196 Compress `src` using `ct` into `dst` which must be already allocated.
170 @return : size of compressed data (<= `dstCapacity`),
197 @return : size of compressed data (<= `dstCapacity`),
171 or 0 if compressed data could not fit into `dst`,
198 or 0 if compressed data could not fit into `dst`,
172 or an errorCode, which can be tested using FSE_isError() */
199 or an errorCode, which can be tested using FSE_isError() */
173 size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
200 FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
174
201
175 /*!
202 /*!
176 Tutorial :
203 Tutorial :
@@ -223,25 +250,25 b' If there is an error, the function will '
223 @return : size read from 'rBuffer',
250 @return : size read from 'rBuffer',
224 or an errorCode, which can be tested using FSE_isError().
251 or an errorCode, which can be tested using FSE_isError().
225 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
252 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
226 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
253 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
227
254
228 /*! Constructor and Destructor of FSE_DTable.
255 /*! Constructor and Destructor of FSE_DTable.
229 Note that its size depends on 'tableLog' */
256 Note that its size depends on 'tableLog' */
230 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
257 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
231 FSE_DTable* FSE_createDTable(unsigned tableLog);
258 FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
232 void FSE_freeDTable(FSE_DTable* dt);
259 FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
233
260
234 /*! FSE_buildDTable():
261 /*! FSE_buildDTable():
235 Builds 'dt', which must be already allocated, using FSE_createDTable().
262 Builds 'dt', which must be already allocated, using FSE_createDTable().
236 return : 0, or an errorCode, which can be tested using FSE_isError() */
263 return : 0, or an errorCode, which can be tested using FSE_isError() */
237 size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
264 FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
238
265
239 /*! FSE_decompress_usingDTable():
266 /*! FSE_decompress_usingDTable():
240 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
267 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
241 into `dst` which must be already allocated.
268 into `dst` which must be already allocated.
242 @return : size of regenerated data (necessarily <= `dstCapacity`),
269 @return : size of regenerated data (necessarily <= `dstCapacity`),
243 or an errorCode, which can be tested using FSE_isError() */
270 or an errorCode, which can be tested using FSE_isError() */
244 size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
271 FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
245
272
246 /*!
273 /*!
247 Tutorial :
274 Tutorial :
@@ -271,8 +298,10 b' FSE_decompress_usingDTable() result will'
271 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
298 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
272 */
299 */
273
300
301 #endif /* FSE_H */
274
302
275 #ifdef FSE_STATIC_LINKING_ONLY
303 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
304 #define FSE_H_FSE_STATIC_LINKING_ONLY
276
305
277 /* *** Dependency *** */
306 /* *** Dependency *** */
278 #include "bitstream.h"
307 #include "bitstream.h"
@@ -290,6 +319,10 b' If there is an error, the function will '
290 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
319 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
291 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
320 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
292
321
322 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
323 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
324 #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
325
293
326
294 /* *****************************************
327 /* *****************************************
295 * FSE advanced API
328 * FSE advanced API
@@ -312,7 +345,7 b' size_t FSE_countFast(unsigned* count, un'
312 */
345 */
313 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
346 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
314
347
315 /*! FSE_count_simple
348 /*! FSE_count_simple() :
316 * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
349 * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
317 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
350 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
318 */
351 */
@@ -327,7 +360,7 b' unsigned FSE_optimalTableLog_internal(un'
327 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
360 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
328 * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
361 * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
329 */
362 */
330 #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + (1<<((maxTableLog>2)?(maxTableLog-2):0)) )
363 #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
331 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
364 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
332
365
333 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
366 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -351,6 +384,11 b' size_t FSE_buildDTable_rle (FSE_DTable* '
351 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
384 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
352 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
385 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
353
386
387 typedef enum {
388 FSE_repeat_none, /**< Cannot use the previous table */
389 FSE_repeat_check, /**< Can use the previous table but it must be checked */
390 FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */
391 } FSE_repeat;
354
392
355 /* *****************************************
393 /* *****************************************
356 * FSE symbol compression API
394 * FSE symbol compression API
@@ -524,9 +562,9 b' MEM_STATIC void FSE_initCState2(FSE_CSta'
524
562
525 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
563 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
526 {
564 {
527 const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
565 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
528 const U16* const stateTable = (const U16*)(statePtr->stateTable);
566 const U16* const stateTable = (const U16*)(statePtr->stateTable);
529 U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
567 U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
530 BIT_addBits(bitC, statePtr->value, nbBitsOut);
568 BIT_addBits(bitC, statePtr->value, nbBitsOut);
531 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
569 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
532 }
570 }
@@ -664,5 +702,3 b' MEM_STATIC unsigned FSE_endOfDState(cons'
664 #if defined (__cplusplus)
702 #if defined (__cplusplus)
665 }
703 }
666 #endif
704 #endif
667
668 #endif /* FSE_H */
@@ -34,35 +34,15 b''
34
34
35
35
36 /* **************************************************************
36 /* **************************************************************
37 * Compiler specifics
38 ****************************************************************/
39 #ifdef _MSC_VER /* Visual Studio */
40 # define FORCE_INLINE static __forceinline
41 # include <intrin.h> /* For Visual 2005 */
42 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
43 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
44 #else
45 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
46 # ifdef __GNUC__
47 # define FORCE_INLINE static inline __attribute__((always_inline))
48 # else
49 # define FORCE_INLINE static inline
50 # endif
51 # else
52 # define FORCE_INLINE static
53 # endif /* __STDC_VERSION__ */
54 #endif
55
56
57 /* **************************************************************
58 * Includes
37 * Includes
59 ****************************************************************/
38 ****************************************************************/
60 #include <stdlib.h> /* malloc, free, qsort */
39 #include <stdlib.h> /* malloc, free, qsort */
61 #include <string.h> /* memcpy, memset */
40 #include <string.h> /* memcpy, memset */
62 #include <stdio.h> /* printf (debug) */
63 #include "bitstream.h"
41 #include "bitstream.h"
42 #include "compiler.h"
64 #define FSE_STATIC_LINKING_ONLY
43 #define FSE_STATIC_LINKING_ONLY
65 #include "fse.h"
44 #include "fse.h"
45 #include "error_private.h"
66
46
67
47
68 /* **************************************************************
48 /* **************************************************************
@@ -159,8 +139,8 b' size_t FSE_buildDTable(FSE_DTable* dt, c'
159 { U32 u;
139 { U32 u;
160 for (u=0; u<tableSize; u++) {
140 for (u=0; u<tableSize; u++) {
161 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
141 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
162 U16 nextState = symbolNext[symbol]++;
142 U32 const nextState = symbolNext[symbol]++;
163 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
143 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
164 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
144 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
165 } }
145 } }
166
146
@@ -217,7 +197,7 b' size_t FSE_buildDTable_raw (FSE_DTable* '
217 return 0;
197 return 0;
218 }
198 }
219
199
220 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
200 FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
221 void* dst, size_t maxDstSize,
201 void* dst, size_t maxDstSize,
222 const void* cSrc, size_t cSrcSize,
202 const void* cSrc, size_t cSrcSize,
223 const FSE_DTable* dt, const unsigned fast)
203 const FSE_DTable* dt, const unsigned fast)
@@ -31,81 +31,114 b''
31 You can contact the author at :
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
33 ****************************************************************** */
34 #ifndef HUF_H_298734234
35 #define HUF_H_298734234
36
34
37 #if defined (__cplusplus)
35 #if defined (__cplusplus)
38 extern "C" {
36 extern "C" {
39 #endif
37 #endif
40
38
39 #ifndef HUF_H_298734234
40 #define HUF_H_298734234
41
41
42 /* *** Dependencies *** */
42 /* *** Dependencies *** */
43 #include <stddef.h> /* size_t */
43 #include <stddef.h> /* size_t */
44
44
45
45
46 /* *** simple functions *** */
46 /* *** library symbols visibility *** */
47 /**
47 /* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
48 HUF_compress() :
48 * HUF symbols remain "private" (internal symbols for library only).
49 Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
49 * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
50 'dst' buffer must be already allocated.
50 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
51 Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
51 # define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
52 `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
52 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
53 @return : size of compressed data (<= `dstCapacity`).
53 # define HUF_PUBLIC_API __declspec(dllexport)
54 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
54 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
55 if return == 1, srcData is a single repeated byte symbol (RLE compression).
55 # define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
56 if HUF_isError(return), compression failed (more details using HUF_getErrorName())
56 #else
57 */
57 # define HUF_PUBLIC_API
58 size_t HUF_compress(void* dst, size_t dstCapacity,
58 #endif
59 const void* src, size_t srcSize);
59
60
61 /* ========================== */
62 /* *** simple functions *** */
63 /* ========================== */
60
64
61 /**
65 /** HUF_compress() :
62 HUF_decompress() :
66 * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
63 Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
67 * 'dst' buffer must be already allocated.
64 into already allocated buffer 'dst', of minimum size 'dstSize'.
68 * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
65 `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
69 * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
66 Note : in contrast with FSE, HUF_decompress can regenerate
70 * @return : size of compressed data (<= `dstCapacity`).
67 RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
71 * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
68 because it knows size to regenerate.
72 * if HUF_isError(return), compression failed (more details using HUF_getErrorName())
69 @return : size of regenerated data (== originalSize),
73 */
70 or an error code, which can be tested using HUF_isError()
74 HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
71 */
75 const void* src, size_t srcSize);
72 size_t HUF_decompress(void* dst, size_t originalSize,
76
73 const void* cSrc, size_t cSrcSize);
77 /** HUF_decompress() :
78 * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
79 * into already allocated buffer 'dst', of minimum size 'dstSize'.
80 * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
81 * Note : in contrast with FSE, HUF_decompress can regenerate
82 * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
83 * because it knows size to regenerate (originalSize).
84 * @return : size of regenerated data (== originalSize),
85 * or an error code, which can be tested using HUF_isError()
86 */
87 HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
88 const void* cSrc, size_t cSrcSize);
74
89
75
90
76 /* *** Tool functions *** */
91 /* *** Tool functions *** */
77 #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
92 #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
78 size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
93 HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
79
94
80 /* Error Management */
95 /* Error Management */
81 unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
96 HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
82 const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
97 HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
83
98
84
99
85 /* *** Advanced function *** */
100 /* *** Advanced function *** */
86
101
87 /** HUF_compress2() :
102 /** HUF_compress2() :
88 * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` .
103 * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
89 * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
104 * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
90 size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
105 * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
106 HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
107 const void* src, size_t srcSize,
108 unsigned maxSymbolValue, unsigned tableLog);
91
109
92 /** HUF_compress4X_wksp() :
110 /** HUF_compress4X_wksp() :
93 * Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
111 * Same as HUF_compress2(), but uses externally allocated `workSpace`.
94 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
112 * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
113 #define HUF_WORKSPACE_SIZE (6 << 10)
114 #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
115 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
116 const void* src, size_t srcSize,
117 unsigned maxSymbolValue, unsigned tableLog,
118 void* workSpace, size_t wkspSize);
95
119
96
120 #endif /* HUF_H_298734234 */
97
121
98 #ifdef HUF_STATIC_LINKING_ONLY
122 /* ******************************************************************
123 * WARNING !!
124 * The following section contains advanced and experimental definitions
125 * which shall never be used in the context of a dynamic library,
126 * because they are not guaranteed to remain stable in the future.
127 * Only consider them in association with static linking.
128 * *****************************************************************/
129 #if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
130 #define HUF_H_HUF_STATIC_LINKING_ONLY
99
131
100 /* *** Dependencies *** */
132 /* *** Dependencies *** */
101 #include "mem.h" /* U32 */
133 #include "mem.h" /* U32 */
102
134
103
135
104 /* *** Constants *** */
136 /* *** Constants *** */
105 #define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
137 #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
106 #define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
138 #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
107 #define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
139 #define HUF_SYMBOLVALUE_MAX 255
108 #define HUF_SYMBOLVALUE_MAX 255
140
141 #define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
109 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
142 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
110 # error "HUF_TABLELOG_MAX is too large !"
143 # error "HUF_TABLELOG_MAX is too large !"
111 #endif
144 #endif
@@ -116,12 +149,14 b' size_t HUF_compress4X_wksp (void* dst, s'
116 ******************************************/
149 ******************************************/
117 /* HUF buffer bounds */
150 /* HUF buffer bounds */
118 #define HUF_CTABLEBOUND 129
151 #define HUF_CTABLEBOUND 129
119 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */
152 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */
120 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
153 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
121
154
122 /* static allocation of HUF's Compression Table */
155 /* static allocation of HUF's Compression Table */
156 #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
157 #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
123 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
158 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
124 U32 name##hb[maxSymbolValue+1]; \
159 U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
125 void* name##hv = &(name##hb); \
160 void* name##hv = &(name##hb); \
126 HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
161 HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
127
162
@@ -142,97 +177,151 b' size_t HUF_decompress4X4 (void* dst, siz'
142
177
143 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
178 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
144 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
179 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
180 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
145 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
181 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
146 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
183 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
147
185
148
186
149 /* ****************************************
187 /* ****************************************
150 * HUF detailed API
188 * HUF detailed API
151 ******************************************/
189 * ****************************************/
152 /*!
153 HUF_compress() does the following:
154 1. count symbol occurrence from source[] into table count[] using FSE_count()
155 2. (optional) refine tableLog using HUF_optimalTableLog()
156 3. build Huffman table from count using HUF_buildCTable()
157 4. save Huffman table to memory buffer using HUF_writeCTable()
158 5. encode the data stream using HUF_compress4X_usingCTable()
159
190
160 The following API allows targeting specific sub-functions for advanced tasks.
191 /*! HUF_compress() does the following:
161 For example, it's possible to compress several blocks using the same 'CTable',
192 * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
162 or to save and regenerate 'CTable' using external methods.
193 * 2. (optional) refine tableLog using HUF_optimalTableLog()
163 */
194 * 3. build Huffman table from count using HUF_buildCTable()
164 /* FSE_count() : find it within "fse.h" */
195 * 4. save Huffman table to memory buffer using HUF_writeCTable()
196 * 5. encode the data stream using HUF_compress4X_usingCTable()
197 *
198 * The following API allows targeting specific sub-functions for advanced tasks.
199 * For example, it's possible to compress several blocks using the same 'CTable',
200 * or to save and regenerate 'CTable' using external methods.
201 */
165 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
202 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
166 typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
203 typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
167 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
204 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
168 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
205 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
169 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
206 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
170
207
208 typedef enum {
209 HUF_repeat_none, /**< Cannot use the previous table */
210 HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
211 HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
212 } HUF_repeat;
213 /** HUF_compress4X_repeat() :
214 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
215 * If it uses hufTable it does not modify hufTable or repeat.
216 * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
217 * If preferRepeat then the old table will always be used if valid. */
218 size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
219 const void* src, size_t srcSize,
220 unsigned maxSymbolValue, unsigned tableLog,
221 void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
222 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
171
223
172 /** HUF_buildCTable_wksp() :
224 /** HUF_buildCTable_wksp() :
173 * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
225 * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
174 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
226 * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
175 */
227 */
228 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
229 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
176 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
177
231
178 /*! HUF_readStats() :
232 /*! HUF_readStats() :
179 Read compact Huffman tree, saved by HUF_writeCTable().
233 * Read compact Huffman tree, saved by HUF_writeCTable().
180 `huffWeight` is destination buffer.
234 * `huffWeight` is destination buffer.
181 @return : size read from `src` , or an error Code .
235 * @return : size read from `src` , or an error Code .
182 Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
236 * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
183 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
237 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
184 U32* nbSymbolsPtr, U32* tableLogPtr,
238 U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
185 const void* src, size_t srcSize);
239 const void* src, size_t srcSize);
186
240
187 /** HUF_readCTable() :
241 /** HUF_readCTable() :
188 * Loading a CTable saved with HUF_writeCTable() */
242 * Loading a CTable saved with HUF_writeCTable() */
189 size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
243 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
190
244
191
245
192 /*
246 /*
193 HUF_decompress() does the following:
247 * HUF_decompress() does the following:
194 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
248 * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
195 2. build Huffman table from save, using HUF_readDTableXn()
249 * 2. build Huffman table from save, using HUF_readDTableX?()
196 3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
250 * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
197 */
251 */
198
252
199 /** HUF_selectDecoder() :
253 /** HUF_selectDecoder() :
200 * Tells which decoder is likely to decode faster,
254 * Tells which decoder is likely to decode faster,
201 * based on a set of pre-determined metrics.
255 * based on a set of pre-computed metrics.
202 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
256 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
203 * Assumption : 0 < cSrcSize < dstSize <= 128 KB */
257 * Assumption : 0 < dstSize <= 128 KB */
204 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
258 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
205
259
260 /**
261 * The minimum workspace size for the `workSpace` used in
262 * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
263 *
264 * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
265 * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
266 * Buffer overflow errors may potentially occur if code modifications result in
267 * a required workspace size greater than that specified in the following
268 * macro.
269 */
270 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
271 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
272
206 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
273 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
274 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
207 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
275 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
276 size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
208
277
209 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
278 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
210 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
279 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
211 size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
280 size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
212
281
213
282
283 /* ====================== */
214 /* single stream variants */
284 /* single stream variants */
285 /* ====================== */
215
286
216 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
287 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
217 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */
288 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
218 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
289 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
290 /** HUF_compress1X_repeat() :
291 * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
292 * If it uses hufTable it does not modify hufTable or repeat.
293 * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
294 * If preferRepeat then the old table will always be used if valid. */
295 size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
296 const void* src, size_t srcSize,
297 unsigned maxSymbolValue, unsigned tableLog,
298 void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
299 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
219
300
220 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
301 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
221 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
302 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
222
303
223 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
304 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
305 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
224 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
306 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
307 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
225 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
308 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
309 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
226
310
227 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
311 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
228 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
312 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
229 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
313 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
230
314
315 /* BMI2 variants.
316 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
317 */
318 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
319 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
320 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
321 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
322
231 #endif /* HUF_STATIC_LINKING_ONLY */
323 #endif /* HUF_STATIC_LINKING_ONLY */
232
324
233
234 #if defined (__cplusplus)
325 #if defined (__cplusplus)
235 }
326 }
236 #endif
327 #endif
237
238 #endif /* HUF_H_298734234 */
@@ -1,10 +1,11 b''
1 /**
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 */
9 */
9
10
10 #ifndef MEM_H_MODULE
11 #ifndef MEM_H_MODULE
@@ -48,14 +49,13 b' MEM_STATIC void MEM_check(void) { MEM_ST'
48 *****************************************************************/
49 *****************************************************************/
49 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
50 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
50 # include <stdint.h>
51 # include <stdint.h>
51 typedef uint8_t BYTE;
52 typedef uint8_t BYTE;
52 typedef uint16_t U16;
53 typedef uint16_t U16;
53 typedef int16_t S16;
54 typedef int16_t S16;
54 typedef uint32_t U32;
55 typedef uint32_t U32;
55 typedef int32_t S32;
56 typedef int32_t S32;
56 typedef uint64_t U64;
57 typedef uint64_t U64;
57 typedef int64_t S64;
58 typedef int64_t S64;
58 typedef intptr_t iPtrDiff;
59 #else
59 #else
60 typedef unsigned char BYTE;
60 typedef unsigned char BYTE;
61 typedef unsigned short U16;
61 typedef unsigned short U16;
@@ -64,7 +64,6 b' MEM_STATIC void MEM_check(void) { MEM_ST'
64 typedef signed int S32;
64 typedef signed int S32;
65 typedef unsigned long long U64;
65 typedef unsigned long long U64;
66 typedef signed long long S64;
66 typedef signed long long S64;
67 typedef ptrdiff_t iPtrDiff;
68 #endif
67 #endif
69
68
70
69
@@ -76,19 +75,18 b' MEM_STATIC void MEM_check(void) { MEM_ST'
76 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
75 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
77 * The below switch allow to select different access method for improved performance.
76 * The below switch allow to select different access method for improved performance.
78 * Method 0 (default) : use `memcpy()`. Safe and portable.
77 * Method 0 (default) : use `memcpy()`. Safe and portable.
79 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
78 * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
80 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
79 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
81 * Method 2 : direct access. This method is portable but violate C standard.
80 * Method 2 : direct access. This method is portable but violate C standard.
82 * It can generate buggy code on targets depending on alignment.
81 * It can generate buggy code on targets depending on alignment.
83 * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
82 * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
84 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
83 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
85 * Prefer these methods in priority order (0 > 1 > 2)
84 * Prefer these methods in priority order (0 > 1 > 2)
86 */
85 */
87 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
86 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
88 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
87 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
89 # define MEM_FORCE_MEMORY_ACCESS 2
88 # define MEM_FORCE_MEMORY_ACCESS 2
90 # elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
89 # elif defined(__INTEL_COMPILER) || defined(__GNUC__)
91 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
92 # define MEM_FORCE_MEMORY_ACCESS 1
90 # define MEM_FORCE_MEMORY_ACCESS 1
93 # endif
91 # endif
94 #endif
92 #endif
@@ -109,7 +107,7 b' Only use if no other choice to achieve b'
109 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
107 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
110 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
108 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
111 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
109 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
112 MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
110 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
113
111
114 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
112 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
115 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
113 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
@@ -120,21 +118,27 b' MEM_STATIC void MEM_write64(void* memPtr'
120 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
118 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
121 /* currently only defined for gcc and icc */
119 /* currently only defined for gcc and icc */
122 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
120 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
123 __pragma( pack(push, 1) )
121 __pragma( pack(push, 1) )
124 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
122 typedef struct { U16 v; } unalign16;
123 typedef struct { U32 v; } unalign32;
124 typedef struct { U64 v; } unalign64;
125 typedef struct { size_t v; } unalignArch;
125 __pragma( pack(pop) )
126 __pragma( pack(pop) )
126 #else
127 #else
127 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
128 typedef struct { U16 v; } __attribute__((packed)) unalign16;
129 typedef struct { U32 v; } __attribute__((packed)) unalign32;
130 typedef struct { U64 v; } __attribute__((packed)) unalign64;
131 typedef struct { size_t v; } __attribute__((packed)) unalignArch;
128 #endif
132 #endif
129
133
130 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
134 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
131 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
135 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
132 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
136 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
133 MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
137 MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
134
138
135 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
139 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
136 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
140 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
137 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
141 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
138
142
139 #else
143 #else
140
144
@@ -182,7 +186,7 b' MEM_STATIC U32 MEM_swap32(U32 in)'
182 {
186 {
183 #if defined(_MSC_VER) /* Visual Studio */
187 #if defined(_MSC_VER) /* Visual Studio */
184 return _byteswap_ulong(in);
188 return _byteswap_ulong(in);
185 #elif defined (__GNUC__)
189 #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
186 return __builtin_bswap32(in);
190 return __builtin_bswap32(in);
187 #else
191 #else
188 return ((in << 24) & 0xff000000 ) |
192 return ((in << 24) & 0xff000000 ) |
@@ -196,7 +200,7 b' MEM_STATIC U64 MEM_swap64(U64 in)'
196 {
200 {
197 #if defined(_MSC_VER) /* Visual Studio */
201 #if defined(_MSC_VER) /* Visual Studio */
198 return _byteswap_uint64(in);
202 return _byteswap_uint64(in);
199 #elif defined (__GNUC__)
203 #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
200 return __builtin_bswap64(in);
204 return __builtin_bswap64(in);
201 #else
205 #else
202 return ((in << 56) & 0xff00000000000000ULL) |
206 return ((in << 56) & 0xff00000000000000ULL) |
@@ -351,20 +355,6 b' MEM_STATIC void MEM_writeBEST(void* memP'
351 }
355 }
352
356
353
357
354 /* function safe only for comparisons */
355 MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
356 {
357 switch (length)
358 {
359 default :
360 case 4 : return MEM_read32(memPtr);
361 case 3 : if (MEM_isLittleEndian())
362 return MEM_read32(memPtr)<<8;
363 else
364 return MEM_read32(memPtr)>>8;
365 }
366 }
367
368 #if defined (__cplusplus)
358 #if defined (__cplusplus)
369 }
359 }
370 #endif
360 #endif
@@ -1,17 +1,18 b''
1 /**
1 /*
2 * Copyright (c) 2016-present, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 */
9 */
9
10
10
11
11 /* ====== Dependencies ======= */
12 /* ====== Dependencies ======= */
12 #include <stddef.h> /* size_t */
13 #include <stddef.h> /* size_t */
13 #include <stdlib.h> /* malloc, calloc, free */
14 #include "pool.h"
14 #include "pool.h"
15 #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
15
16
16 /* ====== Compiler specifics ====== */
17 /* ====== Compiler specifics ====== */
17 #if defined(_MSC_VER)
18 #if defined(_MSC_VER)
@@ -25,13 +26,14 b''
25
26
26 /* A job is a function and an opaque argument */
27 /* A job is a function and an opaque argument */
27 typedef struct POOL_job_s {
28 typedef struct POOL_job_s {
28 POOL_function function;
29 POOL_function function;
29 void *opaque;
30 void *opaque;
30 } POOL_job;
31 } POOL_job;
31
32
32 struct POOL_ctx_s {
33 struct POOL_ctx_s {
34 ZSTD_customMem customMem;
33 /* Keep track of the threads */
35 /* Keep track of the threads */
34 pthread_t *threads;
36 ZSTD_pthread_t *threads;
35 size_t numThreads;
37 size_t numThreads;
36
38
37 /* The queue is a circular buffer */
39 /* The queue is a circular buffer */
@@ -39,12 +41,18 b' struct POOL_ctx_s {'
39 size_t queueHead;
41 size_t queueHead;
40 size_t queueTail;
42 size_t queueTail;
41 size_t queueSize;
43 size_t queueSize;
44
45 /* The number of threads working on jobs */
46 size_t numThreadsBusy;
47 /* Indicates if the queue is empty */
48 int queueEmpty;
49
42 /* The mutex protects the queue */
50 /* The mutex protects the queue */
43 pthread_mutex_t queueMutex;
51 ZSTD_pthread_mutex_t queueMutex;
44 /* Condition variable for pushers to wait on when the queue is full */
52 /* Condition variable for pushers to wait on when the queue is full */
45 pthread_cond_t queuePushCond;
53 ZSTD_pthread_cond_t queuePushCond;
46 /* Condition variables for poppers to wait on when the queue is empty */
54 /* Condition variables for poppers to wait on when the queue is empty */
47 pthread_cond_t queuePopCond;
55 ZSTD_pthread_cond_t queuePopCond;
48 /* Indicates if the queue is shutting down */
56 /* Indicates if the queue is shutting down */
49 int shutdown;
57 int shutdown;
50 };
58 };
@@ -59,55 +67,73 b' static void* POOL_thread(void* opaque) {'
59 if (!ctx) { return NULL; }
67 if (!ctx) { return NULL; }
60 for (;;) {
68 for (;;) {
61 /* Lock the mutex and wait for a non-empty queue or until shutdown */
69 /* Lock the mutex and wait for a non-empty queue or until shutdown */
62 pthread_mutex_lock(&ctx->queueMutex);
70 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
63 while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
71
64 pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
72 while (ctx->queueEmpty && !ctx->shutdown) {
73 ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
65 }
74 }
66 /* empty => shutting down: so stop */
75 /* empty => shutting down: so stop */
67 if (ctx->queueHead == ctx->queueTail) {
76 if (ctx->queueEmpty) {
68 pthread_mutex_unlock(&ctx->queueMutex);
77 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
69 return opaque;
78 return opaque;
70 }
79 }
71 /* Pop a job off the queue */
80 /* Pop a job off the queue */
72 { POOL_job const job = ctx->queue[ctx->queueHead];
81 { POOL_job const job = ctx->queue[ctx->queueHead];
73 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
82 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
83 ctx->numThreadsBusy++;
84 ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
74 /* Unlock the mutex, signal a pusher, and run the job */
85 /* Unlock the mutex, signal a pusher, and run the job */
75 pthread_mutex_unlock(&ctx->queueMutex);
86 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
76 pthread_cond_signal(&ctx->queuePushCond);
87 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
88
77 job.function(job.opaque);
89 job.function(job.opaque);
78 }
90
79 }
91 /* If the intended queue size was 0, signal after finishing job */
92 if (ctx->queueSize == 1) {
93 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
94 ctx->numThreadsBusy--;
95 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
96 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
97 } }
98 } /* for (;;) */
80 /* Unreachable */
99 /* Unreachable */
81 }
100 }
82
101
83 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
102 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
84 POOL_ctx *ctx;
103 return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
104 }
105
106 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
107 POOL_ctx* ctx;
85 /* Check the parameters */
108 /* Check the parameters */
86 if (!numThreads || !queueSize) { return NULL; }
109 if (!numThreads) { return NULL; }
87 /* Allocate the context and zero initialize */
110 /* Allocate the context and zero initialize */
88 ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
111 ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
89 if (!ctx) { return NULL; }
112 if (!ctx) { return NULL; }
90 /* Initialize the job queue.
113 /* Initialize the job queue.
91 * It needs one extra space since one space is wasted to differentiate empty
114 * It needs one extra space since one space is wasted to differentiate empty
92 * and full queues.
115 * and full queues.
93 */
116 */
94 ctx->queueSize = queueSize + 1;
117 ctx->queueSize = queueSize + 1;
95 ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
118 ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
96 ctx->queueHead = 0;
119 ctx->queueHead = 0;
97 ctx->queueTail = 0;
120 ctx->queueTail = 0;
98 pthread_mutex_init(&ctx->queueMutex, NULL);
121 ctx->numThreadsBusy = 0;
99 pthread_cond_init(&ctx->queuePushCond, NULL);
122 ctx->queueEmpty = 1;
100 pthread_cond_init(&ctx->queuePopCond, NULL);
123 (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
124 (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
125 (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
101 ctx->shutdown = 0;
126 ctx->shutdown = 0;
102 /* Allocate space for the thread handles */
127 /* Allocate space for the thread handles */
103 ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
128 ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
104 ctx->numThreads = 0;
129 ctx->numThreads = 0;
130 ctx->customMem = customMem;
105 /* Check for errors */
131 /* Check for errors */
106 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
132 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
107 /* Initialize the threads */
133 /* Initialize the threads */
108 { size_t i;
134 { size_t i;
109 for (i = 0; i < numThreads; ++i) {
135 for (i = 0; i < numThreads; ++i) {
110 if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
136 if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
111 ctx->numThreads = i;
137 ctx->numThreads = i;
112 POOL_free(ctx);
138 POOL_free(ctx);
113 return NULL;
139 return NULL;
@@ -120,75 +146,138 b' POOL_ctx *POOL_create(size_t numThreads,'
120 /*! POOL_join() :
146 /*! POOL_join() :
121 Shutdown the queue, wake any sleeping threads, and join all of the threads.
147 Shutdown the queue, wake any sleeping threads, and join all of the threads.
122 */
148 */
123 static void POOL_join(POOL_ctx *ctx) {
149 static void POOL_join(POOL_ctx* ctx) {
124 /* Shut down the queue */
150 /* Shut down the queue */
125 pthread_mutex_lock(&ctx->queueMutex);
151 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
126 ctx->shutdown = 1;
152 ctx->shutdown = 1;
127 pthread_mutex_unlock(&ctx->queueMutex);
153 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
128 /* Wake up sleeping threads */
154 /* Wake up sleeping threads */
129 pthread_cond_broadcast(&ctx->queuePushCond);
155 ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
130 pthread_cond_broadcast(&ctx->queuePopCond);
156 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
131 /* Join all of the threads */
157 /* Join all of the threads */
132 { size_t i;
158 { size_t i;
133 for (i = 0; i < ctx->numThreads; ++i) {
159 for (i = 0; i < ctx->numThreads; ++i) {
134 pthread_join(ctx->threads[i], NULL);
160 ZSTD_pthread_join(ctx->threads[i], NULL);
135 } }
161 } }
136 }
162 }
137
163
138 void POOL_free(POOL_ctx *ctx) {
164 void POOL_free(POOL_ctx *ctx) {
139 if (!ctx) { return; }
165 if (!ctx) { return; }
140 POOL_join(ctx);
166 POOL_join(ctx);
141 pthread_mutex_destroy(&ctx->queueMutex);
167 ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
142 pthread_cond_destroy(&ctx->queuePushCond);
168 ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
143 pthread_cond_destroy(&ctx->queuePopCond);
169 ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
144 if (ctx->queue) free(ctx->queue);
170 ZSTD_free(ctx->queue, ctx->customMem);
145 if (ctx->threads) free(ctx->threads);
171 ZSTD_free(ctx->threads, ctx->customMem);
146 free(ctx);
172 ZSTD_free(ctx, ctx->customMem);
173 }
174
175 size_t POOL_sizeof(POOL_ctx *ctx) {
176 if (ctx==NULL) return 0; /* supports sizeof NULL */
177 return sizeof(*ctx)
178 + ctx->queueSize * sizeof(POOL_job)
179 + ctx->numThreads * sizeof(ZSTD_pthread_t);
180 }
181
182 /**
183 * Returns 1 if the queue is full and 0 otherwise.
184 *
185 * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
186 * then a queue is empty if there is a thread free and no job is waiting.
187 */
188 static int isQueueFull(POOL_ctx const* ctx) {
189 if (ctx->queueSize > 1) {
190 return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
191 } else {
192 return ctx->numThreadsBusy == ctx->numThreads ||
193 !ctx->queueEmpty;
194 }
195 }
196
197
198 static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
199 {
200 POOL_job const job = {function, opaque};
201 assert(ctx != NULL);
202 if (ctx->shutdown) return;
203
204 ctx->queueEmpty = 0;
205 ctx->queue[ctx->queueTail] = job;
206 ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
207 ZSTD_pthread_cond_signal(&ctx->queuePopCond);
208 }
209
210 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
211 {
212 assert(ctx != NULL);
213 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
214 /* Wait until there is space in the queue for the new job */
215 while (isQueueFull(ctx) && (!ctx->shutdown)) {
216 ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
217 }
218 POOL_add_internal(ctx, function, opaque);
219 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
147 }
220 }
148
221
149 void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
150 POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
151 if (!ctx) { return; }
152
222
153 pthread_mutex_lock(&ctx->queueMutex);
223 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
154 { POOL_job const job = {function, opaque};
224 {
155 /* Wait until there is space in the queue for the new job */
225 assert(ctx != NULL);
156 size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
226 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
157 while (ctx->queueHead == newTail && !ctx->shutdown) {
227 if (isQueueFull(ctx)) {
158 pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
228 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
159 newTail = (ctx->queueTail + 1) % ctx->queueSize;
229 return 0;
160 }
161 /* The queue is still going => there is space */
162 if (!ctx->shutdown) {
163 ctx->queue[ctx->queueTail] = job;
164 ctx->queueTail = newTail;
165 }
166 }
230 }
167 pthread_mutex_unlock(&ctx->queueMutex);
231 POOL_add_internal(ctx, function, opaque);
168 pthread_cond_signal(&ctx->queuePopCond);
232 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
233 return 1;
169 }
234 }
170
235
236
171 #else /* ZSTD_MULTITHREAD not defined */
237 #else /* ZSTD_MULTITHREAD not defined */
238
239 /* ========================== */
172 /* No multi-threading support */
240 /* No multi-threading support */
241 /* ========================== */
173
242
174 /* We don't need any data, but if it is empty malloc() might return NULL. */
243
244 /* We don't need any data, but if it is empty, malloc() might return NULL. */
175 struct POOL_ctx_s {
245 struct POOL_ctx_s {
176 int data;
246 int dummy;
177 };
247 };
248 static POOL_ctx g_ctx;
178
249
179 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
250 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
180 (void)numThreads;
251 return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
181 (void)queueSize;
182 return (POOL_ctx *)malloc(sizeof(POOL_ctx));
183 }
252 }
184
253
185 void POOL_free(POOL_ctx *ctx) {
254 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
186 if (ctx) free(ctx);
255 (void)numThreads;
256 (void)queueSize;
257 (void)customMem;
258 return &g_ctx;
259 }
260
261 void POOL_free(POOL_ctx* ctx) {
262 assert(!ctx || ctx == &g_ctx);
263 (void)ctx;
187 }
264 }
188
265
189 void POOL_add(void *ctx, POOL_function function, void *opaque) {
266 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
190 (void)ctx;
267 (void)ctx;
191 function(opaque);
268 function(opaque);
269 }
270
271 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
272 (void)ctx;
273 function(opaque);
274 return 1;
275 }
276
277 size_t POOL_sizeof(POOL_ctx* ctx) {
278 if (ctx==NULL) return 0; /* supports sizeof NULL */
279 assert(ctx == &g_ctx);
280 return sizeof(*ctx);
192 }
281 }
193
282
194 #endif /* ZSTD_MULTITHREAD */
283 #endif /* ZSTD_MULTITHREAD */
@@ -1,11 +1,13 b''
1 /**
1 /*
2 * Copyright (c) 2016-present, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 */
9 */
10
9 #ifndef POOL_H
11 #ifndef POOL_H
10 #define POOL_H
12 #define POOL_H
11
13
@@ -15,38 +17,54 b' extern "C" {'
15
17
16
18
17 #include <stddef.h> /* size_t */
19 #include <stddef.h> /* size_t */
20 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
21 #include "zstd.h"
18
22
19 typedef struct POOL_ctx_s POOL_ctx;
23 typedef struct POOL_ctx_s POOL_ctx;
20
24
21 /*! POOL_create() :
25 /*! POOL_create() :
22 Create a thread pool with at most `numThreads` threads.
26 * Create a thread pool with at most `numThreads` threads.
23 `numThreads` must be at least 1.
27 * `numThreads` must be at least 1.
24 The maximum number of queued jobs before blocking is `queueSize`.
28 * The maximum number of queued jobs before blocking is `queueSize`.
25 `queueSize` must be at least 1.
29 * @return : POOL_ctx pointer on success, else NULL.
26 @return : The POOL_ctx pointer on success else NULL.
27 */
30 */
28 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
31 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
32
33 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
29
34
30 /*! POOL_free() :
35 /*! POOL_free() :
31 Free a thread pool returned by POOL_create().
36 Free a thread pool returned by POOL_create().
32 */
37 */
33 void POOL_free(POOL_ctx *ctx);
38 void POOL_free(POOL_ctx* ctx);
39
40 /*! POOL_sizeof() :
41 return memory usage of pool returned by POOL_create().
42 */
43 size_t POOL_sizeof(POOL_ctx* ctx);
34
44
35 /*! POOL_function :
45 /*! POOL_function :
36 The function type that can be added to a thread pool.
46 The function type that can be added to a thread pool.
37 */
47 */
38 typedef void (*POOL_function)(void *);
48 typedef void (*POOL_function)(void*);
39 /*! POOL_add_function :
49 /*! POOL_add_function :
40 The function type for a generic thread pool add function.
50 The function type for a generic thread pool add function.
41 */
51 */
42 typedef void (*POOL_add_function)(void *, POOL_function, void *);
52 typedef void (*POOL_add_function)(void*, POOL_function, void*);
43
53
44 /*! POOL_add() :
54 /*! POOL_add() :
45 Add the job `function(opaque)` to the thread pool.
55 Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
46 Possibly blocks until there is room in the queue.
56 Possibly blocks until there is room in the queue.
47 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
57 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
48 */
58 */
49 void POOL_add(void *ctx, POOL_function function, void *opaque);
59 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
60
61
62 /*! POOL_tryAdd() :
63 Add the job `function(opaque)` to the thread pool if a worker is available.
64 return immediately otherwise.
65 @return : 1 if successful, 0 if not.
66 */
67 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
50
68
51
69
52 #if defined (__cplusplus)
70 #if defined (__cplusplus)
@@ -1,11 +1,10 b''
1
2 /**
1 /**
3 * Copyright (c) 2016 Tino Reichardt
2 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
3 * All rights reserved.
5 *
4 *
6 * This source code is licensed under the BSD-style license found in the
5 * This source code is licensed under both the BSD-style license (found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8 * of patent rights can be found in the PATENTS file in the same directory.
7 * in the COPYING file in the root directory of this source tree).
9 *
8 *
10 * You can contact the author at:
9 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
10 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
@@ -15,11 +14,8 b''
15 * This file will hold wrapper for systems, which do not support pthreads
14 * This file will hold wrapper for systems, which do not support pthreads
16 */
15 */
17
16
18 /* ====== Compiler specifics ====== */
17 /* create fake symbol to avoid empty trnaslation unit warning */
19 #if defined(_MSC_VER)
18 int g_ZSTD_threading_useles_symbol;
20 # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
21 #endif
22
23
19
24 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
20 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
25
21
@@ -39,12 +35,12 b''
39
35
40 static unsigned __stdcall worker(void *arg)
36 static unsigned __stdcall worker(void *arg)
41 {
37 {
42 pthread_t* const thread = (pthread_t*) arg;
38 ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
43 thread->arg = thread->start_routine(thread->arg);
39 thread->arg = thread->start_routine(thread->arg);
44 return 0;
40 return 0;
45 }
41 }
46
42
47 int pthread_create(pthread_t* thread, const void* unused,
43 int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
48 void* (*start_routine) (void*), void* arg)
44 void* (*start_routine) (void*), void* arg)
49 {
45 {
50 (void)unused;
46 (void)unused;
@@ -58,16 +54,16 b' int pthread_create(pthread_t* thread, co'
58 return 0;
54 return 0;
59 }
55 }
60
56
61 int _pthread_join(pthread_t * thread, void **value_ptr)
57 int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
62 {
58 {
63 DWORD result;
59 DWORD result;
64
60
65 if (!thread->handle) return 0;
61 if (!thread.handle) return 0;
66
62
67 result = WaitForSingleObject(thread->handle, INFINITE);
63 result = WaitForSingleObject(thread.handle, INFINITE);
68 switch (result) {
64 switch (result) {
69 case WAIT_OBJECT_0:
65 case WAIT_OBJECT_0:
70 if (value_ptr) *value_ptr = thread->arg;
66 if (value_ptr) *value_ptr = thread.arg;
71 return 0;
67 return 0;
72 case WAIT_ABANDONED:
68 case WAIT_ABANDONED:
73 return EINVAL;
69 return EINVAL;
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now