##// END OF EJS Templates
zstd: vendor python-zstandard 0.8.0...
Gregory Szorc -
r31796:e0dc4053 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (770 lines changed) Show them Hide them
@@ -0,0 +1,770
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(BufferWithSegments__doc__,
14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
15 "\n"
16 "This type represents a contiguous chunk of memory containing N discrete\n"
17 "items within sub-segments of that memory.\n"
18 "\n"
19 "Segments within the buffer are stored as an array of\n"
20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
21 "integer using the host/native bit order representation.\n"
22 "\n"
23 "The type exists to facilitate operations against N>1 items without the\n"
24 "overhead of Python object creation and management.\n"
25 );
26
27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
29 if (self->parent.buf) {
30 PyBuffer_Release(&self->parent);
31 }
32 else if (self->useFree) {
33 free(self->data);
34 }
35 else {
36 PyMem_Free(self->data);
37 }
38
39 self->data = NULL;
40
41 if (self->useFree) {
42 free(self->segments);
43 }
44 else {
45 PyMem_Free(self->segments);
46 }
47
48 self->segments = NULL;
49
50 PyObject_Del(self);
51 }
52
53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
54 static char* kwlist[] = {
55 "data",
56 "segments",
57 NULL
58 };
59
60 Py_buffer segments;
61 Py_ssize_t segmentCount;
62 Py_ssize_t i;
63
64 memset(&self->parent, 0, sizeof(self->parent));
65
66 #if PY_MAJOR_VERSION >= 3
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
68 #else
69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
70 #endif
71 kwlist, &self->parent, &segments)) {
72 return -1;
73 }
74
75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
77 goto except;
78 }
79
80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
82 goto except;
83 }
84
85 if (segments.len % sizeof(BufferSegment)) {
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
87 sizeof(BufferSegment));
88 goto except;
89 }
90
91 segmentCount = segments.len / sizeof(BufferSegment);
92
93 /* Validate segments data, as blindly trusting it could lead to arbitrary
94 memory access. */
95 for (i = 0; i < segmentCount; i++) {
96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
97
98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
100 goto except;
101 return -1;
102 }
103 }
104
105 /* Make a copy of the segments data. It is cheap to do so and is a guard
106 against caller changing offsets, which has security implications. */
107 self->segments = PyMem_Malloc(segments.len);
108 if (!self->segments) {
109 PyErr_NoMemory();
110 goto except;
111 }
112
113 memcpy(self->segments, segments.buf, segments.len);
114 PyBuffer_Release(&segments);
115
116 self->data = self->parent.buf;
117 self->dataSize = self->parent.len;
118 self->segmentCount = segmentCount;
119
120 return 0;
121
122 except:
123 PyBuffer_Release(&self->parent);
124 PyBuffer_Release(&segments);
125 return -1;
126 };
127
128 /**
129 * Construct a BufferWithSegments from existing memory and offsets.
130 *
131 * Ownership of the backing memory and BufferSegments will be transferred to
132 * the created object and freed when the BufferWithSegments is destroyed.
133 */
134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
135 BufferSegment* segments, Py_ssize_t segmentsSize) {
136 ZstdBufferWithSegments* result = NULL;
137 Py_ssize_t i;
138
139 if (NULL == data) {
140 PyErr_SetString(PyExc_ValueError, "data is NULL");
141 return NULL;
142 }
143
144 if (NULL == segments) {
145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
146 return NULL;
147 }
148
149 for (i = 0; i < segmentsSize; i++) {
150 BufferSegment* segment = &segments[i];
151
152 if (segment->offset + segment->length > dataSize) {
153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
154 return NULL;
155 }
156 }
157
158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
159 if (NULL == result) {
160 return NULL;
161 }
162
163 result->useFree = 0;
164
165 memset(&result->parent, 0, sizeof(result->parent));
166 result->data = data;
167 result->dataSize = dataSize;
168 result->segments = segments;
169 result->segmentCount = segmentsSize;
170
171 return result;
172 }
173
174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
175 return self->segmentCount;
176 }
177
178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
179 ZstdBufferSegment* result = NULL;
180
181 if (i < 0) {
182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
183 return NULL;
184 }
185
186 if (i >= self->segmentCount) {
187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
188 return NULL;
189 }
190
191 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
192 if (NULL == result) {
193 return NULL;
194 }
195
196 result->parent = (PyObject*)self;
197 Py_INCREF(self);
198
199 result->data = (char*)self->data + self->segments[i].offset;
200 result->dataSize = self->segments[i].length;
201 result->offset = self->segments[i].offset;
202
203 return result;
204 }
205
206 #if PY_MAJOR_VERSION >= 3
207 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
208 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
209 }
210 #else
211 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
212 if (segment != 0) {
213 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
214 return -1;
215 }
216
217 *ptrptr = self->data;
218 return self->dataSize;
219 }
220
221 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
222 if (len) {
223 *len = 1;
224 }
225
226 return 1;
227 }
228 #endif
229
230 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
231 "Obtain a bytes instance for this buffer.\n"
232 );
233
234 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
235 return PyBytes_FromStringAndSize(self->data, self->dataSize);
236 }
237
238 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
239 "Obtain a BufferSegments describing segments in this sintance.\n"
240 );
241
242 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
243 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
244 if (NULL == result) {
245 return NULL;
246 }
247
248 result->parent = (PyObject*)self;
249 Py_INCREF(self);
250 result->segments = self->segments;
251 result->segmentCount = self->segmentCount;
252
253 return result;
254 }
255
256 static PySequenceMethods BufferWithSegments_sq = {
257 (lenfunc)BufferWithSegments_length, /* sq_length */
258 0, /* sq_concat */
259 0, /* sq_repeat */
260 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
261 0, /* sq_ass_item */
262 0, /* sq_contains */
263 0, /* sq_inplace_concat */
264 0 /* sq_inplace_repeat */
265 };
266
267 static PyBufferProcs BufferWithSegments_as_buffer = {
268 #if PY_MAJOR_VERSION >= 3
269 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
270 0 /* bf_releasebuffer */
271 #else
272 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
273 0, /* bf_getwritebuffer */
274 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
275 0 /* bf_getcharbuffer */
276 #endif
277 };
278
279 static PyMethodDef BufferWithSegments_methods[] = {
280 { "segments", (PyCFunction)BufferWithSegments_segments,
281 METH_NOARGS, BufferWithSegments_segments__doc__ },
282 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
283 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
284 { NULL, NULL }
285 };
286
287 static PyMemberDef BufferWithSegments_members[] = {
288 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
289 READONLY, "total size of the buffer in bytes" },
290 { NULL }
291 };
292
293 PyTypeObject ZstdBufferWithSegmentsType = {
294 PyVarObject_HEAD_INIT(NULL, 0)
295 "zstd.BufferWithSegments", /* tp_name */
296 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
297 0, /* tp_itemsize */
298 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
299 0, /* tp_print */
300 0, /* tp_getattr */
301 0, /* tp_setattr */
302 0, /* tp_compare */
303 0, /* tp_repr */
304 0, /* tp_as_number */
305 &BufferWithSegments_sq, /* tp_as_sequence */
306 0, /* tp_as_mapping */
307 0, /* tp_hash */
308 0, /* tp_call */
309 0, /* tp_str */
310 0, /* tp_getattro */
311 0, /* tp_setattro */
312 &BufferWithSegments_as_buffer, /* tp_as_buffer */
313 Py_TPFLAGS_DEFAULT, /* tp_flags */
314 BufferWithSegments__doc__, /* tp_doc */
315 0, /* tp_traverse */
316 0, /* tp_clear */
317 0, /* tp_richcompare */
318 0, /* tp_weaklistoffset */
319 0, /* tp_iter */
320 0, /* tp_iternext */
321 BufferWithSegments_methods, /* tp_methods */
322 BufferWithSegments_members, /* tp_members */
323 0, /* tp_getset */
324 0, /* tp_base */
325 0, /* tp_dict */
326 0, /* tp_descr_get */
327 0, /* tp_descr_set */
328 0, /* tp_dictoffset */
329 (initproc)BufferWithSegments_init, /* tp_init */
330 0, /* tp_alloc */
331 PyType_GenericNew, /* tp_new */
332 };
333
334 PyDoc_STRVAR(BufferSegments__doc__,
335 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
336 );
337
338 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
339 Py_CLEAR(self->parent);
340 PyObject_Del(self);
341 }
342
343 #if PY_MAJOR_VERSION >= 3
344 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
345 return PyBuffer_FillInfo(view, (PyObject*)self,
346 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
347 1, flags);
348 }
349 #else
350 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
351 if (segment != 0) {
352 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
353 return -1;
354 }
355
356 *ptrptr = (void*)self->segments;
357 return self->segmentCount * sizeof(BufferSegment);
358 }
359
360 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
361 if (len) {
362 *len = 1;
363 }
364
365 return 1;
366 }
367 #endif
368
369 static PyBufferProcs BufferSegments_as_buffer = {
370 #if PY_MAJOR_VERSION >= 3
371 (getbufferproc)BufferSegments_getbuffer,
372 0
373 #else
374 (readbufferproc)BufferSegments_getreadbuffer,
375 0,
376 (segcountproc)BufferSegments_getsegcount,
377 0
378 #endif
379 };
380
381 PyTypeObject ZstdBufferSegmentsType = {
382 PyVarObject_HEAD_INIT(NULL, 0)
383 "zstd.BufferSegments", /* tp_name */
384 sizeof(ZstdBufferSegments),/* tp_basicsize */
385 0, /* tp_itemsize */
386 (destructor)BufferSegments_dealloc, /* tp_dealloc */
387 0, /* tp_print */
388 0, /* tp_getattr */
389 0, /* tp_setattr */
390 0, /* tp_compare */
391 0, /* tp_repr */
392 0, /* tp_as_number */
393 0, /* tp_as_sequence */
394 0, /* tp_as_mapping */
395 0, /* tp_hash */
396 0, /* tp_call */
397 0, /* tp_str */
398 0, /* tp_getattro */
399 0, /* tp_setattro */
400 &BufferSegments_as_buffer, /* tp_as_buffer */
401 Py_TPFLAGS_DEFAULT, /* tp_flags */
402 BufferSegments__doc__, /* tp_doc */
403 0, /* tp_traverse */
404 0, /* tp_clear */
405 0, /* tp_richcompare */
406 0, /* tp_weaklistoffset */
407 0, /* tp_iter */
408 0, /* tp_iternext */
409 0, /* tp_methods */
410 0, /* tp_members */
411 0, /* tp_getset */
412 0, /* tp_base */
413 0, /* tp_dict */
414 0, /* tp_descr_get */
415 0, /* tp_descr_set */
416 0, /* tp_dictoffset */
417 0, /* tp_init */
418 0, /* tp_alloc */
419 PyType_GenericNew, /* tp_new */
420 };
421
422 PyDoc_STRVAR(BufferSegment__doc__,
423 "BufferSegment - Represents a segment within a BufferWithSegments\n"
424 );
425
426 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
427 Py_CLEAR(self->parent);
428 PyObject_Del(self);
429 }
430
431 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
432 return self->dataSize;
433 }
434
435 #if PY_MAJOR_VERSION >= 3
436 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
437 return PyBuffer_FillInfo(view, (PyObject*)self,
438 self->data, self->dataSize, 1, flags);
439 }
440 #else
441 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
442 if (segment != 0) {
443 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
444 return -1;
445 }
446
447 *ptrptr = self->data;
448 return self->dataSize;
449 }
450
451 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
452 if (len) {
453 *len = 1;
454 }
455
456 return 1;
457 }
458 #endif
459
460 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
461 "Obtain a bytes instance for this segment.\n"
462 );
463
464 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
465 return PyBytes_FromStringAndSize(self->data, self->dataSize);
466 }
467
468 static PySequenceMethods BufferSegment_sq = {
469 (lenfunc)BufferSegment_length, /* sq_length */
470 0, /* sq_concat */
471 0, /* sq_repeat */
472 0, /* sq_item */
473 0, /* sq_ass_item */
474 0, /* sq_contains */
475 0, /* sq_inplace_concat */
476 0 /* sq_inplace_repeat */
477 };
478
479 static PyBufferProcs BufferSegment_as_buffer = {
480 #if PY_MAJOR_VERSION >= 3
481 (getbufferproc)BufferSegment_getbuffer,
482 0
483 #else
484 (readbufferproc)BufferSegment_getreadbuffer,
485 0,
486 (segcountproc)BufferSegment_getsegcount,
487 0
488 #endif
489 };
490
491 static PyMethodDef BufferSegment_methods[] = {
492 { "tobytes", (PyCFunction)BufferSegment_tobytes,
493 METH_NOARGS, BufferSegment_tobytes__doc__ },
494 { NULL, NULL }
495 };
496
497 static PyMemberDef BufferSegment_members[] = {
498 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
499 "offset of segment within parent buffer" },
500 { NULL }
501 };
502
503 PyTypeObject ZstdBufferSegmentType = {
504 PyVarObject_HEAD_INIT(NULL, 0)
505 "zstd.BufferSegment", /* tp_name */
506 sizeof(ZstdBufferSegment),/* tp_basicsize */
507 0, /* tp_itemsize */
508 (destructor)BufferSegment_dealloc, /* tp_dealloc */
509 0, /* tp_print */
510 0, /* tp_getattr */
511 0, /* tp_setattr */
512 0, /* tp_compare */
513 0, /* tp_repr */
514 0, /* tp_as_number */
515 &BufferSegment_sq, /* tp_as_sequence */
516 0, /* tp_as_mapping */
517 0, /* tp_hash */
518 0, /* tp_call */
519 0, /* tp_str */
520 0, /* tp_getattro */
521 0, /* tp_setattro */
522 &BufferSegment_as_buffer, /* tp_as_buffer */
523 Py_TPFLAGS_DEFAULT, /* tp_flags */
524 BufferSegment__doc__, /* tp_doc */
525 0, /* tp_traverse */
526 0, /* tp_clear */
527 0, /* tp_richcompare */
528 0, /* tp_weaklistoffset */
529 0, /* tp_iter */
530 0, /* tp_iternext */
531 BufferSegment_methods, /* tp_methods */
532 BufferSegment_members, /* tp_members */
533 0, /* tp_getset */
534 0, /* tp_base */
535 0, /* tp_dict */
536 0, /* tp_descr_get */
537 0, /* tp_descr_set */
538 0, /* tp_dictoffset */
539 0, /* tp_init */
540 0, /* tp_alloc */
541 PyType_GenericNew, /* tp_new */
542 };
543
544 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
545 "Represents a collection of BufferWithSegments.\n"
546 );
547
548 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
549 Py_ssize_t i;
550
551 if (self->firstElements) {
552 PyMem_Free(self->firstElements);
553 self->firstElements = NULL;
554 }
555
556 if (self->buffers) {
557 for (i = 0; i < self->bufferCount; i++) {
558 Py_CLEAR(self->buffers[i]);
559 }
560
561 PyMem_Free(self->buffers);
562 self->buffers = NULL;
563 }
564
565 PyObject_Del(self);
566 }
567
568 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
569 Py_ssize_t size;
570 Py_ssize_t i;
571 Py_ssize_t offset = 0;
572
573 size = PyTuple_Size(args);
574 if (-1 == size) {
575 return -1;
576 }
577
578 if (0 == size) {
579 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
580 return -1;
581 }
582
583 for (i = 0; i < size; i++) {
584 PyObject* item = PyTuple_GET_ITEM(args, i);
585 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
586 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
587 return -1;
588 }
589
590 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
591 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
592 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
593 return -1;
594 }
595 }
596
597 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
598 if (NULL == self->buffers) {
599 PyErr_NoMemory();
600 return -1;
601 }
602
603 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
604 if (NULL == self->firstElements) {
605 PyMem_Free(self->buffers);
606 self->buffers = NULL;
607 PyErr_NoMemory();
608 return -1;
609 }
610
611 self->bufferCount = size;
612
613 for (i = 0; i < size; i++) {
614 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
615
616 self->buffers[i] = item;
617 Py_INCREF(item);
618
619 if (i > 0) {
620 self->firstElements[i - 1] = offset;
621 }
622
623 offset += item->segmentCount;
624 }
625
626 self->firstElements[size - 1] = offset;
627
628 return 0;
629 }
630
631 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
632 Py_ssize_t i;
633 Py_ssize_t j;
634 unsigned long long size = 0;
635
636 for (i = 0; i < self->bufferCount; i++) {
637 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
638 size += self->buffers[i]->segments[j].length;
639 }
640 }
641
642 return PyLong_FromUnsignedLongLong(size);
643 }
644
645 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
646 return self->firstElements[self->bufferCount - 1];
647 }
648
649 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
650 Py_ssize_t bufferOffset;
651
652 if (i < 0) {
653 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
654 return NULL;
655 }
656
657 if (i >= BufferWithSegmentsCollection_length(self)) {
658 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
659 BufferWithSegmentsCollection_length(self));
660 return NULL;
661 }
662
663 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
664 Py_ssize_t offset = 0;
665
666 if (i < self->firstElements[bufferOffset]) {
667 if (bufferOffset > 0) {
668 offset = self->firstElements[bufferOffset - 1];
669 }
670
671 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
672 }
673 }
674
675 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
676 return NULL;
677 }
678
679 static PySequenceMethods BufferWithSegmentsCollection_sq = {
680 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
681 0, /* sq_concat */
682 0, /* sq_repeat */
683 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
684 0, /* sq_ass_item */
685 0, /* sq_contains */
686 0, /* sq_inplace_concat */
687 0 /* sq_inplace_repeat */
688 };
689
690 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
691 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
692 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
693 { NULL, NULL }
694 };
695
696 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
697 PyVarObject_HEAD_INIT(NULL, 0)
698 "zstd.BufferWithSegmentsCollection", /* tp_name */
699 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
700 0, /* tp_itemsize */
701 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
702 0, /* tp_print */
703 0, /* tp_getattr */
704 0, /* tp_setattr */
705 0, /* tp_compare */
706 0, /* tp_repr */
707 0, /* tp_as_number */
708 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
709 0, /* tp_as_mapping */
710 0, /* tp_hash */
711 0, /* tp_call */
712 0, /* tp_str */
713 0, /* tp_getattro */
714 0, /* tp_setattro */
715 0, /* tp_as_buffer */
716 Py_TPFLAGS_DEFAULT, /* tp_flags */
717 BufferWithSegmentsCollection__doc__, /* tp_doc */
718 0, /* tp_traverse */
719 0, /* tp_clear */
720 0, /* tp_richcompare */
721 0, /* tp_weaklistoffset */
722 /* TODO implement iterator for performance. */
723 0, /* tp_iter */
724 0, /* tp_iternext */
725 BufferWithSegmentsCollection_methods, /* tp_methods */
726 0, /* tp_members */
727 0, /* tp_getset */
728 0, /* tp_base */
729 0, /* tp_dict */
730 0, /* tp_descr_get */
731 0, /* tp_descr_set */
732 0, /* tp_dictoffset */
733 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
734 0, /* tp_alloc */
735 PyType_GenericNew, /* tp_new */
736 };
737
738 void bufferutil_module_init(PyObject* mod) {
739 Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
740 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
741 return;
742 }
743
744 Py_INCREF(&ZstdBufferWithSegmentsType);
745 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
746
747 Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
748 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
749 return;
750 }
751
752 Py_INCREF(&ZstdBufferSegmentsType);
753 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
754
755 Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
756 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
757 return;
758 }
759
760 Py_INCREF(&ZstdBufferSegmentType);
761 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
762
763 Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
764 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
765 return;
766 }
767
768 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
769 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
770 }
@@ -0,0 +1,112
1 import struct
2
3 try:
4 import unittest2 as unittest
5 except ImportError:
6 import unittest
7
8 import zstd
9
10 ss = struct.Struct('=QQ')
11
12
13 class TestBufferWithSegments(unittest.TestCase):
14 def test_arguments(self):
15 with self.assertRaises(TypeError):
16 zstd.BufferWithSegments()
17
18 with self.assertRaises(TypeError):
19 zstd.BufferWithSegments(b'foo')
20
21 # Segments data should be a multiple of 16.
22 with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
23 zstd.BufferWithSegments(b'foo', b'\x00\x00')
24
25 def test_invalid_offset(self):
26 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
27 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
28
29 def test_invalid_getitem(self):
30 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
31
32 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
33 test = b[-10]
34
35 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
36 test = b[1]
37
38 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
39 test = b[2]
40
41 def test_single(self):
42 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
43 self.assertEqual(len(b), 1)
44 self.assertEqual(b.size, 3)
45 self.assertEqual(b.tobytes(), b'foo')
46
47 self.assertEqual(len(b[0]), 3)
48 self.assertEqual(b[0].offset, 0)
49 self.assertEqual(b[0].tobytes(), b'foo')
50
51 def test_multiple(self):
52 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
53 ss.pack(3, 4),
54 ss.pack(7, 5)]))
55 self.assertEqual(len(b), 3)
56 self.assertEqual(b.size, 12)
57 self.assertEqual(b.tobytes(), b'foofooxfooxy')
58
59 self.assertEqual(b[0].tobytes(), b'foo')
60 self.assertEqual(b[1].tobytes(), b'foox')
61 self.assertEqual(b[2].tobytes(), b'fooxy')
62
63
64 class TestBufferWithSegmentsCollection(unittest.TestCase):
65 def test_empty_constructor(self):
66 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
67 zstd.BufferWithSegmentsCollection()
68
69 def test_argument_validation(self):
70 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
71 zstd.BufferWithSegmentsCollection(None)
72
73 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
74 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
75 None)
76
77 with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
78 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
79
80 def test_length(self):
81 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
82 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
83 ss.pack(3, 3)]))
84
85 c = zstd.BufferWithSegmentsCollection(b1)
86 self.assertEqual(len(c), 1)
87 self.assertEqual(c.size(), 3)
88
89 c = zstd.BufferWithSegmentsCollection(b2)
90 self.assertEqual(len(c), 2)
91 self.assertEqual(c.size(), 6)
92
93 c = zstd.BufferWithSegmentsCollection(b1, b2)
94 self.assertEqual(len(c), 3)
95 self.assertEqual(c.size(), 9)
96
97 def test_getitem(self):
98 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
99 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
100 ss.pack(3, 3)]))
101
102 c = zstd.BufferWithSegmentsCollection(b1, b2)
103
104 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
105 c[3]
106
107 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
108 c[4]
109
110 self.assertEqual(c[0].tobytes(), b'foo')
111 self.assertEqual(c[1].tobytes(), b'bar')
112 self.assertEqual(c[2].tobytes(), b'baz')
@@ -0,0 +1,143
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from . common import (
18 make_cffi,
19 random_input_data,
20 )
21
22
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
25 class TestCompressor_write_to_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=1048576))
29 def test_write_size_variance(self, original, level, write_size):
30 refctx = zstd.ZstdCompressor(level=level)
31 ref_frame = refctx.compress(original)
32
33 cctx = zstd.ZstdCompressor(level=level)
34 b = io.BytesIO()
35 with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
36 compressor.write(original)
37
38 self.assertEqual(b.getvalue(), ref_frame)
39
40
41 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
42 @make_cffi
43 class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
44 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
45 level=strategies.integers(min_value=1, max_value=5),
46 read_size=strategies.integers(min_value=1, max_value=1048576),
47 write_size=strategies.integers(min_value=1, max_value=1048576))
48 def test_read_write_size_variance(self, original, level, read_size, write_size):
49 refctx = zstd.ZstdCompressor(level=level)
50 ref_frame = refctx.compress(original)
51
52 cctx = zstd.ZstdCompressor(level=level)
53 source = io.BytesIO(original)
54 dest = io.BytesIO()
55
56 cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
57 write_size=write_size)
58
59 self.assertEqual(dest.getvalue(), ref_frame)
60
61
62 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
63 @make_cffi
64 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
65 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
66 level=strategies.integers(min_value=1, max_value=5),
67 chunk_sizes=strategies.streaming(
68 strategies.integers(min_value=1, max_value=4096)))
69 def test_random_input_sizes(self, original, level, chunk_sizes):
70 chunk_sizes = iter(chunk_sizes)
71
72 refctx = zstd.ZstdCompressor(level=level)
73 ref_frame = refctx.compress(original)
74
75 cctx = zstd.ZstdCompressor(level=level)
76 cobj = cctx.compressobj(size=len(original))
77
78 chunks = []
79 i = 0
80 while True:
81 chunk_size = next(chunk_sizes)
82 source = original[i:i + chunk_size]
83 if not source:
84 break
85
86 chunks.append(cobj.compress(source))
87 i += chunk_size
88
89 chunks.append(cobj.flush())
90
91 self.assertEqual(b''.join(chunks), ref_frame)
92
93
94 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
95 @make_cffi
96 class TestCompressor_read_from_fuzzing(unittest.TestCase):
97 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
98 level=strategies.integers(min_value=1, max_value=5),
99 read_size=strategies.integers(min_value=1, max_value=4096),
100 write_size=strategies.integers(min_value=1, max_value=4096))
101 def test_read_write_size_variance(self, original, level, read_size, write_size):
102 refcctx = zstd.ZstdCompressor(level=level)
103 ref_frame = refcctx.compress(original)
104
105 source = io.BytesIO(original)
106
107 cctx = zstd.ZstdCompressor(level=level)
108 chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
109 write_size=write_size))
110
111 self.assertEqual(b''.join(chunks), ref_frame)
112
113
114 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
115 class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
116 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
117 min_size=1, max_size=1024),
118 threads=strategies.integers(min_value=1, max_value=8),
119 use_dict=strategies.booleans())
120 def test_data_equivalence(self, original, threads, use_dict):
121 kwargs = {}
122
123 # Use a content dictionary because it is cheap to create.
124 if use_dict:
125 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
126
127 cctx = zstd.ZstdCompressor(level=1,
128 write_content_size=True,
129 write_checksum=True,
130 **kwargs)
131
132 result = cctx.multi_compress_to_buffer(original, threads=-1)
133
134 self.assertEqual(len(result), len(original))
135
136 # The frame produced via the batch APIs may not be bit identical to that
137 # produced by compress() because compression parameters are adjusted
138 # from the first input in batch mode. So the only thing we can do is
139 # verify the decompressed data matches the input.
140 dctx = zstd.ZstdDecompressor(**kwargs)
141
142 for i, frame in enumerate(result):
143 self.assertEqual(dctx.decompress(frame), original[i])
@@ -0,0 +1,79
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from .common import (
18 make_cffi,
19 )
20
21
22 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
23 max_value=zstd.WINDOWLOG_MAX)
24 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
25 max_value=zstd.CHAINLOG_MAX)
26 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
27 max_value=zstd.HASHLOG_MAX)
28 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
29 max_value=zstd.SEARCHLOG_MAX)
30 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
31 max_value=zstd.SEARCHLENGTH_MAX)
32 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
33 max_value=zstd.TARGETLENGTH_MAX)
34 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
35 zstd.STRATEGY_DFAST,
36 zstd.STRATEGY_GREEDY,
37 zstd.STRATEGY_LAZY,
38 zstd.STRATEGY_LAZY2,
39 zstd.STRATEGY_BTLAZY2,
40 zstd.STRATEGY_BTOPT))
41
42
43 @make_cffi
44 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
45 class TestCompressionParametersHypothesis(unittest.TestCase):
46 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
47 s_searchlength, s_targetlength, s_strategy)
48 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
49 searchlength, targetlength, strategy):
50 # ZSTD_checkCParams moves the goal posts on us from what's advertised
51 # in the constants. So move along with them.
52 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
53 searchlength += 1
54 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
55 searchlength -= 1
56
57 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
58 searchlog, searchlength,
59 targetlength, strategy)
60
61 cctx = zstd.ZstdCompressor(compression_params=p)
62 with cctx.write_to(io.BytesIO()):
63 pass
64
65 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
66 s_searchlength, s_targetlength, s_strategy)
67 def test_estimate_compression_context_size(self, windowlog, chainlog,
68 hashlog, searchlog,
69 searchlength, targetlength,
70 strategy):
71 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
72 searchlength += 1
73 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
74 searchlength -= 1
75
76 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
77 searchlog, searchlength,
78 targetlength, strategy)
79 size = zstd.estimate_compression_context_size(p)
@@ -0,0 +1,151
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from . common import (
18 make_cffi,
19 random_input_data,
20 )
21
22
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
25 class TestDecompressor_write_to_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=8192),
29 input_sizes=strategies.streaming(
30 strategies.integers(min_value=1, max_value=4096)))
31 def test_write_size_variance(self, original, level, write_size, input_sizes):
32 input_sizes = iter(input_sizes)
33
34 cctx = zstd.ZstdCompressor(level=level)
35 frame = cctx.compress(original)
36
37 dctx = zstd.ZstdDecompressor()
38 source = io.BytesIO(frame)
39 dest = io.BytesIO()
40
41 with dctx.write_to(dest, write_size=write_size) as decompressor:
42 while True:
43 chunk = source.read(next(input_sizes))
44 if not chunk:
45 break
46
47 decompressor.write(chunk)
48
49 self.assertEqual(dest.getvalue(), original)
50
51
52 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
53 @make_cffi
54 class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
55 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
56 level=strategies.integers(min_value=1, max_value=5),
57 read_size=strategies.integers(min_value=1, max_value=8192),
58 write_size=strategies.integers(min_value=1, max_value=8192))
59 def test_read_write_size_variance(self, original, level, read_size, write_size):
60 cctx = zstd.ZstdCompressor(level=level)
61 frame = cctx.compress(original)
62
63 source = io.BytesIO(frame)
64 dest = io.BytesIO()
65
66 dctx = zstd.ZstdDecompressor()
67 dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
68
69 self.assertEqual(dest.getvalue(), original)
70
71
72 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
73 @make_cffi
74 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
75 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
76 level=strategies.integers(min_value=1, max_value=5),
77 chunk_sizes=strategies.streaming(
78 strategies.integers(min_value=1, max_value=4096)))
79 def test_random_input_sizes(self, original, level, chunk_sizes):
80 chunk_sizes = iter(chunk_sizes)
81
82 cctx = zstd.ZstdCompressor(level=level)
83 frame = cctx.compress(original)
84
85 source = io.BytesIO(frame)
86
87 dctx = zstd.ZstdDecompressor()
88 dobj = dctx.decompressobj()
89
90 chunks = []
91 while True:
92 chunk = source.read(next(chunk_sizes))
93 if not chunk:
94 break
95
96 chunks.append(dobj.decompress(chunk))
97
98 self.assertEqual(b''.join(chunks), original)
99
100
101 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
102 @make_cffi
103 class TestDecompressor_read_from_fuzzing(unittest.TestCase):
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
105 level=strategies.integers(min_value=1, max_value=5),
106 read_size=strategies.integers(min_value=1, max_value=4096),
107 write_size=strategies.integers(min_value=1, max_value=4096))
108 def test_read_write_size_variance(self, original, level, read_size, write_size):
109 cctx = zstd.ZstdCompressor(level=level)
110 frame = cctx.compress(original)
111
112 source = io.BytesIO(frame)
113
114 dctx = zstd.ZstdDecompressor()
115 chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size))
116
117 self.assertEqual(b''.join(chunks), original)
118
119
120 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
121 class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
122 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
123 min_size=1, max_size=1024),
124 threads=strategies.integers(min_value=1, max_value=8),
125 use_dict=strategies.booleans())
126 def test_data_equivalence(self, original, threads, use_dict):
127 kwargs = {}
128 if use_dict:
129 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
130
131 cctx = zstd.ZstdCompressor(level=1,
132 write_content_size=True,
133 write_checksum=True,
134 **kwargs)
135
136 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
137
138 dctx = zstd.ZstdDecompressor(**kwargs)
139
140 result = dctx.multi_decompress_to_buffer(frames_buffer)
141
142 self.assertEqual(len(result), len(original))
143 for i, frame in enumerate(result):
144 self.assertEqual(frame.tobytes(), original[i])
145
146 frames_list = [f.tobytes() for f in frames_buffer]
147 result = dctx.multi_decompress_to_buffer(frames_list)
148
149 self.assertEqual(len(result), len(original))
150 for i, frame in enumerate(result):
151 self.assertEqual(frame.tobytes(), original[i])
@@ -1,117 +1,145
1 Version History
1 Version History
2 ===============
2 ===============
3
3
4 0.8.0 (released 2017-03-08)
5 ---------------------------
6
7 * CompressionParameters now has a estimated_compression_context_size() method.
8 zstd.estimate_compression_context_size() is now deprecated and slated for
9 removal.
10 * Implemented a lot of fuzzing tests.
11 * CompressionParameters instances now perform extra validation by calling
12 ZSTD_checkCParams() at construction time.
13 * multi_compress_to_buffer() API for compressing multiple inputs as a
14 single operation, as efficiently as possible.
15 * ZSTD_CStream instances are now used across multiple operations on
16 ZstdCompressor instances, resulting in much better performance for
17 APIs that do streaming.
18 * ZSTD_DStream instances are now used across multiple operations on
19 ZstdDecompressor instances, resulting in much better performance for
20 APIs that do streaming.
21 * train_dictionary() now releases the GIL.
22 * Support for training dictionaries using the COVER algorithm.
23 * multi_decompress_to_buffer() API for decompressing multiple frames as a
24 single operation, as efficiently as possible.
25 * Support for multi-threaded compression.
26 * Disable deprecation warnings when compiling CFFI module.
27 * Fixed memory leak in train_dictionary().
28 * Removed DictParameters type.
29 * train_dictionary() now accepts keyword arguments instead of a
30 DictParameters instance to control dictionary generation.
31
4 0.7.0 (released 2017-02-07)
32 0.7.0 (released 2017-02-07)
5 ---------------------------
33 ---------------------------
6
34
7 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
35 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
8 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
36 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
9 decompression of *content-only dictionary chains*.
37 decompression of *content-only dictionary chains*.
10 * CFFI module fully implemented; all tests run against both C extension and
38 * CFFI module fully implemented; all tests run against both C extension and
11 CFFI implementation.
39 CFFI implementation.
12 * Vendored version of zstd updated to 1.1.3.
40 * Vendored version of zstd updated to 1.1.3.
13 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
41 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
14 to avoid extra memory allocation of dict data.
42 to avoid extra memory allocation of dict data.
15 * Add function names to error messages (by using ":name" in PyArg_Parse*
43 * Add function names to error messages (by using ":name" in PyArg_Parse*
16 functions).
44 functions).
17 * Reuse decompression context across operations. Previously, we created a
45 * Reuse decompression context across operations. Previously, we created a
18 new ZSTD_DCtx for each decompress(). This was measured to slow down
46 new ZSTD_DCtx for each decompress(). This was measured to slow down
19 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
47 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
20 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
48 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
21 things faster in the process.
49 things faster in the process.
22 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
50 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
23 of bytes written.
51 of bytes written.
24 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
52 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
25 written to the underlying output object.
53 written to the underlying output object.
26 * CompressionParameters instances now expose their values as attributes.
54 * CompressionParameters instances now expose their values as attributes.
27 * CompressionParameters instances no longer are subscriptable nor behave
55 * CompressionParameters instances no longer are subscriptable nor behave
28 as tuples (backwards incompatible). Use attributes to obtain values.
56 as tuples (backwards incompatible). Use attributes to obtain values.
29 * DictParameters instances now expose their values as attributes.
57 * DictParameters instances now expose their values as attributes.
30
58
31 0.6.0 (released 2017-01-14)
59 0.6.0 (released 2017-01-14)
32 ---------------------------
60 ---------------------------
33
61
34 * Support for legacy zstd protocols (build time opt in feature).
62 * Support for legacy zstd protocols (build time opt in feature).
35 * Automation improvements to test against Python 3.6, latest versions
63 * Automation improvements to test against Python 3.6, latest versions
36 of Tox, more deterministic AppVeyor behavior.
64 of Tox, more deterministic AppVeyor behavior.
37 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
65 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
38 source code manually.
66 source code manually.
39 * Vendored version of zstd updated to 1.1.2.
67 * Vendored version of zstd updated to 1.1.2.
40 * Documentation improvements.
68 * Documentation improvements.
41 * Introduce a bench.py script for performing (crude) benchmarks.
69 * Introduce a bench.py script for performing (crude) benchmarks.
42 * ZSTD_CCtx instances are now reused across multiple compress() operations.
70 * ZSTD_CCtx instances are now reused across multiple compress() operations.
43 * ZstdCompressor.write_to() now has a flush() method.
71 * ZstdCompressor.write_to() now has a flush() method.
44 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
72 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
45 flush a block (as opposed to ending the stream).
73 flush a block (as opposed to ending the stream).
46 * Disallow compress(b'') when writing content sizes by default (issue #11).
74 * Disallow compress(b'') when writing content sizes by default (issue #11).
47
75
48 0.5.2 (released 2016-11-12)
76 0.5.2 (released 2016-11-12)
49 ---------------------------
77 ---------------------------
50
78
51 * more packaging fixes for source distribution
79 * more packaging fixes for source distribution
52
80
53 0.5.1 (released 2016-11-12)
81 0.5.1 (released 2016-11-12)
54 ---------------------------
82 ---------------------------
55
83
56 * setup_zstd.py is included in the source distribution
84 * setup_zstd.py is included in the source distribution
57
85
58 0.5.0 (released 2016-11-10)
86 0.5.0 (released 2016-11-10)
59 ---------------------------
87 ---------------------------
60
88
61 * Vendored version of zstd updated to 1.1.1.
89 * Vendored version of zstd updated to 1.1.1.
62 * Continuous integration for Python 3.6 and 3.7
90 * Continuous integration for Python 3.6 and 3.7
63 * Continuous integration for Conda
91 * Continuous integration for Conda
64 * Added compression and decompression APIs providing similar interfaces
92 * Added compression and decompression APIs providing similar interfaces
65 to the standard library ``zlib`` and ``bz2`` modules. This allows
93 to the standard library ``zlib`` and ``bz2`` modules. This allows
66 coding to a common interface.
94 coding to a common interface.
67 * ``zstd.__version__` is now defined.
95 * ``zstd.__version__` is now defined.
68 * ``read_from()`` on various APIs now accepts objects implementing the buffer
96 * ``read_from()`` on various APIs now accepts objects implementing the buffer
69 protocol.
97 protocol.
70 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
98 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
71 to pass in an existing buffer with a header without having to create a
99 to pass in an existing buffer with a header without having to create a
72 slice or a new object.
100 slice or a new object.
73 * Implemented ``ZstdCompressionDict.as_bytes()``.
101 * Implemented ``ZstdCompressionDict.as_bytes()``.
74 * Python's memory allocator is now used instead of ``malloc()``.
102 * Python's memory allocator is now used instead of ``malloc()``.
75 * Low-level zstd data structures are reused in more instances, cutting down
103 * Low-level zstd data structures are reused in more instances, cutting down
76 on overhead for certain operations.
104 on overhead for certain operations.
77 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
105 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
78 has now been refactored into a standalone ``setup_zstd.py`` file. This
106 has now been refactored into a standalone ``setup_zstd.py`` file. This
79 allows other projects with ``setup.py`` files to reuse the
107 allows other projects with ``setup.py`` files to reuse the
80 ``distutils`` code for this project without copying code.
108 ``distutils`` code for this project without copying code.
81 * The monolithic ``zstd.c`` file has been split into a header file defining
109 * The monolithic ``zstd.c`` file has been split into a header file defining
82 types and separate ``.c`` source files for the implementation.
110 types and separate ``.c`` source files for the implementation.
83
111
84 History of the Project
112 History of the Project
85 ======================
113 ======================
86
114
87 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
115 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
88 Python extension for use by the Mercurial project. A very hacky prototype
116 Python extension for use by the Mercurial project. A very hacky prototype
89 is sent to the mercurial-devel list for RFC.
117 is sent to the mercurial-devel list for RFC.
90
118
91 2016-09-03 - Most functionality from Zstandard C API implemented. Source
119 2016-09-03 - Most functionality from Zstandard C API implemented. Source
92 code published on https://github.com/indygreg/python-zstandard. Travis-CI
120 code published on https://github.com/indygreg/python-zstandard. Travis-CI
93 automation configured. 0.0.1 release on PyPI.
121 automation configured. 0.0.1 release on PyPI.
94
122
95 2016-09-05 - After the API was rounded out a bit and support for Python
123 2016-09-05 - After the API was rounded out a bit and support for Python
96 2.6 and 2.7 was added, version 0.1 was released to PyPI.
124 2.6 and 2.7 was added, version 0.1 was released to PyPI.
97
125
98 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
126 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
99 was released to PyPI.
127 was released to PyPI.
100
128
101 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
129 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
102 now accepts arguments controlling frame parameters. The source size can now
130 now accepts arguments controlling frame parameters. The source size can now
103 be declared when performing streaming compression. ZstdDecompressor.decompress()
131 be declared when performing streaming compression. ZstdDecompressor.decompress()
104 is implemented. Compression dictionaries are now cached when using the simple
132 is implemented. Compression dictionaries are now cached when using the simple
105 compression and decompression APIs. Memory size APIs added.
133 compression and decompression APIs. Memory size APIs added.
106 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
134 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
107 implemented. This rounds out the major compression/decompression APIs planned
135 implemented. This rounds out the major compression/decompression APIs planned
108 by the author.
136 by the author.
109
137
110 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
138 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
111 decoding a zstd frame (issue #2).
139 decoding a zstd frame (issue #2).
112
140
113 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
141 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
114 write buffer sizes, and a few bug fixes involving failure to read/write
142 write buffer sizes, and a few bug fixes involving failure to read/write
115 all data when buffer sizes were too small to hold remaining data.
143 all data when buffer sizes were too small to hold remaining data.
116
144
117 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
145 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
This diff has been collapsed as it changes many lines, (580 lines changed) Show them Hide them
@@ -1,943 +1,1393
1 ================
1 ================
2 python-zstandard
2 python-zstandard
3 ================
3 ================
4
4
5 This project provides Python bindings for interfacing with the
5 This project provides Python bindings for interfacing with the
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface are provided.
7 and CFFI interface are provided.
8
8
9 The primary goal of the project is to provide a rich interface to the
9 The primary goal of the project is to provide a rich interface to the
10 underlying C API through a Pythonic interface while not sacrificing
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 performance. This means exposing most of the features and flexibility
12 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
13
13
14 The canonical home for this project is
14 The canonical home for this project is
15 https://github.com/indygreg/python-zstandard.
15 https://github.com/indygreg/python-zstandard.
16
16
17 | |ci-status| |win-ci-status|
17 | |ci-status| |win-ci-status|
18
18
19 State of Project
19 State of Project
20 ================
20 ================
21
21
22 The project is officially in beta state. The author is reasonably satisfied
22 The project is officially in beta state. The author is reasonably satisfied
23 with the current API and that functionality works as advertised. There
23 that functionality works as advertised. **There will be some backwards
24 may be some backwards incompatible changes before 1.0. Though the author
24 incompatible changes before 1.0, probably in the 0.9 release.** This may
25 does not intend to make any major changes to the Python API.
25 involve renaming the main module from *zstd* to *zstandard* and renaming
26 various types and methods. Pin the package version to prevent unwanted
27 breakage when this change occurs!
26
28
27 This project is vendored and distributed with Mercurial 4.1, where it is
29 This project is vendored and distributed with Mercurial 4.1, where it is
28 used in a production capacity.
30 used in a production capacity.
29
31
30 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
32 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
31 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
33 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
32 confident the extension is stable and works as advertised on these
34 confident the extension is stable and works as advertised on these
33 platforms.
35 platforms.
34
36
37 The CFFI bindings are mostly feature complete. Where a feature is implemented
38 in CFFI, unit tests run against both C extension and CFFI implementation to
39 ensure behavior parity.
40
35 Expected Changes
41 Expected Changes
36 ----------------
42 ----------------
37
43
38 The author is reasonably confident in the current state of what's
44 The author is reasonably confident in the current state of what's
39 implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
45 implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
40 Those APIs likely won't change significantly. Some low-level behavior
46 Those APIs likely won't change significantly. Some low-level behavior
41 (such as naming and types expected by arguments) may change.
47 (such as naming and types expected by arguments) may change.
42
48
43 There will likely be arguments added to control the input and output
49 There will likely be arguments added to control the input and output
44 buffer sizes (currently, certain operations read and write in chunk
50 buffer sizes (currently, certain operations read and write in chunk
45 sizes using zstd's preferred defaults).
51 sizes using zstd's preferred defaults).
46
52
47 There should be an API that accepts an object that conforms to the buffer
53 There should be an API that accepts an object that conforms to the buffer
48 interface and returns an iterator over compressed or decompressed output.
54 interface and returns an iterator over compressed or decompressed output.
49
55
56 There should be an API that exposes an ``io.RawIOBase`` interface to
57 compressor and decompressor streams, like how ``gzip.GzipFile`` from
58 the standard library works (issue 13).
59
50 The author is on the fence as to whether to support the extremely
60 The author is on the fence as to whether to support the extremely
51 low level compression and decompression APIs. It could be useful to
61 low level compression and decompression APIs. It could be useful to
52 support compression without the framing headers. But the author doesn't
62 support compression without the framing headers. But the author doesn't
53 believe it a high priority at this time.
63 believe it a high priority at this time.
54
64
55 The CFFI bindings are feature complete and all tests run against both
65 There will likely be a refactoring of the module names. Currently,
56 the C extension and CFFI bindings to ensure behavior parity.
66 ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
67 This means that all code for the C extension must be implemented in
68 C. ``zstd`` may be converted to a Python module so code can be reused
69 between CFFI and C and so not all code in the C extension has to be C.
57
70
58 Requirements
71 Requirements
59 ============
72 ============
60
73
61 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
74 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
62 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
75 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
63 currently well-tested as an architecture.
76 currently well-tested as an architecture.
64
77
65 Installing
78 Installing
66 ==========
79 ==========
67
80
68 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
81 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
69 So, to install this package::
82 So, to install this package::
70
83
71 $ pip install zstandard
84 $ pip install zstandard
72
85
73 Binary wheels are made available for some platforms. If you need to
86 Binary wheels are made available for some platforms. If you need to
74 install from a source distribution, all you should need is a working C
87 install from a source distribution, all you should need is a working C
75 compiler and the Python development headers/libraries. On many Linux
88 compiler and the Python development headers/libraries. On many Linux
76 distributions, you can install a ``python-dev`` or ``python-devel``
89 distributions, you can install a ``python-dev`` or ``python-devel``
77 package to provide these dependencies.
90 package to provide these dependencies.
78
91
79 Packages are also uploaded to Anaconda Cloud at
92 Packages are also uploaded to Anaconda Cloud at
80 https://anaconda.org/indygreg/zstandard. See that URL for how to install
93 https://anaconda.org/indygreg/zstandard. See that URL for how to install
81 this package with ``conda``.
94 this package with ``conda``.
82
95
83 Performance
96 Performance
84 ===========
97 ===========
85
98
86 Very crude and non-scientific benchmarking (most benchmarks fall in this
99 Very crude and non-scientific benchmarking (most benchmarks fall in this
87 category because proper benchmarking is hard) show that the Python bindings
100 category because proper benchmarking is hard) show that the Python bindings
88 perform within 10% of the native C implementation.
101 perform within 10% of the native C implementation.
89
102
90 The following table compares the performance of compressing and decompressing
103 The following table compares the performance of compressing and decompressing
91 a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
104 a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
92 obtained with the ``zstd`` program are on the left. The remaining columns detail
105 obtained with the ``zstd`` program are on the left. The remaining columns detail
93 performance of various compression APIs in the Python bindings.
106 performance of various compression APIs in the Python bindings.
94
107
95 +-------+-----------------+-----------------+-----------------+---------------+
108 +-------+-----------------+-----------------+-----------------+---------------+
96 | Level | Native | Simple | Stream In | Stream Out |
109 | Level | Native | Simple | Stream In | Stream Out |
97 | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp |
110 | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp |
98 +=======+=================+=================+=================+===============+
111 +=======+=================+=================+=================+===============+
99 | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s |
112 | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s |
100 +-------+-----------------+-----------------+-----------------+---------------+
113 +-------+-----------------+-----------------+-----------------+---------------+
101 | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s |
114 | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s |
102 +-------+-----------------+-----------------+-----------------+---------------+
115 +-------+-----------------+-----------------+-----------------+---------------+
103 | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s |
116 | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s |
104 +-------+-----------------+-----------------+-----------------+---------------+
117 +-------+-----------------+-----------------+-----------------+---------------+
105 | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s |
118 | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s |
106 +-------+-----------------+-----------------+-----------------+---------------+
119 +-------+-----------------+-----------------+-----------------+---------------+
107
120
108 Again, these are very unscientific. But it shows that Python is capable of
121 Again, these are very unscientific. But it shows that Python is capable of
109 compressing at several hundred MB/s and decompressing at over 1 GB/s.
122 compressing at several hundred MB/s and decompressing at over 1 GB/s.
110
123
111 Comparison to Other Python Bindings
124 Comparison to Other Python Bindings
112 ===================================
125 ===================================
113
126
114 https://pypi.python.org/pypi/zstd is an alternate Python binding to
127 https://pypi.python.org/pypi/zstd is an alternate Python binding to
115 Zstandard. At the time this was written, the latest release of that
128 Zstandard. At the time this was written, the latest release of that
116 package (1.1.2) only exposed the simple APIs for compression and decompression.
129 package (1.1.2) only exposed the simple APIs for compression and decompression.
117 This package exposes much more of the zstd API, including streaming and
130 This package exposes much more of the zstd API, including streaming and
118 dictionary compression. This package also has CFFI support.
131 dictionary compression. This package also has CFFI support.
119
132
120 Bundling of Zstandard Source Code
133 Bundling of Zstandard Source Code
121 =================================
134 =================================
122
135
123 The source repository for this project contains a vendored copy of the
136 The source repository for this project contains a vendored copy of the
124 Zstandard source code. This is done for a few reasons.
137 Zstandard source code. This is done for a few reasons.
125
138
126 First, Zstandard is relatively new and not yet widely available as a system
139 First, Zstandard is relatively new and not yet widely available as a system
127 package. Providing a copy of the source code enables the Python C extension
140 package. Providing a copy of the source code enables the Python C extension
128 to be compiled without requiring the user to obtain the Zstandard source code
141 to be compiled without requiring the user to obtain the Zstandard source code
129 separately.
142 separately.
130
143
131 Second, Zstandard has both a stable *public* API and an *experimental* API.
144 Second, Zstandard has both a stable *public* API and an *experimental* API.
132 The *experimental* API is actually quite useful (contains functionality for
145 The *experimental* API is actually quite useful (contains functionality for
133 training dictionaries for example), so it is something we wish to expose to
146 training dictionaries for example), so it is something we wish to expose to
134 Python. However, the *experimental* API is only available via static linking.
147 Python. However, the *experimental* API is only available via static linking.
135 Furthermore, the *experimental* API can change at any time. So, control over
148 Furthermore, the *experimental* API can change at any time. So, control over
136 the exact version of the Zstandard library linked against is important to
149 the exact version of the Zstandard library linked against is important to
137 ensure known behavior.
150 ensure known behavior.
138
151
139 Instructions for Building and Testing
152 Instructions for Building and Testing
140 =====================================
153 =====================================
141
154
142 Once you have the source code, the extension can be built via setup.py::
155 Once you have the source code, the extension can be built via setup.py::
143
156
144 $ python setup.py build_ext
157 $ python setup.py build_ext
145
158
146 We recommend testing with ``nose``::
159 We recommend testing with ``nose``::
147
160
148 $ nosetests
161 $ nosetests
149
162
150 A Tox configuration is present to test against multiple Python versions::
163 A Tox configuration is present to test against multiple Python versions::
151
164
152 $ tox
165 $ tox
153
166
154 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
167 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
155 don't have it, those tests won't run.
168 don't have it, those tests won't run. Since the fuzzing tests take longer
169 to execute than normal tests, you'll need to opt in to running them by
170 setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
171 automatically when using ``tox``.
156
172
157 There is also an experimental CFFI module. You need the ``cffi`` Python
173 The ``cffi`` Python package needs to be installed in order to build the CFFI
158 package installed to build and test that.
174 bindings. If it isn't present, the CFFI bindings won't be built.
159
175
160 To create a virtualenv with all development dependencies, do something
176 To create a virtualenv with all development dependencies, do something
161 like the following::
177 like the following::
162
178
163 # Python 2
179 # Python 2
164 $ virtualenv venv
180 $ virtualenv venv
165
181
166 # Python 3
182 # Python 3
167 $ python3 -m venv venv
183 $ python3 -m venv venv
168
184
169 $ source venv/bin/activate
185 $ source venv/bin/activate
170 $ pip install cffi hypothesis nose tox
186 $ pip install cffi hypothesis nose tox
171
187
172 API
188 API
173 ===
189 ===
174
190
175 The compiled C extension provides a ``zstd`` Python module. This module
191 The compiled C extension provides a ``zstd`` Python module. The CFFI
176 exposes the following interfaces.
192 bindings provide a ``zstd_cffi`` module. Both provide an identical API
193 interface. The types, functions, and attributes exposed by these modules
194 are documented in the sections below.
195
196 .. note::
197
198 The documentation in this section makes references to various zstd
199 concepts and functionality. The ``Concepts`` section below explains
200 these concepts in more detail.
177
201
178 ZstdCompressor
202 ZstdCompressor
179 --------------
203 --------------
180
204
181 The ``ZstdCompressor`` class provides an interface for performing
205 The ``ZstdCompressor`` class provides an interface for performing
182 compression operations.
206 compression operations.
183
207
184 Each instance is associated with parameters that control compression
208 Each instance is associated with parameters that control compression
185 behavior. These come from the following named arguments (all optional):
209 behavior. These come from the following named arguments (all optional):
186
210
187 level
211 level
188 Integer compression level. Valid values are between 1 and 22.
212 Integer compression level. Valid values are between 1 and 22.
189 dict_data
213 dict_data
190 Compression dictionary to use.
214 Compression dictionary to use.
191
215
192 Note: When using dictionary data and ``compress()`` is called multiple
216 Note: When using dictionary data and ``compress()`` is called multiple
193 times, the ``CompressionParameters`` derived from an integer compression
217 times, the ``CompressionParameters`` derived from an integer compression
194 ``level`` and the first compressed data's size will be reused for all
218 ``level`` and the first compressed data's size will be reused for all
195 subsequent operations. This may not be desirable if source data size
219 subsequent operations. This may not be desirable if source data size
196 varies significantly.
220 varies significantly.
197 compression_params
221 compression_params
198 A ``CompressionParameters`` instance (overrides the ``level`` value).
222 A ``CompressionParameters`` instance (overrides the ``level`` value).
199 write_checksum
223 write_checksum
200 Whether a 4 byte checksum should be written with the compressed data.
224 Whether a 4 byte checksum should be written with the compressed data.
201 Defaults to False. If True, the decompressor can verify that decompressed
225 Defaults to False. If True, the decompressor can verify that decompressed
202 data matches the original input data.
226 data matches the original input data.
203 write_content_size
227 write_content_size
204 Whether the size of the uncompressed data will be written into the
228 Whether the size of the uncompressed data will be written into the
205 header of compressed data. Defaults to False. The data will only be
229 header of compressed data. Defaults to False. The data will only be
206 written if the compressor knows the size of the input data. This is
230 written if the compressor knows the size of the input data. This is
207 likely not true for streaming compression.
231 likely not true for streaming compression.
208 write_dict_id
232 write_dict_id
209 Whether to write the dictionary ID into the compressed data.
233 Whether to write the dictionary ID into the compressed data.
210 Defaults to True. The dictionary ID is only written if a dictionary
234 Defaults to True. The dictionary ID is only written if a dictionary
211 is being used.
235 is being used.
236 threads
237 Enables and sets the number of threads to use for multi-threaded compression
238 operations. Defaults to 0, which means to use single-threaded compression.
239 Negative values will resolve to the number of logical CPUs in the system.
240 Read below for more info on multi-threaded compression. This argument only
241 controls thread count for operations that operate on individual pieces of
242 data. APIs that spawn multiple threads for working on multiple pieces of
243 data have their own ``threads`` argument.
212
244
213 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
245 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
214 instances can be called from multiple Python threads simultaneously. In other
246 instances can be called from multiple Python threads simultaneously. In other
215 words, assume instances are not thread safe unless stated otherwise.
247 words, assume instances are not thread safe unless stated otherwise.
216
248
217 Simple API
249 Simple API
218 ^^^^^^^^^^
250 ^^^^^^^^^^
219
251
220 ``compress(data)`` compresses and returns data as a one-shot operation.::
252 ``compress(data)`` compresses and returns data as a one-shot operation.::
221
253
222 cctx = zstd.ZstdCompressor()
254 cctx = zstd.ZstdCompressor()
223 compressed = cctx.compress(b'data to compress')
255 compressed = cctx.compress(b'data to compress')
224
256
257 The ``data`` argument can be any object that implements the *buffer protocol*.
258
225 Unless ``compression_params`` or ``dict_data`` are passed to the
259 Unless ``compression_params`` or ``dict_data`` are passed to the
226 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
260 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
227 optimal compression parameters for the configured compression ``level`` and
261 optimal compression parameters for the configured compression ``level`` and
228 input data size (some parameters are fine-tuned for small input sizes).
262 input data size (some parameters are fine-tuned for small input sizes).
229
263
230 If a compression dictionary is being used, the compression parameters
264 If a compression dictionary is being used, the compression parameters
231 determined from the first input's size will be reused for subsequent
265 determined from the first input's size will be reused for subsequent
232 operations.
266 operations.
233
267
234 There is currently a deficiency in zstd's C APIs that makes it difficult
268 There is currently a deficiency in zstd's C APIs that makes it difficult
235 to round trip empty inputs when ``write_content_size=True``. Attempting
269 to round trip empty inputs when ``write_content_size=True``. Attempting
236 this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
270 this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
237 to ``compress()``.
271 to ``compress()``.
238
272
239 Streaming Input API
273 Streaming Input API
240 ^^^^^^^^^^^^^^^^^^^
274 ^^^^^^^^^^^^^^^^^^^
241
275
242 ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
276 ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
243 data into a compressor.::
277 data into a compressor.::
244
278
245 cctx = zstd.ZstdCompressor(level=10)
279 cctx = zstd.ZstdCompressor(level=10)
246 with cctx.write_to(fh) as compressor:
280 with cctx.write_to(fh) as compressor:
247 compressor.write(b'chunk 0')
281 compressor.write(b'chunk 0')
248 compressor.write(b'chunk 1')
282 compressor.write(b'chunk 1')
249 ...
283 ...
250
284
251 The argument to ``write_to()`` must have a ``write(data)`` method. As
285 The argument to ``write_to()`` must have a ``write(data)`` method. As
252 compressed data is available, ``write()`` will be called with the compressed
286 compressed data is available, ``write()`` will be called with the compressed
253 data as its argument. Many common Python types implement ``write()``, including
287 data as its argument. Many common Python types implement ``write()``, including
254 open file handles and ``io.BytesIO``.
288 open file handles and ``io.BytesIO``.
255
289
256 ``write_to()`` returns an object representing a streaming compressor instance.
290 ``write_to()`` returns an object representing a streaming compressor instance.
257 It **must** be used as a context manager. That object's ``write(data)`` method
291 It **must** be used as a context manager. That object's ``write(data)`` method
258 is used to feed data into the compressor.
292 is used to feed data into the compressor.
259
293
260 A ``flush()`` method can be called to evict whatever data remains within the
294 A ``flush()`` method can be called to evict whatever data remains within the
261 compressor's internal state into the output object. This may result in 0 or
295 compressor's internal state into the output object. This may result in 0 or
262 more ``write()`` calls to the output object.
296 more ``write()`` calls to the output object.
263
297
264 Both ``write()`` and ``flush()`` return the number of bytes written to the
298 Both ``write()`` and ``flush()`` return the number of bytes written to the
265 object's ``write()``. In many cases, small inputs do not accumulate enough
299 object's ``write()``. In many cases, small inputs do not accumulate enough
266 data to cause a write and ``write()`` will return ``0``.
300 data to cause a write and ``write()`` will return ``0``.
267
301
268 If the size of the data being fed to this streaming compressor is known,
302 If the size of the data being fed to this streaming compressor is known,
269 you can declare it before compression begins::
303 you can declare it before compression begins::
270
304
271 cctx = zstd.ZstdCompressor()
305 cctx = zstd.ZstdCompressor()
272 with cctx.write_to(fh, size=data_len) as compressor:
306 with cctx.write_to(fh, size=data_len) as compressor:
273 compressor.write(chunk0)
307 compressor.write(chunk0)
274 compressor.write(chunk1)
308 compressor.write(chunk1)
275 ...
309 ...
276
310
277 Declaring the size of the source data allows compression parameters to
311 Declaring the size of the source data allows compression parameters to
278 be tuned. And if ``write_content_size`` is used, it also results in the
312 be tuned. And if ``write_content_size`` is used, it also results in the
279 content size being written into the frame header of the output data.
313 content size being written into the frame header of the output data.
280
314
281 The size of chunks being ``write()`` to the destination can be specified::
315 The size of chunks being ``write()`` to the destination can be specified::
282
316
283 cctx = zstd.ZstdCompressor()
317 cctx = zstd.ZstdCompressor()
284 with cctx.write_to(fh, write_size=32768) as compressor:
318 with cctx.write_to(fh, write_size=32768) as compressor:
285 ...
319 ...
286
320
287 To see how much memory is being used by the streaming compressor::
321 To see how much memory is being used by the streaming compressor::
288
322
289 cctx = zstd.ZstdCompressor()
323 cctx = zstd.ZstdCompressor()
290 with cctx.write_to(fh) as compressor:
324 with cctx.write_to(fh) as compressor:
291 ...
325 ...
292 byte_size = compressor.memory_size()
326 byte_size = compressor.memory_size()
293
327
294 Streaming Output API
328 Streaming Output API
295 ^^^^^^^^^^^^^^^^^^^^
329 ^^^^^^^^^^^^^^^^^^^^
296
330
297 ``read_from(reader)`` provides a mechanism to stream data out of a compressor
331 ``read_from(reader)`` provides a mechanism to stream data out of a compressor
298 as an iterator of data chunks.::
332 as an iterator of data chunks.::
299
333
300 cctx = zstd.ZstdCompressor()
334 cctx = zstd.ZstdCompressor()
301 for chunk in cctx.read_from(fh):
335 for chunk in cctx.read_from(fh):
302 # Do something with emitted data.
336 # Do something with emitted data.
303
337
304 ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
338 ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
305 to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
339 to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
306 provide the buffer protocol.)
340 provide the buffer protocol.)
307
341
308 Uncompressed data is fetched from the source either by calling ``read(size)``
342 Uncompressed data is fetched from the source either by calling ``read(size)``
309 or by fetching a slice of data from the object directly (in the case where
343 or by fetching a slice of data from the object directly (in the case where
310 the buffer protocol is being used). The returned iterator consists of chunks
344 the buffer protocol is being used). The returned iterator consists of chunks
311 of compressed data.
345 of compressed data.
312
346
313 If reading from the source via ``read()``, ``read()`` will be called until
347 If reading from the source via ``read()``, ``read()`` will be called until
314 it raises or returns an empty bytes (``b''``). It is perfectly valid for
348 it raises or returns an empty bytes (``b''``). It is perfectly valid for
315 the source to deliver fewer bytes than were what requested by ``read(size)``.
349 the source to deliver fewer bytes than were what requested by ``read(size)``.
316
350
317 Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
351 Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
318 declaring the size of the input stream::
352 declaring the size of the input stream::
319
353
320 cctx = zstd.ZstdCompressor()
354 cctx = zstd.ZstdCompressor()
321 for chunk in cctx.read_from(fh, size=some_int):
355 for chunk in cctx.read_from(fh, size=some_int):
322 pass
356 pass
323
357
324 You can also control the size that data is ``read()`` from the source and
358 You can also control the size that data is ``read()`` from the source and
325 the ideal size of output chunks::
359 the ideal size of output chunks::
326
360
327 cctx = zstd.ZstdCompressor()
361 cctx = zstd.ZstdCompressor()
328 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
362 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
329 pass
363 pass
330
364
331 Unlike ``write_to()``, ``read_from()`` does not give direct control over the
365 Unlike ``write_to()``, ``read_from()`` does not give direct control over the
332 sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
366 sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
333 the object being read from delivers. These will often be of a uniform size.
367 the object being read from delivers. These will often be of a uniform size.
334
368
335 Stream Copying API
369 Stream Copying API
336 ^^^^^^^^^^^^^^^^^^
370 ^^^^^^^^^^^^^^^^^^
337
371
338 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
372 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
339 compressing it.::
373 compressing it.::
340
374
341 cctx = zstd.ZstdCompressor()
375 cctx = zstd.ZstdCompressor()
342 cctx.copy_stream(ifh, ofh)
376 cctx.copy_stream(ifh, ofh)
343
377
344 For example, say you wish to compress a file::
378 For example, say you wish to compress a file::
345
379
346 cctx = zstd.ZstdCompressor()
380 cctx = zstd.ZstdCompressor()
347 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
381 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
348 cctx.copy_stream(ifh, ofh)
382 cctx.copy_stream(ifh, ofh)
349
383
350 It is also possible to declare the size of the source stream::
384 It is also possible to declare the size of the source stream::
351
385
352 cctx = zstd.ZstdCompressor()
386 cctx = zstd.ZstdCompressor()
353 cctx.copy_stream(ifh, ofh, size=len_of_input)
387 cctx.copy_stream(ifh, ofh, size=len_of_input)
354
388
355 You can also specify how large the chunks that are ``read()`` and ``write()``
389 You can also specify how large the chunks that are ``read()`` and ``write()``
356 from and to the streams::
390 from and to the streams::
357
391
358 cctx = zstd.ZstdCompressor()
392 cctx = zstd.ZstdCompressor()
359 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
393 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
360
394
361 The stream copier returns a 2-tuple of bytes read and written::
395 The stream copier returns a 2-tuple of bytes read and written::
362
396
363 cctx = zstd.ZstdCompressor()
397 cctx = zstd.ZstdCompressor()
364 read_count, write_count = cctx.copy_stream(ifh, ofh)
398 read_count, write_count = cctx.copy_stream(ifh, ofh)
365
399
366 Compressor API
400 Compressor API
367 ^^^^^^^^^^^^^^
401 ^^^^^^^^^^^^^^
368
402
369 ``compressobj()`` returns an object that exposes ``compress(data)`` and
403 ``compressobj()`` returns an object that exposes ``compress(data)`` and
370 ``flush()`` methods. Each returns compressed data or an empty bytes.
404 ``flush()`` methods. Each returns compressed data or an empty bytes.
371
405
372 The purpose of ``compressobj()`` is to provide an API-compatible interface
406 The purpose of ``compressobj()`` is to provide an API-compatible interface
373 with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
407 with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
374 swap in different compressor objects while using the same API.
408 swap in different compressor objects while using the same API.
375
409
376 ``flush()`` accepts an optional argument indicating how to end the stream.
410 ``flush()`` accepts an optional argument indicating how to end the stream.
377 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
411 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
378 Once this type of flush is performed, ``compress()`` and ``flush()`` can
412 Once this type of flush is performed, ``compress()`` and ``flush()`` can
379 no longer be called. This type of flush **must** be called to end the
413 no longer be called. This type of flush **must** be called to end the
380 compression context. If not called, returned data may be incomplete.
414 compression context. If not called, returned data may be incomplete.
381
415
382 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
416 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
383 zstd block. Flushes of this type can be performed multiple times. The next
417 zstd block. Flushes of this type can be performed multiple times. The next
384 call to ``compress()`` will begin a new zstd block.
418 call to ``compress()`` will begin a new zstd block.
385
419
386 Here is how this API should be used::
420 Here is how this API should be used::
387
421
388 cctx = zstd.ZstdCompressor()
422 cctx = zstd.ZstdCompressor()
389 cobj = cctx.compressobj()
423 cobj = cctx.compressobj()
390 data = cobj.compress(b'raw input 0')
424 data = cobj.compress(b'raw input 0')
391 data = cobj.compress(b'raw input 1')
425 data = cobj.compress(b'raw input 1')
392 data = cobj.flush()
426 data = cobj.flush()
393
427
394 Or to flush blocks::
428 Or to flush blocks::
395
429
396 cctx.zstd.ZstdCompressor()
430 cctx.zstd.ZstdCompressor()
397 cobj = cctx.compressobj()
431 cobj = cctx.compressobj()
398 data = cobj.compress(b'chunk in first block')
432 data = cobj.compress(b'chunk in first block')
399 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
433 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
400 data = cobj.compress(b'chunk in second block')
434 data = cobj.compress(b'chunk in second block')
401 data = cobj.flush()
435 data = cobj.flush()
402
436
403 For best performance results, keep input chunks under 256KB. This avoids
437 For best performance results, keep input chunks under 256KB. This avoids
404 extra allocations for a large output object.
438 extra allocations for a large output object.
405
439
406 It is possible to declare the input size of the data that will be fed into
440 It is possible to declare the input size of the data that will be fed into
407 the compressor::
441 the compressor::
408
442
409 cctx = zstd.ZstdCompressor()
443 cctx = zstd.ZstdCompressor()
410 cobj = cctx.compressobj(size=6)
444 cobj = cctx.compressobj(size=6)
411 data = cobj.compress(b'foobar')
445 data = cobj.compress(b'foobar')
412 data = cobj.flush()
446 data = cobj.flush()
413
447
448 Batch Compression API
449 ^^^^^^^^^^^^^^^^^^^^^
450
451 (Experimental. Not yet supported in CFFI bindings.)
452
453 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
454 inputs as a single operation.
455
456 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
457 ``BufferWithSegments``, or a list containing byte like objects. Each element of
458 the container will be compressed individually using the configured parameters
459 on the ``ZstdCompressor`` instance.
460
461 The ``threads`` argument controls how many threads to use for compression. The
462 default is ``0`` which means to use a single thread. Negative values use the
463 number of logical CPUs in the machine.
464
465 The function returns a ``BufferWithSegmentsCollection``. This type represents
466 N discrete memory allocations, eaching holding 1 or more compressed frames.
467
468 Output data is written to shared memory buffers. This means that unlike
469 regular Python objects, a reference to *any* object within the collection
470 keeps the shared buffer and therefore memory backing it alive. This can have
471 undesirable effects on process memory usage.
472
473 The API and behavior of this function is experimental and will likely change.
474 Known deficiencies include:
475
476 * If asked to use multiple threads, it will always spawn that many threads,
477 even if the input is too small to use them. It should automatically lower
478 the thread count when the extra threads would just add overhead.
479 * The buffer allocation strategy is fixed. There is room to make it dynamic,
480 perhaps even to allow one output buffer per input, facilitating a variation
481 of the API to return a list without the adverse effects of shared memory
482 buffers.
483
414 ZstdDecompressor
484 ZstdDecompressor
415 ----------------
485 ----------------
416
486
417 The ``ZstdDecompressor`` class provides an interface for performing
487 The ``ZstdDecompressor`` class provides an interface for performing
418 decompression.
488 decompression.
419
489
420 Each instance is associated with parameters that control decompression. These
490 Each instance is associated with parameters that control decompression. These
421 come from the following named arguments (all optional):
491 come from the following named arguments (all optional):
422
492
423 dict_data
493 dict_data
424 Compression dictionary to use.
494 Compression dictionary to use.
425
495
426 The interface of this class is very similar to ``ZstdCompressor`` (by design).
496 The interface of this class is very similar to ``ZstdCompressor`` (by design).
427
497
428 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
498 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
429 instances can be called from multiple Python threads simultaneously. In other
499 instances can be called from multiple Python threads simultaneously. In other
430 words, assume instances are not thread safe unless stated otherwise.
500 words, assume instances are not thread safe unless stated otherwise.
431
501
432 Simple API
502 Simple API
433 ^^^^^^^^^^
503 ^^^^^^^^^^
434
504
435 ``decompress(data)`` can be used to decompress an entire compressed zstd
505 ``decompress(data)`` can be used to decompress an entire compressed zstd
436 frame in a single operation.::
506 frame in a single operation.::
437
507
438 dctx = zstd.ZstdDecompressor()
508 dctx = zstd.ZstdDecompressor()
439 decompressed = dctx.decompress(data)
509 decompressed = dctx.decompress(data)
440
510
441 By default, ``decompress(data)`` will only work on data written with the content
511 By default, ``decompress(data)`` will only work on data written with the content
442 size encoded in its header. This can be achieved by creating a
512 size encoded in its header. This can be achieved by creating a
443 ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
513 ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
444 an embedded content size is seen, ``zstd.ZstdError`` will be raised.
514 an embedded content size is seen, ``zstd.ZstdError`` will be raised.
445
515
446 If the compressed data doesn't have its content size embedded within it,
516 If the compressed data doesn't have its content size embedded within it,
447 decompression can be attempted by specifying the ``max_output_size``
517 decompression can be attempted by specifying the ``max_output_size``
448 argument.::
518 argument.::
449
519
450 dctx = zstd.ZstdDecompressor()
520 dctx = zstd.ZstdDecompressor()
451 uncompressed = dctx.decompress(data, max_output_size=1048576)
521 uncompressed = dctx.decompress(data, max_output_size=1048576)
452
522
453 Ideally, ``max_output_size`` will be identical to the decompressed output
523 Ideally, ``max_output_size`` will be identical to the decompressed output
454 size.
524 size.
455
525
456 If ``max_output_size`` is too small to hold the decompressed data,
526 If ``max_output_size`` is too small to hold the decompressed data,
457 ``zstd.ZstdError`` will be raised.
527 ``zstd.ZstdError`` will be raised.
458
528
459 If ``max_output_size`` is larger than the decompressed data, the allocated
529 If ``max_output_size`` is larger than the decompressed data, the allocated
460 output buffer will be resized to only use the space required.
530 output buffer will be resized to only use the space required.
461
531
462 Please note that an allocation of the requested ``max_output_size`` will be
532 Please note that an allocation of the requested ``max_output_size`` will be
463 performed every time the method is called. Setting to a very large value could
533 performed every time the method is called. Setting to a very large value could
464 result in a lot of work for the memory allocator and may result in
534 result in a lot of work for the memory allocator and may result in
465 ``MemoryError`` being raised if the allocation fails.
535 ``MemoryError`` being raised if the allocation fails.
466
536
467 If the exact size of decompressed data is unknown, it is **strongly**
537 If the exact size of decompressed data is unknown, it is **strongly**
468 recommended to use a streaming API.
538 recommended to use a streaming API.
469
539
470 Streaming Input API
540 Streaming Input API
471 ^^^^^^^^^^^^^^^^^^^
541 ^^^^^^^^^^^^^^^^^^^
472
542
473 ``write_to(fh)`` can be used to incrementally send compressed data to a
543 ``write_to(fh)`` can be used to incrementally send compressed data to a
474 decompressor.::
544 decompressor.::
475
545
476 dctx = zstd.ZstdDecompressor()
546 dctx = zstd.ZstdDecompressor()
477 with dctx.write_to(fh) as decompressor:
547 with dctx.write_to(fh) as decompressor:
478 decompressor.write(compressed_data)
548 decompressor.write(compressed_data)
479
549
480 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
550 This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
481 the decompressor by calling ``write(data)`` and decompressed output is written
551 the decompressor by calling ``write(data)`` and decompressed output is written
482 to the output object by calling its ``write(data)`` method.
552 to the output object by calling its ``write(data)`` method.
483
553
484 Calls to ``write()`` will return the number of bytes written to the output
554 Calls to ``write()`` will return the number of bytes written to the output
485 object. Not all inputs will result in bytes being written, so return values
555 object. Not all inputs will result in bytes being written, so return values
486 of ``0`` are possible.
556 of ``0`` are possible.
487
557
488 The size of chunks being ``write()`` to the destination can be specified::
558 The size of chunks being ``write()`` to the destination can be specified::
489
559
490 dctx = zstd.ZstdDecompressor()
560 dctx = zstd.ZstdDecompressor()
491 with dctx.write_to(fh, write_size=16384) as decompressor:
561 with dctx.write_to(fh, write_size=16384) as decompressor:
492 pass
562 pass
493
563
494 You can see how much memory is being used by the decompressor::
564 You can see how much memory is being used by the decompressor::
495
565
496 dctx = zstd.ZstdDecompressor()
566 dctx = zstd.ZstdDecompressor()
497 with dctx.write_to(fh) as decompressor:
567 with dctx.write_to(fh) as decompressor:
498 byte_size = decompressor.memory_size()
568 byte_size = decompressor.memory_size()
499
569
500 Streaming Output API
570 Streaming Output API
501 ^^^^^^^^^^^^^^^^^^^^
571 ^^^^^^^^^^^^^^^^^^^^
502
572
503 ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
573 ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
504 compressed source as an iterator of data chunks.::
574 compressed source as an iterator of data chunks.::
505
575
506 dctx = zstd.ZstdDecompressor()
576 dctx = zstd.ZstdDecompressor()
507 for chunk in dctx.read_from(fh):
577 for chunk in dctx.read_from(fh):
508 # Do something with original data.
578 # Do something with original data.
509
579
510 ``read_from()`` accepts a) an object with a ``read(size)`` method that will
580 ``read_from()`` accepts a) an object with a ``read(size)`` method that will
511 return compressed bytes b) an object conforming to the buffer protocol that
581 return compressed bytes b) an object conforming to the buffer protocol that
512 can expose its data as a contiguous range of bytes. The ``bytes`` and
582 can expose its data as a contiguous range of bytes. The ``bytes`` and
513 ``memoryview`` types expose this buffer protocol.
583 ``memoryview`` types expose this buffer protocol.
514
584
515 ``read_from()`` returns an iterator whose elements are chunks of the
585 ``read_from()`` returns an iterator whose elements are chunks of the
516 decompressed data.
586 decompressed data.
517
587
518 The size of requested ``read()`` from the source can be specified::
588 The size of requested ``read()`` from the source can be specified::
519
589
520 dctx = zstd.ZstdDecompressor()
590 dctx = zstd.ZstdDecompressor()
521 for chunk in dctx.read_from(fh, read_size=16384):
591 for chunk in dctx.read_from(fh, read_size=16384):
522 pass
592 pass
523
593
524 It is also possible to skip leading bytes in the input data::
594 It is also possible to skip leading bytes in the input data::
525
595
526 dctx = zstd.ZstdDecompressor()
596 dctx = zstd.ZstdDecompressor()
527 for chunk in dctx.read_from(fh, skip_bytes=1):
597 for chunk in dctx.read_from(fh, skip_bytes=1):
528 pass
598 pass
529
599
530 Skipping leading bytes is useful if the source data contains extra
600 Skipping leading bytes is useful if the source data contains extra
531 *header* data but you want to avoid the overhead of making a buffer copy
601 *header* data but you want to avoid the overhead of making a buffer copy
532 or allocating a new ``memoryview`` object in order to decompress the data.
602 or allocating a new ``memoryview`` object in order to decompress the data.
533
603
534 Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
604 Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
535 controls when data is decompressed. If the iterator isn't consumed,
605 controls when data is decompressed. If the iterator isn't consumed,
536 decompression is put on hold.
606 decompression is put on hold.
537
607
538 When ``read_from()`` is passed an object conforming to the buffer protocol,
608 When ``read_from()`` is passed an object conforming to the buffer protocol,
539 the behavior may seem similar to what occurs when the simple decompression
609 the behavior may seem similar to what occurs when the simple decompression
540 API is used. However, this API works when the decompressed size is unknown.
610 API is used. However, this API works when the decompressed size is unknown.
541 Furthermore, if feeding large inputs, the decompressor will work in chunks
611 Furthermore, if feeding large inputs, the decompressor will work in chunks
542 instead of performing a single operation.
612 instead of performing a single operation.
543
613
544 Stream Copying API
614 Stream Copying API
545 ^^^^^^^^^^^^^^^^^^
615 ^^^^^^^^^^^^^^^^^^
546
616
547 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
617 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
548 performing decompression.::
618 performing decompression.::
549
619
550 dctx = zstd.ZstdDecompressor()
620 dctx = zstd.ZstdDecompressor()
551 dctx.copy_stream(ifh, ofh)
621 dctx.copy_stream(ifh, ofh)
552
622
553 e.g. to decompress a file to another file::
623 e.g. to decompress a file to another file::
554
624
555 dctx = zstd.ZstdDecompressor()
625 dctx = zstd.ZstdDecompressor()
556 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
626 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
557 dctx.copy_stream(ifh, ofh)
627 dctx.copy_stream(ifh, ofh)
558
628
559 The size of chunks being ``read()`` and ``write()`` from and to the streams
629 The size of chunks being ``read()`` and ``write()`` from and to the streams
560 can be specified::
630 can be specified::
561
631
562 dctx = zstd.ZstdDecompressor()
632 dctx = zstd.ZstdDecompressor()
563 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
633 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
564
634
565 Decompressor API
635 Decompressor API
566 ^^^^^^^^^^^^^^^^
636 ^^^^^^^^^^^^^^^^
567
637
568 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
638 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
569 methods. Compressed data chunks are fed into ``decompress(data)`` and
639 methods. Compressed data chunks are fed into ``decompress(data)`` and
570 uncompressed output (or an empty bytes) is returned. Output from subsequent
640 uncompressed output (or an empty bytes) is returned. Output from subsequent
571 calls needs to be concatenated to reassemble the full decompressed byte
641 calls needs to be concatenated to reassemble the full decompressed byte
572 sequence.
642 sequence.
573
643
574 The purpose of ``decompressobj()`` is to provide an API-compatible interface
644 The purpose of ``decompressobj()`` is to provide an API-compatible interface
575 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
645 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
576 to swap in different decompressor objects while using the same API.
646 to swap in different decompressor objects while using the same API.
577
647
578 Each object is single use: once an input frame is decoded, ``decompress()``
648 Each object is single use: once an input frame is decoded, ``decompress()``
579 can no longer be called.
649 can no longer be called.
580
650
581 Here is how this API should be used::
651 Here is how this API should be used::
582
652
583 dctx = zstd.ZstdDeompressor()
653 dctx = zstd.ZstdDeompressor()
584 dobj = cctx.decompressobj()
654 dobj = cctx.decompressobj()
585 data = dobj.decompress(compressed_chunk_0)
655 data = dobj.decompress(compressed_chunk_0)
586 data = dobj.decompress(compressed_chunk_1)
656 data = dobj.decompress(compressed_chunk_1)
587
657
658 Batch Decompression API
659 ^^^^^^^^^^^^^^^^^^^^^^^
660
661 (Experimental. Not yet supported in CFFI bindings.)
662
663 ``multi_decompress_to_buffer()`` performs decompression of multiple
664 frames as a single operation and returns a ``BufferWithSegmentsCollection``
665 containing decompressed data for all inputs.
666
667 Compressed frames can be passed to the function as a ``BufferWithSegments``,
668 a ``BufferWithSegmentsCollection``, or as a list containing objects that
669 conform to the buffer protocol. For best performance, pass a
670 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
671 minimal input validation will be done for that type. If calling from
672 Python (as opposed to C), constructing one of these instances may add
673 overhead cancelling out the performance overhead of validation for list
674 inputs.
675
676 The decompressed size of each frame must be discoverable. It can either be
677 embedded within the zstd frame (``write_content_size=True`` argument to
678 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
679
680 The ``decompressed_sizes`` argument is an object conforming to the buffer
681 protocol which holds an array of 64-bit unsigned integers in the machine's
682 native format defining the decompressed sizes of each frame. If this argument
683 is passed, it avoids having to scan each frame for its decompressed size.
684 This frame scanning can add noticeable overhead in some scenarios.
685
686 The ``threads`` argument controls the number of threads to use to perform
687 decompression operations. The default (``0``) or the value ``1`` means to
688 use a single thread. Negative values use the number of logical CPUs in the
689 machine.
690
691 .. note::
692
693 It is possible to pass a ``mmap.mmap()`` instance into this function by
694 wrapping it with a ``BufferWithSegments`` instance (which will define the
695 offsets of frames within the memory mapped region).
696
697 This function is logically equivalent to performing ``dctx.decompress()``
698 on each input frame and returning the result.
699
700 This function exists to perform decompression on multiple frames as fast
701 as possible by having as little overhead as possible. Since decompression is
702 performed as a single operation and since the decompressed output is stored in
703 a single buffer, extra memory allocations, Python objects, and Python function
704 calls are avoided. This is ideal for scenarios where callers need to access
705 decompressed data for multiple frames.
706
707 Currently, the implementation always spawns multiple threads when requested,
708 even if the amount of work to do is small. In the future, it will be smarter
709 about avoiding threads and their associated overhead when the amount of
710 work to do is small.
711
588 Content-Only Dictionary Chain Decompression
712 Content-Only Dictionary Chain Decompression
589 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
713 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
590
714
591 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
715 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
592 zstd frames produced using chained *content-only* dictionary compression. Such
716 zstd frames produced using chained *content-only* dictionary compression. Such
593 a list of frames is produced by compressing discrete inputs where each
717 a list of frames is produced by compressing discrete inputs where each
594 non-initial input is compressed with a *content-only* dictionary consisting
718 non-initial input is compressed with a *content-only* dictionary consisting
595 of the content of the previous input.
719 of the content of the previous input.
596
720
597 For example, say you have the following inputs::
721 For example, say you have the following inputs::
598
722
599 inputs = [b'input 1', b'input 2', b'input 3']
723 inputs = [b'input 1', b'input 2', b'input 3']
600
724
601 The zstd frame chain consists of:
725 The zstd frame chain consists of:
602
726
603 1. ``b'input 1'`` compressed in standalone/discrete mode
727 1. ``b'input 1'`` compressed in standalone/discrete mode
604 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
728 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
605 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
729 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
606
730
607 Each zstd frame **must** have the content size written.
731 Each zstd frame **must** have the content size written.
608
732
609 The following Python code can be used to produce a *content-only dictionary
733 The following Python code can be used to produce a *content-only dictionary
610 chain*::
734 chain*::
611
735
612 def make_chain(inputs):
736 def make_chain(inputs):
613 frames = []
737 frames = []
614
738
615 # First frame is compressed in standalone/discrete mode.
739 # First frame is compressed in standalone/discrete mode.
616 zctx = zstd.ZstdCompressor(write_content_size=True)
740 zctx = zstd.ZstdCompressor(write_content_size=True)
617 frames.append(zctx.compress(inputs[0]))
741 frames.append(zctx.compress(inputs[0]))
618
742
619 # Subsequent frames use the previous fulltext as a content-only dictionary
743 # Subsequent frames use the previous fulltext as a content-only dictionary
620 for i, raw in enumerate(inputs[1:]):
744 for i, raw in enumerate(inputs[1:]):
621 dict_data = zstd.ZstdCompressionDict(inputs[i])
745 dict_data = zstd.ZstdCompressionDict(inputs[i])
622 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
746 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
623 frames.append(zctx.compress(raw))
747 frames.append(zctx.compress(raw))
624
748
625 return frames
749 return frames
626
750
627 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
751 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
628 element in the input chain.
752 element in the input chain.
629
753
630 It is possible to implement *content-only dictionary chain* decompression
754 It is possible to implement *content-only dictionary chain* decompression
631 on top of other Python APIs. However, this function will likely be significantly
755 on top of other Python APIs. However, this function will likely be significantly
632 faster, especially for long input chains, as it avoids the overhead of
756 faster, especially for long input chains, as it avoids the overhead of
633 instantiating and passing around intermediate objects between C and Python.
757 instantiating and passing around intermediate objects between C and Python.
634
758
635 Choosing an API
759 Multi-Threaded Compression
636 ---------------
760 --------------------------
637
638 Various forms of compression and decompression APIs are provided because each
639 are suitable for different use cases.
640
761
641 The simple/one-shot APIs are useful for small data, when the decompressed
762 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
642 data size is known (either recorded in the zstd frame header via
763 of threads to use for compression. The way this works is that input is split
643 ``write_content_size`` or known via an out-of-band mechanism, such as a file
764 into segments and each segment is fed into a worker pool for compression. Once
644 size).
765 a segment is compressed, it is flushed/appended to the output.
645
766
646 A limitation of the simple APIs is that input or output data must fit in memory.
767 The segment size for multi-threaded compression is chosen from the window size
647 And unless using advanced tricks with Python *buffer objects*, both input and
768 of the compressor. This is derived from the ``window_log`` attribute of a
648 output must fit in memory simultaneously.
769 ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB
649
770 range.
650 Another limitation is that compression or decompression is performed as a single
651 operation. So if you feed large input, it could take a long time for the
652 function to return.
653
771
654 The streaming APIs do not have the limitations of the simple API. The cost to
772 If multi-threaded compression is requested and the input is smaller than the
655 this is they are more complex to use than a single function call.
773 configured segment size, only a single compression thread will be used. If the
656
774 input is smaller than the segment size multiplied by the thread pool size or
657 The streaming APIs put the caller in control of compression and decompression
775 if data cannot be delivered to the compressor fast enough, not all requested
658 behavior by allowing them to directly control either the input or output side
776 compressor threads may be active simultaneously.
659 of the operation.
660
661 With the streaming input APIs, the caller feeds data into the compressor or
662 decompressor as they see fit. Output data will only be written after the caller
663 has explicitly written data.
664
777
665 With the streaming output APIs, the caller consumes output from the compressor
778 Compared to non-multi-threaded compression, multi-threaded compression has
666 or decompressor as they see fit. The compressor or decompressor will only
779 higher per-operation overhead. This includes extra memory operations,
667 consume data from the source when the caller is ready to receive it.
780 thread creation, lock acquisition, etc.
668
781
669 One end of the streaming APIs involves a file-like object that must
782 Due to the nature of multi-threaded compression using *N* compression
670 ``write()`` output data or ``read()`` input data. Depending on what the
783 *states*, the output from multi-threaded compression will likely be larger
671 backing storage for these objects is, those operations may not complete quickly.
784 than non-multi-threaded compression. The difference is usually small. But
672 For example, when streaming compressed data to a file, the ``write()`` into
785 there is a CPU/wall time versus size trade off that may warrant investigation.
673 a streaming compressor could result in a ``write()`` to the filesystem, which
786
674 may take a long time to finish due to slow I/O on the filesystem. So, there
787 Output from multi-threaded compression does not require any special handling
675 may be overhead in streaming APIs beyond the compression and decompression
788 on the decompression side. In other words, any zstd decompressor should be able
676 operations.
789 to consume data produced with multi-threaded compression.
677
790
678 Dictionary Creation and Management
791 Dictionary Creation and Management
679 ----------------------------------
792 ----------------------------------
680
793
681 Zstandard allows *dictionaries* to be used when compressing and
794 Compression dictionaries are represented as the ``ZstdCompressionDict`` type.
682 decompressing data. The idea is that if you are compressing a lot of similar
683 data, you can precompute common properties of that data (such as recurring
684 byte sequences) to achieve better compression ratios.
685
686 In Python, compression dictionaries are represented as the
687 ``ZstdCompressionDict`` type.
688
795
689 Instances can be constructed from bytes::
796 Instances can be constructed from bytes::
690
797
691 dict_data = zstd.ZstdCompressionDict(data)
798 dict_data = zstd.ZstdCompressionDict(data)
692
799
693 It is possible to construct a dictionary from *any* data. Unless the
800 It is possible to construct a dictionary from *any* data. Unless the
694 data begins with a magic header, the dictionary will be treated as
801 data begins with a magic header, the dictionary will be treated as
695 *content-only*. *Content-only* dictionaries allow compression operations
802 *content-only*. *Content-only* dictionaries allow compression operations
696 that follow to reference raw data within the content. For one use of
803 that follow to reference raw data within the content. For one use of
697 *content-only* dictionaries, see
804 *content-only* dictionaries, see
698 ``ZstdDecompressor.decompress_content_dict_chain()``.
805 ``ZstdDecompressor.decompress_content_dict_chain()``.
699
806
700 More interestingly, instances can be created by *training* on sample data::
807 More interestingly, instances can be created by *training* on sample data::
701
808
702 dict_data = zstd.train_dictionary(size, samples)
809 dict_data = zstd.train_dictionary(size, samples)
703
810
704 This takes a list of bytes instances and creates and returns a
811 This takes a list of bytes instances and creates and returns a
705 ``ZstdCompressionDict``.
812 ``ZstdCompressionDict``.
706
813
707 You can see how many bytes are in the dictionary by calling ``len()``::
814 You can see how many bytes are in the dictionary by calling ``len()``::
708
815
709 dict_data = zstd.train_dictionary(size, samples)
816 dict_data = zstd.train_dictionary(size, samples)
710 dict_size = len(dict_data) # will not be larger than ``size``
817 dict_size = len(dict_data) # will not be larger than ``size``
711
818
712 Once you have a dictionary, you can pass it to the objects performing
819 Once you have a dictionary, you can pass it to the objects performing
713 compression and decompression::
820 compression and decompression::
714
821
715 dict_data = zstd.train_dictionary(16384, samples)
822 dict_data = zstd.train_dictionary(16384, samples)
716
823
717 cctx = zstd.ZstdCompressor(dict_data=dict_data)
824 cctx = zstd.ZstdCompressor(dict_data=dict_data)
718 for source_data in input_data:
825 for source_data in input_data:
719 compressed = cctx.compress(source_data)
826 compressed = cctx.compress(source_data)
720 # Do something with compressed data.
827 # Do something with compressed data.
721
828
722 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
829 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
723 for compressed_data in input_data:
830 for compressed_data in input_data:
724 buffer = io.BytesIO()
831 buffer = io.BytesIO()
725 with dctx.write_to(buffer) as decompressor:
832 with dctx.write_to(buffer) as decompressor:
726 decompressor.write(compressed_data)
833 decompressor.write(compressed_data)
727 # Do something with raw data in ``buffer``.
834 # Do something with raw data in ``buffer``.
728
835
729 Dictionaries have unique integer IDs. You can retrieve this ID via::
836 Dictionaries have unique integer IDs. You can retrieve this ID via::
730
837
731 dict_id = zstd.dictionary_id(dict_data)
838 dict_id = zstd.dictionary_id(dict_data)
732
839
733 You can obtain the raw data in the dict (useful for persisting and constructing
840 You can obtain the raw data in the dict (useful for persisting and constructing
734 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
841 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
735
842
736 dict_data = zstd.train_dictionary(size, samples)
843 dict_data = zstd.train_dictionary(size, samples)
737 raw_data = dict_data.as_bytes()
844 raw_data = dict_data.as_bytes()
738
845
846 The following named arguments to ``train_dictionary`` can also be used
847 to further control dictionary generation.
848
849 selectivity
850 Integer selectivity level. Default is 9. Larger values yield more data in
851 dictionary.
852 level
853 Integer compression level. Default is 6.
854 dict_id
855 Integer dictionary ID for the produced dictionary. Default is 0, which
856 means to use a random value.
857 notifications
858 Controls writing of informational messages to ``stderr``. ``0`` (the
859 default) means to write nothing. ``1`` writes errors. ``2`` writes
860 progression info. ``3`` writes more details. And ``4`` writes all info.
861
862 Cover Dictionaries
863 ^^^^^^^^^^^^^^^^^^
864
865 An alternate dictionary training mechanism named *cover* is also available.
866 More details about this training mechanism are available in the paper
867 *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors:
868 Liao, Petri, Moffat, Wirth).
869
870 To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
871 ``zstd.train_dictionary()``. The function behaves nearly the same except
872 its arguments are different and the returned dictionary will contain ``k``
873 and ``d`` attributes reflecting the parameters to the cover algorithm.
874
875 .. note::
876
877 The ``k`` and ``d`` attributes are only populated on dictionary
878 instances created by this function. If a ``ZstdCompressionDict`` is
879 constructed from raw bytes data, the ``k`` and ``d`` attributes will
880 be ``0``.
881
882 The segment and dmer size parameters to the cover algorithm can either be
883 specified manually or you can ask ``train_cover_dictionary()`` to try
884 multiple values and pick the best one, where *best* means the smallest
885 compressed data size.
886
887 In manual mode, the ``k`` and ``d`` arguments must be specified or a
888 ``ZstdError`` will be raised.
889
890 In automatic mode (triggered by specifying ``optimize=True``), ``k``
891 and ``d`` are optional. If a value isn't specified, then default values for
892 both are tested. The ``steps`` argument can control the number of steps
893 through ``k`` values. The ``level`` argument defines the compression level
894 that will be used when testing the compressed size. And ``threads`` can
895 specify the number of threads to use for concurrent operation.
896
897 This function takes the following arguments:
898
899 dict_size
900 Target size in bytes of the dictionary to generate.
901 samples
902 A list of bytes holding samples the dictionary will be trained from.
903 k
904 Parameter to cover algorithm defining the segment size. A reasonable range
905 is [16, 2048+].
906 d
907 Parameter to cover algorithm defining the dmer size. A reasonable range is
908 [6, 16]. ``d`` must be less than or equal to ``k``.
909 dict_id
910 Integer dictionary ID for the produced dictionary. Default is 0, which uses
911 a random value.
912 optimize
913 When true, test dictionary generation with multiple parameters.
914 level
915 Integer target compression level when testing compression with
916 ``optimize=True``. Default is 1.
917 steps
918 Number of steps through ``k`` values to perform when ``optimize=True``.
919 Default is 32.
920 threads
921 Number of threads to use when ``optimize=True``. Default is 0, which means
922 to use a single thread. A negative value can be specified to use as many
923 threads as there are detected logical CPUs.
924 notifications
925 Controls writing of informational messages to ``stderr``. See the
926 documentation for ``train_dictionary()`` for more.
927
739 Explicit Compression Parameters
928 Explicit Compression Parameters
740 -------------------------------
929 -------------------------------
741
930
742 Zstandard's integer compression levels along with the input size and dictionary
931 Zstandard's integer compression levels along with the input size and dictionary
743 size are converted into a data structure defining multiple parameters to tune
932 size are converted into a data structure defining multiple parameters to tune
744 behavior of the compression algorithm. It is possible to use define this
933 behavior of the compression algorithm. It is possible to use define this
745 data structure explicitly to have lower-level control over compression behavior.
934 data structure explicitly to have lower-level control over compression behavior.
746
935
747 The ``zstd.CompressionParameters`` type represents this data structure.
936 The ``zstd.CompressionParameters`` type represents this data structure.
748 You can see how Zstandard converts compression levels to this data structure
937 You can see how Zstandard converts compression levels to this data structure
749 by calling ``zstd.get_compression_parameters()``. e.g.::
938 by calling ``zstd.get_compression_parameters()``. e.g.::
750
939
751 params = zstd.get_compression_parameters(5)
940 params = zstd.get_compression_parameters(5)
752
941
753 This function also accepts the uncompressed data size and dictionary size
942 This function also accepts the uncompressed data size and dictionary size
754 to adjust parameters::
943 to adjust parameters::
755
944
756 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
945 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
757
946
758 You can also construct compression parameters from their low-level components::
947 You can also construct compression parameters from their low-level components::
759
948
760 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
949 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
761
950
762 You can then configure a compressor to use the custom parameters::
951 You can then configure a compressor to use the custom parameters::
763
952
764 cctx = zstd.ZstdCompressor(compression_params=params)
953 cctx = zstd.ZstdCompressor(compression_params=params)
765
954
766 The members/attributes of ``CompressionParameters`` instances are as follows::
955 The members/attributes of ``CompressionParameters`` instances are as follows::
767
956
768 * window_log
957 * window_log
769 * chain_log
958 * chain_log
770 * hash_log
959 * hash_log
771 * search_log
960 * search_log
772 * search_length
961 * search_length
773 * target_length
962 * target_length
774 * strategy
963 * strategy
775
964
776 This is the order the arguments are passed to the constructor if not using
965 This is the order the arguments are passed to the constructor if not using
777 named arguments.
966 named arguments.
778
967
779 You'll need to read the Zstandard documentation for what these parameters
968 You'll need to read the Zstandard documentation for what these parameters
780 do.
969 do.
781
970
782 Frame Inspection
971 Frame Inspection
783 ----------------
972 ----------------
784
973
785 Data emitted from zstd compression is encapsulated in a *frame*. This frame
974 Data emitted from zstd compression is encapsulated in a *frame*. This frame
786 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
975 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
787 the frame in more detail. For more info, see
976 the frame in more detail. For more info, see
788 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
977 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
789
978
790 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
979 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
791 instance and return a ``FrameParameters`` object describing the frame.
980 instance and return a ``FrameParameters`` object describing the frame.
792
981
793 Depending on which fields are present in the frame and their values, the
982 Depending on which fields are present in the frame and their values, the
794 length of the frame parameters varies. If insufficient bytes are passed
983 length of the frame parameters varies. If insufficient bytes are passed
795 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
984 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
796 frame parameters can be parsed, pass in at least 18 bytes.
985 frame parameters can be parsed, pass in at least 18 bytes.
797
986
798 ``FrameParameters`` instances have the following attributes:
987 ``FrameParameters`` instances have the following attributes:
799
988
800 content_size
989 content_size
801 Integer size of original, uncompressed content. This will be ``0`` if the
990 Integer size of original, uncompressed content. This will be ``0`` if the
802 original content size isn't written to the frame (controlled with the
991 original content size isn't written to the frame (controlled with the
803 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
992 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
804 content size was ``0``.
993 content size was ``0``.
805
994
806 window_size
995 window_size
807 Integer size of maximum back-reference distance in compressed data.
996 Integer size of maximum back-reference distance in compressed data.
808
997
809 dict_id
998 dict_id
810 Integer of dictionary ID used for compression. ``0`` if no dictionary
999 Integer of dictionary ID used for compression. ``0`` if no dictionary
811 ID was used or if the dictionary ID was ``0``.
1000 ID was used or if the dictionary ID was ``0``.
812
1001
813 has_checksum
1002 has_checksum
814 Bool indicating whether a 4 byte content checksum is stored at the end
1003 Bool indicating whether a 4 byte content checksum is stored at the end
815 of the frame.
1004 of the frame.
816
1005
817 Misc Functionality
1006 Misc Functionality
818 ------------------
1007 ------------------
819
1008
820 estimate_compression_context_size(CompressionParameters)
1009 estimate_compression_context_size(CompressionParameters)
821 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1010 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
822
1011
823 Given a ``CompressionParameters`` struct, estimate the memory size required
1012 Given a ``CompressionParameters`` struct, estimate the memory size required
824 to perform compression.
1013 to perform compression.
825
1014
826 estimate_decompression_context_size()
1015 estimate_decompression_context_size()
827 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1016 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
828
1017
829 Estimate the memory size requirements for a decompressor instance.
1018 Estimate the memory size requirements for a decompressor instance.
830
1019
831 Constants
1020 Constants
832 ---------
1021 ---------
833
1022
834 The following module constants/attributes are exposed:
1023 The following module constants/attributes are exposed:
835
1024
836 ZSTD_VERSION
1025 ZSTD_VERSION
837 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1026 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
838 ``(1, 0, 0)``
1027 ``(1, 0, 0)``
839 MAX_COMPRESSION_LEVEL
1028 MAX_COMPRESSION_LEVEL
840 Integer max compression level accepted by compression functions
1029 Integer max compression level accepted by compression functions
841 COMPRESSION_RECOMMENDED_INPUT_SIZE
1030 COMPRESSION_RECOMMENDED_INPUT_SIZE
842 Recommended chunk size to feed to compressor functions
1031 Recommended chunk size to feed to compressor functions
843 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1032 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
844 Recommended chunk size for compression output
1033 Recommended chunk size for compression output
845 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1034 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
846 Recommended chunk size to feed into decompresor functions
1035 Recommended chunk size to feed into decompresor functions
847 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1036 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
848 Recommended chunk size for decompression output
1037 Recommended chunk size for decompression output
849
1038
850 FRAME_HEADER
1039 FRAME_HEADER
851 bytes containing header of the Zstandard frame
1040 bytes containing header of the Zstandard frame
852 MAGIC_NUMBER
1041 MAGIC_NUMBER
853 Frame header as an integer
1042 Frame header as an integer
854
1043
855 WINDOWLOG_MIN
1044 WINDOWLOG_MIN
856 Minimum value for compression parameter
1045 Minimum value for compression parameter
857 WINDOWLOG_MAX
1046 WINDOWLOG_MAX
858 Maximum value for compression parameter
1047 Maximum value for compression parameter
859 CHAINLOG_MIN
1048 CHAINLOG_MIN
860 Minimum value for compression parameter
1049 Minimum value for compression parameter
861 CHAINLOG_MAX
1050 CHAINLOG_MAX
862 Maximum value for compression parameter
1051 Maximum value for compression parameter
863 HASHLOG_MIN
1052 HASHLOG_MIN
864 Minimum value for compression parameter
1053 Minimum value for compression parameter
865 HASHLOG_MAX
1054 HASHLOG_MAX
866 Maximum value for compression parameter
1055 Maximum value for compression parameter
867 SEARCHLOG_MIN
1056 SEARCHLOG_MIN
868 Minimum value for compression parameter
1057 Minimum value for compression parameter
869 SEARCHLOG_MAX
1058 SEARCHLOG_MAX
870 Maximum value for compression parameter
1059 Maximum value for compression parameter
871 SEARCHLENGTH_MIN
1060 SEARCHLENGTH_MIN
872 Minimum value for compression parameter
1061 Minimum value for compression parameter
873 SEARCHLENGTH_MAX
1062 SEARCHLENGTH_MAX
874 Maximum value for compression parameter
1063 Maximum value for compression parameter
875 TARGETLENGTH_MIN
1064 TARGETLENGTH_MIN
876 Minimum value for compression parameter
1065 Minimum value for compression parameter
877 TARGETLENGTH_MAX
1066 TARGETLENGTH_MAX
878 Maximum value for compression parameter
1067 Maximum value for compression parameter
879 STRATEGY_FAST
1068 STRATEGY_FAST
880 Compression strategy
1069 Compression strategy
881 STRATEGY_DFAST
1070 STRATEGY_DFAST
882 Compression strategy
1071 Compression strategy
883 STRATEGY_GREEDY
1072 STRATEGY_GREEDY
884 Compression strategy
1073 Compression strategy
885 STRATEGY_LAZY
1074 STRATEGY_LAZY
886 Compression strategy
1075 Compression strategy
887 STRATEGY_LAZY2
1076 STRATEGY_LAZY2
888 Compression strategy
1077 Compression strategy
889 STRATEGY_BTLAZY2
1078 STRATEGY_BTLAZY2
890 Compression strategy
1079 Compression strategy
891 STRATEGY_BTOPT
1080 STRATEGY_BTOPT
892 Compression strategy
1081 Compression strategy
893
1082
894 Performance Considerations
1083 Performance Considerations
895 --------------------------
1084 --------------------------
896
1085
897 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1086 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
898 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1087 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
899 or ``ZstdDecompressor`` instance for multiple operations is faster than
1088 or ``ZstdDecompressor`` instance for multiple operations is faster than
900 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1089 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
901 operation. The differences are magnified as the size of data decreases. For
1090 operation. The differences are magnified as the size of data decreases. For
902 example, the difference between *context* reuse and non-reuse for 100,000
1091 example, the difference between *context* reuse and non-reuse for 100,000
903 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1092 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
904 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
1093 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
905 time spent doing compression dwarfs time spent creating new *contexts*).
1094 time spent doing compression dwarfs time spent creating new *contexts*).
906
1095
1096 Buffer Types
1097 ------------
1098
1099 The API exposes a handful of custom types for interfacing with memory buffers.
1100 The primary goal of these types is to facilitate efficient multi-object
1101 operations.
1102
1103 The essential idea is to have a single memory allocation provide backing
1104 storage for multiple logical objects. This has 2 main advantages: fewer
1105 allocations and optimal memory access patterns. This avoids having to allocate
1106 a Python object for each logical object and furthermore ensures that access of
1107 data for objects can be sequential (read: fast) in memory.
1108
1109 BufferWithSegments
1110 ^^^^^^^^^^^^^^^^^^
1111
1112 The ``BufferWithSegments`` type represents a memory buffer containing N
1113 discrete items of known lengths (segments). It is essentially a fixed size
1114 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1115 unsigned native endian integers defining the byte offset and length of each
1116 segment within the buffer.
1117
1118 Instances behave like containers.
1119
1120 ``len()`` returns the number of segments within the instance.
1121
1122 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1123 individual segment within the backing buffer. That returned object references
1124 (not copies) memory. This means that iterating all objects doesn't copy
1125 data within the buffer.
1126
1127 The ``.size`` attribute contains the total size in bytes of the backing
1128 buffer.
1129
1130 Instances conform to the buffer protocol. So a reference to the backing bytes
1131 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1132 be obtained via ``.tobytes()``.
1133
1134 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1135 segments within the buffer. It is a ``BufferSegments`` type.
1136
1137 BufferSegment
1138 ^^^^^^^^^^^^^
1139
1140 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1141 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1142
1143 ``len()`` returns the length of the segment in bytes.
1144
1145 ``.offset`` contains the byte offset of this segment within its parent
1146 ``BufferWithSegments`` instance.
1147
1148 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1149 obtain a ``bytes`` instance with a copy of the backing bytes.
1150
1151 BufferSegments
1152 ^^^^^^^^^^^^^^
1153
1154 This type represents an array of ``(offset, length)`` integers defining segments
1155 within a ``BufferWithSegments``.
1156
1157 The array members are 64-bit unsigned integers using host/native bit order.
1158
1159 Instances conform to the buffer protocol.
1160
1161 BufferWithSegmentsCollection
1162 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1163
1164 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1165 of multiple ``BufferWithSegments`` instances.
1166
1167 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1168 resulting object behaves like an ordered sequence whose members are the
1169 segments within each ``BufferWithSegments``.
1170
1171 ``len()`` returns the number of segments within all ``BufferWithSegments``
1172 instances.
1173
1174 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1175 that offset as if all ``BufferWithSegments`` instances were a single
1176 entity.
1177
1178 If the object is composed of 2 ``BufferWithSegments`` instances with the
1179 first having 2 segments and the second have 3 segments, then ``b[0]``
1180 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1181 and ``b[4]`` access segments from the second.
1182
1183 Choosing an API
1184 ===============
1185
1186 There are multiple APIs for performing compression and decompression. This is
1187 because different applications have different needs and the library wants to
1188 facilitate optimal use in as many use cases as possible.
1189
1190 From a high-level, APIs are divided into *one-shot* and *streaming*. See
1191 the ``Concepts`` section for a description of how these are different at
1192 the C layer.
1193
1194 The *one-shot* APIs are useful for small data, where the input or output
1195 size is known. (The size can come from a buffer length, file size, or
1196 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1197 input and output must fit in memory simultaneously. For say a 4 GB input,
1198 this is often not feasible.
1199
1200 The *one-shot* APIs also perform all work as a single operation. So, if you
1201 feed it large input, it could take a long time for the function to return.
1202
1203 The streaming APIs do not have the limitations of the simple API. But the
1204 price you pay for this flexibility is that they are more complex than a
1205 single function call.
1206
1207 The streaming APIs put the caller in control of compression and decompression
1208 behavior by allowing them to directly control either the input or output side
1209 of the operation.
1210
1211 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1212 has full control over the input to the compression or decompression stream.
1213 They can directly choose when new data is operated on.
1214
1215 With the *streaming ouput* APIs, the caller has full control over the output
1216 of the compression or decompression stream. It can choose when to receive
1217 new data.
1218
1219 When using the *streaming* APIs that operate on file-like or stream objects,
1220 it is important to consider what happens in that object when I/O is requested.
1221 There is potential for long pauses as data is read or written from the
1222 underlying stream (say from interacting with a filesystem or network). This
1223 could add considerable overhead.
1224
1225 Concepts
1226 ========
1227
1228 It is important to have a basic understanding of how Zstandard works in order
1229 to optimally use this library. In addition, there are some low-level Python
1230 concepts that are worth explaining to aid understanding. This section aims to
1231 provide that knowledge.
1232
1233 Zstandard Frames and Compression Format
1234 ---------------------------------------
1235
1236 Compressed zstandard data almost always exists within a container called a
1237 *frame*. (For the technically curious, see the
1238 `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
1239
1240 The frame contains a header and optional trailer. The header contains a
1241 magic number to self-identify as a zstd frame and a description of the
1242 compressed data that follows.
1243
1244 Among other things, the frame *optionally* contains the size of the
1245 decompressed data the frame represents, a 32-bit checksum of the
1246 decompressed data (to facilitate verification during decompression),
1247 and the ID of the dictionary used to compress the data.
1248
1249 Storing the original content size in the frame (``write_content_size=True``
1250 to ``ZstdCompressor``) is important for performance in some scenarios. Having
1251 the decompressed size stored there (or storing it elsewhere) allows
1252 decompression to perform a single memory allocation that is exactly sized to
1253 the output. This is faster than continuously growing a memory buffer to hold
1254 output.
1255
1256 Compression and Decompression Contexts
1257 --------------------------------------
1258
1259 In order to perform a compression or decompression operation with the zstd
1260 C API, you need what's called a *context*. A context essentially holds
1261 configuration and state for a compression or decompression operation. For
1262 example, a compression context holds the configured compression level.
1263
1264 Contexts can be reused for multiple operations. Since creating and
1265 destroying contexts is not free, there are performance advantages to
1266 reusing contexts.
1267
1268 The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
1269 wrappers around these contexts in the zstd C API.
1270
1271 One-shot And Streaming Operations
1272 ---------------------------------
1273
1274 A compression or decompression operation can either be performed as a
1275 single *one-shot* operation or as a continuous *streaming* operation.
1276
1277 In one-shot mode (the *simple* APIs provided by the Python interface),
1278 **all** input is handed to the compressor or decompressor as a single buffer
1279 and **all** output is returned as a single buffer.
1280
1281 In streaming mode, input is delivered to the compressor or decompressor as
1282 a series of chunks via multiple function calls. Likewise, output is
1283 obtained in chunks as well.
1284
1285 Streaming operations require an additional *stream* object to be created
1286 to track the operation. These are logical extensions of *context*
1287 instances.
1288
1289 There are advantages and disadvantages to each mode of operation. There
1290 are scenarios where certain modes can't be used. See the
1291 ``Choosing an API`` section for more.
1292
1293 Dictionaries
1294 ------------
1295
1296 A compression *dictionary* is essentially data used to seed the compressor
1297 state so it can achieve better compression. The idea is that if you are
1298 compressing a lot of similar pieces of data (e.g. JSON documents or anything
1299 sharing similar structure), then you can find common patterns across multiple
1300 objects then leverage those common patterns during compression and
1301 decompression operations to achieve better compression ratios.
1302
1303 Dictionary compression is generally only useful for small inputs - data no
1304 larger than a few kilobytes. The upper bound on this range is highly dependent
1305 on the input data and the dictionary.
1306
1307 Python Buffer Protocol
1308 ----------------------
1309
1310 Many functions in the library operate on objects that implement Python's
1311 `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
1312
1313 The *buffer protocol* is an internal implementation detail of a Python
1314 type that allows instances of that type (objects) to be exposed as a raw
1315 pointer (or buffer) in the C API. In other words, it allows objects to be
1316 exposed as an array of bytes.
1317
1318 From the perspective of the C API, objects implementing the *buffer protocol*
1319 all look the same: they are just a pointer to a memory address of a defined
1320 length. This allows the C API to be largely type agnostic when accessing their
1321 data. This allows custom types to be passed in without first converting them
1322 to a specific type.
1323
1324 Many Python types implement the buffer protocol. These include ``bytes``
1325 (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,
1326 ``mmap.mmap``, and ``memoryview``.
1327
1328 ``python-zstandard`` APIs that accept objects conforming to the buffer
1329 protocol require that the buffer is *C contiguous* and has a single
1330 dimension (``ndim==1``). This is usually the case. An example of where it
1331 is not is a Numpy matrix type.
1332
1333 Requiring Output Sizes for Non-Streaming Decompression APIs
1334 -----------------------------------------------------------
1335
1336 Non-streaming decompression APIs require that either the output size is
1337 explicitly defined (either in the zstd frame header or passed into the
1338 function) or that a max output size is specified. This restriction is for
1339 your safety.
1340
1341 The *one-shot* decompression APIs store the decompressed result in a
1342 single buffer. This means that a buffer needs to be pre-allocated to hold
1343 the result. If the decompressed size is not known, then there is no universal
1344 good default size to use. Any default will fail or will be highly sub-optimal
1345 in some scenarios (it will either be too small or will put stress on the
1346 memory allocator to allocate a too large block).
1347
1348 A *helpful* API may retry decompression with buffers of increasing size.
1349 While useful, there are obvious performance disadvantages, namely redoing
1350 decompression N times until it works. In addition, there is a security
1351 concern. Say the input came from highly compressible data, like 1 GB of the
1352 same byte value. The output size could be several magnitudes larger than the
1353 input size. An input of <100KB could decompress to >1GB. Without a bounds
1354 restriction on the decompressed size, certain inputs could exhaust all system
1355 memory. That's not good and is why the maximum output size is limited.
1356
907 Note on Zstandard's *Experimental* API
1357 Note on Zstandard's *Experimental* API
908 ======================================
1358 ======================================
909
1359
910 Many of the Zstandard APIs used by this module are marked as *experimental*
1360 Many of the Zstandard APIs used by this module are marked as *experimental*
911 within the Zstandard project. This includes a large number of useful
1361 within the Zstandard project. This includes a large number of useful
912 features, such as compression and frame parameters and parts of dictionary
1362 features, such as compression and frame parameters and parts of dictionary
913 compression.
1363 compression.
914
1364
915 It is unclear how Zstandard's C API will evolve over time, especially with
1365 It is unclear how Zstandard's C API will evolve over time, especially with
916 regards to this *experimental* functionality. We will try to maintain
1366 regards to this *experimental* functionality. We will try to maintain
917 backwards compatibility at the Python API level. However, we cannot
1367 backwards compatibility at the Python API level. However, we cannot
918 guarantee this for things not under our control.
1368 guarantee this for things not under our control.
919
1369
920 Since a copy of the Zstandard source code is distributed with this
1370 Since a copy of the Zstandard source code is distributed with this
921 module and since we compile against it, the behavior of a specific
1371 module and since we compile against it, the behavior of a specific
922 version of this module should be constant for all of time. So if you
1372 version of this module should be constant for all of time. So if you
923 pin the version of this module used in your projects (which is a Python
1373 pin the version of this module used in your projects (which is a Python
924 best practice), you should be buffered from unwanted future changes.
1374 best practice), you should be buffered from unwanted future changes.
925
1375
926 Donate
1376 Donate
927 ======
1377 ======
928
1378
929 A lot of time has been invested into this project by the author.
1379 A lot of time has been invested into this project by the author.
930
1380
931 If you find this project useful and would like to thank the author for
1381 If you find this project useful and would like to thank the author for
932 their work, consider donating some money. Any amount is appreciated.
1382 their work, consider donating some money. Any amount is appreciated.
933
1383
934 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1384 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
935 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1385 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
936 :alt: Donate via PayPal
1386 :alt: Donate via PayPal
937
1387
938 .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
1388 .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
939 :target: https://travis-ci.org/indygreg/python-zstandard
1389 :target: https://travis-ci.org/indygreg/python-zstandard
940
1390
941 .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
1391 .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
942 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
1392 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
943 :alt: Windows build status
1393 :alt: Windows build status
@@ -1,248 +1,392
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
14 static char* kwlist[] = {
15 "dict_size",
16 "samples",
17 "selectivity",
18 "level",
19 "notifications",
20 "dict_id",
21 NULL
22 };
15 size_t capacity;
23 size_t capacity;
16 PyObject* samples;
24 PyObject* samples;
17 Py_ssize_t samplesLen;
25 Py_ssize_t samplesLen;
18 PyObject* parameters = NULL;
26 unsigned selectivity = 0;
27 int level = 0;
28 unsigned notifications = 0;
29 unsigned dictID = 0;
19 ZDICT_params_t zparams;
30 ZDICT_params_t zparams;
20 Py_ssize_t sampleIndex;
31 Py_ssize_t sampleIndex;
21 Py_ssize_t sampleSize;
32 Py_ssize_t sampleSize;
22 PyObject* sampleItem;
33 PyObject* sampleItem;
23 size_t zresult;
34 size_t zresult;
24 void* sampleBuffer;
35 void* sampleBuffer = NULL;
25 void* sampleOffset;
36 void* sampleOffset;
26 size_t samplesSize = 0;
37 size_t samplesSize = 0;
27 size_t* sampleSizes;
38 size_t* sampleSizes = NULL;
28 void* dict;
39 void* dict = NULL;
29 ZstdCompressionDict* result;
40 ZstdCompressionDict* result = NULL;
30
41
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
42 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
32 kwlist,
43 kwlist,
33 &capacity,
44 &capacity,
34 &PyList_Type, &samples,
45 &PyList_Type, &samples,
35 (PyObject*)&DictParametersType, &parameters)) {
46 &selectivity, &level, &notifications, &dictID)) {
36 return NULL;
47 return NULL;
37 }
48 }
38
49
39 /* Validate parameters first since it is easiest. */
50 memset(&zparams, 0, sizeof(zparams));
40 zparams.selectivityLevel = 0;
41 zparams.compressionLevel = 0;
42 zparams.notificationLevel = 0;
43 zparams.dictID = 0;
44 zparams.reserved[0] = 0;
45 zparams.reserved[1] = 0;
46
51
47 if (parameters) {
52 zparams.selectivityLevel = selectivity;
48 /* TODO validate data ranges */
53 zparams.compressionLevel = level;
49 zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
54 zparams.notificationLevel = notifications;
50 zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
55 zparams.dictID = dictID;
51 zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
52 zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
53 }
54
56
55 /* Figure out the size of the raw samples */
57 /* Figure out the size of the raw samples */
56 samplesLen = PyList_Size(samples);
58 samplesLen = PyList_Size(samples);
57 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
59 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
58 sampleItem = PyList_GetItem(samples, sampleIndex);
60 sampleItem = PyList_GetItem(samples, sampleIndex);
59 if (!PyBytes_Check(sampleItem)) {
61 if (!PyBytes_Check(sampleItem)) {
60 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
62 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
61 return NULL;
63 return NULL;
62 }
64 }
63 samplesSize += PyBytes_GET_SIZE(sampleItem);
65 samplesSize += PyBytes_GET_SIZE(sampleItem);
64 }
66 }
65
67
66 /* Now that we know the total size of the raw simples, we can allocate
68 /* Now that we know the total size of the raw simples, we can allocate
67 a buffer for the raw data */
69 a buffer for the raw data */
68 sampleBuffer = PyMem_Malloc(samplesSize);
70 sampleBuffer = PyMem_Malloc(samplesSize);
69 if (!sampleBuffer) {
71 if (!sampleBuffer) {
70 PyErr_NoMemory();
72 PyErr_NoMemory();
71 return NULL;
73 goto finally;
72 }
74 }
73 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
75 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
74 if (!sampleSizes) {
76 if (!sampleSizes) {
75 PyMem_Free(sampleBuffer);
76 PyErr_NoMemory();
77 PyErr_NoMemory();
77 return NULL;
78 goto finally;
78 }
79 }
79
80
80 sampleOffset = sampleBuffer;
81 sampleOffset = sampleBuffer;
81 /* Now iterate again and assemble the samples in the buffer */
82 /* Now iterate again and assemble the samples in the buffer */
82 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
83 for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
83 sampleItem = PyList_GetItem(samples, sampleIndex);
84 sampleItem = PyList_GetItem(samples, sampleIndex);
84 sampleSize = PyBytes_GET_SIZE(sampleItem);
85 sampleSize = PyBytes_GET_SIZE(sampleItem);
85 sampleSizes[sampleIndex] = sampleSize;
86 sampleSizes[sampleIndex] = sampleSize;
86 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
87 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
87 sampleOffset = (char*)sampleOffset + sampleSize;
88 sampleOffset = (char*)sampleOffset + sampleSize;
88 }
89 }
89
90
90 dict = PyMem_Malloc(capacity);
91 dict = PyMem_Malloc(capacity);
91 if (!dict) {
92 if (!dict) {
92 PyMem_Free(sampleSizes);
93 PyMem_Free(sampleBuffer);
94 PyErr_NoMemory();
93 PyErr_NoMemory();
95 return NULL;
94 goto finally;
96 }
95 }
97
96
97 /* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
98 Py_BEGIN_ALLOW_THREADS
98 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
99 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
99 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
100 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
100 zparams);
101 zparams);
102 Py_END_ALLOW_THREADS
101 if (ZDICT_isError(zresult)) {
103 if (ZDICT_isError(zresult)) {
102 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
104 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
103 PyMem_Free(dict);
105 PyMem_Free(dict);
104 PyMem_Free(sampleSizes);
106 goto finally;
105 PyMem_Free(sampleBuffer);
106 return NULL;
107 }
107 }
108
108
109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
110 if (!result) {
110 if (!result) {
111 return NULL;
111 goto finally;
112 }
112 }
113
113
114 result->dictData = dict;
114 result->dictData = dict;
115 result->dictSize = zresult;
115 result->dictSize = zresult;
116 result->d = 0;
117 result->k = 0;
118
119 finally:
120 PyMem_Free(sampleBuffer);
121 PyMem_Free(sampleSizes);
122
116 return result;
123 return result;
117 }
124 }
118
125
126 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
127 static char* kwlist[] = {
128 "dict_size",
129 "samples",
130 "k",
131 "d",
132 "notifications",
133 "dict_id",
134 "level",
135 "optimize",
136 "steps",
137 "threads",
138 NULL
139 };
140
141 size_t capacity;
142 PyObject* samples;
143 unsigned k = 0;
144 unsigned d = 0;
145 unsigned notifications = 0;
146 unsigned dictID = 0;
147 int level = 0;
148 PyObject* optimize = NULL;
149 unsigned steps = 0;
150 int threads = 0;
151 COVER_params_t params;
152 Py_ssize_t samplesLen;
153 Py_ssize_t i;
154 size_t samplesSize = 0;
155 void* sampleBuffer = NULL;
156 size_t* sampleSizes = NULL;
157 void* sampleOffset;
158 Py_ssize_t sampleSize;
159 void* dict = NULL;
160 size_t zresult;
161 ZstdCompressionDict* result = NULL;
162
163 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
164 kwlist, &capacity, &PyList_Type, &samples,
165 &k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
166 return NULL;
167 }
168
169 if (threads < 0) {
170 threads = cpu_count();
171 }
172
173 memset(&params, 0, sizeof(params));
174 params.k = k;
175 params.d = d;
176 params.steps = steps;
177 params.nbThreads = threads;
178 params.notificationLevel = notifications;
179 params.dictID = dictID;
180 params.compressionLevel = level;
181
182 /* Figure out total size of input samples. */
183 samplesLen = PyList_Size(samples);
184 for (i = 0; i < samplesLen; i++) {
185 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
186
187 if (!PyBytes_Check(sampleItem)) {
188 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
189 return NULL;
190 }
191 samplesSize += PyBytes_GET_SIZE(sampleItem);
192 }
193
194 sampleBuffer = PyMem_Malloc(samplesSize);
195 if (!sampleBuffer) {
196 PyErr_NoMemory();
197 goto finally;
198 }
199
200 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
201 if (!sampleSizes) {
202 PyErr_NoMemory();
203 goto finally;
204 }
205
206 sampleOffset = sampleBuffer;
207 for (i = 0; i < samplesLen; i++) {
208 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
209 sampleSize = PyBytes_GET_SIZE(sampleItem);
210 sampleSizes[i] = sampleSize;
211 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
212 sampleOffset = (char*)sampleOffset + sampleSize;
213 }
214
215 dict = PyMem_Malloc(capacity);
216 if (!dict) {
217 PyErr_NoMemory();
218 goto finally;
219 }
220
221 Py_BEGIN_ALLOW_THREADS
222 if (optimize && PyObject_IsTrue(optimize)) {
223 zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
224 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
225 }
226 else {
227 zresult = COVER_trainFromBuffer(dict, capacity,
228 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
229 }
230 Py_END_ALLOW_THREADS
231
232 if (ZDICT_isError(zresult)) {
233 PyMem_Free(dict);
234 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
235 goto finally;
236 }
237
238 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
239 if (!result) {
240 PyMem_Free(dict);
241 goto finally;
242 }
243
244 result->dictData = dict;
245 result->dictSize = zresult;
246 result->d = params.d;
247 result->k = params.k;
248
249 finally:
250 PyMem_Free(sampleBuffer);
251 PyMem_Free(sampleSizes);
252
253 return result;
254 }
119
255
120 PyDoc_STRVAR(ZstdCompressionDict__doc__,
256 PyDoc_STRVAR(ZstdCompressionDict__doc__,
121 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
257 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
122 "\n"
258 "\n"
123 "This type holds the results of a computed Zstandard compression dictionary.\n"
259 "This type holds the results of a computed Zstandard compression dictionary.\n"
124 "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
260 "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
125 "obtained from another source into the constructor.\n"
261 "obtained from another source into the constructor.\n"
126 );
262 );
127
263
128 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
264 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
129 const char* source;
265 const char* source;
130 Py_ssize_t sourceSize;
266 Py_ssize_t sourceSize;
131
267
132 self->dictData = NULL;
268 self->dictData = NULL;
133 self->dictSize = 0;
269 self->dictSize = 0;
134
270
135 #if PY_MAJOR_VERSION >= 3
271 #if PY_MAJOR_VERSION >= 3
136 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
272 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
137 #else
273 #else
138 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
274 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
139 #endif
275 #endif
140 &source, &sourceSize)) {
276 &source, &sourceSize)) {
141 return -1;
277 return -1;
142 }
278 }
143
279
144 self->dictData = PyMem_Malloc(sourceSize);
280 self->dictData = PyMem_Malloc(sourceSize);
145 if (!self->dictData) {
281 if (!self->dictData) {
146 PyErr_NoMemory();
282 PyErr_NoMemory();
147 return -1;
283 return -1;
148 }
284 }
149
285
150 memcpy(self->dictData, source, sourceSize);
286 memcpy(self->dictData, source, sourceSize);
151 self->dictSize = sourceSize;
287 self->dictSize = sourceSize;
152
288
153 return 0;
289 return 0;
154 }
290 }
155
291
156 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
292 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
157 if (self->dictData) {
293 if (self->dictData) {
158 PyMem_Free(self->dictData);
294 PyMem_Free(self->dictData);
159 self->dictData = NULL;
295 self->dictData = NULL;
160 }
296 }
161
297
162 PyObject_Del(self);
298 PyObject_Del(self);
163 }
299 }
164
300
165 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
301 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
166 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
302 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
167
303
168 return PyLong_FromLong(dictID);
304 return PyLong_FromLong(dictID);
169 }
305 }
170
306
171 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
307 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
172 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
308 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
173 }
309 }
174
310
175 static PyMethodDef ZstdCompressionDict_methods[] = {
311 static PyMethodDef ZstdCompressionDict_methods[] = {
176 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
312 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
177 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
313 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
178 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
314 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
179 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
315 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
180 { NULL, NULL }
316 { NULL, NULL }
181 };
317 };
182
318
319 static PyMemberDef ZstdCompressionDict_members[] = {
320 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
321 "segment size" },
322 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
323 "dmer size" },
324 { NULL }
325 };
326
183 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
327 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
184 return self->dictSize;
328 return self->dictSize;
185 }
329 }
186
330
187 static PySequenceMethods ZstdCompressionDict_sq = {
331 static PySequenceMethods ZstdCompressionDict_sq = {
188 (lenfunc)ZstdCompressionDict_length, /* sq_length */
332 (lenfunc)ZstdCompressionDict_length, /* sq_length */
189 0, /* sq_concat */
333 0, /* sq_concat */
190 0, /* sq_repeat */
334 0, /* sq_repeat */
191 0, /* sq_item */
335 0, /* sq_item */
192 0, /* sq_ass_item */
336 0, /* sq_ass_item */
193 0, /* sq_contains */
337 0, /* sq_contains */
194 0, /* sq_inplace_concat */
338 0, /* sq_inplace_concat */
195 0 /* sq_inplace_repeat */
339 0 /* sq_inplace_repeat */
196 };
340 };
197
341
198 PyTypeObject ZstdCompressionDictType = {
342 PyTypeObject ZstdCompressionDictType = {
199 PyVarObject_HEAD_INIT(NULL, 0)
343 PyVarObject_HEAD_INIT(NULL, 0)
200 "zstd.ZstdCompressionDict", /* tp_name */
344 "zstd.ZstdCompressionDict", /* tp_name */
201 sizeof(ZstdCompressionDict), /* tp_basicsize */
345 sizeof(ZstdCompressionDict), /* tp_basicsize */
202 0, /* tp_itemsize */
346 0, /* tp_itemsize */
203 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
347 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
204 0, /* tp_print */
348 0, /* tp_print */
205 0, /* tp_getattr */
349 0, /* tp_getattr */
206 0, /* tp_setattr */
350 0, /* tp_setattr */
207 0, /* tp_compare */
351 0, /* tp_compare */
208 0, /* tp_repr */
352 0, /* tp_repr */
209 0, /* tp_as_number */
353 0, /* tp_as_number */
210 &ZstdCompressionDict_sq, /* tp_as_sequence */
354 &ZstdCompressionDict_sq, /* tp_as_sequence */
211 0, /* tp_as_mapping */
355 0, /* tp_as_mapping */
212 0, /* tp_hash */
356 0, /* tp_hash */
213 0, /* tp_call */
357 0, /* tp_call */
214 0, /* tp_str */
358 0, /* tp_str */
215 0, /* tp_getattro */
359 0, /* tp_getattro */
216 0, /* tp_setattro */
360 0, /* tp_setattro */
217 0, /* tp_as_buffer */
361 0, /* tp_as_buffer */
218 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
362 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
219 ZstdCompressionDict__doc__, /* tp_doc */
363 ZstdCompressionDict__doc__, /* tp_doc */
220 0, /* tp_traverse */
364 0, /* tp_traverse */
221 0, /* tp_clear */
365 0, /* tp_clear */
222 0, /* tp_richcompare */
366 0, /* tp_richcompare */
223 0, /* tp_weaklistoffset */
367 0, /* tp_weaklistoffset */
224 0, /* tp_iter */
368 0, /* tp_iter */
225 0, /* tp_iternext */
369 0, /* tp_iternext */
226 ZstdCompressionDict_methods, /* tp_methods */
370 ZstdCompressionDict_methods, /* tp_methods */
227 0, /* tp_members */
371 ZstdCompressionDict_members, /* tp_members */
228 0, /* tp_getset */
372 0, /* tp_getset */
229 0, /* tp_base */
373 0, /* tp_base */
230 0, /* tp_dict */
374 0, /* tp_dict */
231 0, /* tp_descr_get */
375 0, /* tp_descr_get */
232 0, /* tp_descr_set */
376 0, /* tp_descr_set */
233 0, /* tp_dictoffset */
377 0, /* tp_dictoffset */
234 (initproc)ZstdCompressionDict_init, /* tp_init */
378 (initproc)ZstdCompressionDict_init, /* tp_init */
235 0, /* tp_alloc */
379 0, /* tp_alloc */
236 PyType_GenericNew, /* tp_new */
380 PyType_GenericNew, /* tp_new */
237 };
381 };
238
382
239 void compressiondict_module_init(PyObject* mod) {
383 void compressiondict_module_init(PyObject* mod) {
240 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
384 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
241 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
385 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
242 return;
386 return;
243 }
387 }
244
388
245 Py_INCREF((PyObject*)&ZstdCompressionDictType);
389 Py_INCREF((PyObject*)&ZstdCompressionDictType);
246 PyModule_AddObject(mod, "ZstdCompressionDict",
390 PyModule_AddObject(mod, "ZstdCompressionDict",
247 (PyObject*)&ZstdCompressionDictType);
391 (PyObject*)&ZstdCompressionDictType);
248 }
392 }
@@ -1,220 +1,253
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
11 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
12 zparams->windowLog = params->windowLog;
12 zparams->windowLog = params->windowLog;
13 zparams->chainLog = params->chainLog;
13 zparams->chainLog = params->chainLog;
14 zparams->hashLog = params->hashLog;
14 zparams->hashLog = params->hashLog;
15 zparams->searchLog = params->searchLog;
15 zparams->searchLog = params->searchLog;
16 zparams->searchLength = params->searchLength;
16 zparams->searchLength = params->searchLength;
17 zparams->targetLength = params->targetLength;
17 zparams->targetLength = params->targetLength;
18 zparams->strategy = params->strategy;
18 zparams->strategy = params->strategy;
19 }
19 }
20
20
21 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
21 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
22 int compressionLevel;
22 int compressionLevel;
23 unsigned PY_LONG_LONG sourceSize = 0;
23 unsigned PY_LONG_LONG sourceSize = 0;
24 Py_ssize_t dictSize = 0;
24 Py_ssize_t dictSize = 0;
25 ZSTD_compressionParameters params;
25 ZSTD_compressionParameters params;
26 CompressionParametersObject* result;
26 CompressionParametersObject* result;
27
27
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
29 &compressionLevel, &sourceSize, &dictSize)) {
29 &compressionLevel, &sourceSize, &dictSize)) {
30 return NULL;
30 return NULL;
31 }
31 }
32
32
33 params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
33 params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
34
34
35 result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
35 result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
36 if (!result) {
36 if (!result) {
37 return NULL;
37 return NULL;
38 }
38 }
39
39
40 result->windowLog = params.windowLog;
40 result->windowLog = params.windowLog;
41 result->chainLog = params.chainLog;
41 result->chainLog = params.chainLog;
42 result->hashLog = params.hashLog;
42 result->hashLog = params.hashLog;
43 result->searchLog = params.searchLog;
43 result->searchLog = params.searchLog;
44 result->searchLength = params.searchLength;
44 result->searchLength = params.searchLength;
45 result->targetLength = params.targetLength;
45 result->targetLength = params.targetLength;
46 result->strategy = params.strategy;
46 result->strategy = params.strategy;
47
47
48 return result;
48 return result;
49 }
49 }
50
50
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
52 static char* kwlist[] = {
52 static char* kwlist[] = {
53 "window_log",
53 "window_log",
54 "chain_log",
54 "chain_log",
55 "hash_log",
55 "hash_log",
56 "search_log",
56 "search_log",
57 "search_length",
57 "search_length",
58 "target_length",
58 "target_length",
59 "strategy",
59 "strategy",
60 NULL
60 NULL
61 };
61 };
62
62
63 unsigned windowLog;
63 unsigned windowLog;
64 unsigned chainLog;
64 unsigned chainLog;
65 unsigned hashLog;
65 unsigned hashLog;
66 unsigned searchLog;
66 unsigned searchLog;
67 unsigned searchLength;
67 unsigned searchLength;
68 unsigned targetLength;
68 unsigned targetLength;
69 unsigned strategy;
69 unsigned strategy;
70 ZSTD_compressionParameters params;
71 size_t zresult;
70
72
71 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
73 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
72 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
74 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
73 &targetLength, &strategy)) {
75 &targetLength, &strategy)) {
74 return -1;
76 return -1;
75 }
77 }
76
78
77 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
79 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
78 PyErr_SetString(PyExc_ValueError, "invalid window log value");
80 PyErr_SetString(PyExc_ValueError, "invalid window log value");
79 return -1;
81 return -1;
80 }
82 }
81
83
82 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
84 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
85 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
84 return -1;
86 return -1;
85 }
87 }
86
88
87 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
89 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
90 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
89 return -1;
91 return -1;
90 }
92 }
91
93
92 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
94 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid search log value");
95 PyErr_SetString(PyExc_ValueError, "invalid search log value");
94 return -1;
96 return -1;
95 }
97 }
96
98
97 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
99 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search length value");
100 PyErr_SetString(PyExc_ValueError, "invalid search length value");
99 return -1;
101 return -1;
100 }
102 }
101
103
102 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
104 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid target length value");
105 PyErr_SetString(PyExc_ValueError, "invalid target length value");
104 return -1;
106 return -1;
105 }
107 }
106
108
107 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
109 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
108 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
110 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
109 return -1;
111 return -1;
110 }
112 }
111
113
112 self->windowLog = windowLog;
114 self->windowLog = windowLog;
113 self->chainLog = chainLog;
115 self->chainLog = chainLog;
114 self->hashLog = hashLog;
116 self->hashLog = hashLog;
115 self->searchLog = searchLog;
117 self->searchLog = searchLog;
116 self->searchLength = searchLength;
118 self->searchLength = searchLength;
117 self->targetLength = targetLength;
119 self->targetLength = targetLength;
118 self->strategy = strategy;
120 self->strategy = strategy;
119
121
122 ztopy_compression_parameters(self, &params);
123 zresult = ZSTD_checkCParams(params);
124
125 if (ZSTD_isError(zresult)) {
126 PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
127 ZSTD_getErrorName(zresult));
128 return -1;
129 }
130
120 return 0;
131 return 0;
121 }
132 }
122
133
134 PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__,
135 "Estimate the size in bytes of a compression context for compression parameters\n"
136 );
137
138 PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {
139 ZSTD_compressionParameters params;
140
141 ztopy_compression_parameters(self, &params);
142
143 return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
144 }
145
123 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
146 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
124 CompressionParametersObject* params;
147 CompressionParametersObject* params;
125 ZSTD_compressionParameters zparams;
148 ZSTD_compressionParameters zparams;
126 PyObject* result;
149 PyObject* result;
127
150
128 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
151 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
129 &CompressionParametersType, &params)) {
152 &CompressionParametersType, &params)) {
130 return NULL;
153 return NULL;
131 }
154 }
132
155
133 ztopy_compression_parameters(params, &zparams);
156 ztopy_compression_parameters(params, &zparams);
134 result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
157 result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
135 return result;
158 return result;
136 }
159 }
137
160
138 PyDoc_STRVAR(CompressionParameters__doc__,
161 PyDoc_STRVAR(CompressionParameters__doc__,
139 "CompressionParameters: low-level control over zstd compression");
162 "CompressionParameters: low-level control over zstd compression");
140
163
141 static void CompressionParameters_dealloc(PyObject* self) {
164 static void CompressionParameters_dealloc(PyObject* self) {
142 PyObject_Del(self);
165 PyObject_Del(self);
143 }
166 }
144
167
168 static PyMethodDef CompressionParameters_methods[] = {
169 {
170 "estimated_compression_context_size",
171 (PyCFunction)CompressionParameters_estimated_compression_context_size,
172 METH_NOARGS,
173 CompressionParameters_estimated_compression_context_size__doc__
174 },
175 { NULL, NULL }
176 };
177
145 static PyMemberDef CompressionParameters_members[] = {
178 static PyMemberDef CompressionParameters_members[] = {
146 { "window_log", T_UINT,
179 { "window_log", T_UINT,
147 offsetof(CompressionParametersObject, windowLog), READONLY,
180 offsetof(CompressionParametersObject, windowLog), READONLY,
148 "window log" },
181 "window log" },
149 { "chain_log", T_UINT,
182 { "chain_log", T_UINT,
150 offsetof(CompressionParametersObject, chainLog), READONLY,
183 offsetof(CompressionParametersObject, chainLog), READONLY,
151 "chain log" },
184 "chain log" },
152 { "hash_log", T_UINT,
185 { "hash_log", T_UINT,
153 offsetof(CompressionParametersObject, hashLog), READONLY,
186 offsetof(CompressionParametersObject, hashLog), READONLY,
154 "hash log" },
187 "hash log" },
155 { "search_log", T_UINT,
188 { "search_log", T_UINT,
156 offsetof(CompressionParametersObject, searchLog), READONLY,
189 offsetof(CompressionParametersObject, searchLog), READONLY,
157 "search log" },
190 "search log" },
158 { "search_length", T_UINT,
191 { "search_length", T_UINT,
159 offsetof(CompressionParametersObject, searchLength), READONLY,
192 offsetof(CompressionParametersObject, searchLength), READONLY,
160 "search length" },
193 "search length" },
161 { "target_length", T_UINT,
194 { "target_length", T_UINT,
162 offsetof(CompressionParametersObject, targetLength), READONLY,
195 offsetof(CompressionParametersObject, targetLength), READONLY,
163 "target length" },
196 "target length" },
164 { "strategy", T_INT,
197 { "strategy", T_INT,
165 offsetof(CompressionParametersObject, strategy), READONLY,
198 offsetof(CompressionParametersObject, strategy), READONLY,
166 "strategy" },
199 "strategy" },
167 { NULL }
200 { NULL }
168 };
201 };
169
202
170 PyTypeObject CompressionParametersType = {
203 PyTypeObject CompressionParametersType = {
171 PyVarObject_HEAD_INIT(NULL, 0)
204 PyVarObject_HEAD_INIT(NULL, 0)
172 "CompressionParameters", /* tp_name */
205 "CompressionParameters", /* tp_name */
173 sizeof(CompressionParametersObject), /* tp_basicsize */
206 sizeof(CompressionParametersObject), /* tp_basicsize */
174 0, /* tp_itemsize */
207 0, /* tp_itemsize */
175 (destructor)CompressionParameters_dealloc, /* tp_dealloc */
208 (destructor)CompressionParameters_dealloc, /* tp_dealloc */
176 0, /* tp_print */
209 0, /* tp_print */
177 0, /* tp_getattr */
210 0, /* tp_getattr */
178 0, /* tp_setattr */
211 0, /* tp_setattr */
179 0, /* tp_compare */
212 0, /* tp_compare */
180 0, /* tp_repr */
213 0, /* tp_repr */
181 0, /* tp_as_number */
214 0, /* tp_as_number */
182 0, /* tp_as_sequence */
215 0, /* tp_as_sequence */
183 0, /* tp_as_mapping */
216 0, /* tp_as_mapping */
184 0, /* tp_hash */
217 0, /* tp_hash */
185 0, /* tp_call */
218 0, /* tp_call */
186 0, /* tp_str */
219 0, /* tp_str */
187 0, /* tp_getattro */
220 0, /* tp_getattro */
188 0, /* tp_setattro */
221 0, /* tp_setattro */
189 0, /* tp_as_buffer */
222 0, /* tp_as_buffer */
190 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
191 CompressionParameters__doc__, /* tp_doc */
224 CompressionParameters__doc__, /* tp_doc */
192 0, /* tp_traverse */
225 0, /* tp_traverse */
193 0, /* tp_clear */
226 0, /* tp_clear */
194 0, /* tp_richcompare */
227 0, /* tp_richcompare */
195 0, /* tp_weaklistoffset */
228 0, /* tp_weaklistoffset */
196 0, /* tp_iter */
229 0, /* tp_iter */
197 0, /* tp_iternext */
230 0, /* tp_iternext */
198 0, /* tp_methods */
231 CompressionParameters_methods, /* tp_methods */
199 CompressionParameters_members, /* tp_members */
232 CompressionParameters_members, /* tp_members */
200 0, /* tp_getset */
233 0, /* tp_getset */
201 0, /* tp_base */
234 0, /* tp_base */
202 0, /* tp_dict */
235 0, /* tp_dict */
203 0, /* tp_descr_get */
236 0, /* tp_descr_get */
204 0, /* tp_descr_set */
237 0, /* tp_descr_set */
205 0, /* tp_dictoffset */
238 0, /* tp_dictoffset */
206 (initproc)CompressionParameters_init, /* tp_init */
239 (initproc)CompressionParameters_init, /* tp_init */
207 0, /* tp_alloc */
240 0, /* tp_alloc */
208 PyType_GenericNew, /* tp_new */
241 PyType_GenericNew, /* tp_new */
209 };
242 };
210
243
211 void compressionparams_module_init(PyObject* mod) {
244 void compressionparams_module_init(PyObject* mod) {
212 Py_TYPE(&CompressionParametersType) = &PyType_Type;
245 Py_TYPE(&CompressionParametersType) = &PyType_Type;
213 if (PyType_Ready(&CompressionParametersType) < 0) {
246 if (PyType_Ready(&CompressionParametersType) < 0) {
214 return;
247 return;
215 }
248 }
216
249
217 Py_IncRef((PyObject*)&CompressionParametersType);
250 Py_INCREF(&CompressionParametersType);
218 PyModule_AddObject(mod, "CompressionParameters",
251 PyModule_AddObject(mod, "CompressionParameters",
219 (PyObject*)&CompressionParametersType);
252 (PyObject*)&CompressionParametersType);
220 }
253 }
@@ -1,290 +1,305
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
14 """A context manager used for writing compressed output to a writer.\n"
14 """A context manager used for writing compressed output to a writer.\n"
15 );
15 );
16
16
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
18 Py_XDECREF(self->compressor);
18 Py_XDECREF(self->compressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
24 }
25
26 PyObject_Del(self);
21 PyObject_Del(self);
27 }
22 }
28
23
29 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
24 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
30 if (self->entered) {
25 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
26 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
27 return NULL;
33 }
28 }
34
29
35 self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
30 if (self->compressor->mtcctx) {
36 if (!self->cstream) {
31 if (init_mtcstream(self->compressor, self->sourceSize)) {
37 return NULL;
32 return NULL;
33 }
34 }
35 else {
36 if (0 != init_cstream(self->compressor, self->sourceSize)) {
37 return NULL;
38 }
38 }
39 }
39
40
40 self->entered = 1;
41 self->entered = 1;
41
42
42 Py_INCREF(self);
43 Py_INCREF(self);
43 return (PyObject*)self;
44 return (PyObject*)self;
44 }
45 }
45
46
46 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
47 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
47 PyObject* exc_type;
48 PyObject* exc_type;
48 PyObject* exc_value;
49 PyObject* exc_value;
49 PyObject* exc_tb;
50 PyObject* exc_tb;
50 size_t zresult;
51 size_t zresult;
51
52
52 ZSTD_outBuffer output;
53 ZSTD_outBuffer output;
53 PyObject* res;
54 PyObject* res;
54
55
55 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
56 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
56 return NULL;
57 return NULL;
57 }
58 }
58
59
59 self->entered = 0;
60 self->entered = 0;
60
61
61 if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
62 if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
62 exc_tb == Py_None) {
63 && exc_value == Py_None && exc_tb == Py_None) {
63
64
64 output.dst = PyMem_Malloc(self->outSize);
65 output.dst = PyMem_Malloc(self->outSize);
65 if (!output.dst) {
66 if (!output.dst) {
66 return PyErr_NoMemory();
67 return PyErr_NoMemory();
67 }
68 }
68 output.size = self->outSize;
69 output.size = self->outSize;
69 output.pos = 0;
70 output.pos = 0;
70
71
71 while (1) {
72 while (1) {
72 zresult = ZSTD_endStream(self->cstream, &output);
73 if (self->compressor->mtcctx) {
74 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
75 }
76 else {
77 zresult = ZSTD_endStream(self->compressor->cstream, &output);
78 }
73 if (ZSTD_isError(zresult)) {
79 if (ZSTD_isError(zresult)) {
74 PyErr_Format(ZstdError, "error ending compression stream: %s",
80 PyErr_Format(ZstdError, "error ending compression stream: %s",
75 ZSTD_getErrorName(zresult));
81 ZSTD_getErrorName(zresult));
76 PyMem_Free(output.dst);
82 PyMem_Free(output.dst);
77 return NULL;
83 return NULL;
78 }
84 }
79
85
80 if (output.pos) {
86 if (output.pos) {
81 #if PY_MAJOR_VERSION >= 3
87 #if PY_MAJOR_VERSION >= 3
82 res = PyObject_CallMethod(self->writer, "write", "y#",
88 res = PyObject_CallMethod(self->writer, "write", "y#",
83 #else
89 #else
84 res = PyObject_CallMethod(self->writer, "write", "s#",
90 res = PyObject_CallMethod(self->writer, "write", "s#",
85 #endif
91 #endif
86 output.dst, output.pos);
92 output.dst, output.pos);
87 Py_XDECREF(res);
93 Py_XDECREF(res);
88 }
94 }
89
95
90 if (!zresult) {
96 if (!zresult) {
91 break;
97 break;
92 }
98 }
93
99
94 output.pos = 0;
100 output.pos = 0;
95 }
101 }
96
102
97 PyMem_Free(output.dst);
103 PyMem_Free(output.dst);
98 ZSTD_freeCStream(self->cstream);
99 self->cstream = NULL;
100 }
104 }
101
105
102 Py_RETURN_FALSE;
106 Py_RETURN_FALSE;
103 }
107 }
104
108
105 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
109 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
106 if (!self->cstream) {
110 if (!self->compressor->cstream) {
107 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
111 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
108 "call when a context manager is active");
112 "call when a context manager is active");
109 return NULL;
113 return NULL;
110 }
114 }
111
115
112 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
116 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
113 }
117 }
114
118
115 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
119 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
116 const char* source;
120 const char* source;
117 Py_ssize_t sourceSize;
121 Py_ssize_t sourceSize;
118 size_t zresult;
122 size_t zresult;
119 ZSTD_inBuffer input;
123 ZSTD_inBuffer input;
120 ZSTD_outBuffer output;
124 ZSTD_outBuffer output;
121 PyObject* res;
125 PyObject* res;
122 Py_ssize_t totalWrite = 0;
126 Py_ssize_t totalWrite = 0;
123
127
124 #if PY_MAJOR_VERSION >= 3
128 #if PY_MAJOR_VERSION >= 3
125 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
129 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
126 #else
130 #else
127 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
131 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
128 #endif
132 #endif
129 return NULL;
133 return NULL;
130 }
134 }
131
135
132 if (!self->entered) {
136 if (!self->entered) {
133 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
137 PyErr_SetString(ZstdError, "compress must be called from an active context manager");
134 return NULL;
138 return NULL;
135 }
139 }
136
140
137 output.dst = PyMem_Malloc(self->outSize);
141 output.dst = PyMem_Malloc(self->outSize);
138 if (!output.dst) {
142 if (!output.dst) {
139 return PyErr_NoMemory();
143 return PyErr_NoMemory();
140 }
144 }
141 output.size = self->outSize;
145 output.size = self->outSize;
142 output.pos = 0;
146 output.pos = 0;
143
147
144 input.src = source;
148 input.src = source;
145 input.size = sourceSize;
149 input.size = sourceSize;
146 input.pos = 0;
150 input.pos = 0;
147
151
148 while ((ssize_t)input.pos < sourceSize) {
152 while ((ssize_t)input.pos < sourceSize) {
149 Py_BEGIN_ALLOW_THREADS
153 Py_BEGIN_ALLOW_THREADS
150 zresult = ZSTD_compressStream(self->cstream, &output, &input);
154 if (self->compressor->mtcctx) {
155 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
156 &output, &input);
157 }
158 else {
159 zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
160 }
151 Py_END_ALLOW_THREADS
161 Py_END_ALLOW_THREADS
152
162
153 if (ZSTD_isError(zresult)) {
163 if (ZSTD_isError(zresult)) {
154 PyMem_Free(output.dst);
164 PyMem_Free(output.dst);
155 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
165 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
156 return NULL;
166 return NULL;
157 }
167 }
158
168
159 /* Copy data from output buffer to writer. */
169 /* Copy data from output buffer to writer. */
160 if (output.pos) {
170 if (output.pos) {
161 #if PY_MAJOR_VERSION >= 3
171 #if PY_MAJOR_VERSION >= 3
162 res = PyObject_CallMethod(self->writer, "write", "y#",
172 res = PyObject_CallMethod(self->writer, "write", "y#",
163 #else
173 #else
164 res = PyObject_CallMethod(self->writer, "write", "s#",
174 res = PyObject_CallMethod(self->writer, "write", "s#",
165 #endif
175 #endif
166 output.dst, output.pos);
176 output.dst, output.pos);
167 Py_XDECREF(res);
177 Py_XDECREF(res);
168 totalWrite += output.pos;
178 totalWrite += output.pos;
169 }
179 }
170 output.pos = 0;
180 output.pos = 0;
171 }
181 }
172
182
173 PyMem_Free(output.dst);
183 PyMem_Free(output.dst);
174
184
175 return PyLong_FromSsize_t(totalWrite);
185 return PyLong_FromSsize_t(totalWrite);
176 }
186 }
177
187
178 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
188 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
179 size_t zresult;
189 size_t zresult;
180 ZSTD_outBuffer output;
190 ZSTD_outBuffer output;
181 PyObject* res;
191 PyObject* res;
182 Py_ssize_t totalWrite = 0;
192 Py_ssize_t totalWrite = 0;
183
193
184 if (!self->entered) {
194 if (!self->entered) {
185 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
195 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
186 return NULL;
196 return NULL;
187 }
197 }
188
198
189 output.dst = PyMem_Malloc(self->outSize);
199 output.dst = PyMem_Malloc(self->outSize);
190 if (!output.dst) {
200 if (!output.dst) {
191 return PyErr_NoMemory();
201 return PyErr_NoMemory();
192 }
202 }
193 output.size = self->outSize;
203 output.size = self->outSize;
194 output.pos = 0;
204 output.pos = 0;
195
205
196 while (1) {
206 while (1) {
197 Py_BEGIN_ALLOW_THREADS
207 Py_BEGIN_ALLOW_THREADS
198 zresult = ZSTD_flushStream(self->cstream, &output);
208 if (self->compressor->mtcctx) {
209 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
210 }
211 else {
212 zresult = ZSTD_flushStream(self->compressor->cstream, &output);
213 }
199 Py_END_ALLOW_THREADS
214 Py_END_ALLOW_THREADS
200
215
201 if (ZSTD_isError(zresult)) {
216 if (ZSTD_isError(zresult)) {
202 PyMem_Free(output.dst);
217 PyMem_Free(output.dst);
203 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
218 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
204 return NULL;
219 return NULL;
205 }
220 }
206
221
207 if (!output.pos) {
222 if (!output.pos) {
208 break;
223 break;
209 }
224 }
210
225
211 /* Copy data from output buffer to writer. */
226 /* Copy data from output buffer to writer. */
212 if (output.pos) {
227 if (output.pos) {
213 #if PY_MAJOR_VERSION >= 3
228 #if PY_MAJOR_VERSION >= 3
214 res = PyObject_CallMethod(self->writer, "write", "y#",
229 res = PyObject_CallMethod(self->writer, "write", "y#",
215 #else
230 #else
216 res = PyObject_CallMethod(self->writer, "write", "s#",
231 res = PyObject_CallMethod(self->writer, "write", "s#",
217 #endif
232 #endif
218 output.dst, output.pos);
233 output.dst, output.pos);
219 Py_XDECREF(res);
234 Py_XDECREF(res);
220 totalWrite += output.pos;
235 totalWrite += output.pos;
221 }
236 }
222 output.pos = 0;
237 output.pos = 0;
223 }
238 }
224
239
225 PyMem_Free(output.dst);
240 PyMem_Free(output.dst);
226
241
227 return PyLong_FromSsize_t(totalWrite);
242 return PyLong_FromSsize_t(totalWrite);
228 }
243 }
229
244
230 static PyMethodDef ZstdCompressionWriter_methods[] = {
245 static PyMethodDef ZstdCompressionWriter_methods[] = {
231 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
246 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
232 PyDoc_STR("Enter a compression context.") },
247 PyDoc_STR("Enter a compression context.") },
233 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
248 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
234 PyDoc_STR("Exit a compression context.") },
249 PyDoc_STR("Exit a compression context.") },
235 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
250 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
236 PyDoc_STR("Obtain the memory size of the underlying compressor") },
251 PyDoc_STR("Obtain the memory size of the underlying compressor") },
237 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
252 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
238 PyDoc_STR("Compress data") },
253 PyDoc_STR("Compress data") },
239 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
254 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
240 PyDoc_STR("Flush data and finish a zstd frame") },
255 PyDoc_STR("Flush data and finish a zstd frame") },
241 { NULL, NULL }
256 { NULL, NULL }
242 };
257 };
243
258
244 PyTypeObject ZstdCompressionWriterType = {
259 PyTypeObject ZstdCompressionWriterType = {
245 PyVarObject_HEAD_INIT(NULL, 0)
260 PyVarObject_HEAD_INIT(NULL, 0)
246 "zstd.ZstdCompressionWriter", /* tp_name */
261 "zstd.ZstdCompressionWriter", /* tp_name */
247 sizeof(ZstdCompressionWriter), /* tp_basicsize */
262 sizeof(ZstdCompressionWriter), /* tp_basicsize */
248 0, /* tp_itemsize */
263 0, /* tp_itemsize */
249 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
264 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
250 0, /* tp_print */
265 0, /* tp_print */
251 0, /* tp_getattr */
266 0, /* tp_getattr */
252 0, /* tp_setattr */
267 0, /* tp_setattr */
253 0, /* tp_compare */
268 0, /* tp_compare */
254 0, /* tp_repr */
269 0, /* tp_repr */
255 0, /* tp_as_number */
270 0, /* tp_as_number */
256 0, /* tp_as_sequence */
271 0, /* tp_as_sequence */
257 0, /* tp_as_mapping */
272 0, /* tp_as_mapping */
258 0, /* tp_hash */
273 0, /* tp_hash */
259 0, /* tp_call */
274 0, /* tp_call */
260 0, /* tp_str */
275 0, /* tp_str */
261 0, /* tp_getattro */
276 0, /* tp_getattro */
262 0, /* tp_setattro */
277 0, /* tp_setattro */
263 0, /* tp_as_buffer */
278 0, /* tp_as_buffer */
264 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
279 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
265 ZstdCompresssionWriter__doc__, /* tp_doc */
280 ZstdCompresssionWriter__doc__, /* tp_doc */
266 0, /* tp_traverse */
281 0, /* tp_traverse */
267 0, /* tp_clear */
282 0, /* tp_clear */
268 0, /* tp_richcompare */
283 0, /* tp_richcompare */
269 0, /* tp_weaklistoffset */
284 0, /* tp_weaklistoffset */
270 0, /* tp_iter */
285 0, /* tp_iter */
271 0, /* tp_iternext */
286 0, /* tp_iternext */
272 ZstdCompressionWriter_methods, /* tp_methods */
287 ZstdCompressionWriter_methods, /* tp_methods */
273 0, /* tp_members */
288 0, /* tp_members */
274 0, /* tp_getset */
289 0, /* tp_getset */
275 0, /* tp_base */
290 0, /* tp_base */
276 0, /* tp_dict */
291 0, /* tp_dict */
277 0, /* tp_descr_get */
292 0, /* tp_descr_get */
278 0, /* tp_descr_set */
293 0, /* tp_descr_set */
279 0, /* tp_dictoffset */
294 0, /* tp_dictoffset */
280 0, /* tp_init */
295 0, /* tp_init */
281 0, /* tp_alloc */
296 0, /* tp_alloc */
282 PyType_GenericNew, /* tp_new */
297 PyType_GenericNew, /* tp_new */
283 };
298 };
284
299
285 void compressionwriter_module_init(PyObject* mod) {
300 void compressionwriter_module_init(PyObject* mod) {
286 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
301 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
287 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
302 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
288 return;
303 return;
289 }
304 }
290 }
305 }
@@ -1,250 +1,258
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
14 "Perform compression using a standard library compatible API.\n"
14 "Perform compression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
18 PyMem_Free(self->output.dst);
18 PyMem_Free(self->output.dst);
19 self->output.dst = NULL;
19 self->output.dst = NULL;
20
20
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
24 }
25
26 Py_XDECREF(self->compressor);
21 Py_XDECREF(self->compressor);
27
22
28 PyObject_Del(self);
23 PyObject_Del(self);
29 }
24 }
30
25
31 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
32 const char* source;
27 const char* source;
33 Py_ssize_t sourceSize;
28 Py_ssize_t sourceSize;
34 ZSTD_inBuffer input;
29 ZSTD_inBuffer input;
35 size_t zresult;
30 size_t zresult;
36 PyObject* result = NULL;
31 PyObject* result = NULL;
37 Py_ssize_t resultSize = 0;
32 Py_ssize_t resultSize = 0;
38
33
39 if (self->finished) {
34 if (self->finished) {
40 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
35 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
41 return NULL;
36 return NULL;
42 }
37 }
43
38
44 #if PY_MAJOR_VERSION >= 3
39 #if PY_MAJOR_VERSION >= 3
45 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
40 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
46 #else
41 #else
47 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
42 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
48 #endif
43 #endif
49 return NULL;
44 return NULL;
50 }
45 }
51
46
52 input.src = source;
47 input.src = source;
53 input.size = sourceSize;
48 input.size = sourceSize;
54 input.pos = 0;
49 input.pos = 0;
55
50
56 while ((ssize_t)input.pos < sourceSize) {
51 while ((ssize_t)input.pos < sourceSize) {
57 Py_BEGIN_ALLOW_THREADS
52 Py_BEGIN_ALLOW_THREADS
58 zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
53 if (self->compressor->mtcctx) {
54 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
55 &self->output, &input);
56 }
57 else {
58 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
59 }
59 Py_END_ALLOW_THREADS
60 Py_END_ALLOW_THREADS
60
61
61 if (ZSTD_isError(zresult)) {
62 if (ZSTD_isError(zresult)) {
62 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
63 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
63 return NULL;
64 return NULL;
64 }
65 }
65
66
66 if (self->output.pos) {
67 if (self->output.pos) {
67 if (result) {
68 if (result) {
68 resultSize = PyBytes_GET_SIZE(result);
69 resultSize = PyBytes_GET_SIZE(result);
69 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
70 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
70 return NULL;
71 return NULL;
71 }
72 }
72
73
73 memcpy(PyBytes_AS_STRING(result) + resultSize,
74 memcpy(PyBytes_AS_STRING(result) + resultSize,
74 self->output.dst, self->output.pos);
75 self->output.dst, self->output.pos);
75 }
76 }
76 else {
77 else {
77 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
78 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
78 if (!result) {
79 if (!result) {
79 return NULL;
80 return NULL;
80 }
81 }
81 }
82 }
82
83
83 self->output.pos = 0;
84 self->output.pos = 0;
84 }
85 }
85 }
86 }
86
87
87 if (result) {
88 if (result) {
88 return result;
89 return result;
89 }
90 }
90 else {
91 else {
91 return PyBytes_FromString("");
92 return PyBytes_FromString("");
92 }
93 }
93 }
94 }
94
95
95 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
96 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
96 int flushMode = compressorobj_flush_finish;
97 int flushMode = compressorobj_flush_finish;
97 size_t zresult;
98 size_t zresult;
98 PyObject* result = NULL;
99 PyObject* result = NULL;
99 Py_ssize_t resultSize = 0;
100 Py_ssize_t resultSize = 0;
100
101
101 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
102 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
102 return NULL;
103 return NULL;
103 }
104 }
104
105
105 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
106 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
106 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
107 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
107 return NULL;
108 return NULL;
108 }
109 }
109
110
110 if (self->finished) {
111 if (self->finished) {
111 PyErr_SetString(ZstdError, "compressor object already finished");
112 PyErr_SetString(ZstdError, "compressor object already finished");
112 return NULL;
113 return NULL;
113 }
114 }
114
115
115 assert(self->output.pos == 0);
116 assert(self->output.pos == 0);
116
117
117 if (flushMode == compressorobj_flush_block) {
118 if (flushMode == compressorobj_flush_block) {
118 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
119 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
119 guaranteed to hold a full block. */
120 guaranteed to hold a full block. */
120 Py_BEGIN_ALLOW_THREADS
121 Py_BEGIN_ALLOW_THREADS
121 zresult = ZSTD_flushStream(self->cstream, &self->output);
122 if (self->compressor->mtcctx) {
123 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
124 }
125 else {
126 zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
127 }
122 Py_END_ALLOW_THREADS
128 Py_END_ALLOW_THREADS
123
129
124 if (ZSTD_isError(zresult)) {
130 if (ZSTD_isError(zresult)) {
125 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
131 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
126 return NULL;
132 return NULL;
127 }
133 }
128
134
129 /* Output buffer is guaranteed to hold full block. */
135 /* Output buffer is guaranteed to hold full block. */
130 assert(zresult == 0);
136 assert(zresult == 0);
131
137
132 if (self->output.pos) {
138 if (self->output.pos) {
133 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
139 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
134 if (!result) {
140 if (!result) {
135 return NULL;
141 return NULL;
136 }
142 }
137 }
143 }
138
144
139 self->output.pos = 0;
145 self->output.pos = 0;
140
146
141 if (result) {
147 if (result) {
142 return result;
148 return result;
143 }
149 }
144 else {
150 else {
145 return PyBytes_FromString("");
151 return PyBytes_FromString("");
146 }
152 }
147 }
153 }
148
154
149 assert(flushMode == compressorobj_flush_finish);
155 assert(flushMode == compressorobj_flush_finish);
150 self->finished = 1;
156 self->finished = 1;
151
157
152 while (1) {
158 while (1) {
153 zresult = ZSTD_endStream(self->cstream, &self->output);
159 if (self->compressor->mtcctx) {
160 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
161 }
162 else {
163 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
164 }
154 if (ZSTD_isError(zresult)) {
165 if (ZSTD_isError(zresult)) {
155 PyErr_Format(ZstdError, "error ending compression stream: %s",
166 PyErr_Format(ZstdError, "error ending compression stream: %s",
156 ZSTD_getErrorName(zresult));
167 ZSTD_getErrorName(zresult));
157 return NULL;
168 return NULL;
158 }
169 }
159
170
160 if (self->output.pos) {
171 if (self->output.pos) {
161 if (result) {
172 if (result) {
162 resultSize = PyBytes_GET_SIZE(result);
173 resultSize = PyBytes_GET_SIZE(result);
163 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
174 if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
164 return NULL;
175 return NULL;
165 }
176 }
166
177
167 memcpy(PyBytes_AS_STRING(result) + resultSize,
178 memcpy(PyBytes_AS_STRING(result) + resultSize,
168 self->output.dst, self->output.pos);
179 self->output.dst, self->output.pos);
169 }
180 }
170 else {
181 else {
171 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
182 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
172 if (!result) {
183 if (!result) {
173 return NULL;
184 return NULL;
174 }
185 }
175 }
186 }
176
187
177 self->output.pos = 0;
188 self->output.pos = 0;
178 }
189 }
179
190
180 if (!zresult) {
191 if (!zresult) {
181 break;
192 break;
182 }
193 }
183 }
194 }
184
195
185 ZSTD_freeCStream(self->cstream);
186 self->cstream = NULL;
187
188 if (result) {
196 if (result) {
189 return result;
197 return result;
190 }
198 }
191 else {
199 else {
192 return PyBytes_FromString("");
200 return PyBytes_FromString("");
193 }
201 }
194 }
202 }
195
203
196 static PyMethodDef ZstdCompressionObj_methods[] = {
204 static PyMethodDef ZstdCompressionObj_methods[] = {
197 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
205 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
198 PyDoc_STR("compress data") },
206 PyDoc_STR("compress data") },
199 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
207 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
200 PyDoc_STR("finish compression operation") },
208 PyDoc_STR("finish compression operation") },
201 { NULL, NULL }
209 { NULL, NULL }
202 };
210 };
203
211
204 PyTypeObject ZstdCompressionObjType = {
212 PyTypeObject ZstdCompressionObjType = {
205 PyVarObject_HEAD_INIT(NULL, 0)
213 PyVarObject_HEAD_INIT(NULL, 0)
206 "zstd.ZstdCompressionObj", /* tp_name */
214 "zstd.ZstdCompressionObj", /* tp_name */
207 sizeof(ZstdCompressionObj), /* tp_basicsize */
215 sizeof(ZstdCompressionObj), /* tp_basicsize */
208 0, /* tp_itemsize */
216 0, /* tp_itemsize */
209 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
217 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
210 0, /* tp_print */
218 0, /* tp_print */
211 0, /* tp_getattr */
219 0, /* tp_getattr */
212 0, /* tp_setattr */
220 0, /* tp_setattr */
213 0, /* tp_compare */
221 0, /* tp_compare */
214 0, /* tp_repr */
222 0, /* tp_repr */
215 0, /* tp_as_number */
223 0, /* tp_as_number */
216 0, /* tp_as_sequence */
224 0, /* tp_as_sequence */
217 0, /* tp_as_mapping */
225 0, /* tp_as_mapping */
218 0, /* tp_hash */
226 0, /* tp_hash */
219 0, /* tp_call */
227 0, /* tp_call */
220 0, /* tp_str */
228 0, /* tp_str */
221 0, /* tp_getattro */
229 0, /* tp_getattro */
222 0, /* tp_setattro */
230 0, /* tp_setattro */
223 0, /* tp_as_buffer */
231 0, /* tp_as_buffer */
224 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
232 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
225 ZstdCompressionObj__doc__, /* tp_doc */
233 ZstdCompressionObj__doc__, /* tp_doc */
226 0, /* tp_traverse */
234 0, /* tp_traverse */
227 0, /* tp_clear */
235 0, /* tp_clear */
228 0, /* tp_richcompare */
236 0, /* tp_richcompare */
229 0, /* tp_weaklistoffset */
237 0, /* tp_weaklistoffset */
230 0, /* tp_iter */
238 0, /* tp_iter */
231 0, /* tp_iternext */
239 0, /* tp_iternext */
232 ZstdCompressionObj_methods, /* tp_methods */
240 ZstdCompressionObj_methods, /* tp_methods */
233 0, /* tp_members */
241 0, /* tp_members */
234 0, /* tp_getset */
242 0, /* tp_getset */
235 0, /* tp_base */
243 0, /* tp_base */
236 0, /* tp_dict */
244 0, /* tp_dict */
237 0, /* tp_descr_get */
245 0, /* tp_descr_get */
238 0, /* tp_descr_set */
246 0, /* tp_descr_set */
239 0, /* tp_dictoffset */
247 0, /* tp_dictoffset */
240 0, /* tp_init */
248 0, /* tp_init */
241 0, /* tp_alloc */
249 0, /* tp_alloc */
242 PyType_GenericNew, /* tp_new */
250 PyType_GenericNew, /* tp_new */
243 };
251 };
244
252
245 void compressobj_module_init(PyObject* module) {
253 void compressobj_module_init(PyObject* module) {
246 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
254 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
247 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
255 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
248 return;
256 return;
249 }
257 }
250 }
258 }
This diff has been collapsed as it changes many lines, (957 lines changed) Show them Hide them
@@ -1,791 +1,1544
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10 #include "pool.h"
10
11
11 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
12
13
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
14 int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) {
14 ZSTD_customMem zmem;
15 ZSTD_customMem zmem;
15 assert(!compressor->cdict);
16
17 if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) {
18 return 0;
19 }
20
16 Py_BEGIN_ALLOW_THREADS
21 Py_BEGIN_ALLOW_THREADS
17 memset(&zmem, 0, sizeof(zmem));
22 memset(&zmem, 0, sizeof(zmem));
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
23 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
19 compressor->dict->dictSize, 1, *zparams, zmem);
24 compressor->dict->dictSize, 1, *zparams, zmem);
20 Py_END_ALLOW_THREADS
25 Py_END_ALLOW_THREADS
21
26
22 if (!compressor->cdict) {
27 if (!compressor->cdict) {
23 PyErr_SetString(ZstdError, "could not create compression dictionary");
28 PyErr_SetString(ZstdError, "could not create compression dictionary");
24 return 1;
29 return 1;
25 }
30 }
26
31
27 return 0;
32 return 0;
28 }
33 }
29
34
30 /**
35 /**
31 * Initialize a zstd CStream from a ZstdCompressor instance.
36 * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.
32 *
37 *
33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
38 * Returns 0 on success. Other value on failure. Will set a Python exception
34 * exception will be set.
39 * on failure.
35 */
40 */
36 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
41 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
37 ZSTD_CStream* cstream;
38 ZSTD_parameters zparams;
42 ZSTD_parameters zparams;
39 void* dictData = NULL;
43 void* dictData = NULL;
40 size_t dictSize = 0;
44 size_t dictSize = 0;
41 size_t zresult;
45 size_t zresult;
42
46
43 cstream = ZSTD_createCStream();
47 if (compressor->cstream) {
44 if (!cstream) {
48 zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);
45 PyErr_SetString(ZstdError, "cannot create CStream");
49 if (ZSTD_isError(zresult)) {
46 return NULL;
50 PyErr_Format(ZstdError, "could not reset CStream: %s",
51 ZSTD_getErrorName(zresult));
52 return -1;
53 }
54
55 return 0;
56 }
57
58 compressor->cstream = ZSTD_createCStream();
59 if (!compressor->cstream) {
60 PyErr_SetString(ZstdError, "could not create CStream");
61 return -1;
47 }
62 }
48
63
49 if (compressor->dict) {
64 if (compressor->dict) {
50 dictData = compressor->dict->dictData;
65 dictData = compressor->dict->dictData;
51 dictSize = compressor->dict->dictSize;
66 dictSize = compressor->dict->dictSize;
52 }
67 }
53
68
54 memset(&zparams, 0, sizeof(zparams));
69 memset(&zparams, 0, sizeof(zparams));
55 if (compressor->cparams) {
70 if (compressor->cparams) {
56 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
71 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
57 /* Do NOT call ZSTD_adjustCParams() here because the compression params
72 /* Do NOT call ZSTD_adjustCParams() here because the compression params
58 come from the user. */
73 come from the user. */
59 }
74 }
60 else {
75 else {
61 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
76 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
62 }
77 }
63
78
64 zparams.fParams = compressor->fparams;
79 zparams.fParams = compressor->fparams;
65
80
66 zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, zparams, sourceSize);
81 zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
82 zparams, sourceSize);
67
83
68 if (ZSTD_isError(zresult)) {
84 if (ZSTD_isError(zresult)) {
69 ZSTD_freeCStream(cstream);
85 ZSTD_freeCStream(compressor->cstream);
86 compressor->cstream = NULL;
70 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
87 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
71 return NULL;
88 return -1;
72 }
89 }
73
90
74 return cstream;
91 return 0;;
92 }
93
94 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
95 size_t zresult;
96 void* dictData = NULL;
97 size_t dictSize = 0;
98 ZSTD_parameters zparams;
99
100 assert(compressor->mtcctx);
101
102 if (compressor->dict) {
103 dictData = compressor->dict->dictData;
104 dictSize = compressor->dict->dictSize;
105 }
106
107 memset(&zparams, 0, sizeof(zparams));
108 if (compressor->cparams) {
109 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
110 }
111 else {
112 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
113 }
114
115 zparams.fParams = compressor->fparams;
116
117 zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,
118 zparams, sourceSize);
119
120 if (ZSTD_isError(zresult)) {
121 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
122 return -1;
123 }
124
125 return 0;
75 }
126 }
76
127
77 PyDoc_STRVAR(ZstdCompressor__doc__,
128 PyDoc_STRVAR(ZstdCompressor__doc__,
78 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
129 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
79 "\n"
130 "\n"
80 "Create an object used to perform Zstandard compression.\n"
131 "Create an object used to perform Zstandard compression.\n"
81 "\n"
132 "\n"
82 "An instance can compress data various ways. Instances can be used multiple\n"
133 "An instance can compress data various ways. Instances can be used multiple\n"
83 "times. Each compression operation will use the compression parameters\n"
134 "times. Each compression operation will use the compression parameters\n"
84 "defined at construction time.\n"
135 "defined at construction time.\n"
85 "\n"
136 "\n"
86 "Compression can be configured via the following names arguments:\n"
137 "Compression can be configured via the following names arguments:\n"
87 "\n"
138 "\n"
88 "level\n"
139 "level\n"
89 " Integer compression level.\n"
140 " Integer compression level.\n"
90 "dict_data\n"
141 "dict_data\n"
91 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
142 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
92 "compression_params\n"
143 "compression_params\n"
93 " A ``CompressionParameters`` instance defining low-level compression"
144 " A ``CompressionParameters`` instance defining low-level compression"
94 " parameters. If defined, this will overwrite the ``level`` argument.\n"
145 " parameters. If defined, this will overwrite the ``level`` argument.\n"
95 "write_checksum\n"
146 "write_checksum\n"
96 " If True, a 4 byte content checksum will be written with the compressed\n"
147 " If True, a 4 byte content checksum will be written with the compressed\n"
97 " data, allowing the decompressor to perform content verification.\n"
148 " data, allowing the decompressor to perform content verification.\n"
98 "write_content_size\n"
149 "write_content_size\n"
99 " If True, the decompressed content size will be included in the header of\n"
150 " If True, the decompressed content size will be included in the header of\n"
100 " the compressed data. This data will only be written if the compressor\n"
151 " the compressed data. This data will only be written if the compressor\n"
101 " knows the size of the input data.\n"
152 " knows the size of the input data.\n"
102 "write_dict_id\n"
153 "write_dict_id\n"
103 " Determines whether the dictionary ID will be written into the compressed\n"
154 " Determines whether the dictionary ID will be written into the compressed\n"
104 " data. Defaults to True. Only adds content to the compressed data if\n"
155 " data. Defaults to True. Only adds content to the compressed data if\n"
105 " a dictionary is being used.\n"
156 " a dictionary is being used.\n"
157 "threads\n"
158 " Number of threads to use to compress data concurrently. When set,\n"
159 " compression operations are performed on multiple threads. The default\n"
160 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
161 " set the number of threads to the number of detected logical CPUs.\n"
106 );
162 );
107
163
108 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
164 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
109 static char* kwlist[] = {
165 static char* kwlist[] = {
110 "level",
166 "level",
111 "dict_data",
167 "dict_data",
112 "compression_params",
168 "compression_params",
113 "write_checksum",
169 "write_checksum",
114 "write_content_size",
170 "write_content_size",
115 "write_dict_id",
171 "write_dict_id",
172 "threads",
116 NULL
173 NULL
117 };
174 };
118
175
119 int level = 3;
176 int level = 3;
120 ZstdCompressionDict* dict = NULL;
177 ZstdCompressionDict* dict = NULL;
121 CompressionParametersObject* params = NULL;
178 CompressionParametersObject* params = NULL;
122 PyObject* writeChecksum = NULL;
179 PyObject* writeChecksum = NULL;
123 PyObject* writeContentSize = NULL;
180 PyObject* writeContentSize = NULL;
124 PyObject* writeDictID = NULL;
181 PyObject* writeDictID = NULL;
182 int threads = 0;
125
183
126 self->cctx = NULL;
184 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
127 self->dict = NULL;
128 self->cparams = NULL;
129 self->cdict = NULL;
130
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor",
132 kwlist, &level, &ZstdCompressionDictType, &dict,
185 kwlist, &level, &ZstdCompressionDictType, &dict,
133 &CompressionParametersType, &params,
186 &CompressionParametersType, &params,
134 &writeChecksum, &writeContentSize, &writeDictID)) {
187 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
135 return -1;
188 return -1;
136 }
189 }
137
190
138 if (level < 1) {
191 if (level < 1) {
139 PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
192 PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
140 return -1;
193 return -1;
141 }
194 }
142
195
143 if (level > ZSTD_maxCLevel()) {
196 if (level > ZSTD_maxCLevel()) {
144 PyErr_Format(PyExc_ValueError, "level must be less than %d",
197 PyErr_Format(PyExc_ValueError, "level must be less than %d",
145 ZSTD_maxCLevel() + 1);
198 ZSTD_maxCLevel() + 1);
146 return -1;
199 return -1;
147 }
200 }
148
201
202 if (threads < 0) {
203 threads = cpu_count();
204 }
205
206 self->threads = threads;
207
149 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
208 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
150 overhead of each compression operation. */
209 overhead of each compression operation. */
151 self->cctx = ZSTD_createCCtx();
210 if (threads) {
152 if (!self->cctx) {
211 self->mtcctx = ZSTDMT_createCCtx(threads);
153 PyErr_NoMemory();
212 if (!self->mtcctx) {
154 return -1;
213 PyErr_NoMemory();
214 return -1;
215 }
216 }
217 else {
218 self->cctx = ZSTD_createCCtx();
219 if (!self->cctx) {
220 PyErr_NoMemory();
221 return -1;
222 }
155 }
223 }
156
224
157 self->compressionLevel = level;
225 self->compressionLevel = level;
158
226
159 if (dict) {
227 if (dict) {
160 self->dict = dict;
228 self->dict = dict;
161 Py_INCREF(dict);
229 Py_INCREF(dict);
162 }
230 }
163
231
164 if (params) {
232 if (params) {
165 self->cparams = params;
233 self->cparams = params;
166 Py_INCREF(params);
234 Py_INCREF(params);
167 }
235 }
168
236
169 memset(&self->fparams, 0, sizeof(self->fparams));
237 memset(&self->fparams, 0, sizeof(self->fparams));
170
238
171 if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
239 if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
172 self->fparams.checksumFlag = 1;
240 self->fparams.checksumFlag = 1;
173 }
241 }
174 if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
242 if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
175 self->fparams.contentSizeFlag = 1;
243 self->fparams.contentSizeFlag = 1;
176 }
244 }
177 if (writeDictID && PyObject_Not(writeDictID)) {
245 if (writeDictID && PyObject_Not(writeDictID)) {
178 self->fparams.noDictIDFlag = 1;
246 self->fparams.noDictIDFlag = 1;
179 }
247 }
180
248
181 return 0;
249 return 0;
182 }
250 }
183
251
184 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
252 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
253 if (self->cstream) {
254 ZSTD_freeCStream(self->cstream);
255 self->cstream = NULL;
256 }
257
185 Py_XDECREF(self->cparams);
258 Py_XDECREF(self->cparams);
186 Py_XDECREF(self->dict);
259 Py_XDECREF(self->dict);
187
260
188 if (self->cdict) {
261 if (self->cdict) {
189 ZSTD_freeCDict(self->cdict);
262 ZSTD_freeCDict(self->cdict);
190 self->cdict = NULL;
263 self->cdict = NULL;
191 }
264 }
192
265
193 if (self->cctx) {
266 if (self->cctx) {
194 ZSTD_freeCCtx(self->cctx);
267 ZSTD_freeCCtx(self->cctx);
195 self->cctx = NULL;
268 self->cctx = NULL;
196 }
269 }
197
270
271 if (self->mtcctx) {
272 ZSTDMT_freeCCtx(self->mtcctx);
273 self->mtcctx = NULL;
274 }
275
198 PyObject_Del(self);
276 PyObject_Del(self);
199 }
277 }
200
278
201 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
279 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
202 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
280 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
203 "compress data between streams\n"
281 "compress data between streams\n"
204 "\n"
282 "\n"
205 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
283 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
206 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
284 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
207 "method.\n"
285 "method.\n"
208 "\n"
286 "\n"
209 "An optional ``size`` argument specifies the size of the source stream.\n"
287 "An optional ``size`` argument specifies the size of the source stream.\n"
210 "If defined, compression parameters will be tuned based on the size.\n"
288 "If defined, compression parameters will be tuned based on the size.\n"
211 "\n"
289 "\n"
212 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
290 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
213 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
291 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
214 "the default compression stream input and output sizes, respectively.\n"
292 "the default compression stream input and output sizes, respectively.\n"
215 );
293 );
216
294
217 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
295 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
218 static char* kwlist[] = {
296 static char* kwlist[] = {
219 "ifh",
297 "ifh",
220 "ofh",
298 "ofh",
221 "size",
299 "size",
222 "read_size",
300 "read_size",
223 "write_size",
301 "write_size",
224 NULL
302 NULL
225 };
303 };
226
304
227 PyObject* source;
305 PyObject* source;
228 PyObject* dest;
306 PyObject* dest;
229 Py_ssize_t sourceSize = 0;
307 Py_ssize_t sourceSize = 0;
230 size_t inSize = ZSTD_CStreamInSize();
308 size_t inSize = ZSTD_CStreamInSize();
231 size_t outSize = ZSTD_CStreamOutSize();
309 size_t outSize = ZSTD_CStreamOutSize();
232 ZSTD_CStream* cstream;
233 ZSTD_inBuffer input;
310 ZSTD_inBuffer input;
234 ZSTD_outBuffer output;
311 ZSTD_outBuffer output;
235 Py_ssize_t totalRead = 0;
312 Py_ssize_t totalRead = 0;
236 Py_ssize_t totalWrite = 0;
313 Py_ssize_t totalWrite = 0;
237 char* readBuffer;
314 char* readBuffer;
238 Py_ssize_t readSize;
315 Py_ssize_t readSize;
239 PyObject* readResult;
316 PyObject* readResult;
240 PyObject* res = NULL;
317 PyObject* res = NULL;
241 size_t zresult;
318 size_t zresult;
242 PyObject* writeResult;
319 PyObject* writeResult;
243 PyObject* totalReadPy;
320 PyObject* totalReadPy;
244 PyObject* totalWritePy;
321 PyObject* totalWritePy;
245
322
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
323 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
247 &source, &dest, &sourceSize, &inSize, &outSize)) {
324 &source, &dest, &sourceSize, &inSize, &outSize)) {
248 return NULL;
325 return NULL;
249 }
326 }
250
327
251 if (!PyObject_HasAttrString(source, "read")) {
328 if (!PyObject_HasAttrString(source, "read")) {
252 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
329 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
253 return NULL;
330 return NULL;
254 }
331 }
255
332
256 if (!PyObject_HasAttrString(dest, "write")) {
333 if (!PyObject_HasAttrString(dest, "write")) {
257 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
334 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
258 return NULL;
335 return NULL;
259 }
336 }
260
337
261 /* Prevent free on uninitialized memory in finally. */
338 /* Prevent free on uninitialized memory in finally. */
262 output.dst = NULL;
339 output.dst = NULL;
263
340
264 cstream = CStream_from_ZstdCompressor(self, sourceSize);
341 if (self->mtcctx) {
265 if (!cstream) {
342 if (init_mtcstream(self, sourceSize)) {
266 res = NULL;
343 res = NULL;
267 goto finally;
344 goto finally;
345 }
346 }
347 else {
348 if (0 != init_cstream(self, sourceSize)) {
349 res = NULL;
350 goto finally;
351 }
268 }
352 }
269
353
270 output.dst = PyMem_Malloc(outSize);
354 output.dst = PyMem_Malloc(outSize);
271 if (!output.dst) {
355 if (!output.dst) {
272 PyErr_NoMemory();
356 PyErr_NoMemory();
273 res = NULL;
357 res = NULL;
274 goto finally;
358 goto finally;
275 }
359 }
276 output.size = outSize;
360 output.size = outSize;
277 output.pos = 0;
361 output.pos = 0;
278
362
279 while (1) {
363 while (1) {
280 /* Try to read from source stream. */
364 /* Try to read from source stream. */
281 readResult = PyObject_CallMethod(source, "read", "n", inSize);
365 readResult = PyObject_CallMethod(source, "read", "n", inSize);
282 if (!readResult) {
366 if (!readResult) {
283 PyErr_SetString(ZstdError, "could not read() from source");
367 PyErr_SetString(ZstdError, "could not read() from source");
284 goto finally;
368 goto finally;
285 }
369 }
286
370
287 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
371 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
288
372
289 /* If no data was read, we're at EOF. */
373 /* If no data was read, we're at EOF. */
290 if (0 == readSize) {
374 if (0 == readSize) {
291 break;
375 break;
292 }
376 }
293
377
294 totalRead += readSize;
378 totalRead += readSize;
295
379
296 /* Send data to compressor */
380 /* Send data to compressor */
297 input.src = readBuffer;
381 input.src = readBuffer;
298 input.size = readSize;
382 input.size = readSize;
299 input.pos = 0;
383 input.pos = 0;
300
384
301 while (input.pos < input.size) {
385 while (input.pos < input.size) {
302 Py_BEGIN_ALLOW_THREADS
386 Py_BEGIN_ALLOW_THREADS
303 zresult = ZSTD_compressStream(cstream, &output, &input);
387 if (self->mtcctx) {
388 zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
389 }
390 else {
391 zresult = ZSTD_compressStream(self->cstream, &output, &input);
392 }
304 Py_END_ALLOW_THREADS
393 Py_END_ALLOW_THREADS
305
394
306 if (ZSTD_isError(zresult)) {
395 if (ZSTD_isError(zresult)) {
307 res = NULL;
396 res = NULL;
308 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
397 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
309 goto finally;
398 goto finally;
310 }
399 }
311
400
312 if (output.pos) {
401 if (output.pos) {
313 #if PY_MAJOR_VERSION >= 3
402 #if PY_MAJOR_VERSION >= 3
314 writeResult = PyObject_CallMethod(dest, "write", "y#",
403 writeResult = PyObject_CallMethod(dest, "write", "y#",
315 #else
404 #else
316 writeResult = PyObject_CallMethod(dest, "write", "s#",
405 writeResult = PyObject_CallMethod(dest, "write", "s#",
317 #endif
406 #endif
318 output.dst, output.pos);
407 output.dst, output.pos);
319 Py_XDECREF(writeResult);
408 Py_XDECREF(writeResult);
320 totalWrite += output.pos;
409 totalWrite += output.pos;
321 output.pos = 0;
410 output.pos = 0;
322 }
411 }
323 }
412 }
324 }
413 }
325
414
326 /* We've finished reading. Now flush the compressor stream. */
415 /* We've finished reading. Now flush the compressor stream. */
327 while (1) {
416 while (1) {
328 zresult = ZSTD_endStream(cstream, &output);
417 if (self->mtcctx) {
418 zresult = ZSTDMT_endStream(self->mtcctx, &output);
419 }
420 else {
421 zresult = ZSTD_endStream(self->cstream, &output);
422 }
329 if (ZSTD_isError(zresult)) {
423 if (ZSTD_isError(zresult)) {
330 PyErr_Format(ZstdError, "error ending compression stream: %s",
424 PyErr_Format(ZstdError, "error ending compression stream: %s",
331 ZSTD_getErrorName(zresult));
425 ZSTD_getErrorName(zresult));
332 res = NULL;
426 res = NULL;
333 goto finally;
427 goto finally;
334 }
428 }
335
429
336 if (output.pos) {
430 if (output.pos) {
337 #if PY_MAJOR_VERSION >= 3
431 #if PY_MAJOR_VERSION >= 3
338 writeResult = PyObject_CallMethod(dest, "write", "y#",
432 writeResult = PyObject_CallMethod(dest, "write", "y#",
339 #else
433 #else
340 writeResult = PyObject_CallMethod(dest, "write", "s#",
434 writeResult = PyObject_CallMethod(dest, "write", "s#",
341 #endif
435 #endif
342 output.dst, output.pos);
436 output.dst, output.pos);
343 totalWrite += output.pos;
437 totalWrite += output.pos;
344 Py_XDECREF(writeResult);
438 Py_XDECREF(writeResult);
345 output.pos = 0;
439 output.pos = 0;
346 }
440 }
347
441
348 if (!zresult) {
442 if (!zresult) {
349 break;
443 break;
350 }
444 }
351 }
445 }
352
446
353 ZSTD_freeCStream(cstream);
354 cstream = NULL;
355
356 totalReadPy = PyLong_FromSsize_t(totalRead);
447 totalReadPy = PyLong_FromSsize_t(totalRead);
357 totalWritePy = PyLong_FromSsize_t(totalWrite);
448 totalWritePy = PyLong_FromSsize_t(totalWrite);
358 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
449 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
359 Py_DecRef(totalReadPy);
450 Py_DECREF(totalReadPy);
360 Py_DecRef(totalWritePy);
451 Py_DECREF(totalWritePy);
361
452
362 finally:
453 finally:
363 if (output.dst) {
454 if (output.dst) {
364 PyMem_Free(output.dst);
455 PyMem_Free(output.dst);
365 }
456 }
366
457
367 if (cstream) {
368 ZSTD_freeCStream(cstream);
369 }
370
371 return res;
458 return res;
372 }
459 }
373
460
374 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
461 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
375 "compress(data, allow_empty=False)\n"
462 "compress(data, allow_empty=False)\n"
376 "\n"
463 "\n"
377 "Compress data in a single operation.\n"
464 "Compress data in a single operation.\n"
378 "\n"
465 "\n"
379 "This is the simplest mechanism to perform compression: simply pass in a\n"
466 "This is the simplest mechanism to perform compression: simply pass in a\n"
380 "value and get a compressed value back. It is almost the most prone to abuse.\n"
467 "value and get a compressed value back. It is almost the most prone to abuse.\n"
381 "The input and output values must fit in memory, so passing in very large\n"
468 "The input and output values must fit in memory, so passing in very large\n"
382 "values can result in excessive memory usage. For this reason, one of the\n"
469 "values can result in excessive memory usage. For this reason, one of the\n"
383 "streaming based APIs is preferred for larger values.\n"
470 "streaming based APIs is preferred for larger values.\n"
384 );
471 );
385
472
386 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
473 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
387 static char* kwlist[] = {
474 static char* kwlist[] = {
388 "data",
475 "data",
389 "allow_empty",
476 "allow_empty",
390 NULL
477 NULL
391 };
478 };
392
479
393 const char* source;
480 const char* source;
394 Py_ssize_t sourceSize;
481 Py_ssize_t sourceSize;
395 PyObject* allowEmpty = NULL;
482 PyObject* allowEmpty = NULL;
396 size_t destSize;
483 size_t destSize;
397 PyObject* output;
484 PyObject* output;
398 char* dest;
485 char* dest;
399 void* dictData = NULL;
486 void* dictData = NULL;
400 size_t dictSize = 0;
487 size_t dictSize = 0;
401 size_t zresult;
488 size_t zresult;
402 ZSTD_parameters zparams;
489 ZSTD_parameters zparams;
403
490
404 #if PY_MAJOR_VERSION >= 3
491 #if PY_MAJOR_VERSION >= 3
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
492 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
406 #else
493 #else
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
494 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
408 #endif
495 #endif
409 kwlist, &source, &sourceSize, &allowEmpty)) {
496 kwlist, &source, &sourceSize, &allowEmpty)) {
410 return NULL;
497 return NULL;
411 }
498 }
412
499
500 if (self->threads && self->dict) {
501 PyErr_SetString(ZstdError,
502 "compress() cannot be used with both dictionaries and multi-threaded compression");
503 return NULL;
504 }
505
506 if (self->threads && self->cparams) {
507 PyErr_SetString(ZstdError,
508 "compress() cannot be used with both compression parameters and multi-threaded compression");
509 return NULL;
510 }
511
413 /* Limitation in zstd C API doesn't let decompression side distinguish
512 /* Limitation in zstd C API doesn't let decompression side distinguish
414 between content size of 0 and unknown content size. This can make round
513 between content size of 0 and unknown content size. This can make round
415 tripping via Python difficult. Until this is fixed, require a flag
514 tripping via Python difficult. Until this is fixed, require a flag
416 to fire the footgun.
515 to fire the footgun.
417 https://github.com/indygreg/python-zstandard/issues/11 */
516 https://github.com/indygreg/python-zstandard/issues/11 */
418 if (0 == sourceSize && self->fparams.contentSizeFlag
517 if (0 == sourceSize && self->fparams.contentSizeFlag
419 && (!allowEmpty || PyObject_Not(allowEmpty))) {
518 && (!allowEmpty || PyObject_Not(allowEmpty))) {
420 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
519 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
421 return NULL;
520 return NULL;
422 }
521 }
423
522
424 destSize = ZSTD_compressBound(sourceSize);
523 destSize = ZSTD_compressBound(sourceSize);
425 output = PyBytes_FromStringAndSize(NULL, destSize);
524 output = PyBytes_FromStringAndSize(NULL, destSize);
426 if (!output) {
525 if (!output) {
427 return NULL;
526 return NULL;
428 }
527 }
429
528
430 dest = PyBytes_AsString(output);
529 dest = PyBytes_AsString(output);
431
530
432 if (self->dict) {
531 if (self->dict) {
433 dictData = self->dict->dictData;
532 dictData = self->dict->dictData;
434 dictSize = self->dict->dictSize;
533 dictSize = self->dict->dictSize;
435 }
534 }
436
535
437 memset(&zparams, 0, sizeof(zparams));
536 memset(&zparams, 0, sizeof(zparams));
438 if (!self->cparams) {
537 if (!self->cparams) {
439 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
538 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
440 }
539 }
441 else {
540 else {
442 ztopy_compression_parameters(self->cparams, &zparams.cParams);
541 ztopy_compression_parameters(self->cparams, &zparams.cParams);
443 /* Do NOT call ZSTD_adjustCParams() here because the compression params
542 /* Do NOT call ZSTD_adjustCParams() here because the compression params
444 come from the user. */
543 come from the user. */
445 }
544 }
446
545
447 zparams.fParams = self->fparams;
546 zparams.fParams = self->fparams;
448
547
449 /* The raw dict data has to be processed before it can be used. Since this
548 /* The raw dict data has to be processed before it can be used. Since this
450 adds overhead - especially if multiple dictionary compression operations
549 adds overhead - especially if multiple dictionary compression operations
451 are performed on the same ZstdCompressor instance - we create a
550 are performed on the same ZstdCompressor instance - we create a
452 ZSTD_CDict once and reuse it for all operations.
551 ZSTD_CDict once and reuse it for all operations.
453
552
454 Note: the compression parameters used for the first invocation (possibly
553 Note: the compression parameters used for the first invocation (possibly
455 derived from the source size) will be reused on all subsequent invocations.
554 derived from the source size) will be reused on all subsequent invocations.
456 https://github.com/facebook/zstd/issues/358 contains more info. We could
555 https://github.com/facebook/zstd/issues/358 contains more info. We could
457 potentially add an argument somewhere to control this behavior.
556 potentially add an argument somewhere to control this behavior.
458 */
557 */
459 if (dictData && !self->cdict) {
558 if (0 != populate_cdict(self, &zparams)) {
460 if (populate_cdict(self, dictData, dictSize, &zparams)) {
559 Py_DECREF(output);
461 Py_DECREF(output);
560 return NULL;
462 return NULL;
463 }
464 }
561 }
465
562
466 Py_BEGIN_ALLOW_THREADS
563 Py_BEGIN_ALLOW_THREADS
467 /* By avoiding ZSTD_compress(), we don't necessarily write out content
564 if (self->mtcctx) {
468 size. This means the argument to ZstdCompressor to control frame
565 zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,
469 parameters is honored. */
566 source, sourceSize, self->compressionLevel);
470 if (self->cdict) {
471 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
472 source, sourceSize, self->cdict);
473 }
567 }
474 else {
568 else {
475 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
569 /* By avoiding ZSTD_compress(), we don't necessarily write out content
476 source, sourceSize, dictData, dictSize, zparams);
570 size. This means the argument to ZstdCompressor to control frame
571 parameters is honored. */
572 if (self->cdict) {
573 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
574 source, sourceSize, self->cdict);
575 }
576 else {
577 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
578 source, sourceSize, dictData, dictSize, zparams);
579 }
477 }
580 }
478 Py_END_ALLOW_THREADS
581 Py_END_ALLOW_THREADS
479
582
480 if (ZSTD_isError(zresult)) {
583 if (ZSTD_isError(zresult)) {
481 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
584 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
482 Py_CLEAR(output);
585 Py_CLEAR(output);
483 return NULL;
586 return NULL;
484 }
587 }
485 else {
588 else {
486 Py_SIZE(output) = zresult;
589 Py_SIZE(output) = zresult;
487 }
590 }
488
591
489 return output;
592 return output;
490 }
593 }
491
594
492 PyDoc_STRVAR(ZstdCompressionObj__doc__,
595 PyDoc_STRVAR(ZstdCompressionObj__doc__,
493 "compressobj()\n"
596 "compressobj()\n"
494 "\n"
597 "\n"
495 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
598 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
496 "\n"
599 "\n"
497 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
600 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
498 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
601 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
499 "without changing how compression is performed.\n"
602 "without changing how compression is performed.\n"
500 );
603 );
501
604
502 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
605 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
503 static char* kwlist[] = {
606 static char* kwlist[] = {
504 "size",
607 "size",
505 NULL
608 NULL
506 };
609 };
507
610
508 Py_ssize_t inSize = 0;
611 Py_ssize_t inSize = 0;
509 size_t outSize = ZSTD_CStreamOutSize();
612 size_t outSize = ZSTD_CStreamOutSize();
510 ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
613 ZstdCompressionObj* result = NULL;
511 if (!result) {
512 return NULL;
513 }
514
614
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
516 return NULL;
616 return NULL;
517 }
617 }
518
618
519 result->cstream = CStream_from_ZstdCompressor(self, inSize);
619 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
520 if (!result->cstream) {
620 if (!result) {
521 Py_DECREF(result);
522 return NULL;
621 return NULL;
523 }
622 }
524
623
624 if (self->mtcctx) {
625 if (init_mtcstream(self, inSize)) {
626 Py_DECREF(result);
627 return NULL;
628 }
629 }
630 else {
631 if (0 != init_cstream(self, inSize)) {
632 Py_DECREF(result);
633 return NULL;
634 }
635 }
636
525 result->output.dst = PyMem_Malloc(outSize);
637 result->output.dst = PyMem_Malloc(outSize);
526 if (!result->output.dst) {
638 if (!result->output.dst) {
527 PyErr_NoMemory();
639 PyErr_NoMemory();
528 Py_DECREF(result);
640 Py_DECREF(result);
529 return NULL;
641 return NULL;
530 }
642 }
531 result->output.size = outSize;
643 result->output.size = outSize;
532 result->output.pos = 0;
533
534 result->compressor = self;
644 result->compressor = self;
535 Py_INCREF(result->compressor);
645 Py_INCREF(result->compressor);
536
646
537 result->finished = 0;
538
539 return result;
647 return result;
540 }
648 }
541
649
542 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
650 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
543 "read_from(reader, [size=0, read_size=default, write_size=default])\n"
651 "read_from(reader, [size=0, read_size=default, write_size=default])\n"
544 "Read uncompress data from a reader and return an iterator\n"
652 "Read uncompress data from a reader and return an iterator\n"
545 "\n"
653 "\n"
546 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
654 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
547 "\n"
655 "\n"
548 "Uncompressed data will be obtained from ``reader`` by calling the\n"
656 "Uncompressed data will be obtained from ``reader`` by calling the\n"
549 "``read(size)`` method of it. The source data will be streamed into a\n"
657 "``read(size)`` method of it. The source data will be streamed into a\n"
550 "compressor. As compressed data is available, it will be exposed to the\n"
658 "compressor. As compressed data is available, it will be exposed to the\n"
551 "iterator.\n"
659 "iterator.\n"
552 "\n"
660 "\n"
553 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
661 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
554 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
662 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
555 "and output defaults, respectively.\n"
663 "and output defaults, respectively.\n"
556 "\n"
664 "\n"
557 "The caller is partially in control of how fast data is fed into the\n"
665 "The caller is partially in control of how fast data is fed into the\n"
558 "compressor by how it consumes the returned iterator. The compressor will\n"
666 "compressor by how it consumes the returned iterator. The compressor will\n"
559 "not consume from the reader unless the caller consumes from the iterator.\n"
667 "not consume from the reader unless the caller consumes from the iterator.\n"
560 );
668 );
561
669
562 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
670 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
563 static char* kwlist[] = {
671 static char* kwlist[] = {
564 "reader",
672 "reader",
565 "size",
673 "size",
566 "read_size",
674 "read_size",
567 "write_size",
675 "write_size",
568 NULL
676 NULL
569 };
677 };
570
678
571 PyObject* reader;
679 PyObject* reader;
572 Py_ssize_t sourceSize = 0;
680 Py_ssize_t sourceSize = 0;
573 size_t inSize = ZSTD_CStreamInSize();
681 size_t inSize = ZSTD_CStreamInSize();
574 size_t outSize = ZSTD_CStreamOutSize();
682 size_t outSize = ZSTD_CStreamOutSize();
575 ZstdCompressorIterator* result;
683 ZstdCompressorIterator* result;
576
684
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
685 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
578 &reader, &sourceSize, &inSize, &outSize)) {
686 &reader, &sourceSize, &inSize, &outSize)) {
579 return NULL;
687 return NULL;
580 }
688 }
581
689
582 result = PyObject_New(ZstdCompressorIterator, &ZstdCompressorIteratorType);
690 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
583 if (!result) {
691 if (!result) {
584 return NULL;
692 return NULL;
585 }
693 }
586
587 result->compressor = NULL;
588 result->reader = NULL;
589 result->buffer = NULL;
590 result->cstream = NULL;
591 result->input.src = NULL;
592 result->output.dst = NULL;
593 result->readResult = NULL;
594
595 if (PyObject_HasAttrString(reader, "read")) {
694 if (PyObject_HasAttrString(reader, "read")) {
596 result->reader = reader;
695 result->reader = reader;
597 Py_INCREF(result->reader);
696 Py_INCREF(result->reader);
598 }
697 }
599 else if (1 == PyObject_CheckBuffer(reader)) {
698 else if (1 == PyObject_CheckBuffer(reader)) {
600 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
699 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
601 if (!result->buffer) {
700 if (!result->buffer) {
602 goto except;
701 goto except;
603 }
702 }
604
703
605 memset(result->buffer, 0, sizeof(Py_buffer));
704 memset(result->buffer, 0, sizeof(Py_buffer));
606
705
607 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
706 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
608 goto except;
707 goto except;
609 }
708 }
610
709
611 result->bufferOffset = 0;
612 sourceSize = result->buffer->len;
710 sourceSize = result->buffer->len;
613 }
711 }
614 else {
712 else {
615 PyErr_SetString(PyExc_ValueError,
713 PyErr_SetString(PyExc_ValueError,
616 "must pass an object with a read() method or conforms to buffer protocol");
714 "must pass an object with a read() method or conforms to buffer protocol");
617 goto except;
715 goto except;
618 }
716 }
619
717
620 result->compressor = self;
718 result->compressor = self;
621 Py_INCREF(result->compressor);
719 Py_INCREF(result->compressor);
622
720
623 result->sourceSize = sourceSize;
721 result->sourceSize = sourceSize;
624 result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
722
625 if (!result->cstream) {
723 if (self->mtcctx) {
626 goto except;
724 if (init_mtcstream(self, sourceSize)) {
725 goto except;
726 }
727 }
728 else {
729 if (0 != init_cstream(self, sourceSize)) {
730 goto except;
731 }
627 }
732 }
628
733
629 result->inSize = inSize;
734 result->inSize = inSize;
630 result->outSize = outSize;
735 result->outSize = outSize;
631
736
632 result->output.dst = PyMem_Malloc(outSize);
737 result->output.dst = PyMem_Malloc(outSize);
633 if (!result->output.dst) {
738 if (!result->output.dst) {
634 PyErr_NoMemory();
739 PyErr_NoMemory();
635 goto except;
740 goto except;
636 }
741 }
637 result->output.size = outSize;
742 result->output.size = outSize;
638 result->output.pos = 0;
639
640 result->input.src = NULL;
641 result->input.size = 0;
642 result->input.pos = 0;
643
644 result->finishedInput = 0;
645 result->finishedOutput = 0;
646
743
647 goto finally;
744 goto finally;
648
745
649 except:
746 except:
650 if (result->cstream) {
747 Py_XDECREF(result->compressor);
651 ZSTD_freeCStream(result->cstream);
748 Py_XDECREF(result->reader);
652 result->cstream = NULL;
653 }
654
655 Py_DecRef((PyObject*)result->compressor);
656 Py_DecRef(result->reader);
657
658 Py_DECREF(result);
749 Py_DECREF(result);
659 result = NULL;
750 result = NULL;
660
751
661 finally:
752 finally:
662 return result;
753 return result;
663 }
754 }
664
755
665 PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
756 PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
666 "Create a context manager to write compressed data to an object.\n"
757 "Create a context manager to write compressed data to an object.\n"
667 "\n"
758 "\n"
668 "The passed object must have a ``write()`` method.\n"
759 "The passed object must have a ``write()`` method.\n"
669 "\n"
760 "\n"
670 "The caller feeds input data to the object by calling ``compress(data)``.\n"
761 "The caller feeds input data to the object by calling ``compress(data)``.\n"
671 "Compressed data is written to the argument given to this function.\n"
762 "Compressed data is written to the argument given to this function.\n"
672 "\n"
763 "\n"
673 "The function takes an optional ``size`` argument indicating the total size\n"
764 "The function takes an optional ``size`` argument indicating the total size\n"
674 "of the eventual input. If specified, the size will influence compression\n"
765 "of the eventual input. If specified, the size will influence compression\n"
675 "parameter tuning and could result in the size being written into the\n"
766 "parameter tuning and could result in the size being written into the\n"
676 "header of the compressed data.\n"
767 "header of the compressed data.\n"
677 "\n"
768 "\n"
678 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
769 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
679 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
770 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
680 "for a compressor output stream.\n"
771 "for a compressor output stream.\n"
681 );
772 );
682
773
683 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
774 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
684 static char* kwlist[] = {
775 static char* kwlist[] = {
685 "writer",
776 "writer",
686 "size",
777 "size",
687 "write_size",
778 "write_size",
688 NULL
779 NULL
689 };
780 };
690
781
691 PyObject* writer;
782 PyObject* writer;
692 ZstdCompressionWriter* result;
783 ZstdCompressionWriter* result;
693 Py_ssize_t sourceSize = 0;
784 Py_ssize_t sourceSize = 0;
694 size_t outSize = ZSTD_CStreamOutSize();
785 size_t outSize = ZSTD_CStreamOutSize();
695
786
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
787 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
697 &writer, &sourceSize, &outSize)) {
788 &writer, &sourceSize, &outSize)) {
698 return NULL;
789 return NULL;
699 }
790 }
700
791
701 if (!PyObject_HasAttrString(writer, "write")) {
792 if (!PyObject_HasAttrString(writer, "write")) {
702 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
793 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
703 return NULL;
794 return NULL;
704 }
795 }
705
796
706 result = PyObject_New(ZstdCompressionWriter, &ZstdCompressionWriterType);
797 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
707 if (!result) {
798 if (!result) {
708 return NULL;
799 return NULL;
709 }
800 }
710
801
711 result->compressor = self;
802 result->compressor = self;
712 Py_INCREF(result->compressor);
803 Py_INCREF(result->compressor);
713
804
714 result->writer = writer;
805 result->writer = writer;
715 Py_INCREF(result->writer);
806 Py_INCREF(result->writer);
716
807
717 result->sourceSize = sourceSize;
808 result->sourceSize = sourceSize;
718
719 result->outSize = outSize;
809 result->outSize = outSize;
720
810
721 result->entered = 0;
811 return result;
722 result->cstream = NULL;
812 }
813
814 typedef struct {
815 void* sourceData;
816 size_t sourceSize;
817 } DataSource;
818
819 typedef struct {
820 DataSource* sources;
821 Py_ssize_t sourcesSize;
822 unsigned long long totalSourceSize;
823 } DataSources;
824
825 typedef struct {
826 void* dest;
827 Py_ssize_t destSize;
828 BufferSegment* segments;
829 Py_ssize_t segmentsSize;
830 } DestBuffer;
831
832 typedef enum {
833 WorkerError_none = 0,
834 WorkerError_zstd = 1,
835 WorkerError_no_memory = 2,
836 } WorkerError;
837
838 /**
839 * Holds state for an individual worker performing multi_compress_to_buffer work.
840 */
841 typedef struct {
842 /* Used for compression. */
843 ZSTD_CCtx* cctx;
844 ZSTD_CDict* cdict;
845 int cLevel;
846 CompressionParametersObject* cParams;
847 ZSTD_frameParameters fParams;
848
849 /* What to compress. */
850 DataSource* sources;
851 Py_ssize_t sourcesSize;
852 Py_ssize_t startOffset;
853 Py_ssize_t endOffset;
854 unsigned long long totalSourceSize;
855
856 /* Result storage. */
857 DestBuffer* destBuffers;
858 Py_ssize_t destCount;
859
860 /* Error tracking. */
861 WorkerError error;
862 size_t zresult;
863 Py_ssize_t errorOffset;
864 } WorkerState;
865
866 static void compress_worker(WorkerState* state) {
867 Py_ssize_t inputOffset = state->startOffset;
868 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
869 Py_ssize_t currentBufferStartOffset = state->startOffset;
870 size_t zresult;
871 ZSTD_parameters zparams;
872 void* newDest;
873 size_t allocationSize;
874 size_t boundSize;
875 Py_ssize_t destOffset = 0;
876 DataSource* sources = state->sources;
877 DestBuffer* destBuffer;
878
879 assert(!state->destBuffers);
880 assert(0 == state->destCount);
881
882 if (state->cParams) {
883 ztopy_compression_parameters(state->cParams, &zparams.cParams);
884 }
885
886 zparams.fParams = state->fParams;
887
888 /*
889 * The total size of the compressed data is unknown until we actually
890 * compress data. That means we can't pre-allocate the exact size we need.
891 *
892 * There is a cost to every allocation and reallocation. So, it is in our
893 * interest to minimize the number of allocations.
894 *
895 * There is also a cost to too few allocations. If allocations are too
896 * large they may fail. If buffers are shared and all inputs become
897 * irrelevant at different lifetimes, then a reference to one segment
898 * in the buffer will keep the entire buffer alive. This leads to excessive
899 * memory usage.
900 *
901 * Our current strategy is to assume a compression ratio of 16:1 and
902 * allocate buffers of that size, rounded up to the nearest power of 2
903 * (because computers like round numbers). That ratio is greater than what
904 * most inputs achieve. This is by design: we don't want to over-allocate.
905 * But we don't want to under-allocate and lead to too many buffers either.
906 */
907
908 state->destCount = 1;
909
910 state->destBuffers = calloc(1, sizeof(DestBuffer));
911 if (NULL == state->destBuffers) {
912 state->error = WorkerError_no_memory;
913 return;
914 }
915
916 destBuffer = &state->destBuffers[state->destCount - 1];
917
918 /*
919 * Rather than track bounds and grow the segments buffer, allocate space
920 * to hold remaining items then truncate when we're done with it.
921 */
922 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
923 if (NULL == destBuffer->segments) {
924 state->error = WorkerError_no_memory;
925 return;
926 }
927
928 destBuffer->segmentsSize = remainingItems;
929
930 allocationSize = roundpow2(state->totalSourceSize >> 4);
931
932 /* If the maximum size of the output is larger than that, round up. */
933 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
934
935 if (boundSize > allocationSize) {
936 allocationSize = roundpow2(boundSize);
937 }
938
939 destBuffer->dest = malloc(allocationSize);
940 if (NULL == destBuffer->dest) {
941 state->error = WorkerError_no_memory;
942 return;
943 }
944
945 destBuffer->destSize = allocationSize;
946
947 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
948 void* source = sources[inputOffset].sourceData;
949 size_t sourceSize = sources[inputOffset].sourceSize;
950 size_t destAvailable;
951 void* dest;
952
953 destAvailable = destBuffer->destSize - destOffset;
954 boundSize = ZSTD_compressBound(sourceSize);
955
956 /*
957 * Not enough space in current buffer to hold largest compressed output.
958 * So allocate and switch to a new output buffer.
959 */
960 if (boundSize > destAvailable) {
961 /*
962 * The downsizing of the existing buffer is optional. It should be cheap
963 * (unlike growing). So we just do it.
964 */
965 if (destAvailable) {
966 newDest = realloc(destBuffer->dest, destOffset);
967 if (NULL == newDest) {
968 state->error = WorkerError_no_memory;
969 return;
970 }
971
972 destBuffer->dest = newDest;
973 destBuffer->destSize = destOffset;
974 }
975
976 /* Truncate segments buffer. */
977 newDest = realloc(destBuffer->segments,
978 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
979 if (NULL == newDest) {
980 state->error = WorkerError_no_memory;
981 return;
982 }
983
984 destBuffer->segments = newDest;
985 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
986
987 /* Grow space for new struct. */
988 /* TODO consider over-allocating so we don't do this every time. */
989 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
990 if (NULL == newDest) {
991 state->error = WorkerError_no_memory;
992 return;
993 }
994
995 state->destBuffers = newDest;
996 state->destCount++;
997
998 destBuffer = &state->destBuffers[state->destCount - 1];
999
1000 /* Don't take any chances with non-NULL pointers. */
1001 memset(destBuffer, 0, sizeof(DestBuffer));
1002
1003 /**
1004 * We could dynamically update allocation size based on work done so far.
1005 * For now, keep is simple.
1006 */
1007 allocationSize = roundpow2(state->totalSourceSize >> 4);
1008
1009 if (boundSize > allocationSize) {
1010 allocationSize = roundpow2(boundSize);
1011 }
1012
1013 destBuffer->dest = malloc(allocationSize);
1014 if (NULL == destBuffer->dest) {
1015 state->error = WorkerError_no_memory;
1016 return;
1017 }
1018
1019 destBuffer->destSize = allocationSize;
1020 destAvailable = allocationSize;
1021 destOffset = 0;
1022
1023 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1024 if (NULL == destBuffer->segments) {
1025 state->error = WorkerError_no_memory;
1026 return;
1027 }
1028
1029 destBuffer->segmentsSize = remainingItems;
1030 currentBufferStartOffset = inputOffset;
1031 }
1032
1033 dest = (char*)destBuffer->dest + destOffset;
1034
1035 if (state->cdict) {
1036 zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,
1037 source, sourceSize, state->cdict);
1038 }
1039 else {
1040 if (!state->cParams) {
1041 zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
1042 }
1043
1044 zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,
1045 source, sourceSize, NULL, 0, zparams);
1046 }
1047
1048 if (ZSTD_isError(zresult)) {
1049 state->error = WorkerError_zstd;
1050 state->zresult = zresult;
1051 state->errorOffset = inputOffset;
1052 break;
1053 }
1054
1055 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1056 destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult;
1057
1058 destOffset += zresult;
1059 remainingItems--;
1060 }
1061
1062 if (destBuffer->destSize > destOffset) {
1063 newDest = realloc(destBuffer->dest, destOffset);
1064 if (NULL == newDest) {
1065 state->error = WorkerError_no_memory;
1066 return;
1067 }
1068
1069 destBuffer->dest = newDest;
1070 destBuffer->destSize = destOffset;
1071 }
1072 }
1073
1074 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1075 DataSources* sources, unsigned int threadCount) {
1076 ZSTD_parameters zparams;
1077 unsigned long long bytesPerWorker;
1078 POOL_ctx* pool = NULL;
1079 WorkerState* workerStates = NULL;
1080 Py_ssize_t i;
1081 unsigned long long workerBytes = 0;
1082 Py_ssize_t workerStartOffset = 0;
1083 size_t currentThread = 0;
1084 int errored = 0;
1085 Py_ssize_t segmentsCount = 0;
1086 Py_ssize_t segmentIndex;
1087 PyObject* segmentsArg = NULL;
1088 ZstdBufferWithSegments* buffer;
1089 ZstdBufferWithSegmentsCollection* result = NULL;
1090
1091 assert(sources->sourcesSize > 0);
1092 assert(sources->totalSourceSize > 0);
1093 assert(threadCount >= 1);
1094
1095 /* More threads than inputs makes no sense. */
1096 threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize
1097 : threadCount;
1098
1099 /* TODO lower thread count when input size is too small and threads would add
1100 overhead. */
1101
1102 /*
1103 * When dictionaries are used, parameters are derived from the size of the
1104 * first element.
1105 *
1106 * TODO come up with a better mechanism.
1107 */
1108 memset(&zparams, 0, sizeof(zparams));
1109 if (compressor->cparams) {
1110 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
1111 }
1112 else {
1113 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
1114 sources->sources[0].sourceSize,
1115 compressor->dict ? compressor->dict->dictSize : 0);
1116 }
1117
1118 zparams.fParams = compressor->fparams;
1119
1120 if (0 != populate_cdict(compressor, &zparams)) {
1121 return NULL;
1122 }
1123
1124 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1125 if (NULL == workerStates) {
1126 PyErr_NoMemory();
1127 goto finally;
1128 }
1129
1130 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1131
1132 if (threadCount > 1) {
1133 pool = POOL_create(threadCount, 1);
1134 if (NULL == pool) {
1135 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1136 goto finally;
1137 }
1138 }
1139
1140 bytesPerWorker = sources->totalSourceSize / threadCount;
1141
1142 for (i = 0; i < threadCount; i++) {
1143 workerStates[i].cctx = ZSTD_createCCtx();
1144 if (!workerStates[i].cctx) {
1145 PyErr_NoMemory();
1146 goto finally;
1147 }
1148
1149 workerStates[i].cdict = compressor->cdict;
1150 workerStates[i].cLevel = compressor->compressionLevel;
1151 workerStates[i].cParams = compressor->cparams;
1152 workerStates[i].fParams = compressor->fparams;
1153
1154 workerStates[i].sources = sources->sources;
1155 workerStates[i].sourcesSize = sources->sourcesSize;
1156 }
1157
1158 Py_BEGIN_ALLOW_THREADS
1159 for (i = 0; i < sources->sourcesSize; i++) {
1160 workerBytes += sources->sources[i].sourceSize;
1161
1162 /*
1163 * The last worker/thread needs to handle all remaining work. Don't
1164 * trigger it prematurely. Defer to the block outside of the loop
1165 * to run the last worker/thread. But do still process this loop
1166 * so workerBytes is correct.
1167 */
1168 if (currentThread == threadCount - 1) {
1169 continue;
1170 }
1171
1172 if (workerBytes >= bytesPerWorker) {
1173 assert(currentThread < threadCount);
1174 workerStates[currentThread].totalSourceSize = workerBytes;
1175 workerStates[currentThread].startOffset = workerStartOffset;
1176 workerStates[currentThread].endOffset = i;
1177
1178 if (threadCount > 1) {
1179 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1180 }
1181 else {
1182 compress_worker(&workerStates[currentThread]);
1183 }
1184
1185 currentThread++;
1186 workerStartOffset = i + 1;
1187 workerBytes = 0;
1188 }
1189 }
1190
1191 if (workerBytes) {
1192 assert(currentThread < threadCount);
1193 workerStates[currentThread].totalSourceSize = workerBytes;
1194 workerStates[currentThread].startOffset = workerStartOffset;
1195 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1196
1197 if (threadCount > 1) {
1198 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1199 }
1200 else {
1201 compress_worker(&workerStates[currentThread]);
1202 }
1203 }
1204
1205 if (threadCount > 1) {
1206 POOL_free(pool);
1207 pool = NULL;
1208 }
1209
1210 Py_END_ALLOW_THREADS
1211
1212 for (i = 0; i < threadCount; i++) {
1213 switch (workerStates[i].error) {
1214 case WorkerError_no_memory:
1215 PyErr_NoMemory();
1216 errored = 1;
1217 break;
1218
1219 case WorkerError_zstd:
1220 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1221 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1222 errored = 1;
1223 break;
1224 default:
1225 ;
1226 }
1227
1228 if (errored) {
1229 break;
1230 }
1231
1232 }
1233
1234 if (errored) {
1235 goto finally;
1236 }
1237
1238 segmentsCount = 0;
1239 for (i = 0; i < threadCount; i++) {
1240 WorkerState* state = &workerStates[i];
1241 segmentsCount += state->destCount;
1242 }
1243
1244 segmentsArg = PyTuple_New(segmentsCount);
1245 if (NULL == segmentsArg) {
1246 goto finally;
1247 }
1248
1249 segmentIndex = 0;
1250
1251 for (i = 0; i < threadCount; i++) {
1252 Py_ssize_t j;
1253 WorkerState* state = &workerStates[i];
1254
1255 for (j = 0; j < state->destCount; j++) {
1256 DestBuffer* destBuffer = &state->destBuffers[j];
1257 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1258 destBuffer->segments, destBuffer->segmentsSize);
1259
1260 if (NULL == buffer) {
1261 goto finally;
1262 }
1263
1264 /* Tell instance to use free() instsead of PyMem_Free(). */
1265 buffer->useFree = 1;
1266
1267 /*
1268 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1269 * Unset it here so it doesn't get freed below.
1270 */
1271 destBuffer->dest = NULL;
1272 destBuffer->segments = NULL;
1273
1274 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1275 }
1276 }
1277
1278 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1279 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1280
1281 finally:
1282 Py_CLEAR(segmentsArg);
1283
1284 if (pool) {
1285 POOL_free(pool);
1286 }
1287
1288 if (workerStates) {
1289 Py_ssize_t j;
1290
1291 for (i = 0; i < threadCount; i++) {
1292 WorkerState state = workerStates[i];
1293
1294 if (state.cctx) {
1295 ZSTD_freeCCtx(state.cctx);
1296 }
1297
1298 /* malloc() is used in worker thread. */
1299
1300 for (j = 0; j < state.destCount; j++) {
1301 if (state.destBuffers) {
1302 free(state.destBuffers[j].dest);
1303 free(state.destBuffers[j].segments);
1304 }
1305 }
1306
1307
1308 free(state.destBuffers);
1309 }
1310
1311 PyMem_Free(workerStates);
1312 }
1313
1314 return result;
1315 }
1316
1317 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1318 "Compress multiple pieces of data as a single operation\n"
1319 "\n"
1320 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1321 "a list of bytes like objects holding data to compress.\n"
1322 "\n"
1323 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1324 "\n"
1325 "This function is optimized to perform multiple compression operations as\n"
1326 "as possible with as little overhead as possbile.\n"
1327 );
1328
1329 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1330 static char* kwlist[] = {
1331 "data",
1332 "threads",
1333 NULL
1334 };
1335
1336 PyObject* data;
1337 int threads = 0;
1338 Py_buffer* dataBuffers = NULL;
1339 DataSources sources;
1340 Py_ssize_t i;
1341 Py_ssize_t sourceCount = 0;
1342 ZstdBufferWithSegmentsCollection* result = NULL;
1343
1344 if (self->mtcctx) {
1345 PyErr_SetString(ZstdError,
1346 "function cannot be called on ZstdCompressor configured for multi-threaded compression");
1347 return NULL;
1348 }
1349
1350 memset(&sources, 0, sizeof(sources));
1351
1352 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1353 &data, &threads)) {
1354 return NULL;
1355 }
1356
1357 if (threads < 0) {
1358 threads = cpu_count();
1359 }
1360
1361 if (threads < 2) {
1362 threads = 1;
1363 }
1364
1365 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1366 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1367
1368 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1369 if (NULL == sources.sources) {
1370 PyErr_NoMemory();
1371 goto finally;
1372 }
1373
1374 for (i = 0; i < buffer->segmentCount; i++) {
1375 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1376 sources.sources[i].sourceSize = buffer->segments[i].length;
1377 sources.totalSourceSize += buffer->segments[i].length;
1378 }
1379
1380 sources.sourcesSize = buffer->segmentCount;
1381 }
1382 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1383 Py_ssize_t j;
1384 Py_ssize_t offset = 0;
1385 ZstdBufferWithSegments* buffer;
1386 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1387
1388 sourceCount = BufferWithSegmentsCollection_length(collection);
1389
1390 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1391 if (NULL == sources.sources) {
1392 PyErr_NoMemory();
1393 goto finally;
1394 }
1395
1396 for (i = 0; i < collection->bufferCount; i++) {
1397 buffer = collection->buffers[i];
1398
1399 for (j = 0; j < buffer->segmentCount; j++) {
1400 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1401 sources.sources[offset].sourceSize = buffer->segments[j].length;
1402 sources.totalSourceSize += buffer->segments[j].length;
1403
1404 offset++;
1405 }
1406 }
1407
1408 sources.sourcesSize = sourceCount;
1409 }
1410 else if (PyList_Check(data)) {
1411 sourceCount = PyList_GET_SIZE(data);
1412
1413 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1414 if (NULL == sources.sources) {
1415 PyErr_NoMemory();
1416 goto finally;
1417 }
1418
1419 /*
1420 * It isn't clear whether the address referred to by Py_buffer.buf
1421 * is still valid after PyBuffer_Release. We we hold a reference to all
1422 * Py_buffer instances for the duration of the operation.
1423 */
1424 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1425 if (NULL == dataBuffers) {
1426 PyErr_NoMemory();
1427 goto finally;
1428 }
1429
1430 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1431
1432 for (i = 0; i < sourceCount; i++) {
1433 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1434 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1435 PyErr_Clear();
1436 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1437 goto finally;
1438 }
1439
1440 sources.sources[i].sourceData = dataBuffers[i].buf;
1441 sources.sources[i].sourceSize = dataBuffers[i].len;
1442 sources.totalSourceSize += dataBuffers[i].len;
1443 }
1444
1445 sources.sourcesSize = sourceCount;
1446 }
1447 else {
1448 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1449 goto finally;
1450 }
1451
1452 if (0 == sources.sourcesSize) {
1453 PyErr_SetString(PyExc_ValueError, "no source elements found");
1454 goto finally;
1455 }
1456
1457 if (0 == sources.totalSourceSize) {
1458 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1459 goto finally;
1460 }
1461
1462 result = compress_from_datasources(self, &sources, threads);
1463
1464 finally:
1465 PyMem_Free(sources.sources);
1466
1467 if (dataBuffers) {
1468 for (i = 0; i < sourceCount; i++) {
1469 PyBuffer_Release(&dataBuffers[i]);
1470 }
1471
1472 PyMem_Free(dataBuffers);
1473 }
723
1474
724 return result;
1475 return result;
725 }
1476 }
726
1477
727 static PyMethodDef ZstdCompressor_methods[] = {
1478 static PyMethodDef ZstdCompressor_methods[] = {
728 { "compress", (PyCFunction)ZstdCompressor_compress,
1479 { "compress", (PyCFunction)ZstdCompressor_compress,
729 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1480 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
730 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1481 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
731 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1482 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
732 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1483 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
733 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1484 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
734 { "read_from", (PyCFunction)ZstdCompressor_read_from,
1485 { "read_from", (PyCFunction)ZstdCompressor_read_from,
735 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
1486 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
736 { "write_to", (PyCFunction)ZstdCompressor_write_to,
1487 { "write_to", (PyCFunction)ZstdCompressor_write_to,
737 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
1488 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
1489 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1490 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
738 { NULL, NULL }
1491 { NULL, NULL }
739 };
1492 };
740
1493
741 PyTypeObject ZstdCompressorType = {
1494 PyTypeObject ZstdCompressorType = {
742 PyVarObject_HEAD_INIT(NULL, 0)
1495 PyVarObject_HEAD_INIT(NULL, 0)
743 "zstd.ZstdCompressor", /* tp_name */
1496 "zstd.ZstdCompressor", /* tp_name */
744 sizeof(ZstdCompressor), /* tp_basicsize */
1497 sizeof(ZstdCompressor), /* tp_basicsize */
745 0, /* tp_itemsize */
1498 0, /* tp_itemsize */
746 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
1499 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
747 0, /* tp_print */
1500 0, /* tp_print */
748 0, /* tp_getattr */
1501 0, /* tp_getattr */
749 0, /* tp_setattr */
1502 0, /* tp_setattr */
750 0, /* tp_compare */
1503 0, /* tp_compare */
751 0, /* tp_repr */
1504 0, /* tp_repr */
752 0, /* tp_as_number */
1505 0, /* tp_as_number */
753 0, /* tp_as_sequence */
1506 0, /* tp_as_sequence */
754 0, /* tp_as_mapping */
1507 0, /* tp_as_mapping */
755 0, /* tp_hash */
1508 0, /* tp_hash */
756 0, /* tp_call */
1509 0, /* tp_call */
757 0, /* tp_str */
1510 0, /* tp_str */
758 0, /* tp_getattro */
1511 0, /* tp_getattro */
759 0, /* tp_setattro */
1512 0, /* tp_setattro */
760 0, /* tp_as_buffer */
1513 0, /* tp_as_buffer */
761 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1514 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
762 ZstdCompressor__doc__, /* tp_doc */
1515 ZstdCompressor__doc__, /* tp_doc */
763 0, /* tp_traverse */
1516 0, /* tp_traverse */
764 0, /* tp_clear */
1517 0, /* tp_clear */
765 0, /* tp_richcompare */
1518 0, /* tp_richcompare */
766 0, /* tp_weaklistoffset */
1519 0, /* tp_weaklistoffset */
767 0, /* tp_iter */
1520 0, /* tp_iter */
768 0, /* tp_iternext */
1521 0, /* tp_iternext */
769 ZstdCompressor_methods, /* tp_methods */
1522 ZstdCompressor_methods, /* tp_methods */
770 0, /* tp_members */
1523 0, /* tp_members */
771 0, /* tp_getset */
1524 0, /* tp_getset */
772 0, /* tp_base */
1525 0, /* tp_base */
773 0, /* tp_dict */
1526 0, /* tp_dict */
774 0, /* tp_descr_get */
1527 0, /* tp_descr_get */
775 0, /* tp_descr_set */
1528 0, /* tp_descr_set */
776 0, /* tp_dictoffset */
1529 0, /* tp_dictoffset */
777 (initproc)ZstdCompressor_init, /* tp_init */
1530 (initproc)ZstdCompressor_init, /* tp_init */
778 0, /* tp_alloc */
1531 0, /* tp_alloc */
779 PyType_GenericNew, /* tp_new */
1532 PyType_GenericNew, /* tp_new */
780 };
1533 };
781
1534
782 void compressor_module_init(PyObject* mod) {
1535 void compressor_module_init(PyObject* mod) {
783 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
1536 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
784 if (PyType_Ready(&ZstdCompressorType) < 0) {
1537 if (PyType_Ready(&ZstdCompressorType) < 0) {
785 return;
1538 return;
786 }
1539 }
787
1540
788 Py_INCREF((PyObject*)&ZstdCompressorType);
1541 Py_INCREF((PyObject*)&ZstdCompressorType);
789 PyModule_AddObject(mod, "ZstdCompressor",
1542 PyModule_AddObject(mod, "ZstdCompressor",
790 (PyObject*)&ZstdCompressorType);
1543 (PyObject*)&ZstdCompressorType);
791 }
1544 }
@@ -1,234 +1,247
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12
12
13 extern PyObject* ZstdError;
13 extern PyObject* ZstdError;
14
14
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
16 "Represents an iterator of compressed data.\n"
16 "Represents an iterator of compressed data.\n"
17 );
17 );
18
18
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
20 Py_XDECREF(self->readResult);
20 Py_XDECREF(self->readResult);
21 Py_XDECREF(self->compressor);
21 Py_XDECREF(self->compressor);
22 Py_XDECREF(self->reader);
22 Py_XDECREF(self->reader);
23
23
24 if (self->buffer) {
24 if (self->buffer) {
25 PyBuffer_Release(self->buffer);
25 PyBuffer_Release(self->buffer);
26 PyMem_FREE(self->buffer);
26 PyMem_FREE(self->buffer);
27 self->buffer = NULL;
27 self->buffer = NULL;
28 }
28 }
29
29
30 if (self->cstream) {
31 ZSTD_freeCStream(self->cstream);
32 self->cstream = NULL;
33 }
34
35 if (self->output.dst) {
30 if (self->output.dst) {
36 PyMem_Free(self->output.dst);
31 PyMem_Free(self->output.dst);
37 self->output.dst = NULL;
32 self->output.dst = NULL;
38 }
33 }
39
34
40 PyObject_Del(self);
35 PyObject_Del(self);
41 }
36 }
42
37
43 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
38 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
44 Py_INCREF(self);
39 Py_INCREF(self);
45 return self;
40 return self;
46 }
41 }
47
42
48 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
43 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
49 size_t zresult;
44 size_t zresult;
50 PyObject* readResult = NULL;
45 PyObject* readResult = NULL;
51 PyObject* chunk;
46 PyObject* chunk;
52 char* readBuffer;
47 char* readBuffer;
53 Py_ssize_t readSize = 0;
48 Py_ssize_t readSize = 0;
54 Py_ssize_t bufferRemaining;
49 Py_ssize_t bufferRemaining;
55
50
56 if (self->finishedOutput) {
51 if (self->finishedOutput) {
57 PyErr_SetString(PyExc_StopIteration, "output flushed");
52 PyErr_SetString(PyExc_StopIteration, "output flushed");
58 return NULL;
53 return NULL;
59 }
54 }
60
55
61 feedcompressor:
56 feedcompressor:
62
57
63 /* If we have data left in the input, consume it. */
58 /* If we have data left in the input, consume it. */
64 if (self->input.pos < self->input.size) {
59 if (self->input.pos < self->input.size) {
65 Py_BEGIN_ALLOW_THREADS
60 Py_BEGIN_ALLOW_THREADS
66 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
61 if (self->compressor->mtcctx) {
62 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
63 &self->output, &self->input);
64 }
65 else {
66 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
67 &self->input);
68 }
67 Py_END_ALLOW_THREADS
69 Py_END_ALLOW_THREADS
68
70
69 /* Release the Python object holding the input buffer. */
71 /* Release the Python object holding the input buffer. */
70 if (self->input.pos == self->input.size) {
72 if (self->input.pos == self->input.size) {
71 self->input.src = NULL;
73 self->input.src = NULL;
72 self->input.pos = 0;
74 self->input.pos = 0;
73 self->input.size = 0;
75 self->input.size = 0;
74 Py_DECREF(self->readResult);
76 Py_DECREF(self->readResult);
75 self->readResult = NULL;
77 self->readResult = NULL;
76 }
78 }
77
79
78 if (ZSTD_isError(zresult)) {
80 if (ZSTD_isError(zresult)) {
79 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
81 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
80 return NULL;
82 return NULL;
81 }
83 }
82
84
83 /* If it produced output data, emit it. */
85 /* If it produced output data, emit it. */
84 if (self->output.pos) {
86 if (self->output.pos) {
85 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
87 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
86 self->output.pos = 0;
88 self->output.pos = 0;
87 return chunk;
89 return chunk;
88 }
90 }
89 }
91 }
90
92
91 /* We should never have output data sitting around after a previous call. */
93 /* We should never have output data sitting around after a previous call. */
92 assert(self->output.pos == 0);
94 assert(self->output.pos == 0);
93
95
94 /* The code above should have either emitted a chunk and returned or consumed
96 /* The code above should have either emitted a chunk and returned or consumed
95 the entire input buffer. So the state of the input buffer is not
97 the entire input buffer. So the state of the input buffer is not
96 relevant. */
98 relevant. */
97 if (!self->finishedInput) {
99 if (!self->finishedInput) {
98 if (self->reader) {
100 if (self->reader) {
99 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
101 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
100 if (!readResult) {
102 if (!readResult) {
101 PyErr_SetString(ZstdError, "could not read() from source");
103 PyErr_SetString(ZstdError, "could not read() from source");
102 return NULL;
104 return NULL;
103 }
105 }
104
106
105 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
107 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
106 }
108 }
107 else {
109 else {
108 assert(self->buffer && self->buffer->buf);
110 assert(self->buffer && self->buffer->buf);
109
111
110 /* Only support contiguous C arrays. */
112 /* Only support contiguous C arrays. */
111 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
113 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
112 assert(self->buffer->itemsize == 1);
114 assert(self->buffer->itemsize == 1);
113
115
114 readBuffer = (char*)self->buffer->buf + self->bufferOffset;
116 readBuffer = (char*)self->buffer->buf + self->bufferOffset;
115 bufferRemaining = self->buffer->len - self->bufferOffset;
117 bufferRemaining = self->buffer->len - self->bufferOffset;
116 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
118 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
117 self->bufferOffset += readSize;
119 self->bufferOffset += readSize;
118 }
120 }
119
121
120 if (0 == readSize) {
122 if (0 == readSize) {
121 Py_XDECREF(readResult);
123 Py_XDECREF(readResult);
122 self->finishedInput = 1;
124 self->finishedInput = 1;
123 }
125 }
124 else {
126 else {
125 self->readResult = readResult;
127 self->readResult = readResult;
126 }
128 }
127 }
129 }
128
130
129 /* EOF */
131 /* EOF */
130 if (0 == readSize) {
132 if (0 == readSize) {
131 zresult = ZSTD_endStream(self->cstream, &self->output);
133 if (self->compressor->mtcctx) {
134 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
135 }
136 else {
137 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
138 }
132 if (ZSTD_isError(zresult)) {
139 if (ZSTD_isError(zresult)) {
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
140 PyErr_Format(ZstdError, "error ending compression stream: %s",
134 ZSTD_getErrorName(zresult));
141 ZSTD_getErrorName(zresult));
135 return NULL;
142 return NULL;
136 }
143 }
137
144
138 assert(self->output.pos);
145 assert(self->output.pos);
139
146
140 if (0 == zresult) {
147 if (0 == zresult) {
141 self->finishedOutput = 1;
148 self->finishedOutput = 1;
142 }
149 }
143
150
144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
151 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
145 self->output.pos = 0;
152 self->output.pos = 0;
146 return chunk;
153 return chunk;
147 }
154 }
148
155
149 /* New data from reader. Feed into compressor. */
156 /* New data from reader. Feed into compressor. */
150 self->input.src = readBuffer;
157 self->input.src = readBuffer;
151 self->input.size = readSize;
158 self->input.size = readSize;
152 self->input.pos = 0;
159 self->input.pos = 0;
153
160
154 Py_BEGIN_ALLOW_THREADS
161 Py_BEGIN_ALLOW_THREADS
155 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
162 if (self->compressor->mtcctx) {
163 zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
164 &self->input);
165 }
166 else {
167 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
168 }
156 Py_END_ALLOW_THREADS
169 Py_END_ALLOW_THREADS
157
170
158 /* The input buffer currently points to memory managed by Python
171 /* The input buffer currently points to memory managed by Python
159 (readBuffer). This object was allocated by this function. If it wasn't
172 (readBuffer). This object was allocated by this function. If it wasn't
160 fully consumed, we need to release it in a subsequent function call.
173 fully consumed, we need to release it in a subsequent function call.
161 If it is fully consumed, do that now.
174 If it is fully consumed, do that now.
162 */
175 */
163 if (self->input.pos == self->input.size) {
176 if (self->input.pos == self->input.size) {
164 self->input.src = NULL;
177 self->input.src = NULL;
165 self->input.pos = 0;
178 self->input.pos = 0;
166 self->input.size = 0;
179 self->input.size = 0;
167 Py_XDECREF(self->readResult);
180 Py_XDECREF(self->readResult);
168 self->readResult = NULL;
181 self->readResult = NULL;
169 }
182 }
170
183
171 if (ZSTD_isError(zresult)) {
184 if (ZSTD_isError(zresult)) {
172 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
185 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
173 return NULL;
186 return NULL;
174 }
187 }
175
188
176 assert(self->input.pos <= self->input.size);
189 assert(self->input.pos <= self->input.size);
177
190
178 /* If we didn't write anything, start the process over. */
191 /* If we didn't write anything, start the process over. */
179 if (0 == self->output.pos) {
192 if (0 == self->output.pos) {
180 goto feedcompressor;
193 goto feedcompressor;
181 }
194 }
182
195
183 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
196 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
184 self->output.pos = 0;
197 self->output.pos = 0;
185 return chunk;
198 return chunk;
186 }
199 }
187
200
188 PyTypeObject ZstdCompressorIteratorType = {
201 PyTypeObject ZstdCompressorIteratorType = {
189 PyVarObject_HEAD_INIT(NULL, 0)
202 PyVarObject_HEAD_INIT(NULL, 0)
190 "zstd.ZstdCompressorIterator", /* tp_name */
203 "zstd.ZstdCompressorIterator", /* tp_name */
191 sizeof(ZstdCompressorIterator), /* tp_basicsize */
204 sizeof(ZstdCompressorIterator), /* tp_basicsize */
192 0, /* tp_itemsize */
205 0, /* tp_itemsize */
193 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
206 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
194 0, /* tp_print */
207 0, /* tp_print */
195 0, /* tp_getattr */
208 0, /* tp_getattr */
196 0, /* tp_setattr */
209 0, /* tp_setattr */
197 0, /* tp_compare */
210 0, /* tp_compare */
198 0, /* tp_repr */
211 0, /* tp_repr */
199 0, /* tp_as_number */
212 0, /* tp_as_number */
200 0, /* tp_as_sequence */
213 0, /* tp_as_sequence */
201 0, /* tp_as_mapping */
214 0, /* tp_as_mapping */
202 0, /* tp_hash */
215 0, /* tp_hash */
203 0, /* tp_call */
216 0, /* tp_call */
204 0, /* tp_str */
217 0, /* tp_str */
205 0, /* tp_getattro */
218 0, /* tp_getattro */
206 0, /* tp_setattro */
219 0, /* tp_setattro */
207 0, /* tp_as_buffer */
220 0, /* tp_as_buffer */
208 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
221 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
209 ZstdCompressorIterator__doc__, /* tp_doc */
222 ZstdCompressorIterator__doc__, /* tp_doc */
210 0, /* tp_traverse */
223 0, /* tp_traverse */
211 0, /* tp_clear */
224 0, /* tp_clear */
212 0, /* tp_richcompare */
225 0, /* tp_richcompare */
213 0, /* tp_weaklistoffset */
226 0, /* tp_weaklistoffset */
214 ZstdCompressorIterator_iter, /* tp_iter */
227 ZstdCompressorIterator_iter, /* tp_iter */
215 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
228 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
216 0, /* tp_methods */
229 0, /* tp_methods */
217 0, /* tp_members */
230 0, /* tp_members */
218 0, /* tp_getset */
231 0, /* tp_getset */
219 0, /* tp_base */
232 0, /* tp_base */
220 0, /* tp_dict */
233 0, /* tp_dict */
221 0, /* tp_descr_get */
234 0, /* tp_descr_get */
222 0, /* tp_descr_set */
235 0, /* tp_descr_set */
223 0, /* tp_dictoffset */
236 0, /* tp_dictoffset */
224 0, /* tp_init */
237 0, /* tp_init */
225 0, /* tp_alloc */
238 0, /* tp_alloc */
226 PyType_GenericNew, /* tp_new */
239 PyType_GenericNew, /* tp_new */
227 };
240 };
228
241
229 void compressoriterator_module_init(PyObject* mod) {
242 void compressoriterator_module_init(PyObject* mod) {
230 Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
243 Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
231 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
244 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
232 return;
245 return;
233 }
246 }
234 }
247 }
@@ -1,87 +1,87
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 static char frame_header[] = {
13 static char frame_header[] = {
14 '\x28',
14 '\x28',
15 '\xb5',
15 '\xb5',
16 '\x2f',
16 '\x2f',
17 '\xfd',
17 '\xfd',
18 };
18 };
19
19
20 void constants_module_init(PyObject* mod) {
20 void constants_module_init(PyObject* mod) {
21 PyObject* version;
21 PyObject* version;
22 PyObject* zstdVersion;
22 PyObject* zstdVersion;
23 PyObject* frameHeader;
23 PyObject* frameHeader;
24
24
25 #if PY_MAJOR_VERSION >= 3
25 #if PY_MAJOR_VERSION >= 3
26 version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
26 version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
27 #else
27 #else
28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
29 #endif
29 #endif
30 Py_INCREF(version);
30 Py_INCREF(version);
31 PyModule_AddObject(mod, "__version__", version);
31 PyModule_AddObject(mod, "__version__", version);
32
32
33 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
33 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
34 PyModule_AddObject(mod, "ZstdError", ZstdError);
34 PyModule_AddObject(mod, "ZstdError", ZstdError);
35
35
36 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
36 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
37 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
37 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
38
38
39 /* For now, the version is a simple tuple instead of a dedicated type. */
39 /* For now, the version is a simple tuple instead of a dedicated type. */
40 zstdVersion = PyTuple_New(3);
40 zstdVersion = PyTuple_New(3);
41 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
41 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
42 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
42 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
43 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
43 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
44 Py_IncRef(zstdVersion);
44 Py_INCREF(zstdVersion);
45 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
45 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
46
46
47 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
47 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
48 if (frameHeader) {
48 if (frameHeader) {
49 PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
49 PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
50 }
50 }
51 else {
51 else {
52 PyErr_Format(PyExc_ValueError, "could not create frame header object");
52 PyErr_Format(PyExc_ValueError, "could not create frame header object");
53 }
53 }
54
54
55 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
55 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
56 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
56 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
57 (long)ZSTD_CStreamInSize());
57 (long)ZSTD_CStreamInSize());
58 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
58 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
59 (long)ZSTD_CStreamOutSize());
59 (long)ZSTD_CStreamOutSize());
60 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
60 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
61 (long)ZSTD_DStreamInSize());
61 (long)ZSTD_DStreamInSize());
62 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
62 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
63 (long)ZSTD_DStreamOutSize());
63 (long)ZSTD_DStreamOutSize());
64
64
65 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
65 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
66 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
66 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
67 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
67 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
68 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
68 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
69 PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
69 PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
70 PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
70 PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
71 PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
71 PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
72 PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
72 PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
73 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
73 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
74 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
74 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
75 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
75 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
76 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
76 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
77 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
77 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
78 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
78 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
79
79
80 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
80 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
81 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
81 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
82 PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
82 PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
83 PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
83 PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
84 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
84 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
85 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
85 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
86 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
86 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
87 }
87 }
@@ -1,188 +1,179
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
14 """A context manager used for writing decompressed output.\n"
14 """A context manager used for writing decompressed output.\n"
15 );
15 );
16
16
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
18 Py_XDECREF(self->decompressor);
18 Py_XDECREF(self->decompressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 if (self->dstream) {
22 ZSTD_freeDStream(self->dstream);
23 self->dstream = NULL;
24 }
25
26 PyObject_Del(self);
21 PyObject_Del(self);
27 }
22 }
28
23
29 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
30 if (self->entered) {
25 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
26 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
27 return NULL;
33 }
28 }
34
29
35 self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
30 if (0 != init_dstream(self->decompressor)) {
36 if (!self->dstream) {
37 return NULL;
31 return NULL;
38 }
32 }
39
33
40 self->entered = 1;
34 self->entered = 1;
41
35
42 Py_INCREF(self);
36 Py_INCREF(self);
43 return (PyObject*)self;
37 return (PyObject*)self;
44 }
38 }
45
39
46 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
40 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
47 self->entered = 0;
41 self->entered = 0;
48
42
49 if (self->dstream) {
50 ZSTD_freeDStream(self->dstream);
51 self->dstream = NULL;
52 }
53
54 Py_RETURN_FALSE;
43 Py_RETURN_FALSE;
55 }
44 }
56
45
57 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
46 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
58 if (!self->dstream) {
47 if (!self->decompressor->dstream) {
59 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
48 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
60 "call when context manager is active");
49 "call when context manager is active");
61 return NULL;
50 return NULL;
62 }
51 }
63
52
64 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
53 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
65 }
54 }
66
55
67 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
56 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
68 const char* source;
57 const char* source;
69 Py_ssize_t sourceSize;
58 Py_ssize_t sourceSize;
70 size_t zresult = 0;
59 size_t zresult = 0;
71 ZSTD_inBuffer input;
60 ZSTD_inBuffer input;
72 ZSTD_outBuffer output;
61 ZSTD_outBuffer output;
73 PyObject* res;
62 PyObject* res;
74 Py_ssize_t totalWrite = 0;
63 Py_ssize_t totalWrite = 0;
75
64
76 #if PY_MAJOR_VERSION >= 3
65 #if PY_MAJOR_VERSION >= 3
77 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
66 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
78 #else
67 #else
79 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
68 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
80 #endif
69 #endif
81 return NULL;
70 return NULL;
82 }
71 }
83
72
84 if (!self->entered) {
73 if (!self->entered) {
85 PyErr_SetString(ZstdError, "write must be called from an active context manager");
74 PyErr_SetString(ZstdError, "write must be called from an active context manager");
86 return NULL;
75 return NULL;
87 }
76 }
88
77
78 assert(self->decompressor->dstream);
79
89 output.dst = PyMem_Malloc(self->outSize);
80 output.dst = PyMem_Malloc(self->outSize);
90 if (!output.dst) {
81 if (!output.dst) {
91 return PyErr_NoMemory();
82 return PyErr_NoMemory();
92 }
83 }
93 output.size = self->outSize;
84 output.size = self->outSize;
94 output.pos = 0;
85 output.pos = 0;
95
86
96 input.src = source;
87 input.src = source;
97 input.size = sourceSize;
88 input.size = sourceSize;
98 input.pos = 0;
89 input.pos = 0;
99
90
100 while ((ssize_t)input.pos < sourceSize) {
91 while ((ssize_t)input.pos < sourceSize) {
101 Py_BEGIN_ALLOW_THREADS
92 Py_BEGIN_ALLOW_THREADS
102 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
93 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
103 Py_END_ALLOW_THREADS
94 Py_END_ALLOW_THREADS
104
95
105 if (ZSTD_isError(zresult)) {
96 if (ZSTD_isError(zresult)) {
106 PyMem_Free(output.dst);
97 PyMem_Free(output.dst);
107 PyErr_Format(ZstdError, "zstd decompress error: %s",
98 PyErr_Format(ZstdError, "zstd decompress error: %s",
108 ZSTD_getErrorName(zresult));
99 ZSTD_getErrorName(zresult));
109 return NULL;
100 return NULL;
110 }
101 }
111
102
112 if (output.pos) {
103 if (output.pos) {
113 #if PY_MAJOR_VERSION >= 3
104 #if PY_MAJOR_VERSION >= 3
114 res = PyObject_CallMethod(self->writer, "write", "y#",
105 res = PyObject_CallMethod(self->writer, "write", "y#",
115 #else
106 #else
116 res = PyObject_CallMethod(self->writer, "write", "s#",
107 res = PyObject_CallMethod(self->writer, "write", "s#",
117 #endif
108 #endif
118 output.dst, output.pos);
109 output.dst, output.pos);
119 Py_XDECREF(res);
110 Py_XDECREF(res);
120 totalWrite += output.pos;
111 totalWrite += output.pos;
121 output.pos = 0;
112 output.pos = 0;
122 }
113 }
123 }
114 }
124
115
125 PyMem_Free(output.dst);
116 PyMem_Free(output.dst);
126
117
127 return PyLong_FromSsize_t(totalWrite);
118 return PyLong_FromSsize_t(totalWrite);
128 }
119 }
129
120
130 static PyMethodDef ZstdDecompressionWriter_methods[] = {
121 static PyMethodDef ZstdDecompressionWriter_methods[] = {
131 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
122 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
132 PyDoc_STR("Enter a decompression context.") },
123 PyDoc_STR("Enter a decompression context.") },
133 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
124 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
134 PyDoc_STR("Exit a decompression context.") },
125 PyDoc_STR("Exit a decompression context.") },
135 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
126 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
136 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
127 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
137 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
128 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
138 PyDoc_STR("Compress data") },
129 PyDoc_STR("Compress data") },
139 { NULL, NULL }
130 { NULL, NULL }
140 };
131 };
141
132
142 PyTypeObject ZstdDecompressionWriterType = {
133 PyTypeObject ZstdDecompressionWriterType = {
143 PyVarObject_HEAD_INIT(NULL, 0)
134 PyVarObject_HEAD_INIT(NULL, 0)
144 "zstd.ZstdDecompressionWriter", /* tp_name */
135 "zstd.ZstdDecompressionWriter", /* tp_name */
145 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
136 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
146 0, /* tp_itemsize */
137 0, /* tp_itemsize */
147 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
138 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
148 0, /* tp_print */
139 0, /* tp_print */
149 0, /* tp_getattr */
140 0, /* tp_getattr */
150 0, /* tp_setattr */
141 0, /* tp_setattr */
151 0, /* tp_compare */
142 0, /* tp_compare */
152 0, /* tp_repr */
143 0, /* tp_repr */
153 0, /* tp_as_number */
144 0, /* tp_as_number */
154 0, /* tp_as_sequence */
145 0, /* tp_as_sequence */
155 0, /* tp_as_mapping */
146 0, /* tp_as_mapping */
156 0, /* tp_hash */
147 0, /* tp_hash */
157 0, /* tp_call */
148 0, /* tp_call */
158 0, /* tp_str */
149 0, /* tp_str */
159 0, /* tp_getattro */
150 0, /* tp_getattro */
160 0, /* tp_setattro */
151 0, /* tp_setattro */
161 0, /* tp_as_buffer */
152 0, /* tp_as_buffer */
162 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
153 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
163 ZstdDecompressionWriter__doc, /* tp_doc */
154 ZstdDecompressionWriter__doc, /* tp_doc */
164 0, /* tp_traverse */
155 0, /* tp_traverse */
165 0, /* tp_clear */
156 0, /* tp_clear */
166 0, /* tp_richcompare */
157 0, /* tp_richcompare */
167 0, /* tp_weaklistoffset */
158 0, /* tp_weaklistoffset */
168 0, /* tp_iter */
159 0, /* tp_iter */
169 0, /* tp_iternext */
160 0, /* tp_iternext */
170 ZstdDecompressionWriter_methods,/* tp_methods */
161 ZstdDecompressionWriter_methods,/* tp_methods */
171 0, /* tp_members */
162 0, /* tp_members */
172 0, /* tp_getset */
163 0, /* tp_getset */
173 0, /* tp_base */
164 0, /* tp_base */
174 0, /* tp_dict */
165 0, /* tp_dict */
175 0, /* tp_descr_get */
166 0, /* tp_descr_get */
176 0, /* tp_descr_set */
167 0, /* tp_descr_set */
177 0, /* tp_dictoffset */
168 0, /* tp_dictoffset */
178 0, /* tp_init */
169 0, /* tp_init */
179 0, /* tp_alloc */
170 0, /* tp_alloc */
180 PyType_GenericNew, /* tp_new */
171 PyType_GenericNew, /* tp_new */
181 };
172 };
182
173
183 void decompressionwriter_module_init(PyObject* mod) {
174 void decompressionwriter_module_init(PyObject* mod) {
184 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
175 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
185 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
176 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
186 return;
177 return;
187 }
178 }
188 }
179 }
@@ -1,170 +1,167
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(DecompressionObj__doc__,
13 PyDoc_STRVAR(DecompressionObj__doc__,
14 "Perform decompression using a standard library compatible API.\n"
14 "Perform decompression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
18 if (self->dstream) {
19 ZSTD_freeDStream(self->dstream);
20 self->dstream = NULL;
21 }
22
23 Py_XDECREF(self->decompressor);
18 Py_XDECREF(self->decompressor);
24
19
25 PyObject_Del(self);
20 PyObject_Del(self);
26 }
21 }
27
22
28 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
29 const char* source;
24 const char* source;
30 Py_ssize_t sourceSize;
25 Py_ssize_t sourceSize;
31 size_t zresult;
26 size_t zresult;
32 ZSTD_inBuffer input;
27 ZSTD_inBuffer input;
33 ZSTD_outBuffer output;
28 ZSTD_outBuffer output;
34 size_t outSize = ZSTD_DStreamOutSize();
29 size_t outSize = ZSTD_DStreamOutSize();
35 PyObject* result = NULL;
30 PyObject* result = NULL;
36 Py_ssize_t resultSize = 0;
31 Py_ssize_t resultSize = 0;
37
32
33 /* Constructor should ensure stream is populated. */
34 assert(self->decompressor->dstream);
35
38 if (self->finished) {
36 if (self->finished) {
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
37 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
40 return NULL;
38 return NULL;
41 }
39 }
42
40
43 #if PY_MAJOR_VERSION >= 3
41 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTuple(args, "y#:decompress",
42 if (!PyArg_ParseTuple(args, "y#:decompress",
45 #else
43 #else
46 if (!PyArg_ParseTuple(args, "s#:decompress",
44 if (!PyArg_ParseTuple(args, "s#:decompress",
47 #endif
45 #endif
48 &source, &sourceSize)) {
46 &source, &sourceSize)) {
49 return NULL;
47 return NULL;
50 }
48 }
51
49
52 input.src = source;
50 input.src = source;
53 input.size = sourceSize;
51 input.size = sourceSize;
54 input.pos = 0;
52 input.pos = 0;
55
53
56 output.dst = PyMem_Malloc(outSize);
54 output.dst = PyMem_Malloc(outSize);
57 if (!output.dst) {
55 if (!output.dst) {
58 PyErr_NoMemory();
56 PyErr_NoMemory();
59 return NULL;
57 return NULL;
60 }
58 }
61 output.size = outSize;
59 output.size = outSize;
62 output.pos = 0;
60 output.pos = 0;
63
61
64 /* Read input until exhausted. */
62 /* Read input until exhausted. */
65 while (input.pos < input.size) {
63 while (input.pos < input.size) {
66 Py_BEGIN_ALLOW_THREADS
64 Py_BEGIN_ALLOW_THREADS
67 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
65 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
68 Py_END_ALLOW_THREADS
66 Py_END_ALLOW_THREADS
69
67
70 if (ZSTD_isError(zresult)) {
68 if (ZSTD_isError(zresult)) {
71 PyErr_Format(ZstdError, "zstd decompressor error: %s",
69 PyErr_Format(ZstdError, "zstd decompressor error: %s",
72 ZSTD_getErrorName(zresult));
70 ZSTD_getErrorName(zresult));
73 result = NULL;
71 result = NULL;
74 goto finally;
72 goto finally;
75 }
73 }
76
74
77 if (0 == zresult) {
75 if (0 == zresult) {
78 self->finished = 1;
76 self->finished = 1;
79 }
77 }
80
78
81 if (output.pos) {
79 if (output.pos) {
82 if (result) {
80 if (result) {
83 resultSize = PyBytes_GET_SIZE(result);
81 resultSize = PyBytes_GET_SIZE(result);
84 if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
82 if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
85 goto except;
83 goto except;
86 }
84 }
87
85
88 memcpy(PyBytes_AS_STRING(result) + resultSize,
86 memcpy(PyBytes_AS_STRING(result) + resultSize,
89 output.dst, output.pos);
87 output.dst, output.pos);
90 }
88 }
91 else {
89 else {
92 result = PyBytes_FromStringAndSize(output.dst, output.pos);
90 result = PyBytes_FromStringAndSize(output.dst, output.pos);
93 if (!result) {
91 if (!result) {
94 goto except;
92 goto except;
95 }
93 }
96 }
94 }
97
95
98 output.pos = 0;
96 output.pos = 0;
99 }
97 }
100 }
98 }
101
99
102 if (!result) {
100 if (!result) {
103 result = PyBytes_FromString("");
101 result = PyBytes_FromString("");
104 }
102 }
105
103
106 goto finally;
104 goto finally;
107
105
108 except:
106 except:
109 Py_DecRef(result);
107 Py_CLEAR(result);
110 result = NULL;
111
108
112 finally:
109 finally:
113 PyMem_Free(output.dst);
110 PyMem_Free(output.dst);
114
111
115 return result;
112 return result;
116 }
113 }
117
114
118 static PyMethodDef DecompressionObj_methods[] = {
115 static PyMethodDef DecompressionObj_methods[] = {
119 { "decompress", (PyCFunction)DecompressionObj_decompress,
116 { "decompress", (PyCFunction)DecompressionObj_decompress,
120 METH_VARARGS, PyDoc_STR("decompress data") },
117 METH_VARARGS, PyDoc_STR("decompress data") },
121 { NULL, NULL }
118 { NULL, NULL }
122 };
119 };
123
120
124 PyTypeObject ZstdDecompressionObjType = {
121 PyTypeObject ZstdDecompressionObjType = {
125 PyVarObject_HEAD_INIT(NULL, 0)
122 PyVarObject_HEAD_INIT(NULL, 0)
126 "zstd.ZstdDecompressionObj", /* tp_name */
123 "zstd.ZstdDecompressionObj", /* tp_name */
127 sizeof(ZstdDecompressionObj), /* tp_basicsize */
124 sizeof(ZstdDecompressionObj), /* tp_basicsize */
128 0, /* tp_itemsize */
125 0, /* tp_itemsize */
129 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
126 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
130 0, /* tp_print */
127 0, /* tp_print */
131 0, /* tp_getattr */
128 0, /* tp_getattr */
132 0, /* tp_setattr */
129 0, /* tp_setattr */
133 0, /* tp_compare */
130 0, /* tp_compare */
134 0, /* tp_repr */
131 0, /* tp_repr */
135 0, /* tp_as_number */
132 0, /* tp_as_number */
136 0, /* tp_as_sequence */
133 0, /* tp_as_sequence */
137 0, /* tp_as_mapping */
134 0, /* tp_as_mapping */
138 0, /* tp_hash */
135 0, /* tp_hash */
139 0, /* tp_call */
136 0, /* tp_call */
140 0, /* tp_str */
137 0, /* tp_str */
141 0, /* tp_getattro */
138 0, /* tp_getattro */
142 0, /* tp_setattro */
139 0, /* tp_setattro */
143 0, /* tp_as_buffer */
140 0, /* tp_as_buffer */
144 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
141 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
145 DecompressionObj__doc__, /* tp_doc */
142 DecompressionObj__doc__, /* tp_doc */
146 0, /* tp_traverse */
143 0, /* tp_traverse */
147 0, /* tp_clear */
144 0, /* tp_clear */
148 0, /* tp_richcompare */
145 0, /* tp_richcompare */
149 0, /* tp_weaklistoffset */
146 0, /* tp_weaklistoffset */
150 0, /* tp_iter */
147 0, /* tp_iter */
151 0, /* tp_iternext */
148 0, /* tp_iternext */
152 DecompressionObj_methods, /* tp_methods */
149 DecompressionObj_methods, /* tp_methods */
153 0, /* tp_members */
150 0, /* tp_members */
154 0, /* tp_getset */
151 0, /* tp_getset */
155 0, /* tp_base */
152 0, /* tp_base */
156 0, /* tp_dict */
153 0, /* tp_dict */
157 0, /* tp_descr_get */
154 0, /* tp_descr_get */
158 0, /* tp_descr_set */
155 0, /* tp_descr_set */
159 0, /* tp_dictoffset */
156 0, /* tp_dictoffset */
160 0, /* tp_init */
157 0, /* tp_init */
161 0, /* tp_alloc */
158 0, /* tp_alloc */
162 PyType_GenericNew, /* tp_new */
159 PyType_GenericNew, /* tp_new */
163 };
160 };
164
161
165 void decompressobj_module_init(PyObject* module) {
162 void decompressobj_module_init(PyObject* module) {
166 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
163 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
167 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
164 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
168 return;
165 return;
169 }
166 }
170 }
167 }
This diff has been collapsed as it changes many lines, (859 lines changed) Show them Hide them
@@ -1,845 +1,1580
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10 #include "pool.h"
10
11
11 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
12
13
13 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
14 /**
14 ZSTD_DStream* dstream;
15 * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
16 *
17 * This should be called before starting a decompression operation with a
18 * ZSTD_DStream on a ZstdDecompressor.
19 */
20 int init_dstream(ZstdDecompressor* decompressor) {
15 void* dictData = NULL;
21 void* dictData = NULL;
16 size_t dictSize = 0;
22 size_t dictSize = 0;
17 size_t zresult;
23 size_t zresult;
18
24
19 dstream = ZSTD_createDStream();
25 /* Simple case of dstream already exists. Just reset it. */
20 if (!dstream) {
26 if (decompressor->dstream) {
27 zresult = ZSTD_resetDStream(decompressor->dstream);
28 if (ZSTD_isError(zresult)) {
29 PyErr_Format(ZstdError, "could not reset DStream: %s",
30 ZSTD_getErrorName(zresult));
31 return -1;
32 }
33
34 return 0;
35 }
36
37 decompressor->dstream = ZSTD_createDStream();
38 if (!decompressor->dstream) {
21 PyErr_SetString(ZstdError, "could not create DStream");
39 PyErr_SetString(ZstdError, "could not create DStream");
22 return NULL;
40 return -1;
23 }
41 }
24
42
25 if (decompressor->dict) {
43 if (decompressor->dict) {
26 dictData = decompressor->dict->dictData;
44 dictData = decompressor->dict->dictData;
27 dictSize = decompressor->dict->dictSize;
45 dictSize = decompressor->dict->dictSize;
28 }
46 }
29
47
30 if (dictData) {
48 if (dictData) {
31 zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
49 zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
32 }
50 }
33 else {
51 else {
34 zresult = ZSTD_initDStream(dstream);
52 zresult = ZSTD_initDStream(decompressor->dstream);
35 }
53 }
36
54
37 if (ZSTD_isError(zresult)) {
55 if (ZSTD_isError(zresult)) {
56 /* Don't leave a reference to an invalid object. */
57 ZSTD_freeDStream(decompressor->dstream);
58 decompressor->dstream = NULL;
59
38 PyErr_Format(ZstdError, "could not initialize DStream: %s",
60 PyErr_Format(ZstdError, "could not initialize DStream: %s",
39 ZSTD_getErrorName(zresult));
61 ZSTD_getErrorName(zresult));
40 return NULL;
62 return -1;
41 }
63 }
42
64
43 return dstream;
65 return 0;
44 }
66 }
45
67
46 PyDoc_STRVAR(Decompressor__doc__,
68 PyDoc_STRVAR(Decompressor__doc__,
47 "ZstdDecompressor(dict_data=None)\n"
69 "ZstdDecompressor(dict_data=None)\n"
48 "\n"
70 "\n"
49 "Create an object used to perform Zstandard decompression.\n"
71 "Create an object used to perform Zstandard decompression.\n"
50 "\n"
72 "\n"
51 "An instance can perform multiple decompression operations."
73 "An instance can perform multiple decompression operations."
52 );
74 );
53
75
54 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
76 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
55 static char* kwlist[] = {
77 static char* kwlist[] = {
56 "dict_data",
78 "dict_data",
57 NULL
79 NULL
58 };
80 };
59
81
60 ZstdCompressionDict* dict = NULL;
82 ZstdCompressionDict* dict = NULL;
61
83
62 self->dctx = NULL;
84 self->dctx = NULL;
63 self->dict = NULL;
85 self->dict = NULL;
64 self->ddict = NULL;
86 self->ddict = NULL;
65
87
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
88 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
67 &ZstdCompressionDictType, &dict)) {
89 &ZstdCompressionDictType, &dict)) {
68 return -1;
90 return -1;
69 }
91 }
70
92
71 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
93 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
72 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
94 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
73 self->dctx = ZSTD_createDCtx();
95 self->dctx = ZSTD_createDCtx();
74 if (!self->dctx) {
96 if (!self->dctx) {
75 PyErr_NoMemory();
97 PyErr_NoMemory();
76 goto except;
98 goto except;
77 }
99 }
78
100
79 if (dict) {
101 if (dict) {
80 self->dict = dict;
102 self->dict = dict;
81 Py_INCREF(dict);
103 Py_INCREF(dict);
82 }
104 }
83
105
84 return 0;
106 return 0;
85
107
86 except:
108 except:
87 if (self->dctx) {
109 if (self->dctx) {
88 ZSTD_freeDCtx(self->dctx);
110 ZSTD_freeDCtx(self->dctx);
89 self->dctx = NULL;
111 self->dctx = NULL;
90 }
112 }
91
113
92 return -1;
114 return -1;
93 }
115 }
94
116
95 static void Decompressor_dealloc(ZstdDecompressor* self) {
117 static void Decompressor_dealloc(ZstdDecompressor* self) {
96 if (self->dctx) {
118 Py_CLEAR(self->dict);
97 ZSTD_freeDCtx(self->dctx);
98 }
99
100 Py_XDECREF(self->dict);
101
119
102 if (self->ddict) {
120 if (self->ddict) {
103 ZSTD_freeDDict(self->ddict);
121 ZSTD_freeDDict(self->ddict);
104 self->ddict = NULL;
122 self->ddict = NULL;
105 }
123 }
106
124
125 if (self->dstream) {
126 ZSTD_freeDStream(self->dstream);
127 self->dstream = NULL;
128 }
129
130 if (self->dctx) {
131 ZSTD_freeDCtx(self->dctx);
132 self->dctx = NULL;
133 }
134
107 PyObject_Del(self);
135 PyObject_Del(self);
108 }
136 }
109
137
110 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
111 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
112 "\n"
140 "\n"
113 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
114 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
115 "``write(data)`` method.\n"
143 "``write(data)`` method.\n"
116 "\n"
144 "\n"
117 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
118 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
119 "to the default input and output sizes of zstd decompressor streams.\n"
147 "to the default input and output sizes of zstd decompressor streams.\n"
120 );
148 );
121
149
122 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
123 static char* kwlist[] = {
151 static char* kwlist[] = {
124 "ifh",
152 "ifh",
125 "ofh",
153 "ofh",
126 "read_size",
154 "read_size",
127 "write_size",
155 "write_size",
128 NULL
156 NULL
129 };
157 };
130
158
131 PyObject* source;
159 PyObject* source;
132 PyObject* dest;
160 PyObject* dest;
133 size_t inSize = ZSTD_DStreamInSize();
161 size_t inSize = ZSTD_DStreamInSize();
134 size_t outSize = ZSTD_DStreamOutSize();
162 size_t outSize = ZSTD_DStreamOutSize();
135 ZSTD_DStream* dstream;
136 ZSTD_inBuffer input;
163 ZSTD_inBuffer input;
137 ZSTD_outBuffer output;
164 ZSTD_outBuffer output;
138 Py_ssize_t totalRead = 0;
165 Py_ssize_t totalRead = 0;
139 Py_ssize_t totalWrite = 0;
166 Py_ssize_t totalWrite = 0;
140 char* readBuffer;
167 char* readBuffer;
141 Py_ssize_t readSize;
168 Py_ssize_t readSize;
142 PyObject* readResult;
169 PyObject* readResult;
143 PyObject* res = NULL;
170 PyObject* res = NULL;
144 size_t zresult = 0;
171 size_t zresult = 0;
145 PyObject* writeResult;
172 PyObject* writeResult;
146 PyObject* totalReadPy;
173 PyObject* totalReadPy;
147 PyObject* totalWritePy;
174 PyObject* totalWritePy;
148
175
149 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
150 &source, &dest, &inSize, &outSize)) {
177 &source, &dest, &inSize, &outSize)) {
151 return NULL;
178 return NULL;
152 }
179 }
153
180
154 if (!PyObject_HasAttrString(source, "read")) {
181 if (!PyObject_HasAttrString(source, "read")) {
155 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
156 return NULL;
183 return NULL;
157 }
184 }
158
185
159 if (!PyObject_HasAttrString(dest, "write")) {
186 if (!PyObject_HasAttrString(dest, "write")) {
160 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
161 return NULL;
188 return NULL;
162 }
189 }
163
190
164 /* Prevent free on uninitialized memory in finally. */
191 /* Prevent free on uninitialized memory in finally. */
165 output.dst = NULL;
192 output.dst = NULL;
166
193
167 dstream = DStream_from_ZstdDecompressor(self);
194 if (0 != init_dstream(self)) {
168 if (!dstream) {
169 res = NULL;
195 res = NULL;
170 goto finally;
196 goto finally;
171 }
197 }
172
198
173 output.dst = PyMem_Malloc(outSize);
199 output.dst = PyMem_Malloc(outSize);
174 if (!output.dst) {
200 if (!output.dst) {
175 PyErr_NoMemory();
201 PyErr_NoMemory();
176 res = NULL;
202 res = NULL;
177 goto finally;
203 goto finally;
178 }
204 }
179 output.size = outSize;
205 output.size = outSize;
180 output.pos = 0;
206 output.pos = 0;
181
207
182 /* Read source stream until EOF */
208 /* Read source stream until EOF */
183 while (1) {
209 while (1) {
184 readResult = PyObject_CallMethod(source, "read", "n", inSize);
210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
185 if (!readResult) {
211 if (!readResult) {
186 PyErr_SetString(ZstdError, "could not read() from source");
212 PyErr_SetString(ZstdError, "could not read() from source");
187 goto finally;
213 goto finally;
188 }
214 }
189
215
190 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
191
217
192 /* If no data was read, we're at EOF. */
218 /* If no data was read, we're at EOF. */
193 if (0 == readSize) {
219 if (0 == readSize) {
194 break;
220 break;
195 }
221 }
196
222
197 totalRead += readSize;
223 totalRead += readSize;
198
224
199 /* Send data to decompressor */
225 /* Send data to decompressor */
200 input.src = readBuffer;
226 input.src = readBuffer;
201 input.size = readSize;
227 input.size = readSize;
202 input.pos = 0;
228 input.pos = 0;
203
229
204 while (input.pos < input.size) {
230 while (input.pos < input.size) {
205 Py_BEGIN_ALLOW_THREADS
231 Py_BEGIN_ALLOW_THREADS
206 zresult = ZSTD_decompressStream(dstream, &output, &input);
232 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
207 Py_END_ALLOW_THREADS
233 Py_END_ALLOW_THREADS
208
234
209 if (ZSTD_isError(zresult)) {
235 if (ZSTD_isError(zresult)) {
210 PyErr_Format(ZstdError, "zstd decompressor error: %s",
236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
211 ZSTD_getErrorName(zresult));
237 ZSTD_getErrorName(zresult));
212 res = NULL;
238 res = NULL;
213 goto finally;
239 goto finally;
214 }
240 }
215
241
216 if (output.pos) {
242 if (output.pos) {
217 #if PY_MAJOR_VERSION >= 3
243 #if PY_MAJOR_VERSION >= 3
218 writeResult = PyObject_CallMethod(dest, "write", "y#",
244 writeResult = PyObject_CallMethod(dest, "write", "y#",
219 #else
245 #else
220 writeResult = PyObject_CallMethod(dest, "write", "s#",
246 writeResult = PyObject_CallMethod(dest, "write", "s#",
221 #endif
247 #endif
222 output.dst, output.pos);
248 output.dst, output.pos);
223
249
224 Py_XDECREF(writeResult);
250 Py_XDECREF(writeResult);
225 totalWrite += output.pos;
251 totalWrite += output.pos;
226 output.pos = 0;
252 output.pos = 0;
227 }
253 }
228 }
254 }
229 }
255 }
230
256
231 /* Source stream is exhausted. Finish up. */
257 /* Source stream is exhausted. Finish up. */
232
258
233 ZSTD_freeDStream(dstream);
234 dstream = NULL;
235
236 totalReadPy = PyLong_FromSsize_t(totalRead);
259 totalReadPy = PyLong_FromSsize_t(totalRead);
237 totalWritePy = PyLong_FromSsize_t(totalWrite);
260 totalWritePy = PyLong_FromSsize_t(totalWrite);
238 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
261 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
239 Py_DecRef(totalReadPy);
262 Py_DECREF(totalReadPy);
240 Py_DecRef(totalWritePy);
263 Py_DECREF(totalWritePy);
241
264
242 finally:
265 finally:
243 if (output.dst) {
266 if (output.dst) {
244 PyMem_Free(output.dst);
267 PyMem_Free(output.dst);
245 }
268 }
246
269
247 if (dstream) {
248 ZSTD_freeDStream(dstream);
249 }
250
251 return res;
270 return res;
252 }
271 }
253
272
254 PyDoc_STRVAR(Decompressor_decompress__doc__,
273 PyDoc_STRVAR(Decompressor_decompress__doc__,
255 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
274 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
256 "\n"
275 "\n"
257 "This method will decompress the entirety of the argument and return the\n"
276 "This method will decompress the entirety of the argument and return the\n"
258 "result.\n"
277 "result.\n"
259 "\n"
278 "\n"
260 "The input bytes are expected to contain a full Zstandard frame (something\n"
279 "The input bytes are expected to contain a full Zstandard frame (something\n"
261 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
280 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
262 "not contain a full frame, an exception will be raised.\n"
281 "not contain a full frame, an exception will be raised.\n"
263 "\n"
282 "\n"
264 "If the frame header of the compressed data does not contain the content size\n"
283 "If the frame header of the compressed data does not contain the content size\n"
265 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
284 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
266 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
285 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
267 "be made to perform decompression into that buffer. If the buffer is too\n"
286 "be made to perform decompression into that buffer. If the buffer is too\n"
268 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
287 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
269 "be resized if it is too large.\n"
288 "be resized if it is too large.\n"
270 "\n"
289 "\n"
271 "Uncompressed data could be much larger than compressed data. As a result,\n"
290 "Uncompressed data could be much larger than compressed data. As a result,\n"
272 "calling this function could result in a very large memory allocation being\n"
291 "calling this function could result in a very large memory allocation being\n"
273 "performed to hold the uncompressed data. Therefore it is **highly**\n"
292 "performed to hold the uncompressed data. Therefore it is **highly**\n"
274 "recommended to use a streaming decompression method instead of this one.\n"
293 "recommended to use a streaming decompression method instead of this one.\n"
275 );
294 );
276
295
277 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
296 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
278 static char* kwlist[] = {
297 static char* kwlist[] = {
279 "data",
298 "data",
280 "max_output_size",
299 "max_output_size",
281 NULL
300 NULL
282 };
301 };
283
302
284 const char* source;
303 const char* source;
285 Py_ssize_t sourceSize;
304 Py_ssize_t sourceSize;
286 Py_ssize_t maxOutputSize = 0;
305 Py_ssize_t maxOutputSize = 0;
287 unsigned long long decompressedSize;
306 unsigned long long decompressedSize;
288 size_t destCapacity;
307 size_t destCapacity;
289 PyObject* result = NULL;
308 PyObject* result = NULL;
290 void* dictData = NULL;
309 void* dictData = NULL;
291 size_t dictSize = 0;
310 size_t dictSize = 0;
292 size_t zresult;
311 size_t zresult;
293
312
294 #if PY_MAJOR_VERSION >= 3
313 #if PY_MAJOR_VERSION >= 3
295 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
314 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
296 #else
315 #else
297 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
316 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
298 #endif
317 #endif
299 kwlist, &source, &sourceSize, &maxOutputSize)) {
318 kwlist, &source, &sourceSize, &maxOutputSize)) {
300 return NULL;
319 return NULL;
301 }
320 }
302
321
303 if (self->dict) {
322 if (self->dict) {
304 dictData = self->dict->dictData;
323 dictData = self->dict->dictData;
305 dictSize = self->dict->dictSize;
324 dictSize = self->dict->dictSize;
306 }
325 }
307
326
308 if (dictData && !self->ddict) {
327 if (dictData && !self->ddict) {
309 Py_BEGIN_ALLOW_THREADS
328 Py_BEGIN_ALLOW_THREADS
310 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
329 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
311 Py_END_ALLOW_THREADS
330 Py_END_ALLOW_THREADS
312
331
313 if (!self->ddict) {
332 if (!self->ddict) {
314 PyErr_SetString(ZstdError, "could not create decompression dict");
333 PyErr_SetString(ZstdError, "could not create decompression dict");
315 return NULL;
334 return NULL;
316 }
335 }
317 }
336 }
318
337
319 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
338 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
320 /* 0 returned if content size not in the zstd frame header */
339 /* 0 returned if content size not in the zstd frame header */
321 if (0 == decompressedSize) {
340 if (0 == decompressedSize) {
322 if (0 == maxOutputSize) {
341 if (0 == maxOutputSize) {
323 PyErr_SetString(ZstdError, "input data invalid or missing content size "
342 PyErr_SetString(ZstdError, "input data invalid or missing content size "
324 "in frame header");
343 "in frame header");
325 return NULL;
344 return NULL;
326 }
345 }
327 else {
346 else {
328 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
347 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
329 destCapacity = maxOutputSize;
348 destCapacity = maxOutputSize;
330 }
349 }
331 }
350 }
332 else {
351 else {
333 result = PyBytes_FromStringAndSize(NULL, decompressedSize);
352 result = PyBytes_FromStringAndSize(NULL, decompressedSize);
334 destCapacity = decompressedSize;
353 destCapacity = decompressedSize;
335 }
354 }
336
355
337 if (!result) {
356 if (!result) {
338 return NULL;
357 return NULL;
339 }
358 }
340
359
341 Py_BEGIN_ALLOW_THREADS
360 Py_BEGIN_ALLOW_THREADS
342 if (self->ddict) {
361 if (self->ddict) {
343 zresult = ZSTD_decompress_usingDDict(self->dctx,
362 zresult = ZSTD_decompress_usingDDict(self->dctx,
344 PyBytes_AsString(result), destCapacity,
363 PyBytes_AsString(result), destCapacity,
345 source, sourceSize, self->ddict);
364 source, sourceSize, self->ddict);
346 }
365 }
347 else {
366 else {
348 zresult = ZSTD_decompressDCtx(self->dctx,
367 zresult = ZSTD_decompressDCtx(self->dctx,
349 PyBytes_AsString(result), destCapacity, source, sourceSize);
368 PyBytes_AsString(result), destCapacity, source, sourceSize);
350 }
369 }
351 Py_END_ALLOW_THREADS
370 Py_END_ALLOW_THREADS
352
371
353 if (ZSTD_isError(zresult)) {
372 if (ZSTD_isError(zresult)) {
354 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
373 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
355 Py_DecRef(result);
374 Py_DECREF(result);
356 return NULL;
375 return NULL;
357 }
376 }
358 else if (decompressedSize && zresult != decompressedSize) {
377 else if (decompressedSize && zresult != decompressedSize) {
359 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
378 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
360 zresult, decompressedSize);
379 zresult, decompressedSize);
361 Py_DecRef(result);
380 Py_DECREF(result);
362 return NULL;
381 return NULL;
363 }
382 }
364 else if (zresult < destCapacity) {
383 else if (zresult < destCapacity) {
365 if (_PyBytes_Resize(&result, zresult)) {
384 if (_PyBytes_Resize(&result, zresult)) {
366 Py_DecRef(result);
385 Py_DECREF(result);
367 return NULL;
386 return NULL;
368 }
387 }
369 }
388 }
370
389
371 return result;
390 return result;
372 }
391 }
373
392
374 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
393 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
375 "decompressobj()\n"
394 "decompressobj()\n"
376 "\n"
395 "\n"
377 "Incrementally feed data into a decompressor.\n"
396 "Incrementally feed data into a decompressor.\n"
378 "\n"
397 "\n"
379 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
398 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
380 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
399 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
381 "callers can swap in the zstd decompressor while using the same API.\n"
400 "callers can swap in the zstd decompressor while using the same API.\n"
382 );
401 );
383
402
384 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
403 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
385 ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
404 ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
386 if (!result) {
405 if (!result) {
387 return NULL;
406 return NULL;
388 }
407 }
389
408
390 result->dstream = DStream_from_ZstdDecompressor(self);
409 if (0 != init_dstream(self)) {
391 if (!result->dstream) {
410 Py_DECREF(result);
392 Py_DecRef((PyObject*)result);
393 return NULL;
411 return NULL;
394 }
412 }
395
413
396 result->decompressor = self;
414 result->decompressor = self;
397 Py_INCREF(result->decompressor);
415 Py_INCREF(result->decompressor);
398
416
399 result->finished = 0;
400
401 return result;
417 return result;
402 }
418 }
403
419
404 PyDoc_STRVAR(Decompressor_read_from__doc__,
420 PyDoc_STRVAR(Decompressor_read_from__doc__,
405 "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
421 "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
406 "Read compressed data and return an iterator\n"
422 "Read compressed data and return an iterator\n"
407 "\n"
423 "\n"
408 "Returns an iterator of decompressed data chunks produced from reading from\n"
424 "Returns an iterator of decompressed data chunks produced from reading from\n"
409 "the ``reader``.\n"
425 "the ``reader``.\n"
410 "\n"
426 "\n"
411 "Compressed data will be obtained from ``reader`` by calling the\n"
427 "Compressed data will be obtained from ``reader`` by calling the\n"
412 "``read(size)`` method of it. The source data will be streamed into a\n"
428 "``read(size)`` method of it. The source data will be streamed into a\n"
413 "decompressor. As decompressed data is available, it will be exposed to the\n"
429 "decompressor. As decompressed data is available, it will be exposed to the\n"
414 "returned iterator.\n"
430 "returned iterator.\n"
415 "\n"
431 "\n"
416 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
432 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
417 "iterator in chunks of size ``write_size``. The default values are the input\n"
433 "iterator in chunks of size ``write_size``. The default values are the input\n"
418 "and output sizes for a zstd streaming decompressor.\n"
434 "and output sizes for a zstd streaming decompressor.\n"
419 "\n"
435 "\n"
420 "There is also support for skipping the first ``skip_bytes`` of data from\n"
436 "There is also support for skipping the first ``skip_bytes`` of data from\n"
421 "the source.\n"
437 "the source.\n"
422 );
438 );
423
439
424 static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
440 static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
425 static char* kwlist[] = {
441 static char* kwlist[] = {
426 "reader",
442 "reader",
427 "read_size",
443 "read_size",
428 "write_size",
444 "write_size",
429 "skip_bytes",
445 "skip_bytes",
430 NULL
446 NULL
431 };
447 };
432
448
433 PyObject* reader;
449 PyObject* reader;
434 size_t inSize = ZSTD_DStreamInSize();
450 size_t inSize = ZSTD_DStreamInSize();
435 size_t outSize = ZSTD_DStreamOutSize();
451 size_t outSize = ZSTD_DStreamOutSize();
436 ZstdDecompressorIterator* result;
452 ZstdDecompressorIterator* result;
437 size_t skipBytes = 0;
453 size_t skipBytes = 0;
438
454
439 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
455 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
440 &reader, &inSize, &outSize, &skipBytes)) {
456 &reader, &inSize, &outSize, &skipBytes)) {
441 return NULL;
457 return NULL;
442 }
458 }
443
459
444 if (skipBytes >= inSize) {
460 if (skipBytes >= inSize) {
445 PyErr_SetString(PyExc_ValueError,
461 PyErr_SetString(PyExc_ValueError,
446 "skip_bytes must be smaller than read_size");
462 "skip_bytes must be smaller than read_size");
447 return NULL;
463 return NULL;
448 }
464 }
449
465
450 result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
466 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
451 if (!result) {
467 if (!result) {
452 return NULL;
468 return NULL;
453 }
469 }
454
470
455 result->decompressor = NULL;
456 result->reader = NULL;
457 result->buffer = NULL;
458 result->dstream = NULL;
459 result->input.src = NULL;
460 result->output.dst = NULL;
461
462 if (PyObject_HasAttrString(reader, "read")) {
471 if (PyObject_HasAttrString(reader, "read")) {
463 result->reader = reader;
472 result->reader = reader;
464 Py_INCREF(result->reader);
473 Py_INCREF(result->reader);
465 }
474 }
466 else if (1 == PyObject_CheckBuffer(reader)) {
475 else if (1 == PyObject_CheckBuffer(reader)) {
467 /* Object claims it is a buffer. Try to get a handle to it. */
476 /* Object claims it is a buffer. Try to get a handle to it. */
468 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
477 result->buffer = PyMem_Malloc(sizeof(Py_buffer));
469 if (!result->buffer) {
478 if (!result->buffer) {
470 goto except;
479 goto except;
471 }
480 }
472
481
473 memset(result->buffer, 0, sizeof(Py_buffer));
482 memset(result->buffer, 0, sizeof(Py_buffer));
474
483
475 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
484 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
476 goto except;
485 goto except;
477 }
486 }
478
479 result->bufferOffset = 0;
480 }
487 }
481 else {
488 else {
482 PyErr_SetString(PyExc_ValueError,
489 PyErr_SetString(PyExc_ValueError,
483 "must pass an object with a read() method or conforms to buffer protocol");
490 "must pass an object with a read() method or conforms to buffer protocol");
484 goto except;
491 goto except;
485 }
492 }
486
493
487 result->decompressor = self;
494 result->decompressor = self;
488 Py_INCREF(result->decompressor);
495 Py_INCREF(result->decompressor);
489
496
490 result->inSize = inSize;
497 result->inSize = inSize;
491 result->outSize = outSize;
498 result->outSize = outSize;
492 result->skipBytes = skipBytes;
499 result->skipBytes = skipBytes;
493
500
494 result->dstream = DStream_from_ZstdDecompressor(self);
501 if (0 != init_dstream(self)) {
495 if (!result->dstream) {
496 goto except;
502 goto except;
497 }
503 }
498
504
499 result->input.src = PyMem_Malloc(inSize);
505 result->input.src = PyMem_Malloc(inSize);
500 if (!result->input.src) {
506 if (!result->input.src) {
501 PyErr_NoMemory();
507 PyErr_NoMemory();
502 goto except;
508 goto except;
503 }
509 }
504 result->input.size = 0;
505 result->input.pos = 0;
506
507 result->output.dst = NULL;
508 result->output.size = 0;
509 result->output.pos = 0;
510
511 result->readCount = 0;
512 result->finishedInput = 0;
513 result->finishedOutput = 0;
514
510
515 goto finally;
511 goto finally;
516
512
517 except:
513 except:
518 Py_CLEAR(result->reader);
514 Py_CLEAR(result->reader);
519
515
520 if (result->buffer) {
516 if (result->buffer) {
521 PyBuffer_Release(result->buffer);
517 PyBuffer_Release(result->buffer);
522 Py_CLEAR(result->buffer);
518 Py_CLEAR(result->buffer);
523 }
519 }
524
520
525 Py_CLEAR(result);
521 Py_CLEAR(result);
526
522
527 finally:
523 finally:
528
524
529 return result;
525 return result;
530 }
526 }
531
527
532 PyDoc_STRVAR(Decompressor_write_to__doc__,
528 PyDoc_STRVAR(Decompressor_write_to__doc__,
533 "Create a context manager to write decompressed data to an object.\n"
529 "Create a context manager to write decompressed data to an object.\n"
534 "\n"
530 "\n"
535 "The passed object must have a ``write()`` method.\n"
531 "The passed object must have a ``write()`` method.\n"
536 "\n"
532 "\n"
537 "The caller feeds intput data to the object by calling ``write(data)``.\n"
533 "The caller feeds intput data to the object by calling ``write(data)``.\n"
538 "Decompressed data is written to the argument given as it is decompressed.\n"
534 "Decompressed data is written to the argument given as it is decompressed.\n"
539 "\n"
535 "\n"
540 "An optional ``write_size`` argument defines the size of chunks to\n"
536 "An optional ``write_size`` argument defines the size of chunks to\n"
541 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
537 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
542 "streaming decompressor.\n"
538 "streaming decompressor.\n"
543 );
539 );
544
540
545 static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
541 static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
546 static char* kwlist[] = {
542 static char* kwlist[] = {
547 "writer",
543 "writer",
548 "write_size",
544 "write_size",
549 NULL
545 NULL
550 };
546 };
551
547
552 PyObject* writer;
548 PyObject* writer;
553 size_t outSize = ZSTD_DStreamOutSize();
549 size_t outSize = ZSTD_DStreamOutSize();
554 ZstdDecompressionWriter* result;
550 ZstdDecompressionWriter* result;
555
551
556 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
552 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
557 &writer, &outSize)) {
553 &writer, &outSize)) {
558 return NULL;
554 return NULL;
559 }
555 }
560
556
561 if (!PyObject_HasAttrString(writer, "write")) {
557 if (!PyObject_HasAttrString(writer, "write")) {
562 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
558 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
563 return NULL;
559 return NULL;
564 }
560 }
565
561
566 result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
562 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
567 if (!result) {
563 if (!result) {
568 return NULL;
564 return NULL;
569 }
565 }
570
566
571 result->decompressor = self;
567 result->decompressor = self;
572 Py_INCREF(result->decompressor);
568 Py_INCREF(result->decompressor);
573
569
574 result->writer = writer;
570 result->writer = writer;
575 Py_INCREF(result->writer);
571 Py_INCREF(result->writer);
576
572
577 result->outSize = outSize;
573 result->outSize = outSize;
578
574
579 result->entered = 0;
580 result->dstream = NULL;
581
582 return result;
575 return result;
583 }
576 }
584
577
585 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
578 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
586 "Decompress a series of chunks using the content dictionary chaining technique\n"
579 "Decompress a series of chunks using the content dictionary chaining technique\n"
587 );
580 );
588
581
589 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
582 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
590 static char* kwlist[] = {
583 static char* kwlist[] = {
591 "frames",
584 "frames",
592 NULL
585 NULL
593 };
586 };
594
587
595 PyObject* chunks;
588 PyObject* chunks;
596 Py_ssize_t chunksLen;
589 Py_ssize_t chunksLen;
597 Py_ssize_t chunkIndex;
590 Py_ssize_t chunkIndex;
598 char parity = 0;
591 char parity = 0;
599 PyObject* chunk;
592 PyObject* chunk;
600 char* chunkData;
593 char* chunkData;
601 Py_ssize_t chunkSize;
594 Py_ssize_t chunkSize;
602 ZSTD_DCtx* dctx = NULL;
595 ZSTD_DCtx* dctx = NULL;
603 size_t zresult;
596 size_t zresult;
604 ZSTD_frameParams frameParams;
597 ZSTD_frameParams frameParams;
605 void* buffer1 = NULL;
598 void* buffer1 = NULL;
606 size_t buffer1Size = 0;
599 size_t buffer1Size = 0;
607 size_t buffer1ContentSize = 0;
600 size_t buffer1ContentSize = 0;
608 void* buffer2 = NULL;
601 void* buffer2 = NULL;
609 size_t buffer2Size = 0;
602 size_t buffer2Size = 0;
610 size_t buffer2ContentSize = 0;
603 size_t buffer2ContentSize = 0;
611 void* destBuffer = NULL;
604 void* destBuffer = NULL;
612 PyObject* result = NULL;
605 PyObject* result = NULL;
613
606
614 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
615 kwlist, &PyList_Type, &chunks)) {
608 kwlist, &PyList_Type, &chunks)) {
616 return NULL;
609 return NULL;
617 }
610 }
618
611
619 chunksLen = PyList_Size(chunks);
612 chunksLen = PyList_Size(chunks);
620 if (!chunksLen) {
613 if (!chunksLen) {
621 PyErr_SetString(PyExc_ValueError, "empty input chain");
614 PyErr_SetString(PyExc_ValueError, "empty input chain");
622 return NULL;
615 return NULL;
623 }
616 }
624
617
625 /* The first chunk should not be using a dictionary. We handle it specially. */
618 /* The first chunk should not be using a dictionary. We handle it specially. */
626 chunk = PyList_GetItem(chunks, 0);
619 chunk = PyList_GetItem(chunks, 0);
627 if (!PyBytes_Check(chunk)) {
620 if (!PyBytes_Check(chunk)) {
628 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
621 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
629 return NULL;
622 return NULL;
630 }
623 }
631
624
632 /* We require that all chunks be zstd frames and that they have content size set. */
625 /* We require that all chunks be zstd frames and that they have content size set. */
633 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
626 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
634 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
627 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
635 if (ZSTD_isError(zresult)) {
628 if (ZSTD_isError(zresult)) {
636 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
629 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
637 return NULL;
630 return NULL;
638 }
631 }
639 else if (zresult) {
632 else if (zresult) {
640 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
633 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
641 return NULL;
634 return NULL;
642 }
635 }
643
636
644 if (0 == frameParams.frameContentSize) {
637 if (0 == frameParams.frameContentSize) {
645 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
638 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
646 return NULL;
639 return NULL;
647 }
640 }
648
641
649 dctx = ZSTD_createDCtx();
642 dctx = ZSTD_createDCtx();
650 if (!dctx) {
643 if (!dctx) {
651 PyErr_NoMemory();
644 PyErr_NoMemory();
652 goto finally;
645 goto finally;
653 }
646 }
654
647
655 buffer1Size = frameParams.frameContentSize;
648 buffer1Size = frameParams.frameContentSize;
656 buffer1 = PyMem_Malloc(buffer1Size);
649 buffer1 = PyMem_Malloc(buffer1Size);
657 if (!buffer1) {
650 if (!buffer1) {
658 goto finally;
651 goto finally;
659 }
652 }
660
653
661 Py_BEGIN_ALLOW_THREADS
654 Py_BEGIN_ALLOW_THREADS
662 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
655 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
663 Py_END_ALLOW_THREADS
656 Py_END_ALLOW_THREADS
664 if (ZSTD_isError(zresult)) {
657 if (ZSTD_isError(zresult)) {
665 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
658 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
666 goto finally;
659 goto finally;
667 }
660 }
668
661
669 buffer1ContentSize = zresult;
662 buffer1ContentSize = zresult;
670
663
671 /* Special case of a simple chain. */
664 /* Special case of a simple chain. */
672 if (1 == chunksLen) {
665 if (1 == chunksLen) {
673 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
666 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
674 goto finally;
667 goto finally;
675 }
668 }
676
669
677 /* This should ideally look at next chunk. But this is slightly simpler. */
670 /* This should ideally look at next chunk. But this is slightly simpler. */
678 buffer2Size = frameParams.frameContentSize;
671 buffer2Size = frameParams.frameContentSize;
679 buffer2 = PyMem_Malloc(buffer2Size);
672 buffer2 = PyMem_Malloc(buffer2Size);
680 if (!buffer2) {
673 if (!buffer2) {
681 goto finally;
674 goto finally;
682 }
675 }
683
676
684 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
677 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
685 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
678 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
686 used as a content dictionary) and the other holds the new fulltext. The
679 used as a content dictionary) and the other holds the new fulltext. The
687 buffers grow when needed but never decrease in size. This limits the
680 buffers grow when needed but never decrease in size. This limits the
688 memory allocator overhead.
681 memory allocator overhead.
689 */
682 */
690 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
683 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
691 chunk = PyList_GetItem(chunks, chunkIndex);
684 chunk = PyList_GetItem(chunks, chunkIndex);
692 if (!PyBytes_Check(chunk)) {
685 if (!PyBytes_Check(chunk)) {
693 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
686 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
694 goto finally;
687 goto finally;
695 }
688 }
696
689
697 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
690 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
698 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
691 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
699 if (ZSTD_isError(zresult)) {
692 if (ZSTD_isError(zresult)) {
700 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
693 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
701 goto finally;
694 goto finally;
702 }
695 }
703 else if (zresult) {
696 else if (zresult) {
704 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
697 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
705 goto finally;
698 goto finally;
706 }
699 }
707
700
708 if (0 == frameParams.frameContentSize) {
701 if (0 == frameParams.frameContentSize) {
709 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
702 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
710 goto finally;
703 goto finally;
711 }
704 }
712
705
713 parity = chunkIndex % 2;
706 parity = chunkIndex % 2;
714
707
715 /* This could definitely be abstracted to reduce code duplication. */
708 /* This could definitely be abstracted to reduce code duplication. */
716 if (parity) {
709 if (parity) {
717 /* Resize destination buffer to hold larger content. */
710 /* Resize destination buffer to hold larger content. */
718 if (buffer2Size < frameParams.frameContentSize) {
711 if (buffer2Size < frameParams.frameContentSize) {
719 buffer2Size = frameParams.frameContentSize;
712 buffer2Size = frameParams.frameContentSize;
720 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
713 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
721 if (!destBuffer) {
714 if (!destBuffer) {
722 goto finally;
715 goto finally;
723 }
716 }
724 buffer2 = destBuffer;
717 buffer2 = destBuffer;
725 }
718 }
726
719
727 Py_BEGIN_ALLOW_THREADS
720 Py_BEGIN_ALLOW_THREADS
728 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
721 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
729 chunkData, chunkSize, buffer1, buffer1ContentSize);
722 chunkData, chunkSize, buffer1, buffer1ContentSize);
730 Py_END_ALLOW_THREADS
723 Py_END_ALLOW_THREADS
731 if (ZSTD_isError(zresult)) {
724 if (ZSTD_isError(zresult)) {
732 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
725 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
733 chunkIndex, ZSTD_getErrorName(zresult));
726 chunkIndex, ZSTD_getErrorName(zresult));
734 goto finally;
727 goto finally;
735 }
728 }
736 buffer2ContentSize = zresult;
729 buffer2ContentSize = zresult;
737 }
730 }
738 else {
731 else {
739 if (buffer1Size < frameParams.frameContentSize) {
732 if (buffer1Size < frameParams.frameContentSize) {
740 buffer1Size = frameParams.frameContentSize;
733 buffer1Size = frameParams.frameContentSize;
741 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
734 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
742 if (!destBuffer) {
735 if (!destBuffer) {
743 goto finally;
736 goto finally;
744 }
737 }
745 buffer1 = destBuffer;
738 buffer1 = destBuffer;
746 }
739 }
747
740
748 Py_BEGIN_ALLOW_THREADS
741 Py_BEGIN_ALLOW_THREADS
749 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
742 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
750 chunkData, chunkSize, buffer2, buffer2ContentSize);
743 chunkData, chunkSize, buffer2, buffer2ContentSize);
751 Py_END_ALLOW_THREADS
744 Py_END_ALLOW_THREADS
752 if (ZSTD_isError(zresult)) {
745 if (ZSTD_isError(zresult)) {
753 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
746 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
754 chunkIndex, ZSTD_getErrorName(zresult));
747 chunkIndex, ZSTD_getErrorName(zresult));
755 goto finally;
748 goto finally;
756 }
749 }
757 buffer1ContentSize = zresult;
750 buffer1ContentSize = zresult;
758 }
751 }
759 }
752 }
760
753
761 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
754 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
762 parity ? buffer2ContentSize : buffer1ContentSize);
755 parity ? buffer2ContentSize : buffer1ContentSize);
763
756
764 finally:
757 finally:
765 if (buffer2) {
758 if (buffer2) {
766 PyMem_Free(buffer2);
759 PyMem_Free(buffer2);
767 }
760 }
768 if (buffer1) {
761 if (buffer1) {
769 PyMem_Free(buffer1);
762 PyMem_Free(buffer1);
770 }
763 }
771
764
772 if (dctx) {
765 if (dctx) {
773 ZSTD_freeDCtx(dctx);
766 ZSTD_freeDCtx(dctx);
774 }
767 }
775
768
776 return result;
769 return result;
777 }
770 }
778
771
772 typedef struct {
773 void* sourceData;
774 size_t sourceSize;
775 unsigned long long destSize;
776 } FramePointer;
777
778 typedef struct {
779 FramePointer* frames;
780 Py_ssize_t framesSize;
781 unsigned long long compressedSize;
782 } FrameSources;
783
784 typedef struct {
785 void* dest;
786 Py_ssize_t destSize;
787 BufferSegment* segments;
788 Py_ssize_t segmentsSize;
789 } DestBuffer;
790
791 typedef enum {
792 WorkerError_none = 0,
793 WorkerError_zstd = 1,
794 WorkerError_memory = 2,
795 WorkerError_sizeMismatch = 3,
796 WorkerError_unknownSize = 4,
797 } WorkerError;
798
799 typedef struct {
800 /* Source records and length */
801 FramePointer* framePointers;
802 /* Which records to process. */
803 Py_ssize_t startOffset;
804 Py_ssize_t endOffset;
805 unsigned long long totalSourceSize;
806
807 /* Compression state and settings. */
808 ZSTD_DCtx* dctx;
809 ZSTD_DDict* ddict;
810 int requireOutputSizes;
811
812 /* Output storage. */
813 DestBuffer* destBuffers;
814 Py_ssize_t destCount;
815
816 /* Item that error occurred on. */
817 Py_ssize_t errorOffset;
818 /* If an error occurred. */
819 WorkerError error;
820 /* result from zstd decompression operation */
821 size_t zresult;
822 } WorkerState;
823
824 static void decompress_worker(WorkerState* state) {
825 size_t allocationSize;
826 DestBuffer* destBuffer;
827 Py_ssize_t frameIndex;
828 Py_ssize_t localOffset = 0;
829 Py_ssize_t currentBufferStartIndex = state->startOffset;
830 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
831 void* tmpBuf;
832 Py_ssize_t destOffset = 0;
833 FramePointer* framePointers = state->framePointers;
834 size_t zresult;
835 unsigned long long totalOutputSize = 0;
836
837 assert(NULL == state->destBuffers);
838 assert(0 == state->destCount);
839 assert(state->endOffset - state->startOffset >= 0);
840
841 /*
842 * We need to allocate a buffer to hold decompressed data. How we do this
843 * depends on what we know about the output. The following scenarios are
844 * possible:
845 *
846 * 1. All structs defining frames declare the output size.
847 * 2. The decompressed size is embedded within the zstd frame.
848 * 3. The decompressed size is not stored anywhere.
849 *
850 * For now, we only support #1 and #2.
851 */
852
853 /* Resolve ouput segments. */
854 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
855 FramePointer* fp = &framePointers[frameIndex];
856
857 if (0 == fp->destSize) {
858 fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
859 if (0 == fp->destSize && state->requireOutputSizes) {
860 state->error = WorkerError_unknownSize;
861 state->errorOffset = frameIndex;
862 return;
863 }
864 }
865
866 totalOutputSize += fp->destSize;
867 }
868
869 state->destBuffers = calloc(1, sizeof(DestBuffer));
870 if (NULL == state->destBuffers) {
871 state->error = WorkerError_memory;
872 return;
873 }
874
875 state->destCount = 1;
876
877 destBuffer = &state->destBuffers[state->destCount - 1];
878
879 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
880
881 allocationSize = roundpow2(state->totalSourceSize);
882
883 if (framePointers[state->startOffset].destSize > allocationSize) {
884 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
885 }
886
887 destBuffer->dest = malloc(allocationSize);
888 if (NULL == destBuffer->dest) {
889 state->error = WorkerError_memory;
890 return;
891 }
892
893 destBuffer->destSize = allocationSize;
894
895 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
896 if (NULL == destBuffer->segments) {
897 /* Caller will free state->dest as part of cleanup. */
898 state->error = WorkerError_memory;
899 return;
900 }
901
902 destBuffer->segmentsSize = remainingItems;
903
904 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
905 const void* source = framePointers[frameIndex].sourceData;
906 const size_t sourceSize = framePointers[frameIndex].sourceSize;
907 void* dest;
908 const size_t decompressedSize = framePointers[frameIndex].destSize;
909 size_t destAvailable = destBuffer->destSize - destOffset;
910
911 assert(decompressedSize > 0); /* For now. */
912
913 /*
914 * Not enough space in current buffer. Finish current before and allocate and
915 * switch to a new one.
916 */
917 if (decompressedSize > destAvailable) {
918 /*
919 * Shrinking the destination buffer is optional. But it should be cheap,
920 * so we just do it.
921 */
922 if (destAvailable) {
923 tmpBuf = realloc(destBuffer->dest, destOffset);
924 if (NULL == tmpBuf) {
925 state->error = WorkerError_memory;
926 return;
927 }
928
929 destBuffer->dest = tmpBuf;
930 destBuffer->destSize = destOffset;
931 }
932
933 /* Truncate segments buffer. */
934 tmpBuf = realloc(destBuffer->segments,
935 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
936 if (NULL == tmpBuf) {
937 state->error = WorkerError_memory;
938 return;
939 }
940
941 destBuffer->segments = tmpBuf;
942 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
943
944 /* Grow space for new DestBuffer. */
945 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
946 if (NULL == tmpBuf) {
947 state->error = WorkerError_memory;
948 return;
949 }
950
951 state->destBuffers = tmpBuf;
952 state->destCount++;
953
954 destBuffer = &state->destBuffers[state->destCount - 1];
955
956 /* Don't take any chances will non-NULL pointers. */
957 memset(destBuffer, 0, sizeof(DestBuffer));
958
959 allocationSize = roundpow2(state->totalSourceSize);
960
961 if (decompressedSize > allocationSize) {
962 allocationSize = roundpow2(decompressedSize);
963 }
964
965 destBuffer->dest = malloc(allocationSize);
966 if (NULL == destBuffer->dest) {
967 state->error = WorkerError_memory;
968 return;
969 }
970
971 destBuffer->destSize = allocationSize;
972 destAvailable = allocationSize;
973 destOffset = 0;
974 localOffset = 0;
975
976 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
977 if (NULL == destBuffer->segments) {
978 state->error = WorkerError_memory;
979 return;
980 }
981
982 destBuffer->segmentsSize = remainingItems;
983 currentBufferStartIndex = frameIndex;
984 }
985
986 dest = (char*)destBuffer->dest + destOffset;
987
988 if (state->ddict) {
989 zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
990 source, sourceSize, state->ddict);
991 }
992 else {
993 zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
994 source, sourceSize);
995 }
996
997 if (ZSTD_isError(zresult)) {
998 state->error = WorkerError_zstd;
999 state->zresult = zresult;
1000 state->errorOffset = frameIndex;
1001 return;
1002 }
1003 else if (zresult != decompressedSize) {
1004 state->error = WorkerError_sizeMismatch;
1005 state->zresult = zresult;
1006 state->errorOffset = frameIndex;
1007 return;
1008 }
1009
1010 destBuffer->segments[localOffset].offset = destOffset;
1011 destBuffer->segments[localOffset].length = decompressedSize;
1012 destOffset += zresult;
1013 localOffset++;
1014 remainingItems--;
1015 }
1016
1017 if (destBuffer->destSize > destOffset) {
1018 tmpBuf = realloc(destBuffer->dest, destOffset);
1019 if (NULL == tmpBuf) {
1020 state->error = WorkerError_memory;
1021 return;
1022 }
1023
1024 destBuffer->dest = tmpBuf;
1025 destBuffer->destSize = destOffset;
1026 }
1027 }
1028
1029 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1030 unsigned int threadCount) {
1031 void* dictData = NULL;
1032 size_t dictSize = 0;
1033 Py_ssize_t i = 0;
1034 int errored = 0;
1035 Py_ssize_t segmentsCount;
1036 ZstdBufferWithSegments* bws = NULL;
1037 PyObject* resultArg = NULL;
1038 Py_ssize_t resultIndex;
1039 ZstdBufferWithSegmentsCollection* result = NULL;
1040 FramePointer* framePointers = frames->frames;
1041 unsigned long long workerBytes = 0;
1042 int currentThread = 0;
1043 Py_ssize_t workerStartOffset = 0;
1044 POOL_ctx* pool = NULL;
1045 WorkerState* workerStates = NULL;
1046 unsigned long long bytesPerWorker;
1047
1048 /* Caller should normalize 0 and negative values to 1 or larger. */
1049 assert(threadCount >= 1);
1050
1051 /* More threads than inputs makes no sense under any conditions. */
1052 threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
1053 : threadCount;
1054
1055 /* TODO lower thread count if input size is too small and threads would just
1056 add overhead. */
1057
1058 if (decompressor->dict) {
1059 dictData = decompressor->dict->dictData;
1060 dictSize = decompressor->dict->dictSize;
1061 }
1062
1063 if (dictData && !decompressor->ddict) {
1064 Py_BEGIN_ALLOW_THREADS
1065 decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
1066 Py_END_ALLOW_THREADS
1067
1068 if (!decompressor->ddict) {
1069 PyErr_SetString(ZstdError, "could not create decompression dict");
1070 return NULL;
1071 }
1072 }
1073
1074 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1075 same API for dispatching work. */
1076 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1077 if (NULL == workerStates) {
1078 PyErr_NoMemory();
1079 goto finally;
1080 }
1081
1082 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1083
1084 if (threadCount > 1) {
1085 pool = POOL_create(threadCount, 1);
1086 if (NULL == pool) {
1087 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1088 goto finally;
1089 }
1090 }
1091
1092 bytesPerWorker = frames->compressedSize / threadCount;
1093
1094 for (i = 0; i < threadCount; i++) {
1095 workerStates[i].dctx = ZSTD_createDCtx();
1096 if (NULL == workerStates[i].dctx) {
1097 PyErr_NoMemory();
1098 goto finally;
1099 }
1100
1101 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1102
1103 workerStates[i].ddict = decompressor->ddict;
1104 workerStates[i].framePointers = framePointers;
1105 workerStates[i].requireOutputSizes = 1;
1106 }
1107
1108 Py_BEGIN_ALLOW_THREADS
1109 /* There are many ways to split work among workers.
1110
1111 For now, we take a simple approach of splitting work so each worker
1112 gets roughly the same number of input bytes. This will result in more
1113 starvation than running N>threadCount jobs. But it avoids complications
1114 around state tracking, which could involve extra locking.
1115 */
1116 for (i = 0; i < frames->framesSize; i++) {
1117 workerBytes += frames->frames[i].sourceSize;
1118
1119 /*
1120 * The last worker/thread needs to handle all remaining work. Don't
1121 * trigger it prematurely. Defer to the block outside of the loop.
1122 * (But still process this loop so workerBytes is correct.
1123 */
1124 if (currentThread == threadCount - 1) {
1125 continue;
1126 }
1127
1128 if (workerBytes >= bytesPerWorker) {
1129 workerStates[currentThread].startOffset = workerStartOffset;
1130 workerStates[currentThread].endOffset = i;
1131 workerStates[currentThread].totalSourceSize = workerBytes;
1132
1133 if (threadCount > 1) {
1134 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1135 }
1136 else {
1137 decompress_worker(&workerStates[currentThread]);
1138 }
1139 currentThread++;
1140 workerStartOffset = i + 1;
1141 workerBytes = 0;
1142 }
1143 }
1144
1145 if (workerBytes) {
1146 workerStates[currentThread].startOffset = workerStartOffset;
1147 workerStates[currentThread].endOffset = frames->framesSize - 1;
1148 workerStates[currentThread].totalSourceSize = workerBytes;
1149
1150 if (threadCount > 1) {
1151 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1152 }
1153 else {
1154 decompress_worker(&workerStates[currentThread]);
1155 }
1156 }
1157
1158 if (threadCount > 1) {
1159 POOL_free(pool);
1160 pool = NULL;
1161 }
1162 Py_END_ALLOW_THREADS
1163
1164 for (i = 0; i < threadCount; i++) {
1165 switch (workerStates[i].error) {
1166 case WorkerError_none:
1167 break;
1168
1169 case WorkerError_zstd:
1170 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1171 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1172 errored = 1;
1173 break;
1174
1175 case WorkerError_memory:
1176 PyErr_NoMemory();
1177 errored = 1;
1178 break;
1179
1180 case WorkerError_sizeMismatch:
1181 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
1182 workerStates[i].errorOffset, workerStates[i].zresult,
1183 framePointers[workerStates[i].errorOffset].destSize);
1184 errored = 1;
1185 break;
1186
1187 case WorkerError_unknownSize:
1188 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1189 workerStates[i].errorOffset);
1190 errored = 1;
1191 break;
1192
1193 default:
1194 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1195 workerStates[i].error);
1196 errored = 1;
1197 break;
1198 }
1199
1200 if (errored) {
1201 break;
1202 }
1203 }
1204
1205 if (errored) {
1206 goto finally;
1207 }
1208
1209 segmentsCount = 0;
1210 for (i = 0; i < threadCount; i++) {
1211 segmentsCount += workerStates[i].destCount;
1212 }
1213
1214 resultArg = PyTuple_New(segmentsCount);
1215 if (NULL == resultArg) {
1216 goto finally;
1217 }
1218
1219 resultIndex = 0;
1220
1221 for (i = 0; i < threadCount; i++) {
1222 Py_ssize_t bufferIndex;
1223 WorkerState* state = &workerStates[i];
1224
1225 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1226 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1227
1228 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1229 destBuffer->segments, destBuffer->segmentsSize);
1230 if (NULL == bws) {
1231 goto finally;
1232 }
1233
1234 /*
1235 * Memory for buffer and segments was allocated using malloc() in worker
1236 * and the memory is transferred to the BufferWithSegments instance. So
1237 * tell instance to use free() and NULL the reference in the state struct
1238 * so it isn't freed below.
1239 */
1240 bws->useFree = 1;
1241 destBuffer->dest = NULL;
1242 destBuffer->segments = NULL;
1243
1244 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1245 }
1246 }
1247
1248 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1249 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1250
1251 finally:
1252 Py_CLEAR(resultArg);
1253
1254 if (workerStates) {
1255 for (i = 0; i < threadCount; i++) {
1256 Py_ssize_t bufferIndex;
1257 WorkerState* state = &workerStates[i];
1258
1259 if (state->dctx) {
1260 ZSTD_freeDCtx(state->dctx);
1261 }
1262
1263 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1264 if (state->destBuffers) {
1265 /*
1266 * Will be NULL if memory transfered to a BufferWithSegments.
1267 * Otherwise it is left over after an error occurred.
1268 */
1269 free(state->destBuffers[bufferIndex].dest);
1270 free(state->destBuffers[bufferIndex].segments);
1271 }
1272 }
1273
1274 free(state->destBuffers);
1275 }
1276
1277 PyMem_Free(workerStates);
1278 }
1279
1280 POOL_free(pool);
1281
1282 return result;
1283 }
1284
1285 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1286 "Decompress multiple frames to output buffers\n"
1287 "\n"
1288 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1289 "list of bytes-like objects. Each item in the passed collection should be a\n"
1290 "compressed zstd frame.\n"
1291 "\n"
1292 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1293 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1294 "it is an object conforming to the buffer protocol that represents an array\n"
1295 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1296 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1297 "output size.\n"
1298 "\n"
1299 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1300 "data. All decompressed data is allocated in a single memory buffer. The\n"
1301 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1302 "and their respective lengths.\n"
1303 "\n"
1304 "The ``threads`` argument controls how many threads to use for operations.\n"
1305 "Negative values will use the same number of threads as logical CPUs on the\n"
1306 "machine.\n"
1307 );
1308
1309 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1310 static char* kwlist[] = {
1311 "frames",
1312 "decompressed_sizes",
1313 "threads",
1314 NULL
1315 };
1316
1317 PyObject* frames;
1318 Py_buffer frameSizes;
1319 int threads = 0;
1320 Py_ssize_t frameCount;
1321 Py_buffer* frameBuffers = NULL;
1322 FramePointer* framePointers = NULL;
1323 unsigned long long* frameSizesP = NULL;
1324 unsigned long long totalInputSize = 0;
1325 FrameSources frameSources;
1326 ZstdBufferWithSegmentsCollection* result = NULL;
1327 Py_ssize_t i;
1328
1329 memset(&frameSizes, 0, sizeof(frameSizes));
1330
1331 #if PY_MAJOR_VERSION >= 3
1332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1333 #else
1334 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1335 #endif
1336 kwlist, &frames, &frameSizes, &threads)) {
1337 return NULL;
1338 }
1339
1340 if (frameSizes.buf) {
1341 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1342 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1343 goto finally;
1344 }
1345
1346 frameSizesP = (unsigned long long*)frameSizes.buf;
1347 }
1348
1349 if (threads < 0) {
1350 threads = cpu_count();
1351 }
1352
1353 if (threads < 2) {
1354 threads = 1;
1355 }
1356
1357 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1358 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1359 frameCount = buffer->segmentCount;
1360
1361 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1362 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1363 frameCount * sizeof(unsigned long long), frameSizes.len);
1364 goto finally;
1365 }
1366
1367 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1368 if (!framePointers) {
1369 PyErr_NoMemory();
1370 goto finally;
1371 }
1372
1373 for (i = 0; i < frameCount; i++) {
1374 void* sourceData;
1375 unsigned long long sourceSize;
1376 unsigned long long decompressedSize = 0;
1377
1378 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1379 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1380 goto finally;
1381 }
1382
1383 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1384 sourceSize = buffer->segments[i].length;
1385 totalInputSize += sourceSize;
1386
1387 if (frameSizesP) {
1388 decompressedSize = frameSizesP[i];
1389 }
1390
1391 framePointers[i].sourceData = sourceData;
1392 framePointers[i].sourceSize = sourceSize;
1393 framePointers[i].destSize = decompressedSize;
1394 }
1395 }
1396 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1397 Py_ssize_t offset = 0;
1398 ZstdBufferWithSegments* buffer;
1399 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1400
1401 frameCount = BufferWithSegmentsCollection_length(collection);
1402
1403 if (frameSizes.buf && frameSizes.len != frameCount) {
1404 PyErr_Format(PyExc_ValueError,
1405 "decompressed_sizes size mismatch; expected %zd; got %zd",
1406 frameCount * sizeof(unsigned long long), frameSizes.len);
1407 goto finally;
1408 }
1409
1410 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1411 if (NULL == framePointers) {
1412 PyErr_NoMemory();
1413 goto finally;
1414 }
1415
1416 /* Iterate the data structure directly because it is faster. */
1417 for (i = 0; i < collection->bufferCount; i++) {
1418 Py_ssize_t segmentIndex;
1419 buffer = collection->buffers[i];
1420
1421 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1422 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1423 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1424 offset);
1425 goto finally;
1426 }
1427
1428 totalInputSize += buffer->segments[segmentIndex].length;
1429
1430 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1431 framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
1432 framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
1433
1434 offset++;
1435 }
1436 }
1437 }
1438 else if (PyList_Check(frames)) {
1439 frameCount = PyList_GET_SIZE(frames);
1440
1441 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1442 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1443 frameCount * sizeof(unsigned long long), frameSizes.len);
1444 goto finally;
1445 }
1446
1447 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1448 if (!framePointers) {
1449 PyErr_NoMemory();
1450 goto finally;
1451 }
1452
1453 /*
1454 * It is not clear whether Py_buffer.buf is still valid after
1455 * PyBuffer_Release. So, we hold a reference to all Py_buffer instances
1456 * for the duration of the operation.
1457 */
1458 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1459 if (NULL == frameBuffers) {
1460 PyErr_NoMemory();
1461 goto finally;
1462 }
1463
1464 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1465
1466 /* Do a pass to assemble info about our input buffers and output sizes. */
1467 for (i = 0; i < frameCount; i++) {
1468 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1469 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1470 PyErr_Clear();
1471 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1472 goto finally;
1473 }
1474
1475 totalInputSize += frameBuffers[i].len;
1476
1477 framePointers[i].sourceData = frameBuffers[i].buf;
1478 framePointers[i].sourceSize = frameBuffers[i].len;
1479 framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
1480 }
1481 }
1482 else {
1483 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1484 goto finally;
1485 }
1486
1487 /* We now have an array with info about our inputs and outputs. Feed it into
1488 our generic decompression function. */
1489 frameSources.frames = framePointers;
1490 frameSources.framesSize = frameCount;
1491 frameSources.compressedSize = totalInputSize;
1492
1493 result = decompress_from_framesources(self, &frameSources, threads);
1494
1495 finally:
1496 if (frameSizes.buf) {
1497 PyBuffer_Release(&frameSizes);
1498 }
1499 PyMem_Free(framePointers);
1500
1501 if (frameBuffers) {
1502 for (i = 0; i < frameCount; i++) {
1503 PyBuffer_Release(&frameBuffers[i]);
1504 }
1505
1506 PyMem_Free(frameBuffers);
1507 }
1508
1509 return result;
1510 }
1511
779 static PyMethodDef Decompressor_methods[] = {
1512 static PyMethodDef Decompressor_methods[] = {
780 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1513 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
781 Decompressor_copy_stream__doc__ },
1514 Decompressor_copy_stream__doc__ },
782 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1515 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
783 Decompressor_decompress__doc__ },
1516 Decompressor_decompress__doc__ },
784 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
1517 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
785 Decompressor_decompressobj__doc__ },
1518 Decompressor_decompressobj__doc__ },
786 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
1519 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
787 Decompressor_read_from__doc__ },
1520 Decompressor_read_from__doc__ },
788 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
1521 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
789 Decompressor_write_to__doc__ },
1522 Decompressor_write_to__doc__ },
790 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1523 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
791 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1524 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1525 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1526 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
792 { NULL, NULL }
1527 { NULL, NULL }
793 };
1528 };
794
1529
795 PyTypeObject ZstdDecompressorType = {
1530 PyTypeObject ZstdDecompressorType = {
796 PyVarObject_HEAD_INIT(NULL, 0)
1531 PyVarObject_HEAD_INIT(NULL, 0)
797 "zstd.ZstdDecompressor", /* tp_name */
1532 "zstd.ZstdDecompressor", /* tp_name */
798 sizeof(ZstdDecompressor), /* tp_basicsize */
1533 sizeof(ZstdDecompressor), /* tp_basicsize */
799 0, /* tp_itemsize */
1534 0, /* tp_itemsize */
800 (destructor)Decompressor_dealloc, /* tp_dealloc */
1535 (destructor)Decompressor_dealloc, /* tp_dealloc */
801 0, /* tp_print */
1536 0, /* tp_print */
802 0, /* tp_getattr */
1537 0, /* tp_getattr */
803 0, /* tp_setattr */
1538 0, /* tp_setattr */
804 0, /* tp_compare */
1539 0, /* tp_compare */
805 0, /* tp_repr */
1540 0, /* tp_repr */
806 0, /* tp_as_number */
1541 0, /* tp_as_number */
807 0, /* tp_as_sequence */
1542 0, /* tp_as_sequence */
808 0, /* tp_as_mapping */
1543 0, /* tp_as_mapping */
809 0, /* tp_hash */
1544 0, /* tp_hash */
810 0, /* tp_call */
1545 0, /* tp_call */
811 0, /* tp_str */
1546 0, /* tp_str */
812 0, /* tp_getattro */
1547 0, /* tp_getattro */
813 0, /* tp_setattro */
1548 0, /* tp_setattro */
814 0, /* tp_as_buffer */
1549 0, /* tp_as_buffer */
815 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1550 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
816 Decompressor__doc__, /* tp_doc */
1551 Decompressor__doc__, /* tp_doc */
817 0, /* tp_traverse */
1552 0, /* tp_traverse */
818 0, /* tp_clear */
1553 0, /* tp_clear */
819 0, /* tp_richcompare */
1554 0, /* tp_richcompare */
820 0, /* tp_weaklistoffset */
1555 0, /* tp_weaklistoffset */
821 0, /* tp_iter */
1556 0, /* tp_iter */
822 0, /* tp_iternext */
1557 0, /* tp_iternext */
823 Decompressor_methods, /* tp_methods */
1558 Decompressor_methods, /* tp_methods */
824 0, /* tp_members */
1559 0, /* tp_members */
825 0, /* tp_getset */
1560 0, /* tp_getset */
826 0, /* tp_base */
1561 0, /* tp_base */
827 0, /* tp_dict */
1562 0, /* tp_dict */
828 0, /* tp_descr_get */
1563 0, /* tp_descr_get */
829 0, /* tp_descr_set */
1564 0, /* tp_descr_set */
830 0, /* tp_dictoffset */
1565 0, /* tp_dictoffset */
831 (initproc)Decompressor_init, /* tp_init */
1566 (initproc)Decompressor_init, /* tp_init */
832 0, /* tp_alloc */
1567 0, /* tp_alloc */
833 PyType_GenericNew, /* tp_new */
1568 PyType_GenericNew, /* tp_new */
834 };
1569 };
835
1570
836 void decompressor_module_init(PyObject* mod) {
1571 void decompressor_module_init(PyObject* mod) {
837 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
1572 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
838 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1573 if (PyType_Ready(&ZstdDecompressorType) < 0) {
839 return;
1574 return;
840 }
1575 }
841
1576
842 Py_INCREF((PyObject*)&ZstdDecompressorType);
1577 Py_INCREF((PyObject*)&ZstdDecompressorType);
843 PyModule_AddObject(mod, "ZstdDecompressor",
1578 PyModule_AddObject(mod, "ZstdDecompressor",
844 (PyObject*)&ZstdDecompressorType);
1579 (PyObject*)&ZstdDecompressorType);
845 }
1580 }
@@ -1,254 +1,251
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12
12
13 extern PyObject* ZstdError;
13 extern PyObject* ZstdError;
14
14
15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
16 "Represents an iterator of decompressed data.\n"
16 "Represents an iterator of decompressed data.\n"
17 );
17 );
18
18
19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
20 Py_XDECREF(self->decompressor);
20 Py_XDECREF(self->decompressor);
21 Py_XDECREF(self->reader);
21 Py_XDECREF(self->reader);
22
22
23 if (self->buffer) {
23 if (self->buffer) {
24 PyBuffer_Release(self->buffer);
24 PyBuffer_Release(self->buffer);
25 PyMem_FREE(self->buffer);
25 PyMem_FREE(self->buffer);
26 self->buffer = NULL;
26 self->buffer = NULL;
27 }
27 }
28
28
29 if (self->dstream) {
30 ZSTD_freeDStream(self->dstream);
31 self->dstream = NULL;
32 }
33
34 if (self->input.src) {
29 if (self->input.src) {
35 PyMem_Free((void*)self->input.src);
30 PyMem_Free((void*)self->input.src);
36 self->input.src = NULL;
31 self->input.src = NULL;
37 }
32 }
38
33
39 PyObject_Del(self);
34 PyObject_Del(self);
40 }
35 }
41
36
42 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
37 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
43 Py_INCREF(self);
38 Py_INCREF(self);
44 return self;
39 return self;
45 }
40 }
46
41
47 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
42 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
48 size_t zresult;
43 size_t zresult;
49 PyObject* chunk;
44 PyObject* chunk;
50 DecompressorIteratorResult result;
45 DecompressorIteratorResult result;
51 size_t oldInputPos = self->input.pos;
46 size_t oldInputPos = self->input.pos;
52
47
48 assert(self->decompressor->dstream);
49
53 result.chunk = NULL;
50 result.chunk = NULL;
54
51
55 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
52 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
56 if (!chunk) {
53 if (!chunk) {
57 result.errored = 1;
54 result.errored = 1;
58 return result;
55 return result;
59 }
56 }
60
57
61 self->output.dst = PyBytes_AsString(chunk);
58 self->output.dst = PyBytes_AsString(chunk);
62 self->output.size = self->outSize;
59 self->output.size = self->outSize;
63 self->output.pos = 0;
60 self->output.pos = 0;
64
61
65 Py_BEGIN_ALLOW_THREADS
62 Py_BEGIN_ALLOW_THREADS
66 zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
63 zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
67 Py_END_ALLOW_THREADS
64 Py_END_ALLOW_THREADS
68
65
69 /* We're done with the pointer. Nullify to prevent anyone from getting a
66 /* We're done with the pointer. Nullify to prevent anyone from getting a
70 handle on a Python object. */
67 handle on a Python object. */
71 self->output.dst = NULL;
68 self->output.dst = NULL;
72
69
73 if (ZSTD_isError(zresult)) {
70 if (ZSTD_isError(zresult)) {
74 Py_DECREF(chunk);
71 Py_DECREF(chunk);
75 PyErr_Format(ZstdError, "zstd decompress error: %s",
72 PyErr_Format(ZstdError, "zstd decompress error: %s",
76 ZSTD_getErrorName(zresult));
73 ZSTD_getErrorName(zresult));
77 result.errored = 1;
74 result.errored = 1;
78 return result;
75 return result;
79 }
76 }
80
77
81 self->readCount += self->input.pos - oldInputPos;
78 self->readCount += self->input.pos - oldInputPos;
82
79
83 /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
80 /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
84 if (0 == zresult) {
81 if (0 == zresult) {
85 self->finishedInput = 1;
82 self->finishedInput = 1;
86 self->finishedOutput = 1;
83 self->finishedOutput = 1;
87 }
84 }
88
85
89 /* If it produced output data, return it. */
86 /* If it produced output data, return it. */
90 if (self->output.pos) {
87 if (self->output.pos) {
91 if (self->output.pos < self->outSize) {
88 if (self->output.pos < self->outSize) {
92 if (_PyBytes_Resize(&chunk, self->output.pos)) {
89 if (_PyBytes_Resize(&chunk, self->output.pos)) {
93 result.errored = 1;
90 result.errored = 1;
94 return result;
91 return result;
95 }
92 }
96 }
93 }
97 }
94 }
98 else {
95 else {
99 Py_DECREF(chunk);
96 Py_DECREF(chunk);
100 chunk = NULL;
97 chunk = NULL;
101 }
98 }
102
99
103 result.errored = 0;
100 result.errored = 0;
104 result.chunk = chunk;
101 result.chunk = chunk;
105
102
106 return result;
103 return result;
107 }
104 }
108
105
109 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
106 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
110 PyObject* readResult = NULL;
107 PyObject* readResult = NULL;
111 char* readBuffer;
108 char* readBuffer;
112 Py_ssize_t readSize;
109 Py_ssize_t readSize;
113 Py_ssize_t bufferRemaining;
110 Py_ssize_t bufferRemaining;
114 DecompressorIteratorResult result;
111 DecompressorIteratorResult result;
115
112
116 if (self->finishedOutput) {
113 if (self->finishedOutput) {
117 PyErr_SetString(PyExc_StopIteration, "output flushed");
114 PyErr_SetString(PyExc_StopIteration, "output flushed");
118 return NULL;
115 return NULL;
119 }
116 }
120
117
121 /* If we have data left in the input, consume it. */
118 /* If we have data left in the input, consume it. */
122 if (self->input.pos < self->input.size) {
119 if (self->input.pos < self->input.size) {
123 result = read_decompressor_iterator(self);
120 result = read_decompressor_iterator(self);
124 if (result.chunk || result.errored) {
121 if (result.chunk || result.errored) {
125 return result.chunk;
122 return result.chunk;
126 }
123 }
127
124
128 /* Else fall through to get more data from input. */
125 /* Else fall through to get more data from input. */
129 }
126 }
130
127
131 read_from_source:
128 read_from_source:
132
129
133 if (!self->finishedInput) {
130 if (!self->finishedInput) {
134 if (self->reader) {
131 if (self->reader) {
135 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
132 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
136 if (!readResult) {
133 if (!readResult) {
137 return NULL;
134 return NULL;
138 }
135 }
139
136
140 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
137 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
141 }
138 }
142 else {
139 else {
143 assert(self->buffer && self->buffer->buf);
140 assert(self->buffer && self->buffer->buf);
144
141
145 /* Only support contiguous C arrays for now */
142 /* Only support contiguous C arrays for now */
146 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
143 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
147 assert(self->buffer->itemsize == 1);
144 assert(self->buffer->itemsize == 1);
148
145
149 /* TODO avoid memcpy() below */
146 /* TODO avoid memcpy() below */
150 readBuffer = (char *)self->buffer->buf + self->bufferOffset;
147 readBuffer = (char *)self->buffer->buf + self->bufferOffset;
151 bufferRemaining = self->buffer->len - self->bufferOffset;
148 bufferRemaining = self->buffer->len - self->bufferOffset;
152 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
149 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
153 self->bufferOffset += readSize;
150 self->bufferOffset += readSize;
154 }
151 }
155
152
156 if (readSize) {
153 if (readSize) {
157 if (!self->readCount && self->skipBytes) {
154 if (!self->readCount && self->skipBytes) {
158 assert(self->skipBytes < self->inSize);
155 assert(self->skipBytes < self->inSize);
159 if ((Py_ssize_t)self->skipBytes >= readSize) {
156 if ((Py_ssize_t)self->skipBytes >= readSize) {
160 PyErr_SetString(PyExc_ValueError,
157 PyErr_SetString(PyExc_ValueError,
161 "skip_bytes larger than first input chunk; "
158 "skip_bytes larger than first input chunk; "
162 "this scenario is currently unsupported");
159 "this scenario is currently unsupported");
163 Py_DecRef(readResult);
160 Py_XDECREF(readResult);
164 return NULL;
161 return NULL;
165 }
162 }
166
163
167 readBuffer = readBuffer + self->skipBytes;
164 readBuffer = readBuffer + self->skipBytes;
168 readSize -= self->skipBytes;
165 readSize -= self->skipBytes;
169 }
166 }
170
167
171 /* Copy input into previously allocated buffer because it can live longer
168 /* Copy input into previously allocated buffer because it can live longer
172 than a single function call and we don't want to keep a ref to a Python
169 than a single function call and we don't want to keep a ref to a Python
173 object around. This could be changed... */
170 object around. This could be changed... */
174 memcpy((void*)self->input.src, readBuffer, readSize);
171 memcpy((void*)self->input.src, readBuffer, readSize);
175 self->input.size = readSize;
172 self->input.size = readSize;
176 self->input.pos = 0;
173 self->input.pos = 0;
177 }
174 }
178 /* No bytes on first read must mean an empty input stream. */
175 /* No bytes on first read must mean an empty input stream. */
179 else if (!self->readCount) {
176 else if (!self->readCount) {
180 self->finishedInput = 1;
177 self->finishedInput = 1;
181 self->finishedOutput = 1;
178 self->finishedOutput = 1;
182 Py_DecRef(readResult);
179 Py_XDECREF(readResult);
183 PyErr_SetString(PyExc_StopIteration, "empty input");
180 PyErr_SetString(PyExc_StopIteration, "empty input");
184 return NULL;
181 return NULL;
185 }
182 }
186 else {
183 else {
187 self->finishedInput = 1;
184 self->finishedInput = 1;
188 }
185 }
189
186
190 /* We've copied the data managed by memory. Discard the Python object. */
187 /* We've copied the data managed by memory. Discard the Python object. */
191 Py_DecRef(readResult);
188 Py_XDECREF(readResult);
192 }
189 }
193
190
194 result = read_decompressor_iterator(self);
191 result = read_decompressor_iterator(self);
195 if (result.errored || result.chunk) {
192 if (result.errored || result.chunk) {
196 return result.chunk;
193 return result.chunk;
197 }
194 }
198
195
199 /* No new output data. Try again unless we know there is no more data. */
196 /* No new output data. Try again unless we know there is no more data. */
200 if (!self->finishedInput) {
197 if (!self->finishedInput) {
201 goto read_from_source;
198 goto read_from_source;
202 }
199 }
203
200
204 PyErr_SetString(PyExc_StopIteration, "input exhausted");
201 PyErr_SetString(PyExc_StopIteration, "input exhausted");
205 return NULL;
202 return NULL;
206 }
203 }
207
204
208 PyTypeObject ZstdDecompressorIteratorType = {
205 PyTypeObject ZstdDecompressorIteratorType = {
209 PyVarObject_HEAD_INIT(NULL, 0)
206 PyVarObject_HEAD_INIT(NULL, 0)
210 "zstd.ZstdDecompressorIterator", /* tp_name */
207 "zstd.ZstdDecompressorIterator", /* tp_name */
211 sizeof(ZstdDecompressorIterator), /* tp_basicsize */
208 sizeof(ZstdDecompressorIterator), /* tp_basicsize */
212 0, /* tp_itemsize */
209 0, /* tp_itemsize */
213 (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
210 (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
214 0, /* tp_print */
211 0, /* tp_print */
215 0, /* tp_getattr */
212 0, /* tp_getattr */
216 0, /* tp_setattr */
213 0, /* tp_setattr */
217 0, /* tp_compare */
214 0, /* tp_compare */
218 0, /* tp_repr */
215 0, /* tp_repr */
219 0, /* tp_as_number */
216 0, /* tp_as_number */
220 0, /* tp_as_sequence */
217 0, /* tp_as_sequence */
221 0, /* tp_as_mapping */
218 0, /* tp_as_mapping */
222 0, /* tp_hash */
219 0, /* tp_hash */
223 0, /* tp_call */
220 0, /* tp_call */
224 0, /* tp_str */
221 0, /* tp_str */
225 0, /* tp_getattro */
222 0, /* tp_getattro */
226 0, /* tp_setattro */
223 0, /* tp_setattro */
227 0, /* tp_as_buffer */
224 0, /* tp_as_buffer */
228 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
225 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
229 ZstdDecompressorIterator__doc__, /* tp_doc */
226 ZstdDecompressorIterator__doc__, /* tp_doc */
230 0, /* tp_traverse */
227 0, /* tp_traverse */
231 0, /* tp_clear */
228 0, /* tp_clear */
232 0, /* tp_richcompare */
229 0, /* tp_richcompare */
233 0, /* tp_weaklistoffset */
230 0, /* tp_weaklistoffset */
234 ZstdDecompressorIterator_iter, /* tp_iter */
231 ZstdDecompressorIterator_iter, /* tp_iter */
235 (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
232 (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
236 0, /* tp_methods */
233 0, /* tp_methods */
237 0, /* tp_members */
234 0, /* tp_members */
238 0, /* tp_getset */
235 0, /* tp_getset */
239 0, /* tp_base */
236 0, /* tp_base */
240 0, /* tp_dict */
237 0, /* tp_dict */
241 0, /* tp_descr_get */
238 0, /* tp_descr_get */
242 0, /* tp_descr_set */
239 0, /* tp_descr_set */
243 0, /* tp_dictoffset */
240 0, /* tp_dictoffset */
244 0, /* tp_init */
241 0, /* tp_init */
245 0, /* tp_alloc */
242 0, /* tp_alloc */
246 PyType_GenericNew, /* tp_new */
243 PyType_GenericNew, /* tp_new */
247 };
244 };
248
245
249 void decompressoriterator_module_init(PyObject* mod) {
246 void decompressoriterator_module_init(PyObject* mod) {
250 Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
247 Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
251 if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
248 if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
252 return;
249 return;
253 }
250 }
254 }
251 }
@@ -1,132 +1,132
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(FrameParameters__doc__,
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
14 "FrameParameters: information about a zstd frame");
15
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
17 const char* source;
17 const char* source;
18 Py_ssize_t sourceSize;
18 Py_ssize_t sourceSize;
19 ZSTD_frameParams params;
19 ZSTD_frameParams params;
20 FrameParametersObject* result = NULL;
20 FrameParametersObject* result = NULL;
21 size_t zresult;
21 size_t zresult;
22
22
23 #if PY_MAJOR_VERSION >= 3
23 #if PY_MAJOR_VERSION >= 3
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
25 #else
25 #else
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
27 #endif
27 #endif
28 &source, &sourceSize)) {
28 &source, &sourceSize)) {
29 return NULL;
29 return NULL;
30 }
30 }
31
31
32 /* Needed for Python 2 to reject unicode */
32 /* Needed for Python 2 to reject unicode */
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
35 return NULL;
35 return NULL;
36 }
36 }
37
37
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
39
39
40 if (ZSTD_isError(zresult)) {
40 if (ZSTD_isError(zresult)) {
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
42 return NULL;
42 return NULL;
43 }
43 }
44
44
45 if (zresult) {
45 if (zresult) {
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
47 return NULL;
47 return NULL;
48 }
48 }
49
49
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
51 if (!result) {
51 if (!result) {
52 return NULL;
52 return NULL;
53 }
53 }
54
54
55 result->frameContentSize = params.frameContentSize;
55 result->frameContentSize = params.frameContentSize;
56 result->windowSize = params.windowSize;
56 result->windowSize = params.windowSize;
57 result->dictID = params.dictID;
57 result->dictID = params.dictID;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
59
59
60 return result;
60 return result;
61 }
61 }
62
62
63 static void FrameParameters_dealloc(PyObject* self) {
63 static void FrameParameters_dealloc(PyObject* self) {
64 PyObject_Del(self);
64 PyObject_Del(self);
65 }
65 }
66
66
67 static PyMemberDef FrameParameters_members[] = {
67 static PyMemberDef FrameParameters_members[] = {
68 { "content_size", T_ULONGLONG,
68 { "content_size", T_ULONGLONG,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
70 "frame content size" },
70 "frame content size" },
71 { "window_size", T_UINT,
71 { "window_size", T_UINT,
72 offsetof(FrameParametersObject, windowSize), READONLY,
72 offsetof(FrameParametersObject, windowSize), READONLY,
73 "window size" },
73 "window size" },
74 { "dict_id", T_UINT,
74 { "dict_id", T_UINT,
75 offsetof(FrameParametersObject, dictID), READONLY,
75 offsetof(FrameParametersObject, dictID), READONLY,
76 "dictionary ID" },
76 "dictionary ID" },
77 { "has_checksum", T_BOOL,
77 { "has_checksum", T_BOOL,
78 offsetof(FrameParametersObject, checksumFlag), READONLY,
78 offsetof(FrameParametersObject, checksumFlag), READONLY,
79 "checksum flag" },
79 "checksum flag" },
80 { NULL }
80 { NULL }
81 };
81 };
82
82
83 PyTypeObject FrameParametersType = {
83 PyTypeObject FrameParametersType = {
84 PyVarObject_HEAD_INIT(NULL, 0)
84 PyVarObject_HEAD_INIT(NULL, 0)
85 "FrameParameters", /* tp_name */
85 "FrameParameters", /* tp_name */
86 sizeof(FrameParametersObject), /* tp_basicsize */
86 sizeof(FrameParametersObject), /* tp_basicsize */
87 0, /* tp_itemsize */
87 0, /* tp_itemsize */
88 (destructor)FrameParameters_dealloc, /* tp_dealloc */
88 (destructor)FrameParameters_dealloc, /* tp_dealloc */
89 0, /* tp_print */
89 0, /* tp_print */
90 0, /* tp_getattr */
90 0, /* tp_getattr */
91 0, /* tp_setattr */
91 0, /* tp_setattr */
92 0, /* tp_compare */
92 0, /* tp_compare */
93 0, /* tp_repr */
93 0, /* tp_repr */
94 0, /* tp_as_number */
94 0, /* tp_as_number */
95 0, /* tp_as_sequence */
95 0, /* tp_as_sequence */
96 0, /* tp_as_mapping */
96 0, /* tp_as_mapping */
97 0, /* tp_hash */
97 0, /* tp_hash */
98 0, /* tp_call */
98 0, /* tp_call */
99 0, /* tp_str */
99 0, /* tp_str */
100 0, /* tp_getattro */
100 0, /* tp_getattro */
101 0, /* tp_setattro */
101 0, /* tp_setattro */
102 0, /* tp_as_buffer */
102 0, /* tp_as_buffer */
103 Py_TPFLAGS_DEFAULT, /* tp_flags */
103 Py_TPFLAGS_DEFAULT, /* tp_flags */
104 FrameParameters__doc__, /* tp_doc */
104 FrameParameters__doc__, /* tp_doc */
105 0, /* tp_traverse */
105 0, /* tp_traverse */
106 0, /* tp_clear */
106 0, /* tp_clear */
107 0, /* tp_richcompare */
107 0, /* tp_richcompare */
108 0, /* tp_weaklistoffset */
108 0, /* tp_weaklistoffset */
109 0, /* tp_iter */
109 0, /* tp_iter */
110 0, /* tp_iternext */
110 0, /* tp_iternext */
111 0, /* tp_methods */
111 0, /* tp_methods */
112 FrameParameters_members, /* tp_members */
112 FrameParameters_members, /* tp_members */
113 0, /* tp_getset */
113 0, /* tp_getset */
114 0, /* tp_base */
114 0, /* tp_base */
115 0, /* tp_dict */
115 0, /* tp_dict */
116 0, /* tp_descr_get */
116 0, /* tp_descr_get */
117 0, /* tp_descr_set */
117 0, /* tp_descr_set */
118 0, /* tp_dictoffset */
118 0, /* tp_dictoffset */
119 0, /* tp_init */
119 0, /* tp_init */
120 0, /* tp_alloc */
120 0, /* tp_alloc */
121 0, /* tp_new */
121 0, /* tp_new */
122 };
122 };
123
123
124 void frameparams_module_init(PyObject* mod) {
124 void frameparams_module_init(PyObject* mod) {
125 Py_TYPE(&FrameParametersType) = &PyType_Type;
125 Py_TYPE(&FrameParametersType) = &PyType_Type;
126 if (PyType_Ready(&FrameParametersType) < 0) {
126 if (PyType_Ready(&FrameParametersType) < 0) {
127 return;
127 return;
128 }
128 }
129
129
130 Py_IncRef((PyObject*)&FrameParametersType);
130 Py_INCREF(&FrameParametersType);
131 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
131 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
132 }
132 }
@@ -1,190 +1,285
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11 #include "structmember.h"
12
12
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
15 #include "mem.h"
15 #include "mem.h"
16 #include "zstd.h"
16 #include "zstd.h"
17 #include "zdict.h"
17 #include "zdict.h"
18 #include "zstdmt_compress.h"
18
19
19 #define PYTHON_ZSTANDARD_VERSION "0.7.0"
20 #define PYTHON_ZSTANDARD_VERSION "0.8.0"
20
21
21 typedef enum {
22 typedef enum {
22 compressorobj_flush_finish,
23 compressorobj_flush_finish,
23 compressorobj_flush_block,
24 compressorobj_flush_block,
24 } CompressorObj_Flush;
25 } CompressorObj_Flush;
25
26
27 /*
28 Represents a CompressionParameters type.
29
30 This type is basically a wrapper around ZSTD_compressionParameters.
31 */
26 typedef struct {
32 typedef struct {
27 PyObject_HEAD
33 PyObject_HEAD
28 unsigned windowLog;
34 unsigned windowLog;
29 unsigned chainLog;
35 unsigned chainLog;
30 unsigned hashLog;
36 unsigned hashLog;
31 unsigned searchLog;
37 unsigned searchLog;
32 unsigned searchLength;
38 unsigned searchLength;
33 unsigned targetLength;
39 unsigned targetLength;
34 ZSTD_strategy strategy;
40 ZSTD_strategy strategy;
35 } CompressionParametersObject;
41 } CompressionParametersObject;
36
42
37 extern PyTypeObject CompressionParametersType;
43 extern PyTypeObject CompressionParametersType;
38
44
45 /*
46 Represents a FrameParameters type.
47
48 This type is basically a wrapper around ZSTD_frameParams.
49 */
39 typedef struct {
50 typedef struct {
40 PyObject_HEAD
51 PyObject_HEAD
41 unsigned long long frameContentSize;
52 unsigned long long frameContentSize;
42 unsigned windowSize;
53 unsigned windowSize;
43 unsigned dictID;
54 unsigned dictID;
44 char checksumFlag;
55 char checksumFlag;
45 } FrameParametersObject;
56 } FrameParametersObject;
46
57
47 extern PyTypeObject FrameParametersType;
58 extern PyTypeObject FrameParametersType;
48
59
49 typedef struct {
60 /*
50 PyObject_HEAD
61 Represents a ZstdCompressionDict type.
51 unsigned selectivityLevel;
52 int compressionLevel;
53 unsigned notificationLevel;
54 unsigned dictID;
55 } DictParametersObject;
56
62
57 extern PyTypeObject DictParametersType;
63 Instances hold data used for a zstd compression dictionary.
58
64 */
59 typedef struct {
65 typedef struct {
60 PyObject_HEAD
66 PyObject_HEAD
61
67
68 /* Pointer to dictionary data. Owned by self. */
62 void* dictData;
69 void* dictData;
70 /* Size of dictionary data. */
63 size_t dictSize;
71 size_t dictSize;
72 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
73 unsigned k;
74 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
75 unsigned d;
64 } ZstdCompressionDict;
76 } ZstdCompressionDict;
65
77
66 extern PyTypeObject ZstdCompressionDictType;
78 extern PyTypeObject ZstdCompressionDictType;
67
79
80 /*
81 Represents a ZstdCompressor type.
82 */
68 typedef struct {
83 typedef struct {
69 PyObject_HEAD
84 PyObject_HEAD
70
85
86 /* Configured compression level. Should be always set. */
71 int compressionLevel;
87 int compressionLevel;
88 /* Number of threads to use for operations. */
89 unsigned int threads;
90 /* Pointer to compression dictionary to use. NULL if not using dictionary
91 compression. */
72 ZstdCompressionDict* dict;
92 ZstdCompressionDict* dict;
93 /* Compression context to use. Populated during object construction. NULL
94 if using multi-threaded compression. */
73 ZSTD_CCtx* cctx;
95 ZSTD_CCtx* cctx;
96 /* Multi-threaded compression context to use. Populated during object
97 construction. NULL if not using multi-threaded compression. */
98 ZSTDMT_CCtx* mtcctx;
99 /* Digest compression dictionary. NULL initially. Populated on first use. */
74 ZSTD_CDict* cdict;
100 ZSTD_CDict* cdict;
101 /* Low-level compression parameter control. NULL unless passed to
102 constructor. Takes precedence over `compressionLevel` if defined. */
75 CompressionParametersObject* cparams;
103 CompressionParametersObject* cparams;
104 /* Controls zstd frame options. */
76 ZSTD_frameParameters fparams;
105 ZSTD_frameParameters fparams;
106 /* Holds state for streaming compression. Shared across all invocation.
107 Populated on first use. */
108 ZSTD_CStream* cstream;
77 } ZstdCompressor;
109 } ZstdCompressor;
78
110
79 extern PyTypeObject ZstdCompressorType;
111 extern PyTypeObject ZstdCompressorType;
80
112
81 typedef struct {
113 typedef struct {
82 PyObject_HEAD
114 PyObject_HEAD
83
115
84 ZstdCompressor* compressor;
116 ZstdCompressor* compressor;
85 ZSTD_CStream* cstream;
86 ZSTD_outBuffer output;
117 ZSTD_outBuffer output;
87 int finished;
118 int finished;
88 } ZstdCompressionObj;
119 } ZstdCompressionObj;
89
120
90 extern PyTypeObject ZstdCompressionObjType;
121 extern PyTypeObject ZstdCompressionObjType;
91
122
92 typedef struct {
123 typedef struct {
93 PyObject_HEAD
124 PyObject_HEAD
94
125
95 ZstdCompressor* compressor;
126 ZstdCompressor* compressor;
96 PyObject* writer;
127 PyObject* writer;
97 Py_ssize_t sourceSize;
128 Py_ssize_t sourceSize;
98 size_t outSize;
129 size_t outSize;
99 ZSTD_CStream* cstream;
100 int entered;
130 int entered;
101 } ZstdCompressionWriter;
131 } ZstdCompressionWriter;
102
132
103 extern PyTypeObject ZstdCompressionWriterType;
133 extern PyTypeObject ZstdCompressionWriterType;
104
134
105 typedef struct {
135 typedef struct {
106 PyObject_HEAD
136 PyObject_HEAD
107
137
108 ZstdCompressor* compressor;
138 ZstdCompressor* compressor;
109 PyObject* reader;
139 PyObject* reader;
110 Py_buffer* buffer;
140 Py_buffer* buffer;
111 Py_ssize_t bufferOffset;
141 Py_ssize_t bufferOffset;
112 Py_ssize_t sourceSize;
142 Py_ssize_t sourceSize;
113 size_t inSize;
143 size_t inSize;
114 size_t outSize;
144 size_t outSize;
115
145
116 ZSTD_CStream* cstream;
117 ZSTD_inBuffer input;
146 ZSTD_inBuffer input;
118 ZSTD_outBuffer output;
147 ZSTD_outBuffer output;
119 int finishedOutput;
148 int finishedOutput;
120 int finishedInput;
149 int finishedInput;
121 PyObject* readResult;
150 PyObject* readResult;
122 } ZstdCompressorIterator;
151 } ZstdCompressorIterator;
123
152
124 extern PyTypeObject ZstdCompressorIteratorType;
153 extern PyTypeObject ZstdCompressorIteratorType;
125
154
126 typedef struct {
155 typedef struct {
127 PyObject_HEAD
156 PyObject_HEAD
128
157
129 ZSTD_DCtx* dctx;
158 ZSTD_DCtx* dctx;
130
159
131 ZstdCompressionDict* dict;
160 ZstdCompressionDict* dict;
132 ZSTD_DDict* ddict;
161 ZSTD_DDict* ddict;
162 ZSTD_DStream* dstream;
133 } ZstdDecompressor;
163 } ZstdDecompressor;
134
164
135 extern PyTypeObject ZstdDecompressorType;
165 extern PyTypeObject ZstdDecompressorType;
136
166
137 typedef struct {
167 typedef struct {
138 PyObject_HEAD
168 PyObject_HEAD
139
169
140 ZstdDecompressor* decompressor;
170 ZstdDecompressor* decompressor;
141 ZSTD_DStream* dstream;
142 int finished;
171 int finished;
143 } ZstdDecompressionObj;
172 } ZstdDecompressionObj;
144
173
145 extern PyTypeObject ZstdDecompressionObjType;
174 extern PyTypeObject ZstdDecompressionObjType;
146
175
147 typedef struct {
176 typedef struct {
148 PyObject_HEAD
177 PyObject_HEAD
149
178
150 ZstdDecompressor* decompressor;
179 ZstdDecompressor* decompressor;
151 PyObject* writer;
180 PyObject* writer;
152 size_t outSize;
181 size_t outSize;
153 ZSTD_DStream* dstream;
154 int entered;
182 int entered;
155 } ZstdDecompressionWriter;
183 } ZstdDecompressionWriter;
156
184
157 extern PyTypeObject ZstdDecompressionWriterType;
185 extern PyTypeObject ZstdDecompressionWriterType;
158
186
159 typedef struct {
187 typedef struct {
160 PyObject_HEAD
188 PyObject_HEAD
161
189
162 ZstdDecompressor* decompressor;
190 ZstdDecompressor* decompressor;
163 PyObject* reader;
191 PyObject* reader;
164 Py_buffer* buffer;
192 Py_buffer* buffer;
165 Py_ssize_t bufferOffset;
193 Py_ssize_t bufferOffset;
166 size_t inSize;
194 size_t inSize;
167 size_t outSize;
195 size_t outSize;
168 size_t skipBytes;
196 size_t skipBytes;
169 ZSTD_DStream* dstream;
170 ZSTD_inBuffer input;
197 ZSTD_inBuffer input;
171 ZSTD_outBuffer output;
198 ZSTD_outBuffer output;
172 Py_ssize_t readCount;
199 Py_ssize_t readCount;
173 int finishedInput;
200 int finishedInput;
174 int finishedOutput;
201 int finishedOutput;
175 } ZstdDecompressorIterator;
202 } ZstdDecompressorIterator;
176
203
177 extern PyTypeObject ZstdDecompressorIteratorType;
204 extern PyTypeObject ZstdDecompressorIteratorType;
178
205
179 typedef struct {
206 typedef struct {
180 int errored;
207 int errored;
181 PyObject* chunk;
208 PyObject* chunk;
182 } DecompressorIteratorResult;
209 } DecompressorIteratorResult;
183
210
211 typedef struct {
212 unsigned long long offset;
213 unsigned long long length;
214 } BufferSegment;
215
216 typedef struct {
217 PyObject_HEAD
218
219 PyObject* parent;
220 BufferSegment* segments;
221 Py_ssize_t segmentCount;
222 } ZstdBufferSegments;
223
224 extern PyTypeObject ZstdBufferSegmentsType;
225
226 typedef struct {
227 PyObject_HEAD
228
229 PyObject* parent;
230 void* data;
231 Py_ssize_t dataSize;
232 unsigned long long offset;
233 } ZstdBufferSegment;
234
235 extern PyTypeObject ZstdBufferSegmentType;
236
237 typedef struct {
238 PyObject_HEAD
239
240 Py_buffer parent;
241 void* data;
242 unsigned long long dataSize;
243 BufferSegment* segments;
244 Py_ssize_t segmentCount;
245 int useFree;
246 } ZstdBufferWithSegments;
247
248 extern PyTypeObject ZstdBufferWithSegmentsType;
249
250 /**
251 * An ordered collection of BufferWithSegments exposed as a squashed collection.
252 *
253 * This type provides a virtual view spanning multiple BufferWithSegments
254 * instances. It allows multiple instances to be "chained" together and
255 * exposed as a single collection. e.g. if there are 2 buffers holding
256 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
257 */
258 typedef struct {
259 PyObject_HEAD
260
261 /* An array of buffers that should be exposed through this instance. */
262 ZstdBufferWithSegments** buffers;
263 /* Number of elements in buffers array. */
264 Py_ssize_t bufferCount;
265 /* Array of first offset in each buffer instance. 0th entry corresponds
266 to number of elements in the 0th buffer. 1st entry corresponds to the
267 sum of elements in 0th and 1st buffers. */
268 Py_ssize_t* firstElements;
269 } ZstdBufferWithSegmentsCollection;
270
271 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
272
184 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
273 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
185 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
274 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
186 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
275 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
187 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
276 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
188 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
277 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
189 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
278 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
279 int init_dstream(ZstdDecompressor* decompressor);
190 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
280 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
281 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
282 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
283 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
284 int cpu_count(void);
285 size_t roundpow2(size_t);
@@ -1,154 +1,187
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import cffi
9 import cffi
10 import distutils.ccompiler
10 import distutils.ccompiler
11 import os
11 import os
12 import re
12 import re
13 import subprocess
13 import subprocess
14 import tempfile
14 import tempfile
15
15
16
16
17 HERE = os.path.abspath(os.path.dirname(__file__))
17 HERE = os.path.abspath(os.path.dirname(__file__))
18
18
19 SOURCES = ['zstd/%s' % p for p in (
19 SOURCES = ['zstd/%s' % p for p in (
20 'common/entropy_common.c',
20 'common/entropy_common.c',
21 'common/error_private.c',
21 'common/error_private.c',
22 'common/fse_decompress.c',
22 'common/fse_decompress.c',
23 'common/pool.c',
23 'common/pool.c',
24 'common/threading.c',
24 'common/threading.c',
25 'common/xxhash.c',
25 'common/xxhash.c',
26 'common/zstd_common.c',
26 'common/zstd_common.c',
27 'compress/fse_compress.c',
27 'compress/fse_compress.c',
28 'compress/huf_compress.c',
28 'compress/huf_compress.c',
29 'compress/zstd_compress.c',
29 'compress/zstd_compress.c',
30 'compress/zstdmt_compress.c',
30 'decompress/huf_decompress.c',
31 'decompress/huf_decompress.c',
31 'decompress/zstd_decompress.c',
32 'decompress/zstd_decompress.c',
32 'dictBuilder/cover.c',
33 'dictBuilder/cover.c',
33 'dictBuilder/divsufsort.c',
34 'dictBuilder/divsufsort.c',
34 'dictBuilder/zdict.c',
35 'dictBuilder/zdict.c',
35 )]
36 )]
36
37
38 # Headers whose preprocessed output will be fed into cdef().
37 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
39 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
38 ('zstd.h',),
40 ('zstd.h',),
39 ('common', 'pool.h'),
41 ('compress', 'zstdmt_compress.h'),
40 ('dictBuilder', 'zdict.h'),
42 ('dictBuilder', 'zdict.h'),
41 )]
43 )]
42
44
43 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
45 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
44 'zstd',
46 'zstd',
45 'zstd/common',
47 'zstd/common',
46 'zstd/compress',
48 'zstd/compress',
47 'zstd/decompress',
49 'zstd/decompress',
48 'zstd/dictBuilder',
50 'zstd/dictBuilder',
49 )]
51 )]
50
52
51 # cffi can't parse some of the primitives in zstd.h. So we invoke the
53 # cffi can't parse some of the primitives in zstd.h. So we invoke the
52 # preprocessor and feed its output into cffi.
54 # preprocessor and feed its output into cffi.
53 compiler = distutils.ccompiler.new_compiler()
55 compiler = distutils.ccompiler.new_compiler()
54
56
55 # Needed for MSVC.
57 # Needed for MSVC.
56 if hasattr(compiler, 'initialize'):
58 if hasattr(compiler, 'initialize'):
57 compiler.initialize()
59 compiler.initialize()
58
60
59 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
61 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
60 # manually.
62 # manually.
61 if compiler.compiler_type == 'unix':
63 if compiler.compiler_type == 'unix':
62 args = list(compiler.executables['compiler'])
64 args = list(compiler.executables['compiler'])
63 args.extend([
65 args.extend([
64 '-E',
66 '-E',
65 '-DZSTD_STATIC_LINKING_ONLY',
67 '-DZSTD_STATIC_LINKING_ONLY',
66 '-DZDICT_STATIC_LINKING_ONLY',
68 '-DZDICT_STATIC_LINKING_ONLY',
67 ])
69 ])
68 elif compiler.compiler_type == 'msvc':
70 elif compiler.compiler_type == 'msvc':
69 args = [compiler.cc]
71 args = [compiler.cc]
70 args.extend([
72 args.extend([
71 '/EP',
73 '/EP',
72 '/DZSTD_STATIC_LINKING_ONLY',
74 '/DZSTD_STATIC_LINKING_ONLY',
73 '/DZDICT_STATIC_LINKING_ONLY',
75 '/DZDICT_STATIC_LINKING_ONLY',
74 ])
76 ])
75 else:
77 else:
76 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
78 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
77
79
78 def preprocess(path):
80 def preprocess(path):
79 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
80 # This can lead to duplicate declarations. So we strip this include from the
81 # preprocessor invocation.
82 with open(path, 'rb') as fh:
81 with open(path, 'rb') as fh:
83 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
82 lines = []
83 for l in fh:
84 # zstd.h includes <stddef.h>, which is also included by cffi's
85 # boilerplate. This can lead to duplicate declarations. So we strip
86 # this include from the preprocessor invocation.
87 #
88 # The same things happens for including zstd.h, so give it the same
89 # treatment.
90 #
91 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
92 # #define in zstdmt_compress.h and results in a compiler warning. So drop
93 # the inline #define.
94 if l.startswith((b'#include <stddef.h>',
95 b'#include "zstd.h"',
96 b'#define ZSTD_STATIC_LINKING_ONLY')):
97 continue
98
99 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
100 # important so just filter it out.
101 if l.startswith(b'ZSTDLIB_API'):
102 l = l[len(b'ZSTDLIB_API '):]
103
104 lines.append(l)
84
105
85 fd, input_file = tempfile.mkstemp(suffix='.h')
106 fd, input_file = tempfile.mkstemp(suffix='.h')
86 os.write(fd, b''.join(lines))
107 os.write(fd, b''.join(lines))
87 os.close(fd)
108 os.close(fd)
88
109
89 try:
110 try:
90 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
111 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
91 output = process.communicate()[0]
112 output = process.communicate()[0]
92 ret = process.poll()
113 ret = process.poll()
93 if ret:
114 if ret:
94 raise Exception('preprocessor exited with error')
115 raise Exception('preprocessor exited with error')
95
116
96 return output
117 return output
97 finally:
118 finally:
98 os.unlink(input_file)
119 os.unlink(input_file)
99
120
100
121
101 def normalize_output(output):
122 def normalize_output(output):
102 lines = []
123 lines = []
103 for line in output.splitlines():
124 for line in output.splitlines():
104 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
125 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
105 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
126 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
106 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
127 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
107
128
108 if line.startswith(b'__attribute__((deprecated('):
129 if line.startswith(b'__attribute__((deprecated('):
109 continue
130 continue
110 elif b'__declspec(deprecated(' in line:
131 elif b'__declspec(deprecated(' in line:
111 continue
132 continue
112
133
113 lines.append(line)
134 lines.append(line)
114
135
115 return b'\n'.join(lines)
136 return b'\n'.join(lines)
116
137
117
138
118 ffi = cffi.FFI()
139 ffi = cffi.FFI()
140 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
141 # when cffi uses the function. Since we statically link against zstd, even
142 # if we use the deprecated functions it shouldn't be a huge problem.
119 ffi.set_source('_zstd_cffi', '''
143 ffi.set_source('_zstd_cffi', '''
120 #include "mem.h"
144 #include "mem.h"
121 #define ZSTD_STATIC_LINKING_ONLY
145 #define ZSTD_STATIC_LINKING_ONLY
122 #include "zstd.h"
146 #include "zstd.h"
123 #define ZDICT_STATIC_LINKING_ONLY
147 #define ZDICT_STATIC_LINKING_ONLY
124 #include "pool.h"
148 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
125 #include "zdict.h"
149 #include "zdict.h"
150 #include "zstdmt_compress.h"
126 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
151 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
127
152
128 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
153 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
129
154
130 sources = []
155 sources = []
131
156
157 # Feed normalized preprocessor output for headers into the cdef parser.
132 for header in HEADERS:
158 for header in HEADERS:
133 preprocessed = preprocess(header)
159 preprocessed = preprocess(header)
134 sources.append(normalize_output(preprocessed))
160 sources.append(normalize_output(preprocessed))
135
161
136 # Do another pass over source and find constants that were preprocessed
162 # #define's are effectively erased as part of going through preprocessor.
137 # away.
163 # So perform a manual pass to re-add those to the cdef source.
138 with open(header, 'rb') as fh:
164 with open(header, 'rb') as fh:
139 for line in fh:
165 for line in fh:
140 line = line.strip()
166 line = line.strip()
141 m = DEFINE.match(line)
167 m = DEFINE.match(line)
142 if not m:
168 if not m:
143 continue
169 continue
144
170
171 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
172 continue
173
145 # The parser doesn't like some constants with complex values.
174 # The parser doesn't like some constants with complex values.
146 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
175 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
147 continue
176 continue
148
177
178 # The ... is magic syntax by the cdef parser to resolve the
179 # value at compile time.
149 sources.append(m.group(0) + b' ...')
180 sources.append(m.group(0) + b' ...')
150
181
151 ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources))
182 cdeflines = b'\n'.join(sources).splitlines()
183 cdeflines = [l for l in cdeflines if l.strip()]
184 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
152
185
153 if __name__ == '__main__':
186 if __name__ == '__main__':
154 ffi.compile()
187 ffi.compile()
@@ -1,70 +1,76
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # Copyright (c) 2016-present, Gregory Szorc
2 # Copyright (c) 2016-present, Gregory Szorc
3 # All rights reserved.
3 # All rights reserved.
4 #
4 #
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 import sys
8 import sys
9 from setuptools import setup
9 from setuptools import setup
10
10
11 try:
11 try:
12 import cffi
12 import cffi
13 except ImportError:
13 except ImportError:
14 cffi = None
14 cffi = None
15
15
16 import setup_zstd
16 import setup_zstd
17
17
18 SUPPORT_LEGACY = False
18 SUPPORT_LEGACY = False
19
19
20 if "--legacy" in sys.argv:
20 if "--legacy" in sys.argv:
21 SUPPORT_LEGACY = True
21 SUPPORT_LEGACY = True
22 sys.argv.remove("--legacy")
22 sys.argv.remove("--legacy")
23
23
24 # Code for obtaining the Extension instance is in its own module to
24 # Code for obtaining the Extension instance is in its own module to
25 # facilitate reuse in other projects.
25 # facilitate reuse in other projects.
26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
27
27
28 install_requires = []
29
28 if cffi:
30 if cffi:
29 import make_cffi
31 import make_cffi
30 extensions.append(make_cffi.ffi.distutils_extension())
32 extensions.append(make_cffi.ffi.distutils_extension())
31
33
34 # Need change in 1.8 for ffi.from_buffer() behavior.
35 install_requires.append('cffi>=1.8')
36
32 version = None
37 version = None
33
38
34 with open('c-ext/python-zstandard.h', 'r') as fh:
39 with open('c-ext/python-zstandard.h', 'r') as fh:
35 for line in fh:
40 for line in fh:
36 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
41 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
37 continue
42 continue
38
43
39 version = line.split()[2][1:-1]
44 version = line.split()[2][1:-1]
40 break
45 break
41
46
42 if not version:
47 if not version:
43 raise Exception('could not resolve package version; '
48 raise Exception('could not resolve package version; '
44 'this should never happen')
49 'this should never happen')
45
50
46 setup(
51 setup(
47 name='zstandard',
52 name='zstandard',
48 version=version,
53 version=version,
49 description='Zstandard bindings for Python',
54 description='Zstandard bindings for Python',
50 long_description=open('README.rst', 'r').read(),
55 long_description=open('README.rst', 'r').read(),
51 url='https://github.com/indygreg/python-zstandard',
56 url='https://github.com/indygreg/python-zstandard',
52 author='Gregory Szorc',
57 author='Gregory Szorc',
53 author_email='gregory.szorc@gmail.com',
58 author_email='gregory.szorc@gmail.com',
54 license='BSD',
59 license='BSD',
55 classifiers=[
60 classifiers=[
56 'Development Status :: 4 - Beta',
61 'Development Status :: 4 - Beta',
57 'Intended Audience :: Developers',
62 'Intended Audience :: Developers',
58 'License :: OSI Approved :: BSD License',
63 'License :: OSI Approved :: BSD License',
59 'Programming Language :: C',
64 'Programming Language :: C',
60 'Programming Language :: Python :: 2.6',
65 'Programming Language :: Python :: 2.6',
61 'Programming Language :: Python :: 2.7',
66 'Programming Language :: Python :: 2.7',
62 'Programming Language :: Python :: 3.3',
67 'Programming Language :: Python :: 3.3',
63 'Programming Language :: Python :: 3.4',
68 'Programming Language :: Python :: 3.4',
64 'Programming Language :: Python :: 3.5',
69 'Programming Language :: Python :: 3.5',
65 'Programming Language :: Python :: 3.6',
70 'Programming Language :: Python :: 3.6',
66 ],
71 ],
67 keywords='zstandard zstd compression',
72 keywords='zstandard zstd compression',
68 ext_modules=extensions,
73 ext_modules=extensions,
69 test_suite='tests',
74 test_suite='tests',
75 install_requires=install_requires,
70 )
76 )
@@ -1,96 +1,102
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 import os
7 import os
8 from distutils.extension import Extension
8 from distutils.extension import Extension
9
9
10
10
11 zstd_sources = ['zstd/%s' % p for p in (
11 zstd_sources = ['zstd/%s' % p for p in (
12 'common/entropy_common.c',
12 'common/entropy_common.c',
13 'common/error_private.c',
13 'common/error_private.c',
14 'common/fse_decompress.c',
14 'common/fse_decompress.c',
15 'common/pool.c',
15 'common/pool.c',
16 'common/threading.c',
16 'common/threading.c',
17 'common/xxhash.c',
17 'common/xxhash.c',
18 'common/zstd_common.c',
18 'common/zstd_common.c',
19 'compress/fse_compress.c',
19 'compress/fse_compress.c',
20 'compress/huf_compress.c',
20 'compress/huf_compress.c',
21 'compress/zstd_compress.c',
21 'compress/zstd_compress.c',
22 'compress/zstdmt_compress.c',
22 'decompress/huf_decompress.c',
23 'decompress/huf_decompress.c',
23 'decompress/zstd_decompress.c',
24 'decompress/zstd_decompress.c',
24 'dictBuilder/cover.c',
25 'dictBuilder/cover.c',
25 'dictBuilder/divsufsort.c',
26 'dictBuilder/divsufsort.c',
26 'dictBuilder/zdict.c',
27 'dictBuilder/zdict.c',
27 )]
28 )]
28
29
29 zstd_sources_legacy = ['zstd/%s' % p for p in (
30 zstd_sources_legacy = ['zstd/%s' % p for p in (
30 'deprecated/zbuff_common.c',
31 'deprecated/zbuff_common.c',
31 'deprecated/zbuff_compress.c',
32 'deprecated/zbuff_compress.c',
32 'deprecated/zbuff_decompress.c',
33 'deprecated/zbuff_decompress.c',
33 'legacy/zstd_v01.c',
34 'legacy/zstd_v01.c',
34 'legacy/zstd_v02.c',
35 'legacy/zstd_v02.c',
35 'legacy/zstd_v03.c',
36 'legacy/zstd_v03.c',
36 'legacy/zstd_v04.c',
37 'legacy/zstd_v04.c',
37 'legacy/zstd_v05.c',
38 'legacy/zstd_v05.c',
38 'legacy/zstd_v06.c',
39 'legacy/zstd_v06.c',
39 'legacy/zstd_v07.c'
40 'legacy/zstd_v07.c'
40 )]
41 )]
41
42
42 zstd_includes = [
43 zstd_includes = [
43 'c-ext',
44 'c-ext',
44 'zstd',
45 'zstd',
45 'zstd/common',
46 'zstd/common',
46 'zstd/compress',
47 'zstd/compress',
47 'zstd/decompress',
48 'zstd/decompress',
48 'zstd/dictBuilder',
49 'zstd/dictBuilder',
49 ]
50 ]
50
51
51 zstd_includes_legacy = [
52 zstd_includes_legacy = [
52 'zstd/deprecated',
53 'zstd/deprecated',
53 'zstd/legacy',
54 'zstd/legacy',
54 ]
55 ]
55
56
56 ext_sources = [
57 ext_sources = [
57 'zstd.c',
58 'zstd.c',
59 'c-ext/bufferutil.c',
58 'c-ext/compressiondict.c',
60 'c-ext/compressiondict.c',
59 'c-ext/compressobj.c',
61 'c-ext/compressobj.c',
60 'c-ext/compressor.c',
62 'c-ext/compressor.c',
61 'c-ext/compressoriterator.c',
63 'c-ext/compressoriterator.c',
62 'c-ext/compressionparams.c',
64 'c-ext/compressionparams.c',
63 'c-ext/compressionwriter.c',
65 'c-ext/compressionwriter.c',
64 'c-ext/constants.c',
66 'c-ext/constants.c',
65 'c-ext/decompressobj.c',
67 'c-ext/decompressobj.c',
66 'c-ext/decompressor.c',
68 'c-ext/decompressor.c',
67 'c-ext/decompressoriterator.c',
69 'c-ext/decompressoriterator.c',
68 'c-ext/decompressionwriter.c',
70 'c-ext/decompressionwriter.c',
69 'c-ext/dictparams.c',
70 'c-ext/frameparams.c',
71 'c-ext/frameparams.c',
71 ]
72 ]
72
73
73 zstd_depends = [
74 zstd_depends = [
74 'c-ext/python-zstandard.h',
75 'c-ext/python-zstandard.h',
75 ]
76 ]
76
77
77
78
78 def get_c_extension(support_legacy=False, name='zstd'):
79 def get_c_extension(support_legacy=False, name='zstd'):
79 """Obtain a distutils.extension.Extension for the C extension."""
80 """Obtain a distutils.extension.Extension for the C extension."""
80 root = os.path.abspath(os.path.dirname(__file__))
81 root = os.path.abspath(os.path.dirname(__file__))
81
82
82 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
83 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
83 if support_legacy:
84 if support_legacy:
84 sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
85 sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
85
86
86 include_dirs = [os.path.join(root, d) for d in zstd_includes]
87 include_dirs = [os.path.join(root, d) for d in zstd_includes]
87 if support_legacy:
88 if support_legacy:
88 include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
89 include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
89
90
90 depends = [os.path.join(root, p) for p in zstd_depends]
91 depends = [os.path.join(root, p) for p in zstd_depends]
91
92
93 extra_args = ['-DZSTD_MULTITHREAD']
94
95 if support_legacy:
96 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
97
92 # TODO compile with optimizations.
98 # TODO compile with optimizations.
93 return Extension(name, sources,
99 return Extension(name, sources,
94 include_dirs=include_dirs,
100 include_dirs=include_dirs,
95 depends=depends,
101 depends=depends,
96 extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
102 extra_compile_args=extra_args)
@@ -1,61 +1,88
1 import inspect
1 import inspect
2 import io
2 import io
3 import os
3 import types
4 import types
4
5
5
6
6 def make_cffi(cls):
7 def make_cffi(cls):
7 """Decorator to add CFFI versions of each test method."""
8 """Decorator to add CFFI versions of each test method."""
8
9
9 try:
10 try:
10 import zstd_cffi
11 import zstd_cffi
11 except ImportError:
12 except ImportError:
12 return cls
13 return cls
13
14
14 # If CFFI version is available, dynamically construct test methods
15 # If CFFI version is available, dynamically construct test methods
15 # that use it.
16 # that use it.
16
17
17 for attr in dir(cls):
18 for attr in dir(cls):
18 fn = getattr(cls, attr)
19 fn = getattr(cls, attr)
19 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
20 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
20 continue
21 continue
21
22
22 if not fn.__name__.startswith('test_'):
23 if not fn.__name__.startswith('test_'):
23 continue
24 continue
24
25
25 name = '%s_cffi' % fn.__name__
26 name = '%s_cffi' % fn.__name__
26
27
27 # Replace the "zstd" symbol with the CFFI module instance. Then copy
28 # Replace the "zstd" symbol with the CFFI module instance. Then copy
28 # the function object and install it in a new attribute.
29 # the function object and install it in a new attribute.
29 if isinstance(fn, types.FunctionType):
30 if isinstance(fn, types.FunctionType):
30 globs = dict(fn.__globals__)
31 globs = dict(fn.__globals__)
31 globs['zstd'] = zstd_cffi
32 globs['zstd'] = zstd_cffi
32 new_fn = types.FunctionType(fn.__code__, globs, name,
33 new_fn = types.FunctionType(fn.__code__, globs, name,
33 fn.__defaults__, fn.__closure__)
34 fn.__defaults__, fn.__closure__)
34 new_method = new_fn
35 new_method = new_fn
35 else:
36 else:
36 globs = dict(fn.__func__.func_globals)
37 globs = dict(fn.__func__.func_globals)
37 globs['zstd'] = zstd_cffi
38 globs['zstd'] = zstd_cffi
38 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
39 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
39 fn.__func__.func_defaults,
40 fn.__func__.func_defaults,
40 fn.__func__.func_closure)
41 fn.__func__.func_closure)
41 new_method = types.UnboundMethodType(new_fn, fn.im_self,
42 new_method = types.UnboundMethodType(new_fn, fn.im_self,
42 fn.im_class)
43 fn.im_class)
43
44
44 setattr(cls, name, new_method)
45 setattr(cls, name, new_method)
45
46
46 return cls
47 return cls
47
48
48
49
49 class OpCountingBytesIO(io.BytesIO):
50 class OpCountingBytesIO(io.BytesIO):
50 def __init__(self, *args, **kwargs):
51 def __init__(self, *args, **kwargs):
51 self._read_count = 0
52 self._read_count = 0
52 self._write_count = 0
53 self._write_count = 0
53 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
54 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
54
55
55 def read(self, *args):
56 def read(self, *args):
56 self._read_count += 1
57 self._read_count += 1
57 return super(OpCountingBytesIO, self).read(*args)
58 return super(OpCountingBytesIO, self).read(*args)
58
59
59 def write(self, data):
60 def write(self, data):
60 self._write_count += 1
61 self._write_count += 1
61 return super(OpCountingBytesIO, self).write(data)
62 return super(OpCountingBytesIO, self).write(data)
63
64
65 _source_files = []
66
67
68 def random_input_data():
69 """Obtain the raw content of source files.
70
71 This is used for generating "random" data to feed into fuzzing, since it is
72 faster than random content generation.
73 """
74 if _source_files:
75 return _source_files
76
77 for root, dirs, files in os.walk(os.path.dirname(__file__)):
78 dirs[:] = list(sorted(dirs))
79 for f in sorted(files):
80 try:
81 with open(os.path.join(root, f), 'rb') as fh:
82 data = fh.read()
83 if data:
84 _source_files.append(data)
85 except OSError:
86 pass
87
88 return _source_files
@@ -1,675 +1,905
1 import hashlib
1 import hashlib
2 import io
2 import io
3 import struct
3 import struct
4 import sys
4 import sys
5
5
6 try:
6 try:
7 import unittest2 as unittest
7 import unittest2 as unittest
8 except ImportError:
8 except ImportError:
9 import unittest
9 import unittest
10
10
11 import zstd
11 import zstd
12
12
13 from .common import (
13 from .common import (
14 make_cffi,
14 make_cffi,
15 OpCountingBytesIO,
15 OpCountingBytesIO,
16 )
16 )
17
17
18
18
19 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
20 next = lambda it: it.__next__()
20 next = lambda it: it.__next__()
21 else:
21 else:
22 next = lambda it: it.next()
22 next = lambda it: it.next()
23
23
24
24
25 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.get_compression_parameters(level, source_size)
27
28 return 1 << (params.window_log + 2)
29
30
25 @make_cffi
31 @make_cffi
26 class TestCompressor(unittest.TestCase):
32 class TestCompressor(unittest.TestCase):
27 def test_level_bounds(self):
33 def test_level_bounds(self):
28 with self.assertRaises(ValueError):
34 with self.assertRaises(ValueError):
29 zstd.ZstdCompressor(level=0)
35 zstd.ZstdCompressor(level=0)
30
36
31 with self.assertRaises(ValueError):
37 with self.assertRaises(ValueError):
32 zstd.ZstdCompressor(level=23)
38 zstd.ZstdCompressor(level=23)
33
39
34
40
35 @make_cffi
41 @make_cffi
36 class TestCompressor_compress(unittest.TestCase):
42 class TestCompressor_compress(unittest.TestCase):
43 def test_multithreaded_unsupported(self):
44 samples = []
45 for i in range(128):
46 samples.append(b'foo' * 64)
47 samples.append(b'bar' * 64)
48
49 d = zstd.train_dictionary(8192, samples)
50
51 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
52
53 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
54 cctx.compress(b'foo')
55
56 params = zstd.get_compression_parameters(3)
57 cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
58 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
59 cctx.compress(b'foo')
60
37 def test_compress_empty(self):
61 def test_compress_empty(self):
38 cctx = zstd.ZstdCompressor(level=1)
62 cctx = zstd.ZstdCompressor(level=1)
39 result = cctx.compress(b'')
63 result = cctx.compress(b'')
40 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
64 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
41 params = zstd.get_frame_parameters(result)
65 params = zstd.get_frame_parameters(result)
42 self.assertEqual(params.content_size, 0)
66 self.assertEqual(params.content_size, 0)
43 self.assertEqual(params.window_size, 524288)
67 self.assertEqual(params.window_size, 524288)
44 self.assertEqual(params.dict_id, 0)
68 self.assertEqual(params.dict_id, 0)
45 self.assertFalse(params.has_checksum, 0)
69 self.assertFalse(params.has_checksum, 0)
46
70
47 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
71 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
48 # is fixed.
72 # is fixed.
49 cctx = zstd.ZstdCompressor(write_content_size=True)
73 cctx = zstd.ZstdCompressor(write_content_size=True)
50 with self.assertRaises(ValueError):
74 with self.assertRaises(ValueError):
51 cctx.compress(b'')
75 cctx.compress(b'')
52
76
53 cctx.compress(b'', allow_empty=True)
77 cctx.compress(b'', allow_empty=True)
54
78
55 def test_compress_large(self):
79 def test_compress_large(self):
56 chunks = []
80 chunks = []
57 for i in range(255):
81 for i in range(255):
58 chunks.append(struct.Struct('>B').pack(i) * 16384)
82 chunks.append(struct.Struct('>B').pack(i) * 16384)
59
83
60 cctx = zstd.ZstdCompressor(level=3)
84 cctx = zstd.ZstdCompressor(level=3)
61 result = cctx.compress(b''.join(chunks))
85 result = cctx.compress(b''.join(chunks))
62 self.assertEqual(len(result), 999)
86 self.assertEqual(len(result), 999)
63 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
87 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
64
88
65 # This matches the test for read_from() below.
89 # This matches the test for read_from() below.
66 cctx = zstd.ZstdCompressor(level=1)
90 cctx = zstd.ZstdCompressor(level=1)
67 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
91 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
68 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
92 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
69 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
93 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
70 b'\x02\x09\x00\x00\x6f')
94 b'\x02\x09\x00\x00\x6f')
71
95
72 def test_write_checksum(self):
96 def test_write_checksum(self):
73 cctx = zstd.ZstdCompressor(level=1)
97 cctx = zstd.ZstdCompressor(level=1)
74 no_checksum = cctx.compress(b'foobar')
98 no_checksum = cctx.compress(b'foobar')
75 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
99 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
76 with_checksum = cctx.compress(b'foobar')
100 with_checksum = cctx.compress(b'foobar')
77
101
78 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
102 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
79
103
80 no_params = zstd.get_frame_parameters(no_checksum)
104 no_params = zstd.get_frame_parameters(no_checksum)
81 with_params = zstd.get_frame_parameters(with_checksum)
105 with_params = zstd.get_frame_parameters(with_checksum)
82
106
83 self.assertFalse(no_params.has_checksum)
107 self.assertFalse(no_params.has_checksum)
84 self.assertTrue(with_params.has_checksum)
108 self.assertTrue(with_params.has_checksum)
85
109
86 def test_write_content_size(self):
110 def test_write_content_size(self):
87 cctx = zstd.ZstdCompressor(level=1)
111 cctx = zstd.ZstdCompressor(level=1)
88 no_size = cctx.compress(b'foobar' * 256)
112 no_size = cctx.compress(b'foobar' * 256)
89 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
113 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
90 with_size = cctx.compress(b'foobar' * 256)
114 with_size = cctx.compress(b'foobar' * 256)
91
115
92 self.assertEqual(len(with_size), len(no_size) + 1)
116 self.assertEqual(len(with_size), len(no_size) + 1)
93
117
94 no_params = zstd.get_frame_parameters(no_size)
118 no_params = zstd.get_frame_parameters(no_size)
95 with_params = zstd.get_frame_parameters(with_size)
119 with_params = zstd.get_frame_parameters(with_size)
96 self.assertEqual(no_params.content_size, 0)
120 self.assertEqual(no_params.content_size, 0)
97 self.assertEqual(with_params.content_size, 1536)
121 self.assertEqual(with_params.content_size, 1536)
98
122
99 def test_no_dict_id(self):
123 def test_no_dict_id(self):
100 samples = []
124 samples = []
101 for i in range(128):
125 for i in range(128):
102 samples.append(b'foo' * 64)
126 samples.append(b'foo' * 64)
103 samples.append(b'bar' * 64)
127 samples.append(b'bar' * 64)
104 samples.append(b'foobar' * 64)
128 samples.append(b'foobar' * 64)
105
129
106 d = zstd.train_dictionary(1024, samples)
130 d = zstd.train_dictionary(1024, samples)
107
131
108 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
132 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
109 with_dict_id = cctx.compress(b'foobarfoobar')
133 with_dict_id = cctx.compress(b'foobarfoobar')
110
134
111 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
135 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
112 no_dict_id = cctx.compress(b'foobarfoobar')
136 no_dict_id = cctx.compress(b'foobarfoobar')
113
137
114 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
138 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
115
139
116 no_params = zstd.get_frame_parameters(no_dict_id)
140 no_params = zstd.get_frame_parameters(no_dict_id)
117 with_params = zstd.get_frame_parameters(with_dict_id)
141 with_params = zstd.get_frame_parameters(with_dict_id)
118 self.assertEqual(no_params.dict_id, 0)
142 self.assertEqual(no_params.dict_id, 0)
119 self.assertEqual(with_params.dict_id, 1584102229)
143 self.assertEqual(with_params.dict_id, 1584102229)
120
144
121 def test_compress_dict_multiple(self):
145 def test_compress_dict_multiple(self):
122 samples = []
146 samples = []
123 for i in range(128):
147 for i in range(128):
124 samples.append(b'foo' * 64)
148 samples.append(b'foo' * 64)
125 samples.append(b'bar' * 64)
149 samples.append(b'bar' * 64)
126 samples.append(b'foobar' * 64)
150 samples.append(b'foobar' * 64)
127
151
128 d = zstd.train_dictionary(8192, samples)
152 d = zstd.train_dictionary(8192, samples)
129
153
130 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
154 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
131
155
132 for i in range(32):
156 for i in range(32):
133 cctx.compress(b'foo bar foobar foo bar foobar')
157 cctx.compress(b'foo bar foobar foo bar foobar')
134
158
159 def test_multithreaded(self):
160 chunk_size = multithreaded_chunk_size(1)
161 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
162
163 cctx = zstd.ZstdCompressor(level=1, threads=2)
164 compressed = cctx.compress(source)
165
166 params = zstd.get_frame_parameters(compressed)
167 self.assertEqual(params.content_size, chunk_size * 2)
168 self.assertEqual(params.dict_id, 0)
169 self.assertFalse(params.has_checksum)
170
171 dctx = zstd.ZstdDecompressor()
172 self.assertEqual(dctx.decompress(compressed), source)
173
135
174
136 @make_cffi
175 @make_cffi
137 class TestCompressor_compressobj(unittest.TestCase):
176 class TestCompressor_compressobj(unittest.TestCase):
138 def test_compressobj_empty(self):
177 def test_compressobj_empty(self):
139 cctx = zstd.ZstdCompressor(level=1)
178 cctx = zstd.ZstdCompressor(level=1)
140 cobj = cctx.compressobj()
179 cobj = cctx.compressobj()
141 self.assertEqual(cobj.compress(b''), b'')
180 self.assertEqual(cobj.compress(b''), b'')
142 self.assertEqual(cobj.flush(),
181 self.assertEqual(cobj.flush(),
143 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
182 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
144
183
145 def test_compressobj_large(self):
184 def test_compressobj_large(self):
146 chunks = []
185 chunks = []
147 for i in range(255):
186 for i in range(255):
148 chunks.append(struct.Struct('>B').pack(i) * 16384)
187 chunks.append(struct.Struct('>B').pack(i) * 16384)
149
188
150 cctx = zstd.ZstdCompressor(level=3)
189 cctx = zstd.ZstdCompressor(level=3)
151 cobj = cctx.compressobj()
190 cobj = cctx.compressobj()
152
191
153 result = cobj.compress(b''.join(chunks)) + cobj.flush()
192 result = cobj.compress(b''.join(chunks)) + cobj.flush()
154 self.assertEqual(len(result), 999)
193 self.assertEqual(len(result), 999)
155 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
194 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
156
195
157 params = zstd.get_frame_parameters(result)
196 params = zstd.get_frame_parameters(result)
158 self.assertEqual(params.content_size, 0)
197 self.assertEqual(params.content_size, 0)
159 self.assertEqual(params.window_size, 1048576)
198 self.assertEqual(params.window_size, 1048576)
160 self.assertEqual(params.dict_id, 0)
199 self.assertEqual(params.dict_id, 0)
161 self.assertFalse(params.has_checksum)
200 self.assertFalse(params.has_checksum)
162
201
163 def test_write_checksum(self):
202 def test_write_checksum(self):
164 cctx = zstd.ZstdCompressor(level=1)
203 cctx = zstd.ZstdCompressor(level=1)
165 cobj = cctx.compressobj()
204 cobj = cctx.compressobj()
166 no_checksum = cobj.compress(b'foobar') + cobj.flush()
205 no_checksum = cobj.compress(b'foobar') + cobj.flush()
167 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
206 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
168 cobj = cctx.compressobj()
207 cobj = cctx.compressobj()
169 with_checksum = cobj.compress(b'foobar') + cobj.flush()
208 with_checksum = cobj.compress(b'foobar') + cobj.flush()
170
209
171 no_params = zstd.get_frame_parameters(no_checksum)
210 no_params = zstd.get_frame_parameters(no_checksum)
172 with_params = zstd.get_frame_parameters(with_checksum)
211 with_params = zstd.get_frame_parameters(with_checksum)
173 self.assertEqual(no_params.content_size, 0)
212 self.assertEqual(no_params.content_size, 0)
174 self.assertEqual(with_params.content_size, 0)
213 self.assertEqual(with_params.content_size, 0)
175 self.assertEqual(no_params.dict_id, 0)
214 self.assertEqual(no_params.dict_id, 0)
176 self.assertEqual(with_params.dict_id, 0)
215 self.assertEqual(with_params.dict_id, 0)
177 self.assertFalse(no_params.has_checksum)
216 self.assertFalse(no_params.has_checksum)
178 self.assertTrue(with_params.has_checksum)
217 self.assertTrue(with_params.has_checksum)
179
218
180 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
219 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
181
220
182 def test_write_content_size(self):
221 def test_write_content_size(self):
183 cctx = zstd.ZstdCompressor(level=1)
222 cctx = zstd.ZstdCompressor(level=1)
184 cobj = cctx.compressobj(size=len(b'foobar' * 256))
223 cobj = cctx.compressobj(size=len(b'foobar' * 256))
185 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
224 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
186 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
225 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
187 cobj = cctx.compressobj(size=len(b'foobar' * 256))
226 cobj = cctx.compressobj(size=len(b'foobar' * 256))
188 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
227 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
189
228
190 no_params = zstd.get_frame_parameters(no_size)
229 no_params = zstd.get_frame_parameters(no_size)
191 with_params = zstd.get_frame_parameters(with_size)
230 with_params = zstd.get_frame_parameters(with_size)
192 self.assertEqual(no_params.content_size, 0)
231 self.assertEqual(no_params.content_size, 0)
193 self.assertEqual(with_params.content_size, 1536)
232 self.assertEqual(with_params.content_size, 1536)
194 self.assertEqual(no_params.dict_id, 0)
233 self.assertEqual(no_params.dict_id, 0)
195 self.assertEqual(with_params.dict_id, 0)
234 self.assertEqual(with_params.dict_id, 0)
196 self.assertFalse(no_params.has_checksum)
235 self.assertFalse(no_params.has_checksum)
197 self.assertFalse(with_params.has_checksum)
236 self.assertFalse(with_params.has_checksum)
198
237
199 self.assertEqual(len(with_size), len(no_size) + 1)
238 self.assertEqual(len(with_size), len(no_size) + 1)
200
239
201 def test_compress_after_finished(self):
240 def test_compress_after_finished(self):
202 cctx = zstd.ZstdCompressor()
241 cctx = zstd.ZstdCompressor()
203 cobj = cctx.compressobj()
242 cobj = cctx.compressobj()
204
243
205 cobj.compress(b'foo')
244 cobj.compress(b'foo')
206 cobj.flush()
245 cobj.flush()
207
246
208 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
247 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
209 cobj.compress(b'foo')
248 cobj.compress(b'foo')
210
249
211 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
250 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
212 cobj.flush()
251 cobj.flush()
213
252
214 def test_flush_block_repeated(self):
253 def test_flush_block_repeated(self):
215 cctx = zstd.ZstdCompressor(level=1)
254 cctx = zstd.ZstdCompressor(level=1)
216 cobj = cctx.compressobj()
255 cobj = cctx.compressobj()
217
256
218 self.assertEqual(cobj.compress(b'foo'), b'')
257 self.assertEqual(cobj.compress(b'foo'), b'')
219 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
258 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
220 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
259 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
221 self.assertEqual(cobj.compress(b'bar'), b'')
260 self.assertEqual(cobj.compress(b'bar'), b'')
222 # 3 byte header plus content.
261 # 3 byte header plus content.
223 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
262 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
224
263
225 def test_flush_empty_block(self):
264 def test_flush_empty_block(self):
226 cctx = zstd.ZstdCompressor(write_checksum=True)
265 cctx = zstd.ZstdCompressor(write_checksum=True)
227 cobj = cctx.compressobj()
266 cobj = cctx.compressobj()
228
267
229 cobj.compress(b'foobar')
268 cobj.compress(b'foobar')
230 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
269 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
231 # No-op if no block is active (this is internal to zstd).
270 # No-op if no block is active (this is internal to zstd).
232 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
271 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
233
272
234 trailing = cobj.flush()
273 trailing = cobj.flush()
235 # 3 bytes block header + 4 bytes frame checksum
274 # 3 bytes block header + 4 bytes frame checksum
236 self.assertEqual(len(trailing), 7)
275 self.assertEqual(len(trailing), 7)
237 header = trailing[0:3]
276 header = trailing[0:3]
238 self.assertEqual(header, b'\x01\x00\x00')
277 self.assertEqual(header, b'\x01\x00\x00')
239
278
279 def test_multithreaded(self):
280 source = io.BytesIO()
281 source.write(b'a' * 1048576)
282 source.write(b'b' * 1048576)
283 source.write(b'c' * 1048576)
284 source.seek(0)
285
286 cctx = zstd.ZstdCompressor(level=1, threads=2)
287 cobj = cctx.compressobj()
288
289 chunks = []
290 while True:
291 d = source.read(8192)
292 if not d:
293 break
294
295 chunks.append(cobj.compress(d))
296
297 chunks.append(cobj.flush())
298
299 compressed = b''.join(chunks)
300
301 self.assertEqual(len(compressed), 295)
302
240
303
241 @make_cffi
304 @make_cffi
242 class TestCompressor_copy_stream(unittest.TestCase):
305 class TestCompressor_copy_stream(unittest.TestCase):
243 def test_no_read(self):
306 def test_no_read(self):
244 source = object()
307 source = object()
245 dest = io.BytesIO()
308 dest = io.BytesIO()
246
309
247 cctx = zstd.ZstdCompressor()
310 cctx = zstd.ZstdCompressor()
248 with self.assertRaises(ValueError):
311 with self.assertRaises(ValueError):
249 cctx.copy_stream(source, dest)
312 cctx.copy_stream(source, dest)
250
313
251 def test_no_write(self):
314 def test_no_write(self):
252 source = io.BytesIO()
315 source = io.BytesIO()
253 dest = object()
316 dest = object()
254
317
255 cctx = zstd.ZstdCompressor()
318 cctx = zstd.ZstdCompressor()
256 with self.assertRaises(ValueError):
319 with self.assertRaises(ValueError):
257 cctx.copy_stream(source, dest)
320 cctx.copy_stream(source, dest)
258
321
259 def test_empty(self):
322 def test_empty(self):
260 source = io.BytesIO()
323 source = io.BytesIO()
261 dest = io.BytesIO()
324 dest = io.BytesIO()
262
325
263 cctx = zstd.ZstdCompressor(level=1)
326 cctx = zstd.ZstdCompressor(level=1)
264 r, w = cctx.copy_stream(source, dest)
327 r, w = cctx.copy_stream(source, dest)
265 self.assertEqual(int(r), 0)
328 self.assertEqual(int(r), 0)
266 self.assertEqual(w, 9)
329 self.assertEqual(w, 9)
267
330
268 self.assertEqual(dest.getvalue(),
331 self.assertEqual(dest.getvalue(),
269 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
332 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
270
333
271 def test_large_data(self):
334 def test_large_data(self):
272 source = io.BytesIO()
335 source = io.BytesIO()
273 for i in range(255):
336 for i in range(255):
274 source.write(struct.Struct('>B').pack(i) * 16384)
337 source.write(struct.Struct('>B').pack(i) * 16384)
275 source.seek(0)
338 source.seek(0)
276
339
277 dest = io.BytesIO()
340 dest = io.BytesIO()
278 cctx = zstd.ZstdCompressor()
341 cctx = zstd.ZstdCompressor()
279 r, w = cctx.copy_stream(source, dest)
342 r, w = cctx.copy_stream(source, dest)
280
343
281 self.assertEqual(r, 255 * 16384)
344 self.assertEqual(r, 255 * 16384)
282 self.assertEqual(w, 999)
345 self.assertEqual(w, 999)
283
346
284 params = zstd.get_frame_parameters(dest.getvalue())
347 params = zstd.get_frame_parameters(dest.getvalue())
285 self.assertEqual(params.content_size, 0)
348 self.assertEqual(params.content_size, 0)
286 self.assertEqual(params.window_size, 1048576)
349 self.assertEqual(params.window_size, 1048576)
287 self.assertEqual(params.dict_id, 0)
350 self.assertEqual(params.dict_id, 0)
288 self.assertFalse(params.has_checksum)
351 self.assertFalse(params.has_checksum)
289
352
290 def test_write_checksum(self):
353 def test_write_checksum(self):
291 source = io.BytesIO(b'foobar')
354 source = io.BytesIO(b'foobar')
292 no_checksum = io.BytesIO()
355 no_checksum = io.BytesIO()
293
356
294 cctx = zstd.ZstdCompressor(level=1)
357 cctx = zstd.ZstdCompressor(level=1)
295 cctx.copy_stream(source, no_checksum)
358 cctx.copy_stream(source, no_checksum)
296
359
297 source.seek(0)
360 source.seek(0)
298 with_checksum = io.BytesIO()
361 with_checksum = io.BytesIO()
299 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
362 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
300 cctx.copy_stream(source, with_checksum)
363 cctx.copy_stream(source, with_checksum)
301
364
302 self.assertEqual(len(with_checksum.getvalue()),
365 self.assertEqual(len(with_checksum.getvalue()),
303 len(no_checksum.getvalue()) + 4)
366 len(no_checksum.getvalue()) + 4)
304
367
305 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
368 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
306 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
369 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
307 self.assertEqual(no_params.content_size, 0)
370 self.assertEqual(no_params.content_size, 0)
308 self.assertEqual(with_params.content_size, 0)
371 self.assertEqual(with_params.content_size, 0)
309 self.assertEqual(no_params.dict_id, 0)
372 self.assertEqual(no_params.dict_id, 0)
310 self.assertEqual(with_params.dict_id, 0)
373 self.assertEqual(with_params.dict_id, 0)
311 self.assertFalse(no_params.has_checksum)
374 self.assertFalse(no_params.has_checksum)
312 self.assertTrue(with_params.has_checksum)
375 self.assertTrue(with_params.has_checksum)
313
376
314 def test_write_content_size(self):
377 def test_write_content_size(self):
315 source = io.BytesIO(b'foobar' * 256)
378 source = io.BytesIO(b'foobar' * 256)
316 no_size = io.BytesIO()
379 no_size = io.BytesIO()
317
380
318 cctx = zstd.ZstdCompressor(level=1)
381 cctx = zstd.ZstdCompressor(level=1)
319 cctx.copy_stream(source, no_size)
382 cctx.copy_stream(source, no_size)
320
383
321 source.seek(0)
384 source.seek(0)
322 with_size = io.BytesIO()
385 with_size = io.BytesIO()
323 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
386 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
324 cctx.copy_stream(source, with_size)
387 cctx.copy_stream(source, with_size)
325
388
326 # Source content size is unknown, so no content size written.
389 # Source content size is unknown, so no content size written.
327 self.assertEqual(len(with_size.getvalue()),
390 self.assertEqual(len(with_size.getvalue()),
328 len(no_size.getvalue()))
391 len(no_size.getvalue()))
329
392
330 source.seek(0)
393 source.seek(0)
331 with_size = io.BytesIO()
394 with_size = io.BytesIO()
332 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
395 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
333
396
334 # We specified source size, so content size header is present.
397 # We specified source size, so content size header is present.
335 self.assertEqual(len(with_size.getvalue()),
398 self.assertEqual(len(with_size.getvalue()),
336 len(no_size.getvalue()) + 1)
399 len(no_size.getvalue()) + 1)
337
400
338 no_params = zstd.get_frame_parameters(no_size.getvalue())
401 no_params = zstd.get_frame_parameters(no_size.getvalue())
339 with_params = zstd.get_frame_parameters(with_size.getvalue())
402 with_params = zstd.get_frame_parameters(with_size.getvalue())
340 self.assertEqual(no_params.content_size, 0)
403 self.assertEqual(no_params.content_size, 0)
341 self.assertEqual(with_params.content_size, 1536)
404 self.assertEqual(with_params.content_size, 1536)
342 self.assertEqual(no_params.dict_id, 0)
405 self.assertEqual(no_params.dict_id, 0)
343 self.assertEqual(with_params.dict_id, 0)
406 self.assertEqual(with_params.dict_id, 0)
344 self.assertFalse(no_params.has_checksum)
407 self.assertFalse(no_params.has_checksum)
345 self.assertFalse(with_params.has_checksum)
408 self.assertFalse(with_params.has_checksum)
346
409
347 def test_read_write_size(self):
410 def test_read_write_size(self):
348 source = OpCountingBytesIO(b'foobarfoobar')
411 source = OpCountingBytesIO(b'foobarfoobar')
349 dest = OpCountingBytesIO()
412 dest = OpCountingBytesIO()
350 cctx = zstd.ZstdCompressor()
413 cctx = zstd.ZstdCompressor()
351 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
414 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
352
415
353 self.assertEqual(r, len(source.getvalue()))
416 self.assertEqual(r, len(source.getvalue()))
354 self.assertEqual(w, 21)
417 self.assertEqual(w, 21)
355 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
418 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
356 self.assertEqual(dest._write_count, len(dest.getvalue()))
419 self.assertEqual(dest._write_count, len(dest.getvalue()))
357
420
421 def test_multithreaded(self):
422 source = io.BytesIO()
423 source.write(b'a' * 1048576)
424 source.write(b'b' * 1048576)
425 source.write(b'c' * 1048576)
426 source.seek(0)
427
428 dest = io.BytesIO()
429 cctx = zstd.ZstdCompressor(threads=2)
430 r, w = cctx.copy_stream(source, dest)
431 self.assertEqual(r, 3145728)
432 self.assertEqual(w, 295)
433
434 params = zstd.get_frame_parameters(dest.getvalue())
435 self.assertEqual(params.content_size, 0)
436 self.assertEqual(params.dict_id, 0)
437 self.assertFalse(params.has_checksum)
438
439 # Writing content size and checksum works.
440 cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
441 write_checksum=True)
442 dest = io.BytesIO()
443 source.seek(0)
444 cctx.copy_stream(source, dest, size=len(source.getvalue()))
445
446 params = zstd.get_frame_parameters(dest.getvalue())
447 self.assertEqual(params.content_size, 3145728)
448 self.assertEqual(params.dict_id, 0)
449 self.assertTrue(params.has_checksum)
450
358
451
359 def compress(data, level):
452 def compress(data, level):
360 buffer = io.BytesIO()
453 buffer = io.BytesIO()
361 cctx = zstd.ZstdCompressor(level=level)
454 cctx = zstd.ZstdCompressor(level=level)
362 with cctx.write_to(buffer) as compressor:
455 with cctx.write_to(buffer) as compressor:
363 compressor.write(data)
456 compressor.write(data)
364 return buffer.getvalue()
457 return buffer.getvalue()
365
458
366
459
367 @make_cffi
460 @make_cffi
368 class TestCompressor_write_to(unittest.TestCase):
461 class TestCompressor_write_to(unittest.TestCase):
369 def test_empty(self):
462 def test_empty(self):
370 result = compress(b'', 1)
463 result = compress(b'', 1)
371 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
464 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
372
465
373 params = zstd.get_frame_parameters(result)
466 params = zstd.get_frame_parameters(result)
374 self.assertEqual(params.content_size, 0)
467 self.assertEqual(params.content_size, 0)
375 self.assertEqual(params.window_size, 524288)
468 self.assertEqual(params.window_size, 524288)
376 self.assertEqual(params.dict_id, 0)
469 self.assertEqual(params.dict_id, 0)
377 self.assertFalse(params.has_checksum)
470 self.assertFalse(params.has_checksum)
378
471
379 def test_multiple_compress(self):
472 def test_multiple_compress(self):
380 buffer = io.BytesIO()
473 buffer = io.BytesIO()
381 cctx = zstd.ZstdCompressor(level=5)
474 cctx = zstd.ZstdCompressor(level=5)
382 with cctx.write_to(buffer) as compressor:
475 with cctx.write_to(buffer) as compressor:
383 self.assertEqual(compressor.write(b'foo'), 0)
476 self.assertEqual(compressor.write(b'foo'), 0)
384 self.assertEqual(compressor.write(b'bar'), 0)
477 self.assertEqual(compressor.write(b'bar'), 0)
385 self.assertEqual(compressor.write(b'x' * 8192), 0)
478 self.assertEqual(compressor.write(b'x' * 8192), 0)
386
479
387 result = buffer.getvalue()
480 result = buffer.getvalue()
388 self.assertEqual(result,
481 self.assertEqual(result,
389 b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
482 b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
390 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
483 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
391
484
392 def test_dictionary(self):
485 def test_dictionary(self):
393 samples = []
486 samples = []
394 for i in range(128):
487 for i in range(128):
395 samples.append(b'foo' * 64)
488 samples.append(b'foo' * 64)
396 samples.append(b'bar' * 64)
489 samples.append(b'bar' * 64)
397 samples.append(b'foobar' * 64)
490 samples.append(b'foobar' * 64)
398
491
399 d = zstd.train_dictionary(8192, samples)
492 d = zstd.train_dictionary(8192, samples)
400
493
401 buffer = io.BytesIO()
494 buffer = io.BytesIO()
402 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
495 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
403 with cctx.write_to(buffer) as compressor:
496 with cctx.write_to(buffer) as compressor:
404 self.assertEqual(compressor.write(b'foo'), 0)
497 self.assertEqual(compressor.write(b'foo'), 0)
405 self.assertEqual(compressor.write(b'bar'), 0)
498 self.assertEqual(compressor.write(b'bar'), 0)
406 self.assertEqual(compressor.write(b'foo' * 16384), 634)
499 self.assertEqual(compressor.write(b'foo' * 16384), 634)
407
500
408 compressed = buffer.getvalue()
501 compressed = buffer.getvalue()
409
502
410 params = zstd.get_frame_parameters(compressed)
503 params = zstd.get_frame_parameters(compressed)
411 self.assertEqual(params.content_size, 0)
504 self.assertEqual(params.content_size, 0)
412 self.assertEqual(params.window_size, 1024)
505 self.assertEqual(params.window_size, 1024)
413 self.assertEqual(params.dict_id, d.dict_id())
506 self.assertEqual(params.dict_id, d.dict_id())
414 self.assertFalse(params.has_checksum)
507 self.assertFalse(params.has_checksum)
415
508
416 self.assertEqual(compressed[0:32],
509 self.assertEqual(compressed[0:32],
417 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
510 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
418 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
511 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
419 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
512 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
420
513
421 h = hashlib.sha1(compressed).hexdigest()
514 h = hashlib.sha1(compressed).hexdigest()
422 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
515 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
423
516
424 def test_compression_params(self):
517 def test_compression_params(self):
425 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
518 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
426
519
427 buffer = io.BytesIO()
520 buffer = io.BytesIO()
428 cctx = zstd.ZstdCompressor(compression_params=params)
521 cctx = zstd.ZstdCompressor(compression_params=params)
429 with cctx.write_to(buffer) as compressor:
522 with cctx.write_to(buffer) as compressor:
430 self.assertEqual(compressor.write(b'foo'), 0)
523 self.assertEqual(compressor.write(b'foo'), 0)
431 self.assertEqual(compressor.write(b'bar'), 0)
524 self.assertEqual(compressor.write(b'bar'), 0)
432 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
525 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
433
526
434 compressed = buffer.getvalue()
527 compressed = buffer.getvalue()
435
528
436 params = zstd.get_frame_parameters(compressed)
529 params = zstd.get_frame_parameters(compressed)
437 self.assertEqual(params.content_size, 0)
530 self.assertEqual(params.content_size, 0)
438 self.assertEqual(params.window_size, 1048576)
531 self.assertEqual(params.window_size, 1048576)
439 self.assertEqual(params.dict_id, 0)
532 self.assertEqual(params.dict_id, 0)
440 self.assertFalse(params.has_checksum)
533 self.assertFalse(params.has_checksum)
441
534
442 h = hashlib.sha1(compressed).hexdigest()
535 h = hashlib.sha1(compressed).hexdigest()
443 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
536 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
444
537
445 def test_write_checksum(self):
538 def test_write_checksum(self):
446 no_checksum = io.BytesIO()
539 no_checksum = io.BytesIO()
447 cctx = zstd.ZstdCompressor(level=1)
540 cctx = zstd.ZstdCompressor(level=1)
448 with cctx.write_to(no_checksum) as compressor:
541 with cctx.write_to(no_checksum) as compressor:
449 self.assertEqual(compressor.write(b'foobar'), 0)
542 self.assertEqual(compressor.write(b'foobar'), 0)
450
543
451 with_checksum = io.BytesIO()
544 with_checksum = io.BytesIO()
452 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
545 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
453 with cctx.write_to(with_checksum) as compressor:
546 with cctx.write_to(with_checksum) as compressor:
454 self.assertEqual(compressor.write(b'foobar'), 0)
547 self.assertEqual(compressor.write(b'foobar'), 0)
455
548
456 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
549 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
457 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
550 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
458 self.assertEqual(no_params.content_size, 0)
551 self.assertEqual(no_params.content_size, 0)
459 self.assertEqual(with_params.content_size, 0)
552 self.assertEqual(with_params.content_size, 0)
460 self.assertEqual(no_params.dict_id, 0)
553 self.assertEqual(no_params.dict_id, 0)
461 self.assertEqual(with_params.dict_id, 0)
554 self.assertEqual(with_params.dict_id, 0)
462 self.assertFalse(no_params.has_checksum)
555 self.assertFalse(no_params.has_checksum)
463 self.assertTrue(with_params.has_checksum)
556 self.assertTrue(with_params.has_checksum)
464
557
465 self.assertEqual(len(with_checksum.getvalue()),
558 self.assertEqual(len(with_checksum.getvalue()),
466 len(no_checksum.getvalue()) + 4)
559 len(no_checksum.getvalue()) + 4)
467
560
468 def test_write_content_size(self):
561 def test_write_content_size(self):
469 no_size = io.BytesIO()
562 no_size = io.BytesIO()
470 cctx = zstd.ZstdCompressor(level=1)
563 cctx = zstd.ZstdCompressor(level=1)
471 with cctx.write_to(no_size) as compressor:
564 with cctx.write_to(no_size) as compressor:
472 self.assertEqual(compressor.write(b'foobar' * 256), 0)
565 self.assertEqual(compressor.write(b'foobar' * 256), 0)
473
566
474 with_size = io.BytesIO()
567 with_size = io.BytesIO()
475 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
568 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
476 with cctx.write_to(with_size) as compressor:
569 with cctx.write_to(with_size) as compressor:
477 self.assertEqual(compressor.write(b'foobar' * 256), 0)
570 self.assertEqual(compressor.write(b'foobar' * 256), 0)
478
571
479 # Source size is not known in streaming mode, so header not
572 # Source size is not known in streaming mode, so header not
480 # written.
573 # written.
481 self.assertEqual(len(with_size.getvalue()),
574 self.assertEqual(len(with_size.getvalue()),
482 len(no_size.getvalue()))
575 len(no_size.getvalue()))
483
576
484 # Declaring size will write the header.
577 # Declaring size will write the header.
485 with_size = io.BytesIO()
578 with_size = io.BytesIO()
486 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
579 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
487 self.assertEqual(compressor.write(b'foobar' * 256), 0)
580 self.assertEqual(compressor.write(b'foobar' * 256), 0)
488
581
489 no_params = zstd.get_frame_parameters(no_size.getvalue())
582 no_params = zstd.get_frame_parameters(no_size.getvalue())
490 with_params = zstd.get_frame_parameters(with_size.getvalue())
583 with_params = zstd.get_frame_parameters(with_size.getvalue())
491 self.assertEqual(no_params.content_size, 0)
584 self.assertEqual(no_params.content_size, 0)
492 self.assertEqual(with_params.content_size, 1536)
585 self.assertEqual(with_params.content_size, 1536)
493 self.assertEqual(no_params.dict_id, 0)
586 self.assertEqual(no_params.dict_id, 0)
494 self.assertEqual(with_params.dict_id, 0)
587 self.assertEqual(with_params.dict_id, 0)
495 self.assertFalse(no_params.has_checksum)
588 self.assertFalse(no_params.has_checksum)
496 self.assertFalse(with_params.has_checksum)
589 self.assertFalse(with_params.has_checksum)
497
590
498 self.assertEqual(len(with_size.getvalue()),
591 self.assertEqual(len(with_size.getvalue()),
499 len(no_size.getvalue()) + 1)
592 len(no_size.getvalue()) + 1)
500
593
501 def test_no_dict_id(self):
594 def test_no_dict_id(self):
502 samples = []
595 samples = []
503 for i in range(128):
596 for i in range(128):
504 samples.append(b'foo' * 64)
597 samples.append(b'foo' * 64)
505 samples.append(b'bar' * 64)
598 samples.append(b'bar' * 64)
506 samples.append(b'foobar' * 64)
599 samples.append(b'foobar' * 64)
507
600
508 d = zstd.train_dictionary(1024, samples)
601 d = zstd.train_dictionary(1024, samples)
509
602
510 with_dict_id = io.BytesIO()
603 with_dict_id = io.BytesIO()
511 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
604 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
512 with cctx.write_to(with_dict_id) as compressor:
605 with cctx.write_to(with_dict_id) as compressor:
513 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
606 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
514
607
515 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
608 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
516 no_dict_id = io.BytesIO()
609 no_dict_id = io.BytesIO()
517 with cctx.write_to(no_dict_id) as compressor:
610 with cctx.write_to(no_dict_id) as compressor:
518 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
611 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
519
612
520 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
613 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
521 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
614 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
522 self.assertEqual(no_params.content_size, 0)
615 self.assertEqual(no_params.content_size, 0)
523 self.assertEqual(with_params.content_size, 0)
616 self.assertEqual(with_params.content_size, 0)
524 self.assertEqual(no_params.dict_id, 0)
617 self.assertEqual(no_params.dict_id, 0)
525 self.assertEqual(with_params.dict_id, d.dict_id())
618 self.assertEqual(with_params.dict_id, d.dict_id())
526 self.assertFalse(no_params.has_checksum)
619 self.assertFalse(no_params.has_checksum)
527 self.assertFalse(with_params.has_checksum)
620 self.assertFalse(with_params.has_checksum)
528
621
529 self.assertEqual(len(with_dict_id.getvalue()),
622 self.assertEqual(len(with_dict_id.getvalue()),
530 len(no_dict_id.getvalue()) + 4)
623 len(no_dict_id.getvalue()) + 4)
531
624
532 def test_memory_size(self):
625 def test_memory_size(self):
533 cctx = zstd.ZstdCompressor(level=3)
626 cctx = zstd.ZstdCompressor(level=3)
534 buffer = io.BytesIO()
627 buffer = io.BytesIO()
535 with cctx.write_to(buffer) as compressor:
628 with cctx.write_to(buffer) as compressor:
536 size = compressor.memory_size()
629 size = compressor.memory_size()
537
630
538 self.assertGreater(size, 100000)
631 self.assertGreater(size, 100000)
539
632
540 def test_write_size(self):
633 def test_write_size(self):
541 cctx = zstd.ZstdCompressor(level=3)
634 cctx = zstd.ZstdCompressor(level=3)
542 dest = OpCountingBytesIO()
635 dest = OpCountingBytesIO()
543 with cctx.write_to(dest, write_size=1) as compressor:
636 with cctx.write_to(dest, write_size=1) as compressor:
544 self.assertEqual(compressor.write(b'foo'), 0)
637 self.assertEqual(compressor.write(b'foo'), 0)
545 self.assertEqual(compressor.write(b'bar'), 0)
638 self.assertEqual(compressor.write(b'bar'), 0)
546 self.assertEqual(compressor.write(b'foobar'), 0)
639 self.assertEqual(compressor.write(b'foobar'), 0)
547
640
548 self.assertEqual(len(dest.getvalue()), dest._write_count)
641 self.assertEqual(len(dest.getvalue()), dest._write_count)
549
642
550 def test_flush_repeated(self):
643 def test_flush_repeated(self):
551 cctx = zstd.ZstdCompressor(level=3)
644 cctx = zstd.ZstdCompressor(level=3)
552 dest = OpCountingBytesIO()
645 dest = OpCountingBytesIO()
553 with cctx.write_to(dest) as compressor:
646 with cctx.write_to(dest) as compressor:
554 self.assertEqual(compressor.write(b'foo'), 0)
647 self.assertEqual(compressor.write(b'foo'), 0)
555 self.assertEqual(dest._write_count, 0)
648 self.assertEqual(dest._write_count, 0)
556 self.assertEqual(compressor.flush(), 12)
649 self.assertEqual(compressor.flush(), 12)
557 self.assertEqual(dest._write_count, 1)
650 self.assertEqual(dest._write_count, 1)
558 self.assertEqual(compressor.write(b'bar'), 0)
651 self.assertEqual(compressor.write(b'bar'), 0)
559 self.assertEqual(dest._write_count, 1)
652 self.assertEqual(dest._write_count, 1)
560 self.assertEqual(compressor.flush(), 6)
653 self.assertEqual(compressor.flush(), 6)
561 self.assertEqual(dest._write_count, 2)
654 self.assertEqual(dest._write_count, 2)
562 self.assertEqual(compressor.write(b'baz'), 0)
655 self.assertEqual(compressor.write(b'baz'), 0)
563
656
564 self.assertEqual(dest._write_count, 3)
657 self.assertEqual(dest._write_count, 3)
565
658
566 def test_flush_empty_block(self):
659 def test_flush_empty_block(self):
567 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
660 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
568 dest = OpCountingBytesIO()
661 dest = OpCountingBytesIO()
569 with cctx.write_to(dest) as compressor:
662 with cctx.write_to(dest) as compressor:
570 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
663 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
571 count = dest._write_count
664 count = dest._write_count
572 offset = dest.tell()
665 offset = dest.tell()
573 self.assertEqual(compressor.flush(), 23)
666 self.assertEqual(compressor.flush(), 23)
574 self.assertGreater(dest._write_count, count)
667 self.assertGreater(dest._write_count, count)
575 self.assertGreater(dest.tell(), offset)
668 self.assertGreater(dest.tell(), offset)
576 offset = dest.tell()
669 offset = dest.tell()
577 # Ending the write here should cause an empty block to be written
670 # Ending the write here should cause an empty block to be written
578 # to denote end of frame.
671 # to denote end of frame.
579
672
580 trailing = dest.getvalue()[offset:]
673 trailing = dest.getvalue()[offset:]
581 # 3 bytes block header + 4 bytes frame checksum
674 # 3 bytes block header + 4 bytes frame checksum
582 self.assertEqual(len(trailing), 7)
675 self.assertEqual(len(trailing), 7)
583
676
584 header = trailing[0:3]
677 header = trailing[0:3]
585 self.assertEqual(header, b'\x01\x00\x00')
678 self.assertEqual(header, b'\x01\x00\x00')
586
679
680 def test_multithreaded(self):
681 dest = io.BytesIO()
682 cctx = zstd.ZstdCompressor(threads=2)
683 with cctx.write_to(dest) as compressor:
684 compressor.write(b'a' * 1048576)
685 compressor.write(b'b' * 1048576)
686 compressor.write(b'c' * 1048576)
687
688 self.assertEqual(len(dest.getvalue()), 295)
689
587
690
588 @make_cffi
691 @make_cffi
589 class TestCompressor_read_from(unittest.TestCase):
692 class TestCompressor_read_from(unittest.TestCase):
590 def test_type_validation(self):
693 def test_type_validation(self):
591 cctx = zstd.ZstdCompressor()
694 cctx = zstd.ZstdCompressor()
592
695
593 # Object with read() works.
696 # Object with read() works.
594 for chunk in cctx.read_from(io.BytesIO()):
697 for chunk in cctx.read_from(io.BytesIO()):
595 pass
698 pass
596
699
597 # Buffer protocol works.
700 # Buffer protocol works.
598 for chunk in cctx.read_from(b'foobar'):
701 for chunk in cctx.read_from(b'foobar'):
599 pass
702 pass
600
703
601 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
704 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
602 for chunk in cctx.read_from(True):
705 for chunk in cctx.read_from(True):
603 pass
706 pass
604
707
605 def test_read_empty(self):
708 def test_read_empty(self):
606 cctx = zstd.ZstdCompressor(level=1)
709 cctx = zstd.ZstdCompressor(level=1)
607
710
608 source = io.BytesIO()
711 source = io.BytesIO()
609 it = cctx.read_from(source)
712 it = cctx.read_from(source)
610 chunks = list(it)
713 chunks = list(it)
611 self.assertEqual(len(chunks), 1)
714 self.assertEqual(len(chunks), 1)
612 compressed = b''.join(chunks)
715 compressed = b''.join(chunks)
613 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
716 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
614
717
615 # And again with the buffer protocol.
718 # And again with the buffer protocol.
616 it = cctx.read_from(b'')
719 it = cctx.read_from(b'')
617 chunks = list(it)
720 chunks = list(it)
618 self.assertEqual(len(chunks), 1)
721 self.assertEqual(len(chunks), 1)
619 compressed2 = b''.join(chunks)
722 compressed2 = b''.join(chunks)
620 self.assertEqual(compressed2, compressed)
723 self.assertEqual(compressed2, compressed)
621
724
622 def test_read_large(self):
725 def test_read_large(self):
623 cctx = zstd.ZstdCompressor(level=1)
726 cctx = zstd.ZstdCompressor(level=1)
624
727
625 source = io.BytesIO()
728 source = io.BytesIO()
626 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
729 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
627 source.write(b'o')
730 source.write(b'o')
628 source.seek(0)
731 source.seek(0)
629
732
630 # Creating an iterator should not perform any compression until
733 # Creating an iterator should not perform any compression until
631 # first read.
734 # first read.
632 it = cctx.read_from(source, size=len(source.getvalue()))
735 it = cctx.read_from(source, size=len(source.getvalue()))
633 self.assertEqual(source.tell(), 0)
736 self.assertEqual(source.tell(), 0)
634
737
635 # We should have exactly 2 output chunks.
738 # We should have exactly 2 output chunks.
636 chunks = []
739 chunks = []
637 chunk = next(it)
740 chunk = next(it)
638 self.assertIsNotNone(chunk)
741 self.assertIsNotNone(chunk)
639 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
742 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
640 chunks.append(chunk)
743 chunks.append(chunk)
641 chunk = next(it)
744 chunk = next(it)
642 self.assertIsNotNone(chunk)
745 self.assertIsNotNone(chunk)
643 chunks.append(chunk)
746 chunks.append(chunk)
644
747
645 self.assertEqual(source.tell(), len(source.getvalue()))
748 self.assertEqual(source.tell(), len(source.getvalue()))
646
749
647 with self.assertRaises(StopIteration):
750 with self.assertRaises(StopIteration):
648 next(it)
751 next(it)
649
752
650 # And again for good measure.
753 # And again for good measure.
651 with self.assertRaises(StopIteration):
754 with self.assertRaises(StopIteration):
652 next(it)
755 next(it)
653
756
654 # We should get the same output as the one-shot compression mechanism.
757 # We should get the same output as the one-shot compression mechanism.
655 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
758 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
656
759
657 params = zstd.get_frame_parameters(b''.join(chunks))
760 params = zstd.get_frame_parameters(b''.join(chunks))
658 self.assertEqual(params.content_size, 0)
761 self.assertEqual(params.content_size, 0)
659 self.assertEqual(params.window_size, 262144)
762 self.assertEqual(params.window_size, 262144)
660 self.assertEqual(params.dict_id, 0)
763 self.assertEqual(params.dict_id, 0)
661 self.assertFalse(params.has_checksum)
764 self.assertFalse(params.has_checksum)
662
765
663 # Now check the buffer protocol.
766 # Now check the buffer protocol.
664 it = cctx.read_from(source.getvalue())
767 it = cctx.read_from(source.getvalue())
665 chunks = list(it)
768 chunks = list(it)
666 self.assertEqual(len(chunks), 2)
769 self.assertEqual(len(chunks), 2)
667 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
770 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
668
771
669 def test_read_write_size(self):
772 def test_read_write_size(self):
670 source = OpCountingBytesIO(b'foobarfoobar')
773 source = OpCountingBytesIO(b'foobarfoobar')
671 cctx = zstd.ZstdCompressor(level=3)
774 cctx = zstd.ZstdCompressor(level=3)
672 for chunk in cctx.read_from(source, read_size=1, write_size=1):
775 for chunk in cctx.read_from(source, read_size=1, write_size=1):
673 self.assertEqual(len(chunk), 1)
776 self.assertEqual(len(chunk), 1)
674
777
675 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
778 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
779
780 def test_multithreaded(self):
781 source = io.BytesIO()
782 source.write(b'a' * 1048576)
783 source.write(b'b' * 1048576)
784 source.write(b'c' * 1048576)
785 source.seek(0)
786
787 cctx = zstd.ZstdCompressor(threads=2)
788
789 compressed = b''.join(cctx.read_from(source))
790 self.assertEqual(len(compressed), 295)
791
792
793 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
794 def test_multithreaded_unsupported(self):
795 cctx = zstd.ZstdCompressor(threads=2)
796
797 with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
798 cctx.multi_compress_to_buffer([b'foo'])
799
800 def test_invalid_inputs(self):
801 cctx = zstd.ZstdCompressor()
802
803 with self.assertRaises(TypeError):
804 cctx.multi_compress_to_buffer(True)
805
806 with self.assertRaises(TypeError):
807 cctx.multi_compress_to_buffer((1, 2))
808
809 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
810 cctx.multi_compress_to_buffer([u'foo'])
811
812 def test_empty_input(self):
813 cctx = zstd.ZstdCompressor()
814
815 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
816 cctx.multi_compress_to_buffer([])
817
818 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
819 cctx.multi_compress_to_buffer([b'', b'', b''])
820
821 def test_list_input(self):
822 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
823
824 original = [b'foo' * 12, b'bar' * 6]
825 frames = [cctx.compress(c) for c in original]
826 b = cctx.multi_compress_to_buffer(original)
827
828 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
829
830 self.assertEqual(len(b), 2)
831 self.assertEqual(b.size(), 44)
832
833 self.assertEqual(b[0].tobytes(), frames[0])
834 self.assertEqual(b[1].tobytes(), frames[1])
835
836 def test_buffer_with_segments_input(self):
837 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
838
839 original = [b'foo' * 4, b'bar' * 6]
840 frames = [cctx.compress(c) for c in original]
841
842 offsets = struct.pack('=QQQQ', 0, len(original[0]),
843 len(original[0]), len(original[1]))
844 segments = zstd.BufferWithSegments(b''.join(original), offsets)
845
846 result = cctx.multi_compress_to_buffer(segments)
847
848 self.assertEqual(len(result), 2)
849 self.assertEqual(result.size(), 47)
850
851 self.assertEqual(result[0].tobytes(), frames[0])
852 self.assertEqual(result[1].tobytes(), frames[1])
853
854 def test_buffer_with_segments_collection_input(self):
855 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
856
857 original = [
858 b'foo1',
859 b'foo2' * 2,
860 b'foo3' * 3,
861 b'foo4' * 4,
862 b'foo5' * 5,
863 ]
864
865 frames = [cctx.compress(c) for c in original]
866
867 b = b''.join([original[0], original[1]])
868 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
869 0, len(original[0]),
870 len(original[0]), len(original[1])))
871 b = b''.join([original[2], original[3], original[4]])
872 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
873 0, len(original[2]),
874 len(original[2]), len(original[3]),
875 len(original[2]) + len(original[3]), len(original[4])))
876
877 c = zstd.BufferWithSegmentsCollection(b1, b2)
878
879 result = cctx.multi_compress_to_buffer(c)
880
881 self.assertEqual(len(result), len(frames))
882
883 for i, frame in enumerate(frames):
884 self.assertEqual(result[i].tobytes(), frame)
885
886 def test_multiple_threads(self):
887 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
888 # make output different.
889 refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
890 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
891
892 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
893
894 frames = []
895 frames.extend(b'x' * 64 for i in range(256))
896 frames.extend(b'y' * 64 for i in range(256))
897
898 result = cctx.multi_compress_to_buffer(frames, threads=-1)
899
900 self.assertEqual(len(result), 512)
901 for i in range(512):
902 if i < 256:
903 self.assertEqual(result[i].tobytes(), reference[0])
904 else:
905 self.assertEqual(result[i].tobytes(), reference[1])
@@ -1,186 +1,123
1 import io
2
3 try:
1 try:
4 import unittest2 as unittest
2 import unittest2 as unittest
5 except ImportError:
3 except ImportError:
6 import unittest
4 import unittest
7
5
8 try:
9 import hypothesis
10 import hypothesis.strategies as strategies
11 except ImportError:
12 hypothesis = None
13
14 import zstd
6 import zstd
15
7
16 from . common import (
8 from . common import (
17 make_cffi,
9 make_cffi,
18 )
10 )
19
11
20
12
21 @make_cffi
13 @make_cffi
22 class TestCompressionParameters(unittest.TestCase):
14 class TestCompressionParameters(unittest.TestCase):
23 def test_init_bad_arg_type(self):
15 def test_init_bad_arg_type(self):
24 with self.assertRaises(TypeError):
16 with self.assertRaises(TypeError):
25 zstd.CompressionParameters()
17 zstd.CompressionParameters()
26
18
27 with self.assertRaises(TypeError):
19 with self.assertRaises(TypeError):
28 zstd.CompressionParameters(0, 1)
20 zstd.CompressionParameters(0, 1)
29
21
30 def test_bounds(self):
22 def test_bounds(self):
31 zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
23 zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
32 zstd.CHAINLOG_MIN,
24 zstd.CHAINLOG_MIN,
33 zstd.HASHLOG_MIN,
25 zstd.HASHLOG_MIN,
34 zstd.SEARCHLOG_MIN,
26 zstd.SEARCHLOG_MIN,
35 zstd.SEARCHLENGTH_MIN,
27 zstd.SEARCHLENGTH_MIN + 1,
36 zstd.TARGETLENGTH_MIN,
28 zstd.TARGETLENGTH_MIN,
37 zstd.STRATEGY_FAST)
29 zstd.STRATEGY_FAST)
38
30
39 zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
31 zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
40 zstd.CHAINLOG_MAX,
32 zstd.CHAINLOG_MAX,
41 zstd.HASHLOG_MAX,
33 zstd.HASHLOG_MAX,
42 zstd.SEARCHLOG_MAX,
34 zstd.SEARCHLOG_MAX,
43 zstd.SEARCHLENGTH_MAX,
35 zstd.SEARCHLENGTH_MAX - 1,
44 zstd.TARGETLENGTH_MAX,
36 zstd.TARGETLENGTH_MAX,
45 zstd.STRATEGY_BTOPT)
37 zstd.STRATEGY_BTOPT)
46
38
47 def test_get_compression_parameters(self):
39 def test_get_compression_parameters(self):
48 p = zstd.get_compression_parameters(1)
40 p = zstd.get_compression_parameters(1)
49 self.assertIsInstance(p, zstd.CompressionParameters)
41 self.assertIsInstance(p, zstd.CompressionParameters)
50
42
51 self.assertEqual(p.window_log, 19)
43 self.assertEqual(p.window_log, 19)
52
44
53 def test_members(self):
45 def test_members(self):
54 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
46 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
55 self.assertEqual(p.window_log, 10)
47 self.assertEqual(p.window_log, 10)
56 self.assertEqual(p.chain_log, 6)
48 self.assertEqual(p.chain_log, 6)
57 self.assertEqual(p.hash_log, 7)
49 self.assertEqual(p.hash_log, 7)
58 self.assertEqual(p.search_log, 4)
50 self.assertEqual(p.search_log, 4)
59 self.assertEqual(p.search_length, 5)
51 self.assertEqual(p.search_length, 5)
60 self.assertEqual(p.target_length, 8)
52 self.assertEqual(p.target_length, 8)
61 self.assertEqual(p.strategy, 1)
53 self.assertEqual(p.strategy, 1)
62
54
55 def test_estimated_compression_context_size(self):
56 p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, zstd.STRATEGY_DFAST)
57
58 # 32-bit has slightly different values from 64-bit.
59 self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076,
60 delta=110)
61
63
62
64 @make_cffi
63 @make_cffi
65 class TestFrameParameters(unittest.TestCase):
64 class TestFrameParameters(unittest.TestCase):
66 def test_invalid_type(self):
65 def test_invalid_type(self):
67 with self.assertRaises(TypeError):
66 with self.assertRaises(TypeError):
68 zstd.get_frame_parameters(None)
67 zstd.get_frame_parameters(None)
69
68
70 with self.assertRaises(TypeError):
69 with self.assertRaises(TypeError):
71 zstd.get_frame_parameters(u'foobarbaz')
70 zstd.get_frame_parameters(u'foobarbaz')
72
71
73 def test_invalid_input_sizes(self):
72 def test_invalid_input_sizes(self):
74 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
73 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
75 zstd.get_frame_parameters(b'')
74 zstd.get_frame_parameters(b'')
76
75
77 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
76 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
78 zstd.get_frame_parameters(zstd.FRAME_HEADER)
77 zstd.get_frame_parameters(zstd.FRAME_HEADER)
79
78
80 def test_invalid_frame(self):
79 def test_invalid_frame(self):
81 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
80 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
82 zstd.get_frame_parameters(b'foobarbaz')
81 zstd.get_frame_parameters(b'foobarbaz')
83
82
84 def test_attributes(self):
83 def test_attributes(self):
85 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
84 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
86 self.assertEqual(params.content_size, 0)
85 self.assertEqual(params.content_size, 0)
87 self.assertEqual(params.window_size, 1024)
86 self.assertEqual(params.window_size, 1024)
88 self.assertEqual(params.dict_id, 0)
87 self.assertEqual(params.dict_id, 0)
89 self.assertFalse(params.has_checksum)
88 self.assertFalse(params.has_checksum)
90
89
91 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
90 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
92 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
91 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
93 self.assertEqual(params.content_size, 0)
92 self.assertEqual(params.content_size, 0)
94 self.assertEqual(params.window_size, 1024)
93 self.assertEqual(params.window_size, 1024)
95 self.assertEqual(params.dict_id, 255)
94 self.assertEqual(params.dict_id, 255)
96 self.assertFalse(params.has_checksum)
95 self.assertFalse(params.has_checksum)
97
96
98 # Lowest 3rd bit indicates if checksum is present.
97 # Lowest 3rd bit indicates if checksum is present.
99 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
98 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
100 self.assertEqual(params.content_size, 0)
99 self.assertEqual(params.content_size, 0)
101 self.assertEqual(params.window_size, 1024)
100 self.assertEqual(params.window_size, 1024)
102 self.assertEqual(params.dict_id, 0)
101 self.assertEqual(params.dict_id, 0)
103 self.assertTrue(params.has_checksum)
102 self.assertTrue(params.has_checksum)
104
103
105 # Upper 2 bits indicate content size.
104 # Upper 2 bits indicate content size.
106 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
105 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
107 self.assertEqual(params.content_size, 511)
106 self.assertEqual(params.content_size, 511)
108 self.assertEqual(params.window_size, 1024)
107 self.assertEqual(params.window_size, 1024)
109 self.assertEqual(params.dict_id, 0)
108 self.assertEqual(params.dict_id, 0)
110 self.assertFalse(params.has_checksum)
109 self.assertFalse(params.has_checksum)
111
110
112 # Window descriptor is 2nd byte after frame header.
111 # Window descriptor is 2nd byte after frame header.
113 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
112 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
114 self.assertEqual(params.content_size, 0)
113 self.assertEqual(params.content_size, 0)
115 self.assertEqual(params.window_size, 262144)
114 self.assertEqual(params.window_size, 262144)
116 self.assertEqual(params.dict_id, 0)
115 self.assertEqual(params.dict_id, 0)
117 self.assertFalse(params.has_checksum)
116 self.assertFalse(params.has_checksum)
118
117
119 # Set multiple things.
118 # Set multiple things.
120 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
119 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
121 self.assertEqual(params.content_size, 272)
120 self.assertEqual(params.content_size, 272)
122 self.assertEqual(params.window_size, 262144)
121 self.assertEqual(params.window_size, 262144)
123 self.assertEqual(params.dict_id, 15)
122 self.assertEqual(params.dict_id, 15)
124 self.assertTrue(params.has_checksum)
123 self.assertTrue(params.has_checksum)
125
126
127 if hypothesis:
128 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
129 max_value=zstd.WINDOWLOG_MAX)
130 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
131 max_value=zstd.CHAINLOG_MAX)
132 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
133 max_value=zstd.HASHLOG_MAX)
134 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
135 max_value=zstd.SEARCHLOG_MAX)
136 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
137 max_value=zstd.SEARCHLENGTH_MAX)
138 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
139 max_value=zstd.TARGETLENGTH_MAX)
140 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
141 zstd.STRATEGY_DFAST,
142 zstd.STRATEGY_GREEDY,
143 zstd.STRATEGY_LAZY,
144 zstd.STRATEGY_LAZY2,
145 zstd.STRATEGY_BTLAZY2,
146 zstd.STRATEGY_BTOPT))
147
148
149 @make_cffi
150 class TestCompressionParametersHypothesis(unittest.TestCase):
151 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
152 s_searchlength, s_targetlength, s_strategy)
153 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
154 searchlength, targetlength, strategy):
155 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
156 searchlog, searchlength,
157 targetlength, strategy)
158
159 # Verify we can instantiate a compressor with the supplied values.
160 # ZSTD_checkCParams moves the goal posts on us from what's advertised
161 # in the constants. So move along with them.
162 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
163 searchlength += 1
164 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
165 searchlog, searchlength,
166 targetlength, strategy)
167 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
168 searchlength -= 1
169 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
170 searchlog, searchlength,
171 targetlength, strategy)
172
173 cctx = zstd.ZstdCompressor(compression_params=p)
174 with cctx.write_to(io.BytesIO()):
175 pass
176
177 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
178 s_searchlength, s_targetlength, s_strategy)
179 def test_estimate_compression_context_size(self, windowlog, chainlog,
180 hashlog, searchlog,
181 searchlength, targetlength,
182 strategy):
183 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
184 searchlog, searchlength,
185 targetlength, strategy)
186 size = zstd.estimate_compression_context_size(p)
@@ -1,577 +1,741
1 import io
1 import io
2 import random
2 import random
3 import struct
3 import struct
4 import sys
4 import sys
5
5
6 try:
6 try:
7 import unittest2 as unittest
7 import unittest2 as unittest
8 except ImportError:
8 except ImportError:
9 import unittest
9 import unittest
10
10
11 import zstd
11 import zstd
12
12
13 from .common import (
13 from .common import (
14 make_cffi,
14 make_cffi,
15 OpCountingBytesIO,
15 OpCountingBytesIO,
16 )
16 )
17
17
18
18
19 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
20 next = lambda it: it.__next__()
20 next = lambda it: it.__next__()
21 else:
21 else:
22 next = lambda it: it.next()
22 next = lambda it: it.next()
23
23
24
24
25 @make_cffi
25 @make_cffi
26 class TestDecompressor_decompress(unittest.TestCase):
26 class TestDecompressor_decompress(unittest.TestCase):
27 def test_empty_input(self):
27 def test_empty_input(self):
28 dctx = zstd.ZstdDecompressor()
28 dctx = zstd.ZstdDecompressor()
29
29
30 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
30 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
31 dctx.decompress(b'')
31 dctx.decompress(b'')
32
32
33 def test_invalid_input(self):
33 def test_invalid_input(self):
34 dctx = zstd.ZstdDecompressor()
34 dctx = zstd.ZstdDecompressor()
35
35
36 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
36 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
37 dctx.decompress(b'foobar')
37 dctx.decompress(b'foobar')
38
38
39 def test_no_content_size_in_frame(self):
39 def test_no_content_size_in_frame(self):
40 cctx = zstd.ZstdCompressor(write_content_size=False)
40 cctx = zstd.ZstdCompressor(write_content_size=False)
41 compressed = cctx.compress(b'foobar')
41 compressed = cctx.compress(b'foobar')
42
42
43 dctx = zstd.ZstdDecompressor()
43 dctx = zstd.ZstdDecompressor()
44 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
44 with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
45 dctx.decompress(compressed)
45 dctx.decompress(compressed)
46
46
47 def test_content_size_present(self):
47 def test_content_size_present(self):
48 cctx = zstd.ZstdCompressor(write_content_size=True)
48 cctx = zstd.ZstdCompressor(write_content_size=True)
49 compressed = cctx.compress(b'foobar')
49 compressed = cctx.compress(b'foobar')
50
50
51 dctx = zstd.ZstdDecompressor()
51 dctx = zstd.ZstdDecompressor()
52 decompressed = dctx.decompress(compressed)
52 decompressed = dctx.decompress(compressed)
53 self.assertEqual(decompressed, b'foobar')
53 self.assertEqual(decompressed, b'foobar')
54
54
55 def test_max_output_size(self):
55 def test_max_output_size(self):
56 cctx = zstd.ZstdCompressor(write_content_size=False)
56 cctx = zstd.ZstdCompressor(write_content_size=False)
57 source = b'foobar' * 256
57 source = b'foobar' * 256
58 compressed = cctx.compress(source)
58 compressed = cctx.compress(source)
59
59
60 dctx = zstd.ZstdDecompressor()
60 dctx = zstd.ZstdDecompressor()
61 # Will fit into buffer exactly the size of input.
61 # Will fit into buffer exactly the size of input.
62 decompressed = dctx.decompress(compressed, max_output_size=len(source))
62 decompressed = dctx.decompress(compressed, max_output_size=len(source))
63 self.assertEqual(decompressed, source)
63 self.assertEqual(decompressed, source)
64
64
65 # Input size - 1 fails
65 # Input size - 1 fails
66 with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
66 with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
67 dctx.decompress(compressed, max_output_size=len(source) - 1)
67 dctx.decompress(compressed, max_output_size=len(source) - 1)
68
68
69 # Input size + 1 works
69 # Input size + 1 works
70 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
70 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
71 self.assertEqual(decompressed, source)
71 self.assertEqual(decompressed, source)
72
72
73 # A much larger buffer works.
73 # A much larger buffer works.
74 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
74 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
75 self.assertEqual(decompressed, source)
75 self.assertEqual(decompressed, source)
76
76
77 def test_stupidly_large_output_buffer(self):
77 def test_stupidly_large_output_buffer(self):
78 cctx = zstd.ZstdCompressor(write_content_size=False)
78 cctx = zstd.ZstdCompressor(write_content_size=False)
79 compressed = cctx.compress(b'foobar' * 256)
79 compressed = cctx.compress(b'foobar' * 256)
80 dctx = zstd.ZstdDecompressor()
80 dctx = zstd.ZstdDecompressor()
81
81
82 # Will get OverflowError on some Python distributions that can't
82 # Will get OverflowError on some Python distributions that can't
83 # handle really large integers.
83 # handle really large integers.
84 with self.assertRaises((MemoryError, OverflowError)):
84 with self.assertRaises((MemoryError, OverflowError)):
85 dctx.decompress(compressed, max_output_size=2**62)
85 dctx.decompress(compressed, max_output_size=2**62)
86
86
87 def test_dictionary(self):
87 def test_dictionary(self):
88 samples = []
88 samples = []
89 for i in range(128):
89 for i in range(128):
90 samples.append(b'foo' * 64)
90 samples.append(b'foo' * 64)
91 samples.append(b'bar' * 64)
91 samples.append(b'bar' * 64)
92 samples.append(b'foobar' * 64)
92 samples.append(b'foobar' * 64)
93
93
94 d = zstd.train_dictionary(8192, samples)
94 d = zstd.train_dictionary(8192, samples)
95
95
96 orig = b'foobar' * 16384
96 orig = b'foobar' * 16384
97 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
97 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
98 compressed = cctx.compress(orig)
98 compressed = cctx.compress(orig)
99
99
100 dctx = zstd.ZstdDecompressor(dict_data=d)
100 dctx = zstd.ZstdDecompressor(dict_data=d)
101 decompressed = dctx.decompress(compressed)
101 decompressed = dctx.decompress(compressed)
102
102
103 self.assertEqual(decompressed, orig)
103 self.assertEqual(decompressed, orig)
104
104
105 def test_dictionary_multiple(self):
105 def test_dictionary_multiple(self):
106 samples = []
106 samples = []
107 for i in range(128):
107 for i in range(128):
108 samples.append(b'foo' * 64)
108 samples.append(b'foo' * 64)
109 samples.append(b'bar' * 64)
109 samples.append(b'bar' * 64)
110 samples.append(b'foobar' * 64)
110 samples.append(b'foobar' * 64)
111
111
112 d = zstd.train_dictionary(8192, samples)
112 d = zstd.train_dictionary(8192, samples)
113
113
114 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
114 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
115 compressed = []
115 compressed = []
116 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
116 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
117 for source in sources:
117 for source in sources:
118 compressed.append(cctx.compress(source))
118 compressed.append(cctx.compress(source))
119
119
120 dctx = zstd.ZstdDecompressor(dict_data=d)
120 dctx = zstd.ZstdDecompressor(dict_data=d)
121 for i in range(len(sources)):
121 for i in range(len(sources)):
122 decompressed = dctx.decompress(compressed[i])
122 decompressed = dctx.decompress(compressed[i])
123 self.assertEqual(decompressed, sources[i])
123 self.assertEqual(decompressed, sources[i])
124
124
125
125
126 @make_cffi
126 @make_cffi
127 class TestDecompressor_copy_stream(unittest.TestCase):
127 class TestDecompressor_copy_stream(unittest.TestCase):
128 def test_no_read(self):
128 def test_no_read(self):
129 source = object()
129 source = object()
130 dest = io.BytesIO()
130 dest = io.BytesIO()
131
131
132 dctx = zstd.ZstdDecompressor()
132 dctx = zstd.ZstdDecompressor()
133 with self.assertRaises(ValueError):
133 with self.assertRaises(ValueError):
134 dctx.copy_stream(source, dest)
134 dctx.copy_stream(source, dest)
135
135
136 def test_no_write(self):
136 def test_no_write(self):
137 source = io.BytesIO()
137 source = io.BytesIO()
138 dest = object()
138 dest = object()
139
139
140 dctx = zstd.ZstdDecompressor()
140 dctx = zstd.ZstdDecompressor()
141 with self.assertRaises(ValueError):
141 with self.assertRaises(ValueError):
142 dctx.copy_stream(source, dest)
142 dctx.copy_stream(source, dest)
143
143
144 def test_empty(self):
144 def test_empty(self):
145 source = io.BytesIO()
145 source = io.BytesIO()
146 dest = io.BytesIO()
146 dest = io.BytesIO()
147
147
148 dctx = zstd.ZstdDecompressor()
148 dctx = zstd.ZstdDecompressor()
149 # TODO should this raise an error?
149 # TODO should this raise an error?
150 r, w = dctx.copy_stream(source, dest)
150 r, w = dctx.copy_stream(source, dest)
151
151
152 self.assertEqual(r, 0)
152 self.assertEqual(r, 0)
153 self.assertEqual(w, 0)
153 self.assertEqual(w, 0)
154 self.assertEqual(dest.getvalue(), b'')
154 self.assertEqual(dest.getvalue(), b'')
155
155
156 def test_large_data(self):
156 def test_large_data(self):
157 source = io.BytesIO()
157 source = io.BytesIO()
158 for i in range(255):
158 for i in range(255):
159 source.write(struct.Struct('>B').pack(i) * 16384)
159 source.write(struct.Struct('>B').pack(i) * 16384)
160 source.seek(0)
160 source.seek(0)
161
161
162 compressed = io.BytesIO()
162 compressed = io.BytesIO()
163 cctx = zstd.ZstdCompressor()
163 cctx = zstd.ZstdCompressor()
164 cctx.copy_stream(source, compressed)
164 cctx.copy_stream(source, compressed)
165
165
166 compressed.seek(0)
166 compressed.seek(0)
167 dest = io.BytesIO()
167 dest = io.BytesIO()
168 dctx = zstd.ZstdDecompressor()
168 dctx = zstd.ZstdDecompressor()
169 r, w = dctx.copy_stream(compressed, dest)
169 r, w = dctx.copy_stream(compressed, dest)
170
170
171 self.assertEqual(r, len(compressed.getvalue()))
171 self.assertEqual(r, len(compressed.getvalue()))
172 self.assertEqual(w, len(source.getvalue()))
172 self.assertEqual(w, len(source.getvalue()))
173
173
174 def test_read_write_size(self):
174 def test_read_write_size(self):
175 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
175 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
176 b'foobarfoobar'))
176 b'foobarfoobar'))
177
177
178 dest = OpCountingBytesIO()
178 dest = OpCountingBytesIO()
179 dctx = zstd.ZstdDecompressor()
179 dctx = zstd.ZstdDecompressor()
180 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
180 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
181
181
182 self.assertEqual(r, len(source.getvalue()))
182 self.assertEqual(r, len(source.getvalue()))
183 self.assertEqual(w, len(b'foobarfoobar'))
183 self.assertEqual(w, len(b'foobarfoobar'))
184 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
184 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
185 self.assertEqual(dest._write_count, len(dest.getvalue()))
185 self.assertEqual(dest._write_count, len(dest.getvalue()))
186
186
187
187
188 @make_cffi
188 @make_cffi
189 class TestDecompressor_decompressobj(unittest.TestCase):
189 class TestDecompressor_decompressobj(unittest.TestCase):
190 def test_simple(self):
190 def test_simple(self):
191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
192
192
193 dctx = zstd.ZstdDecompressor()
193 dctx = zstd.ZstdDecompressor()
194 dobj = dctx.decompressobj()
194 dobj = dctx.decompressobj()
195 self.assertEqual(dobj.decompress(data), b'foobar')
195 self.assertEqual(dobj.decompress(data), b'foobar')
196
196
197 def test_reuse(self):
197 def test_reuse(self):
198 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
198 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
199
199
200 dctx = zstd.ZstdDecompressor()
200 dctx = zstd.ZstdDecompressor()
201 dobj = dctx.decompressobj()
201 dobj = dctx.decompressobj()
202 dobj.decompress(data)
202 dobj.decompress(data)
203
203
204 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
204 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
205 dobj.decompress(data)
205 dobj.decompress(data)
206
206
207
207
208 def decompress_via_writer(data):
208 def decompress_via_writer(data):
209 buffer = io.BytesIO()
209 buffer = io.BytesIO()
210 dctx = zstd.ZstdDecompressor()
210 dctx = zstd.ZstdDecompressor()
211 with dctx.write_to(buffer) as decompressor:
211 with dctx.write_to(buffer) as decompressor:
212 decompressor.write(data)
212 decompressor.write(data)
213 return buffer.getvalue()
213 return buffer.getvalue()
214
214
215
215
216 @make_cffi
216 @make_cffi
217 class TestDecompressor_write_to(unittest.TestCase):
217 class TestDecompressor_write_to(unittest.TestCase):
218 def test_empty_roundtrip(self):
218 def test_empty_roundtrip(self):
219 cctx = zstd.ZstdCompressor()
219 cctx = zstd.ZstdCompressor()
220 empty = cctx.compress(b'')
220 empty = cctx.compress(b'')
221 self.assertEqual(decompress_via_writer(empty), b'')
221 self.assertEqual(decompress_via_writer(empty), b'')
222
222
223 def test_large_roundtrip(self):
223 def test_large_roundtrip(self):
224 chunks = []
224 chunks = []
225 for i in range(255):
225 for i in range(255):
226 chunks.append(struct.Struct('>B').pack(i) * 16384)
226 chunks.append(struct.Struct('>B').pack(i) * 16384)
227 orig = b''.join(chunks)
227 orig = b''.join(chunks)
228 cctx = zstd.ZstdCompressor()
228 cctx = zstd.ZstdCompressor()
229 compressed = cctx.compress(orig)
229 compressed = cctx.compress(orig)
230
230
231 self.assertEqual(decompress_via_writer(compressed), orig)
231 self.assertEqual(decompress_via_writer(compressed), orig)
232
232
233 def test_multiple_calls(self):
233 def test_multiple_calls(self):
234 chunks = []
234 chunks = []
235 for i in range(255):
235 for i in range(255):
236 for j in range(255):
236 for j in range(255):
237 chunks.append(struct.Struct('>B').pack(j) * i)
237 chunks.append(struct.Struct('>B').pack(j) * i)
238
238
239 orig = b''.join(chunks)
239 orig = b''.join(chunks)
240 cctx = zstd.ZstdCompressor()
240 cctx = zstd.ZstdCompressor()
241 compressed = cctx.compress(orig)
241 compressed = cctx.compress(orig)
242
242
243 buffer = io.BytesIO()
243 buffer = io.BytesIO()
244 dctx = zstd.ZstdDecompressor()
244 dctx = zstd.ZstdDecompressor()
245 with dctx.write_to(buffer) as decompressor:
245 with dctx.write_to(buffer) as decompressor:
246 pos = 0
246 pos = 0
247 while pos < len(compressed):
247 while pos < len(compressed):
248 pos2 = pos + 8192
248 pos2 = pos + 8192
249 decompressor.write(compressed[pos:pos2])
249 decompressor.write(compressed[pos:pos2])
250 pos += 8192
250 pos += 8192
251 self.assertEqual(buffer.getvalue(), orig)
251 self.assertEqual(buffer.getvalue(), orig)
252
252
253 def test_dictionary(self):
253 def test_dictionary(self):
254 samples = []
254 samples = []
255 for i in range(128):
255 for i in range(128):
256 samples.append(b'foo' * 64)
256 samples.append(b'foo' * 64)
257 samples.append(b'bar' * 64)
257 samples.append(b'bar' * 64)
258 samples.append(b'foobar' * 64)
258 samples.append(b'foobar' * 64)
259
259
260 d = zstd.train_dictionary(8192, samples)
260 d = zstd.train_dictionary(8192, samples)
261
261
262 orig = b'foobar' * 16384
262 orig = b'foobar' * 16384
263 buffer = io.BytesIO()
263 buffer = io.BytesIO()
264 cctx = zstd.ZstdCompressor(dict_data=d)
264 cctx = zstd.ZstdCompressor(dict_data=d)
265 with cctx.write_to(buffer) as compressor:
265 with cctx.write_to(buffer) as compressor:
266 self.assertEqual(compressor.write(orig), 1544)
266 self.assertEqual(compressor.write(orig), 1544)
267
267
268 compressed = buffer.getvalue()
268 compressed = buffer.getvalue()
269 buffer = io.BytesIO()
269 buffer = io.BytesIO()
270
270
271 dctx = zstd.ZstdDecompressor(dict_data=d)
271 dctx = zstd.ZstdDecompressor(dict_data=d)
272 with dctx.write_to(buffer) as decompressor:
272 with dctx.write_to(buffer) as decompressor:
273 self.assertEqual(decompressor.write(compressed), len(orig))
273 self.assertEqual(decompressor.write(compressed), len(orig))
274
274
275 self.assertEqual(buffer.getvalue(), orig)
275 self.assertEqual(buffer.getvalue(), orig)
276
276
277 def test_memory_size(self):
277 def test_memory_size(self):
278 dctx = zstd.ZstdDecompressor()
278 dctx = zstd.ZstdDecompressor()
279 buffer = io.BytesIO()
279 buffer = io.BytesIO()
280 with dctx.write_to(buffer) as decompressor:
280 with dctx.write_to(buffer) as decompressor:
281 size = decompressor.memory_size()
281 size = decompressor.memory_size()
282
282
283 self.assertGreater(size, 100000)
283 self.assertGreater(size, 100000)
284
284
285 def test_write_size(self):
285 def test_write_size(self):
286 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
286 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
287 dest = OpCountingBytesIO()
287 dest = OpCountingBytesIO()
288 dctx = zstd.ZstdDecompressor()
288 dctx = zstd.ZstdDecompressor()
289 with dctx.write_to(dest, write_size=1) as decompressor:
289 with dctx.write_to(dest, write_size=1) as decompressor:
290 s = struct.Struct('>B')
290 s = struct.Struct('>B')
291 for c in source:
291 for c in source:
292 if not isinstance(c, str):
292 if not isinstance(c, str):
293 c = s.pack(c)
293 c = s.pack(c)
294 decompressor.write(c)
294 decompressor.write(c)
295
295
296
297 self.assertEqual(dest.getvalue(), b'foobarfoobar')
296 self.assertEqual(dest.getvalue(), b'foobarfoobar')
298 self.assertEqual(dest._write_count, len(dest.getvalue()))
297 self.assertEqual(dest._write_count, len(dest.getvalue()))
299
298
300
299
301 @make_cffi
300 @make_cffi
302 class TestDecompressor_read_from(unittest.TestCase):
301 class TestDecompressor_read_from(unittest.TestCase):
303 def test_type_validation(self):
302 def test_type_validation(self):
304 dctx = zstd.ZstdDecompressor()
303 dctx = zstd.ZstdDecompressor()
305
304
306 # Object with read() works.
305 # Object with read() works.
307 dctx.read_from(io.BytesIO())
306 dctx.read_from(io.BytesIO())
308
307
309 # Buffer protocol works.
308 # Buffer protocol works.
310 dctx.read_from(b'foobar')
309 dctx.read_from(b'foobar')
311
310
312 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
311 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
313 b''.join(dctx.read_from(True))
312 b''.join(dctx.read_from(True))
314
313
315 def test_empty_input(self):
314 def test_empty_input(self):
316 dctx = zstd.ZstdDecompressor()
315 dctx = zstd.ZstdDecompressor()
317
316
318 source = io.BytesIO()
317 source = io.BytesIO()
319 it = dctx.read_from(source)
318 it = dctx.read_from(source)
320 # TODO this is arguably wrong. Should get an error about missing frame foo.
319 # TODO this is arguably wrong. Should get an error about missing frame foo.
321 with self.assertRaises(StopIteration):
320 with self.assertRaises(StopIteration):
322 next(it)
321 next(it)
323
322
324 it = dctx.read_from(b'')
323 it = dctx.read_from(b'')
325 with self.assertRaises(StopIteration):
324 with self.assertRaises(StopIteration):
326 next(it)
325 next(it)
327
326
328 def test_invalid_input(self):
327 def test_invalid_input(self):
329 dctx = zstd.ZstdDecompressor()
328 dctx = zstd.ZstdDecompressor()
330
329
331 source = io.BytesIO(b'foobar')
330 source = io.BytesIO(b'foobar')
332 it = dctx.read_from(source)
331 it = dctx.read_from(source)
333 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
332 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
334 next(it)
333 next(it)
335
334
336 it = dctx.read_from(b'foobar')
335 it = dctx.read_from(b'foobar')
337 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
336 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
338 next(it)
337 next(it)
339
338
340 def test_empty_roundtrip(self):
339 def test_empty_roundtrip(self):
341 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
340 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
342 empty = cctx.compress(b'')
341 empty = cctx.compress(b'')
343
342
344 source = io.BytesIO(empty)
343 source = io.BytesIO(empty)
345 source.seek(0)
344 source.seek(0)
346
345
347 dctx = zstd.ZstdDecompressor()
346 dctx = zstd.ZstdDecompressor()
348 it = dctx.read_from(source)
347 it = dctx.read_from(source)
349
348
350 # No chunks should be emitted since there is no data.
349 # No chunks should be emitted since there is no data.
351 with self.assertRaises(StopIteration):
350 with self.assertRaises(StopIteration):
352 next(it)
351 next(it)
353
352
354 # Again for good measure.
353 # Again for good measure.
355 with self.assertRaises(StopIteration):
354 with self.assertRaises(StopIteration):
356 next(it)
355 next(it)
357
356
358 def test_skip_bytes_too_large(self):
357 def test_skip_bytes_too_large(self):
359 dctx = zstd.ZstdDecompressor()
358 dctx = zstd.ZstdDecompressor()
360
359
361 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
360 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
362 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
361 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
363
362
364 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
363 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
365 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
364 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
366
365
367 def test_skip_bytes(self):
366 def test_skip_bytes(self):
368 cctx = zstd.ZstdCompressor(write_content_size=False)
367 cctx = zstd.ZstdCompressor(write_content_size=False)
369 compressed = cctx.compress(b'foobar')
368 compressed = cctx.compress(b'foobar')
370
369
371 dctx = zstd.ZstdDecompressor()
370 dctx = zstd.ZstdDecompressor()
372 output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
371 output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
373 self.assertEqual(output, b'foobar')
372 self.assertEqual(output, b'foobar')
374
373
375 def test_large_output(self):
374 def test_large_output(self):
376 source = io.BytesIO()
375 source = io.BytesIO()
377 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
376 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
378 source.write(b'o')
377 source.write(b'o')
379 source.seek(0)
378 source.seek(0)
380
379
381 cctx = zstd.ZstdCompressor(level=1)
380 cctx = zstd.ZstdCompressor(level=1)
382 compressed = io.BytesIO(cctx.compress(source.getvalue()))
381 compressed = io.BytesIO(cctx.compress(source.getvalue()))
383 compressed.seek(0)
382 compressed.seek(0)
384
383
385 dctx = zstd.ZstdDecompressor()
384 dctx = zstd.ZstdDecompressor()
386 it = dctx.read_from(compressed)
385 it = dctx.read_from(compressed)
387
386
388 chunks = []
387 chunks = []
389 chunks.append(next(it))
388 chunks.append(next(it))
390 chunks.append(next(it))
389 chunks.append(next(it))
391
390
392 with self.assertRaises(StopIteration):
391 with self.assertRaises(StopIteration):
393 next(it)
392 next(it)
394
393
395 decompressed = b''.join(chunks)
394 decompressed = b''.join(chunks)
396 self.assertEqual(decompressed, source.getvalue())
395 self.assertEqual(decompressed, source.getvalue())
397
396
398 # And again with buffer protocol.
397 # And again with buffer protocol.
399 it = dctx.read_from(compressed.getvalue())
398 it = dctx.read_from(compressed.getvalue())
400 chunks = []
399 chunks = []
401 chunks.append(next(it))
400 chunks.append(next(it))
402 chunks.append(next(it))
401 chunks.append(next(it))
403
402
404 with self.assertRaises(StopIteration):
403 with self.assertRaises(StopIteration):
405 next(it)
404 next(it)
406
405
407 decompressed = b''.join(chunks)
406 decompressed = b''.join(chunks)
408 self.assertEqual(decompressed, source.getvalue())
407 self.assertEqual(decompressed, source.getvalue())
409
408
410 def test_large_input(self):
409 def test_large_input(self):
411 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
410 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
412 compressed = io.BytesIO()
411 compressed = io.BytesIO()
413 input_size = 0
412 input_size = 0
414 cctx = zstd.ZstdCompressor(level=1)
413 cctx = zstd.ZstdCompressor(level=1)
415 with cctx.write_to(compressed) as compressor:
414 with cctx.write_to(compressed) as compressor:
416 while True:
415 while True:
417 compressor.write(random.choice(bytes))
416 compressor.write(random.choice(bytes))
418 input_size += 1
417 input_size += 1
419
418
420 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
419 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
421 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
420 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
422 if have_compressed and have_raw:
421 if have_compressed and have_raw:
423 break
422 break
424
423
425 compressed.seek(0)
424 compressed.seek(0)
426 self.assertGreater(len(compressed.getvalue()),
425 self.assertGreater(len(compressed.getvalue()),
427 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
426 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
428
427
429 dctx = zstd.ZstdDecompressor()
428 dctx = zstd.ZstdDecompressor()
430 it = dctx.read_from(compressed)
429 it = dctx.read_from(compressed)
431
430
432 chunks = []
431 chunks = []
433 chunks.append(next(it))
432 chunks.append(next(it))
434 chunks.append(next(it))
433 chunks.append(next(it))
435 chunks.append(next(it))
434 chunks.append(next(it))
436
435
437 with self.assertRaises(StopIteration):
436 with self.assertRaises(StopIteration):
438 next(it)
437 next(it)
439
438
440 decompressed = b''.join(chunks)
439 decompressed = b''.join(chunks)
441 self.assertEqual(len(decompressed), input_size)
440 self.assertEqual(len(decompressed), input_size)
442
441
443 # And again with buffer protocol.
442 # And again with buffer protocol.
444 it = dctx.read_from(compressed.getvalue())
443 it = dctx.read_from(compressed.getvalue())
445
444
446 chunks = []
445 chunks = []
447 chunks.append(next(it))
446 chunks.append(next(it))
448 chunks.append(next(it))
447 chunks.append(next(it))
449 chunks.append(next(it))
448 chunks.append(next(it))
450
449
451 with self.assertRaises(StopIteration):
450 with self.assertRaises(StopIteration):
452 next(it)
451 next(it)
453
452
454 decompressed = b''.join(chunks)
453 decompressed = b''.join(chunks)
455 self.assertEqual(len(decompressed), input_size)
454 self.assertEqual(len(decompressed), input_size)
456
455
457 def test_interesting(self):
456 def test_interesting(self):
458 # Found this edge case via fuzzing.
457 # Found this edge case via fuzzing.
459 cctx = zstd.ZstdCompressor(level=1)
458 cctx = zstd.ZstdCompressor(level=1)
460
459
461 source = io.BytesIO()
460 source = io.BytesIO()
462
461
463 compressed = io.BytesIO()
462 compressed = io.BytesIO()
464 with cctx.write_to(compressed) as compressor:
463 with cctx.write_to(compressed) as compressor:
465 for i in range(256):
464 for i in range(256):
466 chunk = b'\0' * 1024
465 chunk = b'\0' * 1024
467 compressor.write(chunk)
466 compressor.write(chunk)
468 source.write(chunk)
467 source.write(chunk)
469
468
470 dctx = zstd.ZstdDecompressor()
469 dctx = zstd.ZstdDecompressor()
471
470
472 simple = dctx.decompress(compressed.getvalue(),
471 simple = dctx.decompress(compressed.getvalue(),
473 max_output_size=len(source.getvalue()))
472 max_output_size=len(source.getvalue()))
474 self.assertEqual(simple, source.getvalue())
473 self.assertEqual(simple, source.getvalue())
475
474
476 compressed.seek(0)
475 compressed.seek(0)
477 streamed = b''.join(dctx.read_from(compressed))
476 streamed = b''.join(dctx.read_from(compressed))
478 self.assertEqual(streamed, source.getvalue())
477 self.assertEqual(streamed, source.getvalue())
479
478
480 def test_read_write_size(self):
479 def test_read_write_size(self):
481 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
480 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
482 dctx = zstd.ZstdDecompressor()
481 dctx = zstd.ZstdDecompressor()
483 for chunk in dctx.read_from(source, read_size=1, write_size=1):
482 for chunk in dctx.read_from(source, read_size=1, write_size=1):
484 self.assertEqual(len(chunk), 1)
483 self.assertEqual(len(chunk), 1)
485
484
486 self.assertEqual(source._read_count, len(source.getvalue()))
485 self.assertEqual(source._read_count, len(source.getvalue()))
487
486
488
487
489 @make_cffi
488 @make_cffi
490 class TestDecompressor_content_dict_chain(unittest.TestCase):
489 class TestDecompressor_content_dict_chain(unittest.TestCase):
491 def test_bad_inputs_simple(self):
490 def test_bad_inputs_simple(self):
492 dctx = zstd.ZstdDecompressor()
491 dctx = zstd.ZstdDecompressor()
493
492
494 with self.assertRaises(TypeError):
493 with self.assertRaises(TypeError):
495 dctx.decompress_content_dict_chain(b'foo')
494 dctx.decompress_content_dict_chain(b'foo')
496
495
497 with self.assertRaises(TypeError):
496 with self.assertRaises(TypeError):
498 dctx.decompress_content_dict_chain((b'foo', b'bar'))
497 dctx.decompress_content_dict_chain((b'foo', b'bar'))
499
498
500 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
499 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
501 dctx.decompress_content_dict_chain([])
500 dctx.decompress_content_dict_chain([])
502
501
503 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
502 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
504 dctx.decompress_content_dict_chain([u'foo'])
503 dctx.decompress_content_dict_chain([u'foo'])
505
504
506 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
505 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
507 dctx.decompress_content_dict_chain([True])
506 dctx.decompress_content_dict_chain([True])
508
507
509 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
508 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
510 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
509 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
511
510
512 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
511 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
513 dctx.decompress_content_dict_chain([b'foo' * 8])
512 dctx.decompress_content_dict_chain([b'foo' * 8])
514
513
515 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
514 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
516
515
517 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
516 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
518 dctx.decompress_content_dict_chain([no_size])
517 dctx.decompress_content_dict_chain([no_size])
519
518
520 # Corrupt first frame.
519 # Corrupt first frame.
521 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
520 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
522 frame = frame[0:12] + frame[15:]
521 frame = frame[0:12] + frame[15:]
523 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
522 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
524 dctx.decompress_content_dict_chain([frame])
523 dctx.decompress_content_dict_chain([frame])
525
524
526 def test_bad_subsequent_input(self):
525 def test_bad_subsequent_input(self):
527 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
526 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
528
527
529 dctx = zstd.ZstdDecompressor()
528 dctx = zstd.ZstdDecompressor()
530
529
531 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
530 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
532 dctx.decompress_content_dict_chain([initial, u'foo'])
531 dctx.decompress_content_dict_chain([initial, u'foo'])
533
532
534 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
533 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
535 dctx.decompress_content_dict_chain([initial, None])
534 dctx.decompress_content_dict_chain([initial, None])
536
535
537 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
536 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
538 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
537 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
539
538
540 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
539 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
541 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
540 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
542
541
543 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
542 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
544
543
545 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
544 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
546 dctx.decompress_content_dict_chain([initial, no_size])
545 dctx.decompress_content_dict_chain([initial, no_size])
547
546
548 # Corrupt second frame.
547 # Corrupt second frame.
549 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
548 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
550 frame = cctx.compress(b'bar' * 64)
549 frame = cctx.compress(b'bar' * 64)
551 frame = frame[0:12] + frame[15:]
550 frame = frame[0:12] + frame[15:]
552
551
553 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
552 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
554 dctx.decompress_content_dict_chain([initial, frame])
553 dctx.decompress_content_dict_chain([initial, frame])
555
554
556 def test_simple(self):
555 def test_simple(self):
557 original = [
556 original = [
558 b'foo' * 64,
557 b'foo' * 64,
559 b'foobar' * 64,
558 b'foobar' * 64,
560 b'baz' * 64,
559 b'baz' * 64,
561 b'foobaz' * 64,
560 b'foobaz' * 64,
562 b'foobarbaz' * 64,
561 b'foobarbaz' * 64,
563 ]
562 ]
564
563
565 chunks = []
564 chunks = []
566 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
565 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
567 for i, chunk in enumerate(original[1:]):
566 for i, chunk in enumerate(original[1:]):
568 d = zstd.ZstdCompressionDict(original[i])
567 d = zstd.ZstdCompressionDict(original[i])
569 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
568 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
570 chunks.append(cctx.compress(chunk))
569 chunks.append(cctx.compress(chunk))
571
570
572 for i in range(1, len(original)):
571 for i in range(1, len(original)):
573 chain = chunks[0:i]
572 chain = chunks[0:i]
574 expected = original[i - 1]
573 expected = original[i - 1]
575 dctx = zstd.ZstdDecompressor()
574 dctx = zstd.ZstdDecompressor()
576 decompressed = dctx.decompress_content_dict_chain(chain)
575 decompressed = dctx.decompress_content_dict_chain(chain)
577 self.assertEqual(decompressed, expected)
576 self.assertEqual(decompressed, expected)
577
578
579 # TODO enable for CFFI
580 class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
581 def test_invalid_inputs(self):
582 dctx = zstd.ZstdDecompressor()
583
584 with self.assertRaises(TypeError):
585 dctx.multi_decompress_to_buffer(True)
586
587 with self.assertRaises(TypeError):
588 dctx.multi_decompress_to_buffer((1, 2))
589
590 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
591 dctx.multi_decompress_to_buffer([u'foo'])
592
593 with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
594 dctx.multi_decompress_to_buffer([b'foobarbaz'])
595
596 def test_list_input(self):
597 cctx = zstd.ZstdCompressor(write_content_size=True)
598
599 original = [b'foo' * 4, b'bar' * 6]
600 frames = [cctx.compress(d) for d in original]
601
602 dctx = zstd.ZstdDecompressor()
603 result = dctx.multi_decompress_to_buffer(frames)
604
605 self.assertEqual(len(result), len(frames))
606 self.assertEqual(result.size(), sum(map(len, original)))
607
608 for i, data in enumerate(original):
609 self.assertEqual(result[i].tobytes(), data)
610
611 self.assertEqual(result[0].offset, 0)
612 self.assertEqual(len(result[0]), 12)
613 self.assertEqual(result[1].offset, 12)
614 self.assertEqual(len(result[1]), 18)
615
616 def test_list_input_frame_sizes(self):
617 cctx = zstd.ZstdCompressor(write_content_size=False)
618
619 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
620 frames = [cctx.compress(d) for d in original]
621 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
622
623 dctx = zstd.ZstdDecompressor()
624 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
625
626 self.assertEqual(len(result), len(frames))
627 self.assertEqual(result.size(), sum(map(len, original)))
628
629 for i, data in enumerate(original):
630 self.assertEqual(result[i].tobytes(), data)
631
632 def test_buffer_with_segments_input(self):
633 cctx = zstd.ZstdCompressor(write_content_size=True)
634
635 original = [b'foo' * 4, b'bar' * 6]
636 frames = [cctx.compress(d) for d in original]
637
638 dctx = zstd.ZstdDecompressor()
639
640 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
641 b = zstd.BufferWithSegments(b''.join(frames), segments)
642
643 result = dctx.multi_decompress_to_buffer(b)
644
645 self.assertEqual(len(result), len(frames))
646 self.assertEqual(result[0].offset, 0)
647 self.assertEqual(len(result[0]), 12)
648 self.assertEqual(result[1].offset, 12)
649 self.assertEqual(len(result[1]), 18)
650
651 def test_buffer_with_segments_sizes(self):
652 cctx = zstd.ZstdCompressor(write_content_size=False)
653 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
654 frames = [cctx.compress(d) for d in original]
655 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
656
657 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
658 len(frames[0]), len(frames[1]),
659 len(frames[0]) + len(frames[1]), len(frames[2]))
660 b = zstd.BufferWithSegments(b''.join(frames), segments)
661
662 dctx = zstd.ZstdDecompressor()
663 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
664
665 self.assertEqual(len(result), len(frames))
666 self.assertEqual(result.size(), sum(map(len, original)))
667
668 for i, data in enumerate(original):
669 self.assertEqual(result[i].tobytes(), data)
670
671 def test_buffer_with_segments_collection_input(self):
672 cctx = zstd.ZstdCompressor(write_content_size=True)
673
674 original = [
675 b'foo0' * 2,
676 b'foo1' * 3,
677 b'foo2' * 4,
678 b'foo3' * 5,
679 b'foo4' * 6,
680 ]
681
682 frames = cctx.multi_compress_to_buffer(original)
683
684 # Check round trip.
685 dctx = zstd.ZstdDecompressor()
686 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
687
688 self.assertEqual(len(decompressed), len(original))
689
690 for i, data in enumerate(original):
691 self.assertEqual(data, decompressed[i].tobytes())
692
693 # And a manual mode.
694 b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
696 0, len(frames[0]),
697 len(frames[0]), len(frames[1])))
698
699 b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
700 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
701 0, len(frames[2]),
702 len(frames[2]), len(frames[3]),
703 len(frames[2]) + len(frames[3]), len(frames[4])))
704
705 c = zstd.BufferWithSegmentsCollection(b1, b2)
706
707 dctx = zstd.ZstdDecompressor()
708 decompressed = dctx.multi_decompress_to_buffer(c)
709
710 self.assertEqual(len(decompressed), 5)
711 for i in range(5):
712 self.assertEqual(decompressed[i].tobytes(), original[i])
713
714 def test_multiple_threads(self):
715 cctx = zstd.ZstdCompressor(write_content_size=True)
716
717 frames = []
718 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
719 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
720
721 dctx = zstd.ZstdDecompressor()
722 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
723
724 self.assertEqual(len(result), len(frames))
725 self.assertEqual(result.size(), 2 * 64 * 256)
726 self.assertEqual(result[0].tobytes(), b'x' * 64)
727 self.assertEqual(result[256].tobytes(), b'y' * 64)
728
729 def test_item_failure(self):
730 cctx = zstd.ZstdCompressor(write_content_size=True)
731 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
732
733 frames[1] = frames[1] + b'extra'
734
735 dctx = zstd.ZstdDecompressor()
736
737 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
738 dctx.multi_decompress_to_buffer(frames)
739
740 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
741 dctx.multi_decompress_to_buffer(frames, threads=2)
@@ -1,50 +1,110
1 import sys
1 import sys
2
2
3 try:
3 try:
4 import unittest2 as unittest
4 import unittest2 as unittest
5 except ImportError:
5 except ImportError:
6 import unittest
6 import unittest
7
7
8 import zstd
8 import zstd
9
9
10 from . common import (
10 from . common import (
11 make_cffi,
11 make_cffi,
12 )
12 )
13
13
14 if sys.version_info[0] >= 3:
14 if sys.version_info[0] >= 3:
15 int_type = int
15 int_type = int
16 else:
16 else:
17 int_type = long
17 int_type = long
18
18
19
19
20 @make_cffi
20 @make_cffi
21 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(unittest.TestCase):
22 def test_no_args(self):
22 def test_no_args(self):
23 with self.assertRaises(TypeError):
23 with self.assertRaises(TypeError):
24 zstd.train_dictionary()
24 zstd.train_dictionary()
25
25
26 def test_bad_args(self):
26 def test_bad_args(self):
27 with self.assertRaises(TypeError):
27 with self.assertRaises(TypeError):
28 zstd.train_dictionary(8192, u'foo')
28 zstd.train_dictionary(8192, u'foo')
29
29
30 with self.assertRaises(ValueError):
30 with self.assertRaises(ValueError):
31 zstd.train_dictionary(8192, [u'foo'])
31 zstd.train_dictionary(8192, [u'foo'])
32
32
33 def test_basic(self):
33 def test_basic(self):
34 samples = []
34 samples = []
35 for i in range(128):
35 for i in range(128):
36 samples.append(b'foo' * 64)
36 samples.append(b'foo' * 64)
37 samples.append(b'bar' * 64)
37 samples.append(b'bar' * 64)
38 samples.append(b'foobar' * 64)
38 samples.append(b'foobar' * 64)
39 samples.append(b'baz' * 64)
39 samples.append(b'baz' * 64)
40 samples.append(b'foobaz' * 64)
40 samples.append(b'foobaz' * 64)
41 samples.append(b'bazfoo' * 64)
41 samples.append(b'bazfoo' * 64)
42
42
43 d = zstd.train_dictionary(8192, samples)
43 d = zstd.train_dictionary(8192, samples)
44 self.assertLessEqual(len(d), 8192)
44 self.assertLessEqual(len(d), 8192)
45
45
46 dict_id = d.dict_id()
46 dict_id = d.dict_id()
47 self.assertIsInstance(dict_id, int_type)
47 self.assertIsInstance(dict_id, int_type)
48
48
49 data = d.as_bytes()
49 data = d.as_bytes()
50 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
50 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
51
52 def test_set_dict_id(self):
53 samples = []
54 for i in range(128):
55 samples.append(b'foo' * 64)
56 samples.append(b'foobar' * 64)
57
58 d = zstd.train_dictionary(8192, samples, dict_id=42)
59 self.assertEqual(d.dict_id(), 42)
60
61
62 @make_cffi
63 class TestTrainCoverDictionary(unittest.TestCase):
64 def test_no_args(self):
65 with self.assertRaises(TypeError):
66 zstd.train_cover_dictionary()
67
68 def test_bad_args(self):
69 with self.assertRaises(TypeError):
70 zstd.train_cover_dictionary(8192, u'foo')
71
72 with self.assertRaises(ValueError):
73 zstd.train_cover_dictionary(8192, [u'foo'])
74
75 def test_basic(self):
76 samples = []
77 for i in range(128):
78 samples.append(b'foo' * 64)
79 samples.append(b'foobar' * 64)
80
81 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
82 self.assertIsInstance(d.dict_id(), int_type)
83
84 data = d.as_bytes()
85 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
86
87 self.assertEqual(d.k, 64)
88 self.assertEqual(d.d, 16)
89
90 def test_set_dict_id(self):
91 samples = []
92 for i in range(128):
93 samples.append(b'foo' * 64)
94 samples.append(b'foobar' * 64)
95
96 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
97 dict_id=42)
98 self.assertEqual(d.dict_id(), 42)
99
100 def test_optimize(self):
101 samples = []
102 for i in range(128):
103 samples.append(b'foo' * 64)
104 samples.append(b'foobar' * 64)
105
106 d = zstd.train_cover_dictionary(8192, samples, optimize=True,
107 threads=-1, steps=1, d=16)
108
109 self.assertEqual(d.k, 16)
110 self.assertEqual(d.d, 16)
@@ -1,145 +1,210
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 /* A Python C extension for Zstandard. */
9 /* A Python C extension for Zstandard. */
10
10
11 #if defined(_WIN32)
12 #define WIN32_LEAN_AND_MEAN
13 #include <Windows.h>
14 #endif
15
11 #include "python-zstandard.h"
16 #include "python-zstandard.h"
12
17
13 PyObject *ZstdError;
18 PyObject *ZstdError;
14
19
15 PyDoc_STRVAR(estimate_compression_context_size__doc__,
20 PyDoc_STRVAR(estimate_compression_context_size__doc__,
16 "estimate_compression_context_size(compression_parameters)\n"
21 "estimate_compression_context_size(compression_parameters)\n"
17 "\n"
22 "\n"
18 "Give the amount of memory allocated for a compression context given a\n"
23 "Give the amount of memory allocated for a compression context given a\n"
19 "CompressionParameters instance");
24 "CompressionParameters instance");
20
25
21 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
26 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
22 "estimate_decompression_context_size()\n"
27 "estimate_decompression_context_size()\n"
23 "\n"
28 "\n"
24 "Estimate the amount of memory allocated to a decompression context.\n"
29 "Estimate the amount of memory allocated to a decompression context.\n"
25 );
30 );
26
31
27 static PyObject* estimate_decompression_context_size(PyObject* self) {
32 static PyObject* estimate_decompression_context_size(PyObject* self) {
28 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
33 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
29 }
34 }
30
35
31 PyDoc_STRVAR(get_compression_parameters__doc__,
36 PyDoc_STRVAR(get_compression_parameters__doc__,
32 "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
37 "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
33 "\n"
38 "\n"
34 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
39 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
35 "optional input size and dictionary size");
40 "optional input size and dictionary size");
36
41
37 PyDoc_STRVAR(get_frame_parameters__doc__,
42 PyDoc_STRVAR(get_frame_parameters__doc__,
38 "get_frame_parameters(data)\n"
43 "get_frame_parameters(data)\n"
39 "\n"
44 "\n"
40 "Obtains a ``FrameParameters`` instance by parsing data.\n");
45 "Obtains a ``FrameParameters`` instance by parsing data.\n");
41
46
42 PyDoc_STRVAR(train_dictionary__doc__,
47 PyDoc_STRVAR(train_dictionary__doc__,
43 "train_dictionary(dict_size, samples)\n"
48 "train_dictionary(dict_size, samples)\n"
44 "\n"
49 "\n"
45 "Train a dictionary from sample data.\n"
50 "Train a dictionary from sample data.\n"
46 "\n"
51 "\n"
47 "A compression dictionary of size ``dict_size`` will be created from the\n"
52 "A compression dictionary of size ``dict_size`` will be created from the\n"
48 "iterable of samples provided by ``samples``.\n"
53 "iterable of samples provided by ``samples``.\n"
49 "\n"
54 "\n"
50 "The raw dictionary content will be returned\n");
55 "The raw dictionary content will be returned\n");
51
56
57 PyDoc_STRVAR(train_cover_dictionary__doc__,
58 "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
59 "\n"
60 "Train a dictionary from sample data using the COVER algorithm.\n"
61 "\n"
62 "This behaves like ``train_dictionary()`` except a different algorithm is\n"
63 "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
64 "``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
65 "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
66 "``d`` must be less than or equal to ``k``.\n"
67 );
68
52 static char zstd_doc[] = "Interface to zstandard";
69 static char zstd_doc[] = "Interface to zstandard";
53
70
54 static PyMethodDef zstd_methods[] = {
71 static PyMethodDef zstd_methods[] = {
72 /* TODO remove since it is a method on CompressionParameters. */
55 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
73 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
56 METH_VARARGS, estimate_compression_context_size__doc__ },
74 METH_VARARGS, estimate_compression_context_size__doc__ },
57 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
75 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
58 METH_NOARGS, estimate_decompression_context_size__doc__ },
76 METH_NOARGS, estimate_decompression_context_size__doc__ },
59 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
77 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
60 METH_VARARGS, get_compression_parameters__doc__ },
78 METH_VARARGS, get_compression_parameters__doc__ },
61 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
79 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
62 METH_VARARGS, get_frame_parameters__doc__ },
80 METH_VARARGS, get_frame_parameters__doc__ },
63 { "train_dictionary", (PyCFunction)train_dictionary,
81 { "train_dictionary", (PyCFunction)train_dictionary,
64 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
82 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
83 { "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
84 METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
65 { NULL, NULL }
85 { NULL, NULL }
66 };
86 };
67
87
88 void bufferutil_module_init(PyObject* mod);
68 void compressobj_module_init(PyObject* mod);
89 void compressobj_module_init(PyObject* mod);
69 void compressor_module_init(PyObject* mod);
90 void compressor_module_init(PyObject* mod);
70 void compressionparams_module_init(PyObject* mod);
91 void compressionparams_module_init(PyObject* mod);
71 void constants_module_init(PyObject* mod);
92 void constants_module_init(PyObject* mod);
72 void dictparams_module_init(PyObject* mod);
73 void compressiondict_module_init(PyObject* mod);
93 void compressiondict_module_init(PyObject* mod);
74 void compressionwriter_module_init(PyObject* mod);
94 void compressionwriter_module_init(PyObject* mod);
75 void compressoriterator_module_init(PyObject* mod);
95 void compressoriterator_module_init(PyObject* mod);
76 void decompressor_module_init(PyObject* mod);
96 void decompressor_module_init(PyObject* mod);
77 void decompressobj_module_init(PyObject* mod);
97 void decompressobj_module_init(PyObject* mod);
78 void decompressionwriter_module_init(PyObject* mod);
98 void decompressionwriter_module_init(PyObject* mod);
79 void decompressoriterator_module_init(PyObject* mod);
99 void decompressoriterator_module_init(PyObject* mod);
80 void frameparams_module_init(PyObject* mod);
100 void frameparams_module_init(PyObject* mod);
81
101
82 void zstd_module_init(PyObject* m) {
102 void zstd_module_init(PyObject* m) {
83 /* python-zstandard relies on unstable zstd C API features. This means
103 /* python-zstandard relies on unstable zstd C API features. This means
84 that changes in zstd may break expectations in python-zstandard.
104 that changes in zstd may break expectations in python-zstandard.
85
105
86 python-zstandard is distributed with a copy of the zstd sources.
106 python-zstandard is distributed with a copy of the zstd sources.
87 python-zstandard is only guaranteed to work with the bundled version
107 python-zstandard is only guaranteed to work with the bundled version
88 of zstd.
108 of zstd.
89
109
90 However, downstream redistributors or packagers may unbundle zstd
110 However, downstream redistributors or packagers may unbundle zstd
91 from python-zstandard. This can result in a mismatch between zstd
111 from python-zstandard. This can result in a mismatch between zstd
92 versions and API semantics. This essentially "voids the warranty"
112 versions and API semantics. This essentially "voids the warranty"
93 of python-zstandard and may cause undefined behavior.
113 of python-zstandard and may cause undefined behavior.
94
114
95 We detect this mismatch here and refuse to load the module if this
115 We detect this mismatch here and refuse to load the module if this
96 scenario is detected.
116 scenario is detected.
97 */
117 */
98 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
118 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
99 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
119 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
100 return;
120 return;
101 }
121 }
102
122
123 bufferutil_module_init(m);
103 compressionparams_module_init(m);
124 compressionparams_module_init(m);
104 dictparams_module_init(m);
105 compressiondict_module_init(m);
125 compressiondict_module_init(m);
106 compressobj_module_init(m);
126 compressobj_module_init(m);
107 compressor_module_init(m);
127 compressor_module_init(m);
108 compressionwriter_module_init(m);
128 compressionwriter_module_init(m);
109 compressoriterator_module_init(m);
129 compressoriterator_module_init(m);
110 constants_module_init(m);
130 constants_module_init(m);
111 decompressor_module_init(m);
131 decompressor_module_init(m);
112 decompressobj_module_init(m);
132 decompressobj_module_init(m);
113 decompressionwriter_module_init(m);
133 decompressionwriter_module_init(m);
114 decompressoriterator_module_init(m);
134 decompressoriterator_module_init(m);
115 frameparams_module_init(m);
135 frameparams_module_init(m);
116 }
136 }
117
137
118 #if PY_MAJOR_VERSION >= 3
138 #if PY_MAJOR_VERSION >= 3
119 static struct PyModuleDef zstd_module = {
139 static struct PyModuleDef zstd_module = {
120 PyModuleDef_HEAD_INIT,
140 PyModuleDef_HEAD_INIT,
121 "zstd",
141 "zstd",
122 zstd_doc,
142 zstd_doc,
123 -1,
143 -1,
124 zstd_methods
144 zstd_methods
125 };
145 };
126
146
127 PyMODINIT_FUNC PyInit_zstd(void) {
147 PyMODINIT_FUNC PyInit_zstd(void) {
128 PyObject *m = PyModule_Create(&zstd_module);
148 PyObject *m = PyModule_Create(&zstd_module);
129 if (m) {
149 if (m) {
130 zstd_module_init(m);
150 zstd_module_init(m);
131 if (PyErr_Occurred()) {
151 if (PyErr_Occurred()) {
132 Py_DECREF(m);
152 Py_DECREF(m);
133 m = NULL;
153 m = NULL;
134 }
154 }
135 }
155 }
136 return m;
156 return m;
137 }
157 }
138 #else
158 #else
139 PyMODINIT_FUNC initzstd(void) {
159 PyMODINIT_FUNC initzstd(void) {
140 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
160 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
141 if (m) {
161 if (m) {
142 zstd_module_init(m);
162 zstd_module_init(m);
143 }
163 }
144 }
164 }
145 #endif
165 #endif
166
167 /* Attempt to resolve the number of CPUs in the system. */
168 int cpu_count() {
169 int count = 0;
170
171 #if defined(_WIN32)
172 SYSTEM_INFO si;
173 si.dwNumberOfProcessors = 0;
174 GetSystemInfo(&si);
175 count = si.dwNumberOfProcessors;
176 #elif defined(__APPLE__)
177 int num;
178 size_t size = sizeof(int);
179
180 if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
181 count = num;
182 }
183 #elif defined(__linux__)
184 count = sysconf(_SC_NPROCESSORS_ONLN);
185 #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
186 int mib[2];
187 size_t len = sizeof(count);
188 mib[0] = CTL_HW;
189 mib[1] = HW_NCPU;
190 if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
191 count = 0;
192 }
193 #elif defined(__hpux)
194 count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
195 #endif
196
197 return count;
198 }
199
200 size_t roundpow2(size_t i) {
201 i--;
202 i |= i >> 1;
203 i |= i >> 2;
204 i |= i >> 4;
205 i |= i >> 8;
206 i |= i >> 16;
207 i++;
208
209 return i;
210 }
@@ -1,1042 +1,1257
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 """Python interface to the Zstandard (zstd) compression library."""
7 """Python interface to the Zstandard (zstd) compression library."""
8
8
9 from __future__ import absolute_import, unicode_literals
9 from __future__ import absolute_import, unicode_literals
10
10
11 import os
11 import sys
12 import sys
12
13
13 from _zstd_cffi import (
14 from _zstd_cffi import (
14 ffi,
15 ffi,
15 lib,
16 lib,
16 )
17 )
17
18
18 if sys.version_info[0] == 2:
19 if sys.version_info[0] == 2:
19 bytes_type = str
20 bytes_type = str
20 int_type = long
21 int_type = long
21 else:
22 else:
22 bytes_type = bytes
23 bytes_type = bytes
23 int_type = int
24 int_type = int
24
25
25
26
26 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
27 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
27 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
28 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
28 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
29 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
29 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
30 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
30
31
31 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
32 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
32
33
33
34
34 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
35 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
35 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
36 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
36 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
37 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
37 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
38 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
38
39
39 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
40 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
40 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
41 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
41 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
42 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
42 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
43 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
43 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
44 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
44 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
45 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
45 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
46 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
46 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
47 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
47 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
48 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
48 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
49 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
49 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
50 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
50 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
51 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
51 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
52 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
52
53
53 STRATEGY_FAST = lib.ZSTD_fast
54 STRATEGY_FAST = lib.ZSTD_fast
54 STRATEGY_DFAST = lib.ZSTD_dfast
55 STRATEGY_DFAST = lib.ZSTD_dfast
55 STRATEGY_GREEDY = lib.ZSTD_greedy
56 STRATEGY_GREEDY = lib.ZSTD_greedy
56 STRATEGY_LAZY = lib.ZSTD_lazy
57 STRATEGY_LAZY = lib.ZSTD_lazy
57 STRATEGY_LAZY2 = lib.ZSTD_lazy2
58 STRATEGY_LAZY2 = lib.ZSTD_lazy2
58 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
59 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
59 STRATEGY_BTOPT = lib.ZSTD_btopt
60 STRATEGY_BTOPT = lib.ZSTD_btopt
60
61
61 COMPRESSOBJ_FLUSH_FINISH = 0
62 COMPRESSOBJ_FLUSH_FINISH = 0
62 COMPRESSOBJ_FLUSH_BLOCK = 1
63 COMPRESSOBJ_FLUSH_BLOCK = 1
63
64
64
65
66 def _cpu_count():
67 # os.cpu_count() was introducd in Python 3.4.
68 try:
69 return os.cpu_count() or 0
70 except AttributeError:
71 pass
72
73 # Linux.
74 try:
75 if sys.version_info[0] == 2:
76 return os.sysconf(b'SC_NPROCESSORS_ONLN')
77 else:
78 return os.sysconf(u'SC_NPROCESSORS_ONLN')
79 except (AttributeError, ValueError):
80 pass
81
82 # TODO implement on other platforms.
83 return 0
84
85
65 class ZstdError(Exception):
86 class ZstdError(Exception):
66 pass
87 pass
67
88
68
89
69 class CompressionParameters(object):
90 class CompressionParameters(object):
70 def __init__(self, window_log, chain_log, hash_log, search_log,
91 def __init__(self, window_log, chain_log, hash_log, search_log,
71 search_length, target_length, strategy):
92 search_length, target_length, strategy):
72 if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
93 if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
73 raise ValueError('invalid window log value')
94 raise ValueError('invalid window log value')
74
95
75 if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
96 if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
76 raise ValueError('invalid chain log value')
97 raise ValueError('invalid chain log value')
77
98
78 if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
99 if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
79 raise ValueError('invalid hash log value')
100 raise ValueError('invalid hash log value')
80
101
81 if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
102 if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
82 raise ValueError('invalid search log value')
103 raise ValueError('invalid search log value')
83
104
84 if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
105 if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
85 raise ValueError('invalid search length value')
106 raise ValueError('invalid search length value')
86
107
87 if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
108 if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
88 raise ValueError('invalid target length value')
109 raise ValueError('invalid target length value')
89
110
90 if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
111 if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
91 raise ValueError('invalid strategy value')
112 raise ValueError('invalid strategy value')
92
113
93 self.window_log = window_log
114 self.window_log = window_log
94 self.chain_log = chain_log
115 self.chain_log = chain_log
95 self.hash_log = hash_log
116 self.hash_log = hash_log
96 self.search_log = search_log
117 self.search_log = search_log
97 self.search_length = search_length
118 self.search_length = search_length
98 self.target_length = target_length
119 self.target_length = target_length
99 self.strategy = strategy
120 self.strategy = strategy
100
121
122 zresult = lib.ZSTD_checkCParams(self.as_compression_parameters())
123 if lib.ZSTD_isError(zresult):
124 raise ValueError('invalid compression parameters: %s',
125 ffi.string(lib.ZSTD_getErrorName(zresult)))
126
127 def estimated_compression_context_size(self):
128 return lib.ZSTD_estimateCCtxSize(self.as_compression_parameters())
129
101 def as_compression_parameters(self):
130 def as_compression_parameters(self):
102 p = ffi.new('ZSTD_compressionParameters *')[0]
131 p = ffi.new('ZSTD_compressionParameters *')[0]
103 p.windowLog = self.window_log
132 p.windowLog = self.window_log
104 p.chainLog = self.chain_log
133 p.chainLog = self.chain_log
105 p.hashLog = self.hash_log
134 p.hashLog = self.hash_log
106 p.searchLog = self.search_log
135 p.searchLog = self.search_log
107 p.searchLength = self.search_length
136 p.searchLength = self.search_length
108 p.targetLength = self.target_length
137 p.targetLength = self.target_length
109 p.strategy = self.strategy
138 p.strategy = self.strategy
110
139
111 return p
140 return p
112
141
113 def get_compression_parameters(level, source_size=0, dict_size=0):
142 def get_compression_parameters(level, source_size=0, dict_size=0):
114 params = lib.ZSTD_getCParams(level, source_size, dict_size)
143 params = lib.ZSTD_getCParams(level, source_size, dict_size)
115 return CompressionParameters(window_log=params.windowLog,
144 return CompressionParameters(window_log=params.windowLog,
116 chain_log=params.chainLog,
145 chain_log=params.chainLog,
117 hash_log=params.hashLog,
146 hash_log=params.hashLog,
118 search_log=params.searchLog,
147 search_log=params.searchLog,
119 search_length=params.searchLength,
148 search_length=params.searchLength,
120 target_length=params.targetLength,
149 target_length=params.targetLength,
121 strategy=params.strategy)
150 strategy=params.strategy)
122
151
123
152
124 def estimate_compression_context_size(params):
153 def estimate_compression_context_size(params):
125 if not isinstance(params, CompressionParameters):
154 if not isinstance(params, CompressionParameters):
126 raise ValueError('argument must be a CompressionParameters')
155 raise ValueError('argument must be a CompressionParameters')
127
156
128 cparams = params.as_compression_parameters()
157 cparams = params.as_compression_parameters()
129 return lib.ZSTD_estimateCCtxSize(cparams)
158 return lib.ZSTD_estimateCCtxSize(cparams)
130
159
131
160
132 def estimate_decompression_context_size():
161 def estimate_decompression_context_size():
133 return lib.ZSTD_estimateDCtxSize()
162 return lib.ZSTD_estimateDCtxSize()
134
163
135
164
136 class ZstdCompressionWriter(object):
165 class ZstdCompressionWriter(object):
137 def __init__(self, compressor, writer, source_size, write_size):
166 def __init__(self, compressor, writer, source_size, write_size):
138 self._compressor = compressor
167 self._compressor = compressor
139 self._writer = writer
168 self._writer = writer
140 self._source_size = source_size
169 self._source_size = source_size
141 self._write_size = write_size
170 self._write_size = write_size
142 self._entered = False
171 self._entered = False
172 self._mtcctx = compressor._cctx if compressor._multithreaded else None
143
173
144 def __enter__(self):
174 def __enter__(self):
145 if self._entered:
175 if self._entered:
146 raise ZstdError('cannot __enter__ multiple times')
176 raise ZstdError('cannot __enter__ multiple times')
147
177
148 self._cstream = self._compressor._get_cstream(self._source_size)
178 if self._mtcctx:
179 self._compressor._init_mtcstream(self._source_size)
180 else:
181 self._compressor._ensure_cstream(self._source_size)
149 self._entered = True
182 self._entered = True
150 return self
183 return self
151
184
152 def __exit__(self, exc_type, exc_value, exc_tb):
185 def __exit__(self, exc_type, exc_value, exc_tb):
153 self._entered = False
186 self._entered = False
154
187
155 if not exc_type and not exc_value and not exc_tb:
188 if not exc_type and not exc_value and not exc_tb:
156 out_buffer = ffi.new('ZSTD_outBuffer *')
189 out_buffer = ffi.new('ZSTD_outBuffer *')
157 dst_buffer = ffi.new('char[]', self._write_size)
190 dst_buffer = ffi.new('char[]', self._write_size)
158 out_buffer.dst = dst_buffer
191 out_buffer.dst = dst_buffer
159 out_buffer.size = self._write_size
192 out_buffer.size = self._write_size
160 out_buffer.pos = 0
193 out_buffer.pos = 0
161
194
162 while True:
195 while True:
163 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
196 if self._mtcctx:
197 zresult = lib.ZSTDMT_endStream(self._mtcctx, out_buffer)
198 else:
199 zresult = lib.ZSTD_endStream(self._compressor._cstream, out_buffer)
164 if lib.ZSTD_isError(zresult):
200 if lib.ZSTD_isError(zresult):
165 raise ZstdError('error ending compression stream: %s' %
201 raise ZstdError('error ending compression stream: %s' %
166 ffi.string(lib.ZSTD_getErrorName(zresult)))
202 ffi.string(lib.ZSTD_getErrorName(zresult)))
167
203
168 if out_buffer.pos:
204 if out_buffer.pos:
169 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
205 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
170 out_buffer.pos = 0
206 out_buffer.pos = 0
171
207
172 if zresult == 0:
208 if zresult == 0:
173 break
209 break
174
210
175 self._cstream = None
176 self._compressor = None
211 self._compressor = None
177
212
178 return False
213 return False
179
214
180 def memory_size(self):
215 def memory_size(self):
181 if not self._entered:
216 if not self._entered:
182 raise ZstdError('cannot determine size of an inactive compressor; '
217 raise ZstdError('cannot determine size of an inactive compressor; '
183 'call when a context manager is active')
218 'call when a context manager is active')
184
219
185 return lib.ZSTD_sizeof_CStream(self._cstream)
220 return lib.ZSTD_sizeof_CStream(self._compressor._cstream)
186
221
187 def write(self, data):
222 def write(self, data):
188 if not self._entered:
223 if not self._entered:
189 raise ZstdError('write() must be called from an active context '
224 raise ZstdError('write() must be called from an active context '
190 'manager')
225 'manager')
191
226
192 total_write = 0
227 total_write = 0
193
228
194 data_buffer = ffi.from_buffer(data)
229 data_buffer = ffi.from_buffer(data)
195
230
196 in_buffer = ffi.new('ZSTD_inBuffer *')
231 in_buffer = ffi.new('ZSTD_inBuffer *')
197 in_buffer.src = data_buffer
232 in_buffer.src = data_buffer
198 in_buffer.size = len(data_buffer)
233 in_buffer.size = len(data_buffer)
199 in_buffer.pos = 0
234 in_buffer.pos = 0
200
235
201 out_buffer = ffi.new('ZSTD_outBuffer *')
236 out_buffer = ffi.new('ZSTD_outBuffer *')
202 dst_buffer = ffi.new('char[]', self._write_size)
237 dst_buffer = ffi.new('char[]', self._write_size)
203 out_buffer.dst = dst_buffer
238 out_buffer.dst = dst_buffer
204 out_buffer.size = self._write_size
239 out_buffer.size = self._write_size
205 out_buffer.pos = 0
240 out_buffer.pos = 0
206
241
207 while in_buffer.pos < in_buffer.size:
242 while in_buffer.pos < in_buffer.size:
208 zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
243 if self._mtcctx:
244 zresult = lib.ZSTDMT_compressStream(self._mtcctx, out_buffer,
245 in_buffer)
246 else:
247 zresult = lib.ZSTD_compressStream(self._compressor._cstream, out_buffer,
248 in_buffer)
209 if lib.ZSTD_isError(zresult):
249 if lib.ZSTD_isError(zresult):
210 raise ZstdError('zstd compress error: %s' %
250 raise ZstdError('zstd compress error: %s' %
211 ffi.string(lib.ZSTD_getErrorName(zresult)))
251 ffi.string(lib.ZSTD_getErrorName(zresult)))
212
252
213 if out_buffer.pos:
253 if out_buffer.pos:
214 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
254 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
215 total_write += out_buffer.pos
255 total_write += out_buffer.pos
216 out_buffer.pos = 0
256 out_buffer.pos = 0
217
257
218 return total_write
258 return total_write
219
259
220 def flush(self):
260 def flush(self):
221 if not self._entered:
261 if not self._entered:
222 raise ZstdError('flush must be called from an active context manager')
262 raise ZstdError('flush must be called from an active context manager')
223
263
224 total_write = 0
264 total_write = 0
225
265
226 out_buffer = ffi.new('ZSTD_outBuffer *')
266 out_buffer = ffi.new('ZSTD_outBuffer *')
227 dst_buffer = ffi.new('char[]', self._write_size)
267 dst_buffer = ffi.new('char[]', self._write_size)
228 out_buffer.dst = dst_buffer
268 out_buffer.dst = dst_buffer
229 out_buffer.size = self._write_size
269 out_buffer.size = self._write_size
230 out_buffer.pos = 0
270 out_buffer.pos = 0
231
271
232 while True:
272 while True:
233 zresult = lib.ZSTD_flushStream(self._cstream, out_buffer)
273 if self._mtcctx:
274 zresult = lib.ZSTDMT_flushStream(self._mtcctx, out_buffer)
275 else:
276 zresult = lib.ZSTD_flushStream(self._compressor._cstream, out_buffer)
234 if lib.ZSTD_isError(zresult):
277 if lib.ZSTD_isError(zresult):
235 raise ZstdError('zstd compress error: %s' %
278 raise ZstdError('zstd compress error: %s' %
236 ffi.string(lib.ZSTD_getErrorName(zresult)))
279 ffi.string(lib.ZSTD_getErrorName(zresult)))
237
280
238 if not out_buffer.pos:
281 if not out_buffer.pos:
239 break
282 break
240
283
241 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
284 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
242 total_write += out_buffer.pos
285 total_write += out_buffer.pos
243 out_buffer.pos = 0
286 out_buffer.pos = 0
244
287
245 return total_write
288 return total_write
246
289
247
290
248 class ZstdCompressionObj(object):
291 class ZstdCompressionObj(object):
249 def compress(self, data):
292 def compress(self, data):
250 if self._finished:
293 if self._finished:
251 raise ZstdError('cannot call compress() after compressor finished')
294 raise ZstdError('cannot call compress() after compressor finished')
252
295
253 data_buffer = ffi.from_buffer(data)
296 data_buffer = ffi.from_buffer(data)
254 source = ffi.new('ZSTD_inBuffer *')
297 source = ffi.new('ZSTD_inBuffer *')
255 source.src = data_buffer
298 source.src = data_buffer
256 source.size = len(data_buffer)
299 source.size = len(data_buffer)
257 source.pos = 0
300 source.pos = 0
258
301
259 chunks = []
302 chunks = []
260
303
261 while source.pos < len(data):
304 while source.pos < len(data):
262 zresult = lib.ZSTD_compressStream(self._cstream, self._out, source)
305 if self._mtcctx:
306 zresult = lib.ZSTDMT_compressStream(self._mtcctx,
307 self._out, source)
308 else:
309 zresult = lib.ZSTD_compressStream(self._compressor._cstream, self._out,
310 source)
263 if lib.ZSTD_isError(zresult):
311 if lib.ZSTD_isError(zresult):
264 raise ZstdError('zstd compress error: %s' %
312 raise ZstdError('zstd compress error: %s' %
265 ffi.string(lib.ZSTD_getErrorName(zresult)))
313 ffi.string(lib.ZSTD_getErrorName(zresult)))
266
314
267 if self._out.pos:
315 if self._out.pos:
268 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
316 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
269 self._out.pos = 0
317 self._out.pos = 0
270
318
271 return b''.join(chunks)
319 return b''.join(chunks)
272
320
273 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
321 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
274 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
322 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
275 raise ValueError('flush mode not recognized')
323 raise ValueError('flush mode not recognized')
276
324
277 if self._finished:
325 if self._finished:
278 raise ZstdError('compressor object already finished')
326 raise ZstdError('compressor object already finished')
279
327
280 assert self._out.pos == 0
328 assert self._out.pos == 0
281
329
282 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
330 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
283 zresult = lib.ZSTD_flushStream(self._cstream, self._out)
331 if self._mtcctx:
332 zresult = lib.ZSTDMT_flushStream(self._mtcctx, self._out)
333 else:
334 zresult = lib.ZSTD_flushStream(self._compressor._cstream, self._out)
284 if lib.ZSTD_isError(zresult):
335 if lib.ZSTD_isError(zresult):
285 raise ZstdError('zstd compress error: %s' %
336 raise ZstdError('zstd compress error: %s' %
286 ffi.string(lib.ZSTD_getErrorName(zresult)))
337 ffi.string(lib.ZSTD_getErrorName(zresult)))
287
338
288 # Output buffer is guaranteed to hold full block.
339 # Output buffer is guaranteed to hold full block.
289 assert zresult == 0
340 assert zresult == 0
290
341
291 if self._out.pos:
342 if self._out.pos:
292 result = ffi.buffer(self._out.dst, self._out.pos)[:]
343 result = ffi.buffer(self._out.dst, self._out.pos)[:]
293 self._out.pos = 0
344 self._out.pos = 0
294 return result
345 return result
295 else:
346 else:
296 return b''
347 return b''
297
348
298 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
349 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
299 self._finished = True
350 self._finished = True
300
351
301 chunks = []
352 chunks = []
302
353
303 while True:
354 while True:
304 zresult = lib.ZSTD_endStream(self._cstream, self._out)
355 if self._mtcctx:
356 zresult = lib.ZSTDMT_endStream(self._mtcctx, self._out)
357 else:
358 zresult = lib.ZSTD_endStream(self._compressor._cstream, self._out)
305 if lib.ZSTD_isError(zresult):
359 if lib.ZSTD_isError(zresult):
306 raise ZstdError('error ending compression stream: %s' %
360 raise ZstdError('error ending compression stream: %s' %
307 ffi.string(lib.ZSTD_getErroName(zresult)))
361 ffi.string(lib.ZSTD_getErroName(zresult)))
308
362
309 if self._out.pos:
363 if self._out.pos:
310 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
364 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
311 self._out.pos = 0
365 self._out.pos = 0
312
366
313 if not zresult:
367 if not zresult:
314 break
368 break
315
369
316 # GC compression stream immediately.
317 self._cstream = None
318
319 return b''.join(chunks)
370 return b''.join(chunks)
320
371
321
372
322 class ZstdCompressor(object):
373 class ZstdCompressor(object):
323 def __init__(self, level=3, dict_data=None, compression_params=None,
374 def __init__(self, level=3, dict_data=None, compression_params=None,
324 write_checksum=False, write_content_size=False,
375 write_checksum=False, write_content_size=False,
325 write_dict_id=True):
376 write_dict_id=True, threads=0):
326 if level < 1:
377 if level < 1:
327 raise ValueError('level must be greater than 0')
378 raise ValueError('level must be greater than 0')
328 elif level > lib.ZSTD_maxCLevel():
379 elif level > lib.ZSTD_maxCLevel():
329 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
380 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
330
381
382 if threads < 0:
383 threads = _cpu_count()
384
331 self._compression_level = level
385 self._compression_level = level
332 self._dict_data = dict_data
386 self._dict_data = dict_data
333 self._cparams = compression_params
387 self._cparams = compression_params
334 self._fparams = ffi.new('ZSTD_frameParameters *')[0]
388 self._fparams = ffi.new('ZSTD_frameParameters *')[0]
335 self._fparams.checksumFlag = write_checksum
389 self._fparams.checksumFlag = write_checksum
336 self._fparams.contentSizeFlag = write_content_size
390 self._fparams.contentSizeFlag = write_content_size
337 self._fparams.noDictIDFlag = not write_dict_id
391 self._fparams.noDictIDFlag = not write_dict_id
338
392
339 cctx = lib.ZSTD_createCCtx()
393 if threads:
340 if cctx == ffi.NULL:
394 cctx = lib.ZSTDMT_createCCtx(threads)
341 raise MemoryError()
395 if cctx == ffi.NULL:
396 raise MemoryError()
342
397
343 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
398 self._cctx = ffi.gc(cctx, lib.ZSTDMT_freeCCtx)
399 self._multithreaded = True
400 else:
401 cctx = lib.ZSTD_createCCtx()
402 if cctx == ffi.NULL:
403 raise MemoryError()
404
405 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
406 self._multithreaded = False
407
408 self._cstream = None
344
409
345 def compress(self, data, allow_empty=False):
410 def compress(self, data, allow_empty=False):
346 if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
411 if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
347 raise ValueError('cannot write empty inputs when writing content sizes')
412 raise ValueError('cannot write empty inputs when writing content sizes')
348
413
414 if self._multithreaded and self._dict_data:
415 raise ZstdError('compress() cannot be used with both dictionaries and multi-threaded compression')
416
417 if self._multithreaded and self._cparams:
418 raise ZstdError('compress() cannot be used with both compression parameters and multi-threaded compression')
419
349 # TODO use a CDict for performance.
420 # TODO use a CDict for performance.
350 dict_data = ffi.NULL
421 dict_data = ffi.NULL
351 dict_size = 0
422 dict_size = 0
352
423
353 if self._dict_data:
424 if self._dict_data:
354 dict_data = self._dict_data.as_bytes()
425 dict_data = self._dict_data.as_bytes()
355 dict_size = len(self._dict_data)
426 dict_size = len(self._dict_data)
356
427
357 params = ffi.new('ZSTD_parameters *')[0]
428 params = ffi.new('ZSTD_parameters *')[0]
358 if self._cparams:
429 if self._cparams:
359 params.cParams = self._cparams.as_compression_parameters()
430 params.cParams = self._cparams.as_compression_parameters()
360 else:
431 else:
361 params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
432 params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
362 dict_size)
433 dict_size)
363 params.fParams = self._fparams
434 params.fParams = self._fparams
364
435
365 dest_size = lib.ZSTD_compressBound(len(data))
436 dest_size = lib.ZSTD_compressBound(len(data))
366 out = new_nonzero('char[]', dest_size)
437 out = new_nonzero('char[]', dest_size)
367
438
368 zresult = lib.ZSTD_compress_advanced(self._cctx,
439 if self._multithreaded:
369 ffi.addressof(out), dest_size,
440 zresult = lib.ZSTDMT_compressCCtx(self._cctx,
370 data, len(data),
441 ffi.addressof(out), dest_size,
371 dict_data, dict_size,
442 data, len(data),
372 params)
443 self._compression_level)
444 else:
445 zresult = lib.ZSTD_compress_advanced(self._cctx,
446 ffi.addressof(out), dest_size,
447 data, len(data),
448 dict_data, dict_size,
449 params)
373
450
374 if lib.ZSTD_isError(zresult):
451 if lib.ZSTD_isError(zresult):
375 raise ZstdError('cannot compress: %s' %
452 raise ZstdError('cannot compress: %s' %
376 ffi.string(lib.ZSTD_getErrorName(zresult)))
453 ffi.string(lib.ZSTD_getErrorName(zresult)))
377
454
378 return ffi.buffer(out, zresult)[:]
455 return ffi.buffer(out, zresult)[:]
379
456
380 def compressobj(self, size=0):
457 def compressobj(self, size=0):
381 cstream = self._get_cstream(size)
458 if self._multithreaded:
459 self._init_mtcstream(size)
460 else:
461 self._ensure_cstream(size)
462
382 cobj = ZstdCompressionObj()
463 cobj = ZstdCompressionObj()
383 cobj._cstream = cstream
384 cobj._out = ffi.new('ZSTD_outBuffer *')
464 cobj._out = ffi.new('ZSTD_outBuffer *')
385 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
465 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
386 cobj._out.dst = cobj._dst_buffer
466 cobj._out.dst = cobj._dst_buffer
387 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
467 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
388 cobj._out.pos = 0
468 cobj._out.pos = 0
389 cobj._compressor = self
469 cobj._compressor = self
390 cobj._finished = False
470 cobj._finished = False
391
471
472 if self._multithreaded:
473 cobj._mtcctx = self._cctx
474 else:
475 cobj._mtcctx = None
476
392 return cobj
477 return cobj
393
478
394 def copy_stream(self, ifh, ofh, size=0,
479 def copy_stream(self, ifh, ofh, size=0,
395 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
480 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
396 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
481 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
397
482
398 if not hasattr(ifh, 'read'):
483 if not hasattr(ifh, 'read'):
399 raise ValueError('first argument must have a read() method')
484 raise ValueError('first argument must have a read() method')
400 if not hasattr(ofh, 'write'):
485 if not hasattr(ofh, 'write'):
401 raise ValueError('second argument must have a write() method')
486 raise ValueError('second argument must have a write() method')
402
487
403 cstream = self._get_cstream(size)
488 mt = self._multithreaded
489 if mt:
490 self._init_mtcstream(size)
491 else:
492 self._ensure_cstream(size)
404
493
405 in_buffer = ffi.new('ZSTD_inBuffer *')
494 in_buffer = ffi.new('ZSTD_inBuffer *')
406 out_buffer = ffi.new('ZSTD_outBuffer *')
495 out_buffer = ffi.new('ZSTD_outBuffer *')
407
496
408 dst_buffer = ffi.new('char[]', write_size)
497 dst_buffer = ffi.new('char[]', write_size)
409 out_buffer.dst = dst_buffer
498 out_buffer.dst = dst_buffer
410 out_buffer.size = write_size
499 out_buffer.size = write_size
411 out_buffer.pos = 0
500 out_buffer.pos = 0
412
501
413 total_read, total_write = 0, 0
502 total_read, total_write = 0, 0
414
503
415 while True:
504 while True:
416 data = ifh.read(read_size)
505 data = ifh.read(read_size)
417 if not data:
506 if not data:
418 break
507 break
419
508
420 data_buffer = ffi.from_buffer(data)
509 data_buffer = ffi.from_buffer(data)
421 total_read += len(data_buffer)
510 total_read += len(data_buffer)
422 in_buffer.src = data_buffer
511 in_buffer.src = data_buffer
423 in_buffer.size = len(data_buffer)
512 in_buffer.size = len(data_buffer)
424 in_buffer.pos = 0
513 in_buffer.pos = 0
425
514
426 while in_buffer.pos < in_buffer.size:
515 while in_buffer.pos < in_buffer.size:
427 zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
516 if mt:
517 zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
518 else:
519 zresult = lib.ZSTD_compressStream(self._cstream,
520 out_buffer, in_buffer)
428 if lib.ZSTD_isError(zresult):
521 if lib.ZSTD_isError(zresult):
429 raise ZstdError('zstd compress error: %s' %
522 raise ZstdError('zstd compress error: %s' %
430 ffi.string(lib.ZSTD_getErrorName(zresult)))
523 ffi.string(lib.ZSTD_getErrorName(zresult)))
431
524
432 if out_buffer.pos:
525 if out_buffer.pos:
433 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
526 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
434 total_write += out_buffer.pos
527 total_write += out_buffer.pos
435 out_buffer.pos = 0
528 out_buffer.pos = 0
436
529
437 # We've finished reading. Flush the compressor.
530 # We've finished reading. Flush the compressor.
438 while True:
531 while True:
439 zresult = lib.ZSTD_endStream(cstream, out_buffer)
532 if mt:
533 zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
534 else:
535 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
440 if lib.ZSTD_isError(zresult):
536 if lib.ZSTD_isError(zresult):
441 raise ZstdError('error ending compression stream: %s' %
537 raise ZstdError('error ending compression stream: %s' %
442 ffi.string(lib.ZSTD_getErrorName(zresult)))
538 ffi.string(lib.ZSTD_getErrorName(zresult)))
443
539
444 if out_buffer.pos:
540 if out_buffer.pos:
445 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
541 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
446 total_write += out_buffer.pos
542 total_write += out_buffer.pos
447 out_buffer.pos = 0
543 out_buffer.pos = 0
448
544
449 if zresult == 0:
545 if zresult == 0:
450 break
546 break
451
547
452 return total_read, total_write
548 return total_read, total_write
453
549
454 def write_to(self, writer, size=0,
550 def write_to(self, writer, size=0,
455 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
551 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
456
552
457 if not hasattr(writer, 'write'):
553 if not hasattr(writer, 'write'):
458 raise ValueError('must pass an object with a write() method')
554 raise ValueError('must pass an object with a write() method')
459
555
460 return ZstdCompressionWriter(self, writer, size, write_size)
556 return ZstdCompressionWriter(self, writer, size, write_size)
461
557
462 def read_from(self, reader, size=0,
558 def read_from(self, reader, size=0,
463 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
559 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
464 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
560 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
465 if hasattr(reader, 'read'):
561 if hasattr(reader, 'read'):
466 have_read = True
562 have_read = True
467 elif hasattr(reader, '__getitem__'):
563 elif hasattr(reader, '__getitem__'):
468 have_read = False
564 have_read = False
469 buffer_offset = 0
565 buffer_offset = 0
470 size = len(reader)
566 size = len(reader)
471 else:
567 else:
472 raise ValueError('must pass an object with a read() method or '
568 raise ValueError('must pass an object with a read() method or '
473 'conforms to buffer protocol')
569 'conforms to buffer protocol')
474
570
475 cstream = self._get_cstream(size)
571 if self._multithreaded:
572 self._init_mtcstream(size)
573 else:
574 self._ensure_cstream(size)
476
575
477 in_buffer = ffi.new('ZSTD_inBuffer *')
576 in_buffer = ffi.new('ZSTD_inBuffer *')
478 out_buffer = ffi.new('ZSTD_outBuffer *')
577 out_buffer = ffi.new('ZSTD_outBuffer *')
479
578
480 in_buffer.src = ffi.NULL
579 in_buffer.src = ffi.NULL
481 in_buffer.size = 0
580 in_buffer.size = 0
482 in_buffer.pos = 0
581 in_buffer.pos = 0
483
582
484 dst_buffer = ffi.new('char[]', write_size)
583 dst_buffer = ffi.new('char[]', write_size)
485 out_buffer.dst = dst_buffer
584 out_buffer.dst = dst_buffer
486 out_buffer.size = write_size
585 out_buffer.size = write_size
487 out_buffer.pos = 0
586 out_buffer.pos = 0
488
587
489 while True:
588 while True:
490 # We should never have output data sitting around after a previous
589 # We should never have output data sitting around after a previous
491 # iteration.
590 # iteration.
492 assert out_buffer.pos == 0
591 assert out_buffer.pos == 0
493
592
494 # Collect input data.
593 # Collect input data.
495 if have_read:
594 if have_read:
496 read_result = reader.read(read_size)
595 read_result = reader.read(read_size)
497 else:
596 else:
498 remaining = len(reader) - buffer_offset
597 remaining = len(reader) - buffer_offset
499 slice_size = min(remaining, read_size)
598 slice_size = min(remaining, read_size)
500 read_result = reader[buffer_offset:buffer_offset + slice_size]
599 read_result = reader[buffer_offset:buffer_offset + slice_size]
501 buffer_offset += slice_size
600 buffer_offset += slice_size
502
601
503 # No new input data. Break out of the read loop.
602 # No new input data. Break out of the read loop.
504 if not read_result:
603 if not read_result:
505 break
604 break
506
605
507 # Feed all read data into the compressor and emit output until
606 # Feed all read data into the compressor and emit output until
508 # exhausted.
607 # exhausted.
509 read_buffer = ffi.from_buffer(read_result)
608 read_buffer = ffi.from_buffer(read_result)
510 in_buffer.src = read_buffer
609 in_buffer.src = read_buffer
511 in_buffer.size = len(read_buffer)
610 in_buffer.size = len(read_buffer)
512 in_buffer.pos = 0
611 in_buffer.pos = 0
513
612
514 while in_buffer.pos < in_buffer.size:
613 while in_buffer.pos < in_buffer.size:
515 zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
614 if self._multithreaded:
615 zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
616 else:
617 zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
516 if lib.ZSTD_isError(zresult):
618 if lib.ZSTD_isError(zresult):
517 raise ZstdError('zstd compress error: %s' %
619 raise ZstdError('zstd compress error: %s' %
518 ffi.string(lib.ZSTD_getErrorName(zresult)))
620 ffi.string(lib.ZSTD_getErrorName(zresult)))
519
621
520 if out_buffer.pos:
622 if out_buffer.pos:
521 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
623 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
522 out_buffer.pos = 0
624 out_buffer.pos = 0
523 yield data
625 yield data
524
626
525 assert out_buffer.pos == 0
627 assert out_buffer.pos == 0
526
628
527 # And repeat the loop to collect more data.
629 # And repeat the loop to collect more data.
528 continue
630 continue
529
631
530 # If we get here, input is exhausted. End the stream and emit what
632 # If we get here, input is exhausted. End the stream and emit what
531 # remains.
633 # remains.
532 while True:
634 while True:
533 assert out_buffer.pos == 0
635 assert out_buffer.pos == 0
534 zresult = lib.ZSTD_endStream(cstream, out_buffer)
636 if self._multithreaded:
637 zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
638 else:
639 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
535 if lib.ZSTD_isError(zresult):
640 if lib.ZSTD_isError(zresult):
536 raise ZstdError('error ending compression stream: %s' %
641 raise ZstdError('error ending compression stream: %s' %
537 ffi.string(lib.ZSTD_getErrorName(zresult)))
642 ffi.string(lib.ZSTD_getErrorName(zresult)))
538
643
539 if out_buffer.pos:
644 if out_buffer.pos:
540 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
645 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
541 out_buffer.pos = 0
646 out_buffer.pos = 0
542 yield data
647 yield data
543
648
544 if zresult == 0:
649 if zresult == 0:
545 break
650 break
546
651
547 def _get_cstream(self, size):
652 def _ensure_cstream(self, size):
653 if self._cstream:
654 zresult = lib.ZSTD_resetCStream(self._cstream, size)
655 if lib.ZSTD_isError(zresult):
656 raise ZstdError('could not reset CStream: %s' %
657 ffi.string(lib.ZSTD_getErrorName(zresult)))
658
659 return
660
548 cstream = lib.ZSTD_createCStream()
661 cstream = lib.ZSTD_createCStream()
549 if cstream == ffi.NULL:
662 if cstream == ffi.NULL:
550 raise MemoryError()
663 raise MemoryError()
551
664
552 cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
665 cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
553
666
554 dict_data = ffi.NULL
667 dict_data = ffi.NULL
555 dict_size = 0
668 dict_size = 0
556 if self._dict_data:
669 if self._dict_data:
557 dict_data = self._dict_data.as_bytes()
670 dict_data = self._dict_data.as_bytes()
558 dict_size = len(self._dict_data)
671 dict_size = len(self._dict_data)
559
672
560 zparams = ffi.new('ZSTD_parameters *')[0]
673 zparams = ffi.new('ZSTD_parameters *')[0]
561 if self._cparams:
674 if self._cparams:
562 zparams.cParams = self._cparams.as_compression_parameters()
675 zparams.cParams = self._cparams.as_compression_parameters()
563 else:
676 else:
564 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
677 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
565 size, dict_size)
678 size, dict_size)
566 zparams.fParams = self._fparams
679 zparams.fParams = self._fparams
567
680
568 zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
681 zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
569 zparams, size)
682 zparams, size)
570 if lib.ZSTD_isError(zresult):
683 if lib.ZSTD_isError(zresult):
571 raise Exception('cannot init CStream: %s' %
684 raise Exception('cannot init CStream: %s' %
572 ffi.string(lib.ZSTD_getErrorName(zresult)))
685 ffi.string(lib.ZSTD_getErrorName(zresult)))
573
686
574 return cstream
687 self._cstream = cstream
688
689 def _init_mtcstream(self, size):
690 assert self._multithreaded
691
692 dict_data = ffi.NULL
693 dict_size = 0
694 if self._dict_data:
695 dict_data = self._dict_data.as_bytes()
696 dict_size = len(self._dict_data)
697
698 zparams = ffi.new('ZSTD_parameters *')[0]
699 if self._cparams:
700 zparams.cParams = self._cparams.as_compression_parameters()
701 else:
702 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
703 size, dict_size)
704
705 zparams.fParams = self._fparams
706
707 zresult = lib.ZSTDMT_initCStream_advanced(self._cctx, dict_data, dict_size,
708 zparams, size)
709
710 if lib.ZSTD_isError(zresult):
711 raise ZstdError('cannot init CStream: %s' %
712 ffi.string(lib.ZSTD_getErrorName(zresult)))
575
713
576
714
577 class FrameParameters(object):
715 class FrameParameters(object):
578 def __init__(self, fparams):
716 def __init__(self, fparams):
579 self.content_size = fparams.frameContentSize
717 self.content_size = fparams.frameContentSize
580 self.window_size = fparams.windowSize
718 self.window_size = fparams.windowSize
581 self.dict_id = fparams.dictID
719 self.dict_id = fparams.dictID
582 self.has_checksum = bool(fparams.checksumFlag)
720 self.has_checksum = bool(fparams.checksumFlag)
583
721
584
722
585 def get_frame_parameters(data):
723 def get_frame_parameters(data):
586 if not isinstance(data, bytes_type):
724 if not isinstance(data, bytes_type):
587 raise TypeError('argument must be bytes')
725 raise TypeError('argument must be bytes')
588
726
589 params = ffi.new('ZSTD_frameParams *')
727 params = ffi.new('ZSTD_frameParams *')
590
728
591 zresult = lib.ZSTD_getFrameParams(params, data, len(data))
729 zresult = lib.ZSTD_getFrameParams(params, data, len(data))
592 if lib.ZSTD_isError(zresult):
730 if lib.ZSTD_isError(zresult):
593 raise ZstdError('cannot get frame parameters: %s' %
731 raise ZstdError('cannot get frame parameters: %s' %
594 ffi.string(lib.ZSTD_getErrorName(zresult)))
732 ffi.string(lib.ZSTD_getErrorName(zresult)))
595
733
596 if zresult:
734 if zresult:
597 raise ZstdError('not enough data for frame parameters; need %d bytes' %
735 raise ZstdError('not enough data for frame parameters; need %d bytes' %
598 zresult)
736 zresult)
599
737
600 return FrameParameters(params[0])
738 return FrameParameters(params[0])
601
739
602
740
603 class ZstdCompressionDict(object):
741 class ZstdCompressionDict(object):
604 def __init__(self, data):
742 def __init__(self, data, k=0, d=0):
605 assert isinstance(data, bytes_type)
743 assert isinstance(data, bytes_type)
606 self._data = data
744 self._data = data
745 self.k = k
746 self.d = d
607
747
608 def __len__(self):
748 def __len__(self):
609 return len(self._data)
749 return len(self._data)
610
750
611 def dict_id(self):
751 def dict_id(self):
612 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
752 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
613
753
614 def as_bytes(self):
754 def as_bytes(self):
615 return self._data
755 return self._data
616
756
617
757
618 def train_dictionary(dict_size, samples, parameters=None):
758 def train_dictionary(dict_size, samples, selectivity=0, level=0,
759 notifications=0, dict_id=0):
619 if not isinstance(samples, list):
760 if not isinstance(samples, list):
620 raise TypeError('samples must be a list')
761 raise TypeError('samples must be a list')
621
762
622 total_size = sum(map(len, samples))
763 total_size = sum(map(len, samples))
623
764
624 samples_buffer = new_nonzero('char[]', total_size)
765 samples_buffer = new_nonzero('char[]', total_size)
625 sample_sizes = new_nonzero('size_t[]', len(samples))
766 sample_sizes = new_nonzero('size_t[]', len(samples))
626
767
627 offset = 0
768 offset = 0
628 for i, sample in enumerate(samples):
769 for i, sample in enumerate(samples):
629 if not isinstance(sample, bytes_type):
770 if not isinstance(sample, bytes_type):
630 raise ValueError('samples must be bytes')
771 raise ValueError('samples must be bytes')
631
772
632 l = len(sample)
773 l = len(sample)
633 ffi.memmove(samples_buffer + offset, sample, l)
774 ffi.memmove(samples_buffer + offset, sample, l)
634 offset += l
775 offset += l
635 sample_sizes[i] = l
776 sample_sizes[i] = l
636
777
637 dict_data = new_nonzero('char[]', dict_size)
778 dict_data = new_nonzero('char[]', dict_size)
638
779
639 zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size,
780 dparams = ffi.new('ZDICT_params_t *')[0]
640 ffi.addressof(samples_buffer),
781 dparams.selectivityLevel = selectivity
641 ffi.addressof(sample_sizes, 0),
782 dparams.compressionLevel = level
642 len(samples))
783 dparams.notificationLevel = notifications
784 dparams.dictID = dict_id
785
786 zresult = lib.ZDICT_trainFromBuffer_advanced(
787 ffi.addressof(dict_data), dict_size,
788 ffi.addressof(samples_buffer),
789 ffi.addressof(sample_sizes, 0), len(samples),
790 dparams)
791
643 if lib.ZDICT_isError(zresult):
792 if lib.ZDICT_isError(zresult):
644 raise ZstdError('Cannot train dict: %s' %
793 raise ZstdError('Cannot train dict: %s' %
645 ffi.string(lib.ZDICT_getErrorName(zresult)))
794 ffi.string(lib.ZDICT_getErrorName(zresult)))
646
795
647 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
796 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
648
797
649
798
799 def train_cover_dictionary(dict_size, samples, k=0, d=0,
800 notifications=0, dict_id=0, level=0, optimize=False,
801 steps=0, threads=0):
802 if not isinstance(samples, list):
803 raise TypeError('samples must be a list')
804
805 if threads < 0:
806 threads = _cpu_count()
807
808 total_size = sum(map(len, samples))
809
810 samples_buffer = new_nonzero('char[]', total_size)
811 sample_sizes = new_nonzero('size_t[]', len(samples))
812
813 offset = 0
814 for i, sample in enumerate(samples):
815 if not isinstance(sample, bytes_type):
816 raise ValueError('samples must be bytes')
817
818 l = len(sample)
819 ffi.memmove(samples_buffer + offset, sample, l)
820 offset += l
821 sample_sizes[i] = l
822
823 dict_data = new_nonzero('char[]', dict_size)
824
825 dparams = ffi.new('COVER_params_t *')[0]
826 dparams.k = k
827 dparams.d = d
828 dparams.steps = steps
829 dparams.nbThreads = threads
830 dparams.notificationLevel = notifications
831 dparams.dictID = dict_id
832 dparams.compressionLevel = level
833
834 if optimize:
835 zresult = lib.COVER_optimizeTrainFromBuffer(
836 ffi.addressof(dict_data), dict_size,
837 ffi.addressof(samples_buffer),
838 ffi.addressof(sample_sizes, 0), len(samples),
839 ffi.addressof(dparams))
840 else:
841 zresult = lib.COVER_trainFromBuffer(
842 ffi.addressof(dict_data), dict_size,
843 ffi.addressof(samples_buffer),
844 ffi.addressof(sample_sizes, 0), len(samples),
845 dparams)
846
847 if lib.ZDICT_isError(zresult):
848 raise ZstdError('cannot train dict: %s' %
849 ffi.string(lib.ZDICT_getErrorName(zresult)))
850
851 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
852 k=dparams.k, d=dparams.d)
853
854
650 class ZstdDecompressionObj(object):
855 class ZstdDecompressionObj(object):
651 def __init__(self, decompressor):
856 def __init__(self, decompressor):
652 self._decompressor = decompressor
857 self._decompressor = decompressor
653 self._dstream = self._decompressor._get_dstream()
654 self._finished = False
858 self._finished = False
655
859
656 def decompress(self, data):
860 def decompress(self, data):
657 if self._finished:
861 if self._finished:
658 raise ZstdError('cannot use a decompressobj multiple times')
862 raise ZstdError('cannot use a decompressobj multiple times')
659
863
864 assert(self._decompressor._dstream)
865
660 in_buffer = ffi.new('ZSTD_inBuffer *')
866 in_buffer = ffi.new('ZSTD_inBuffer *')
661 out_buffer = ffi.new('ZSTD_outBuffer *')
867 out_buffer = ffi.new('ZSTD_outBuffer *')
662
868
663 data_buffer = ffi.from_buffer(data)
869 data_buffer = ffi.from_buffer(data)
664 in_buffer.src = data_buffer
870 in_buffer.src = data_buffer
665 in_buffer.size = len(data_buffer)
871 in_buffer.size = len(data_buffer)
666 in_buffer.pos = 0
872 in_buffer.pos = 0
667
873
668 dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
874 dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
669 out_buffer.dst = dst_buffer
875 out_buffer.dst = dst_buffer
670 out_buffer.size = len(dst_buffer)
876 out_buffer.size = len(dst_buffer)
671 out_buffer.pos = 0
877 out_buffer.pos = 0
672
878
673 chunks = []
879 chunks = []
674
880
675 while in_buffer.pos < in_buffer.size:
881 while in_buffer.pos < in_buffer.size:
676 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
882 zresult = lib.ZSTD_decompressStream(self._decompressor._dstream,
883 out_buffer, in_buffer)
677 if lib.ZSTD_isError(zresult):
884 if lib.ZSTD_isError(zresult):
678 raise ZstdError('zstd decompressor error: %s' %
885 raise ZstdError('zstd decompressor error: %s' %
679 ffi.string(lib.ZSTD_getErrorName(zresult)))
886 ffi.string(lib.ZSTD_getErrorName(zresult)))
680
887
681 if zresult == 0:
888 if zresult == 0:
682 self._finished = True
889 self._finished = True
683 self._dstream = None
684 self._decompressor = None
890 self._decompressor = None
685
891
686 if out_buffer.pos:
892 if out_buffer.pos:
687 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
893 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
688 out_buffer.pos = 0
894 out_buffer.pos = 0
689
895
690 return b''.join(chunks)
896 return b''.join(chunks)
691
897
692
898
693 class ZstdDecompressionWriter(object):
899 class ZstdDecompressionWriter(object):
694 def __init__(self, decompressor, writer, write_size):
900 def __init__(self, decompressor, writer, write_size):
695 self._decompressor = decompressor
901 self._decompressor = decompressor
696 self._writer = writer
902 self._writer = writer
697 self._write_size = write_size
903 self._write_size = write_size
698 self._dstream = None
699 self._entered = False
904 self._entered = False
700
905
701 def __enter__(self):
906 def __enter__(self):
702 if self._entered:
907 if self._entered:
703 raise ZstdError('cannot __enter__ multiple times')
908 raise ZstdError('cannot __enter__ multiple times')
704
909
705 self._dstream = self._decompressor._get_dstream()
910 self._decompressor._ensure_dstream()
706 self._entered = True
911 self._entered = True
707
912
708 return self
913 return self
709
914
710 def __exit__(self, exc_type, exc_value, exc_tb):
915 def __exit__(self, exc_type, exc_value, exc_tb):
711 self._entered = False
916 self._entered = False
712 self._dstream = None
713
917
714 def memory_size(self):
918 def memory_size(self):
715 if not self._dstream:
919 if not self._decompressor._dstream:
716 raise ZstdError('cannot determine size of inactive decompressor '
920 raise ZstdError('cannot determine size of inactive decompressor '
717 'call when context manager is active')
921 'call when context manager is active')
718
922
719 return lib.ZSTD_sizeof_DStream(self._dstream)
923 return lib.ZSTD_sizeof_DStream(self._decompressor._dstream)
720
924
721 def write(self, data):
925 def write(self, data):
722 if not self._entered:
926 if not self._entered:
723 raise ZstdError('write must be called from an active context manager')
927 raise ZstdError('write must be called from an active context manager')
724
928
725 total_write = 0
929 total_write = 0
726
930
727 in_buffer = ffi.new('ZSTD_inBuffer *')
931 in_buffer = ffi.new('ZSTD_inBuffer *')
728 out_buffer = ffi.new('ZSTD_outBuffer *')
932 out_buffer = ffi.new('ZSTD_outBuffer *')
729
933
730 data_buffer = ffi.from_buffer(data)
934 data_buffer = ffi.from_buffer(data)
731 in_buffer.src = data_buffer
935 in_buffer.src = data_buffer
732 in_buffer.size = len(data_buffer)
936 in_buffer.size = len(data_buffer)
733 in_buffer.pos = 0
937 in_buffer.pos = 0
734
938
735 dst_buffer = ffi.new('char[]', self._write_size)
939 dst_buffer = ffi.new('char[]', self._write_size)
736 out_buffer.dst = dst_buffer
940 out_buffer.dst = dst_buffer
737 out_buffer.size = len(dst_buffer)
941 out_buffer.size = len(dst_buffer)
738 out_buffer.pos = 0
942 out_buffer.pos = 0
739
943
944 dstream = self._decompressor._dstream
945
740 while in_buffer.pos < in_buffer.size:
946 while in_buffer.pos < in_buffer.size:
741 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
947 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
742 if lib.ZSTD_isError(zresult):
948 if lib.ZSTD_isError(zresult):
743 raise ZstdError('zstd decompress error: %s' %
949 raise ZstdError('zstd decompress error: %s' %
744 ffi.string(lib.ZSTD_getErrorName(zresult)))
950 ffi.string(lib.ZSTD_getErrorName(zresult)))
745
951
746 if out_buffer.pos:
952 if out_buffer.pos:
747 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
953 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
748 total_write += out_buffer.pos
954 total_write += out_buffer.pos
749 out_buffer.pos = 0
955 out_buffer.pos = 0
750
956
751 return total_write
957 return total_write
752
958
753
959
754 class ZstdDecompressor(object):
960 class ZstdDecompressor(object):
755 def __init__(self, dict_data=None):
961 def __init__(self, dict_data=None):
756 self._dict_data = dict_data
962 self._dict_data = dict_data
757
963
758 dctx = lib.ZSTD_createDCtx()
964 dctx = lib.ZSTD_createDCtx()
759 if dctx == ffi.NULL:
965 if dctx == ffi.NULL:
760 raise MemoryError()
966 raise MemoryError()
761
967
762 self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
968 self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
969 self._dstream = None
763
970
764 @property
971 @property
765 def _ddict(self):
972 def _ddict(self):
766 if self._dict_data:
973 if self._dict_data:
767 dict_data = self._dict_data.as_bytes()
974 dict_data = self._dict_data.as_bytes()
768 dict_size = len(self._dict_data)
975 dict_size = len(self._dict_data)
769
976
770 ddict = lib.ZSTD_createDDict(dict_data, dict_size)
977 ddict = lib.ZSTD_createDDict(dict_data, dict_size)
771 if ddict == ffi.NULL:
978 if ddict == ffi.NULL:
772 raise ZstdError('could not create decompression dict')
979 raise ZstdError('could not create decompression dict')
773 else:
980 else:
774 ddict = None
981 ddict = None
775
982
776 self.__dict__['_ddict'] = ddict
983 self.__dict__['_ddict'] = ddict
777 return ddict
984 return ddict
778
985
779 def decompress(self, data, max_output_size=0):
986 def decompress(self, data, max_output_size=0):
780 data_buffer = ffi.from_buffer(data)
987 data_buffer = ffi.from_buffer(data)
781
988
782 orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
989 orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
783 dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
990 dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
784 lib.ZSTD_copyDCtx(dctx, self._refdctx)
991 lib.ZSTD_copyDCtx(dctx, self._refdctx)
785
992
786 ddict = self._ddict
993 ddict = self._ddict
787
994
788 output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
995 output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
789 if output_size:
996 if output_size:
790 result_buffer = ffi.new('char[]', output_size)
997 result_buffer = ffi.new('char[]', output_size)
791 result_size = output_size
998 result_size = output_size
792 else:
999 else:
793 if not max_output_size:
1000 if not max_output_size:
794 raise ZstdError('input data invalid or missing content size '
1001 raise ZstdError('input data invalid or missing content size '
795 'in frame header')
1002 'in frame header')
796
1003
797 result_buffer = ffi.new('char[]', max_output_size)
1004 result_buffer = ffi.new('char[]', max_output_size)
798 result_size = max_output_size
1005 result_size = max_output_size
799
1006
800 if ddict:
1007 if ddict:
801 zresult = lib.ZSTD_decompress_usingDDict(dctx,
1008 zresult = lib.ZSTD_decompress_usingDDict(dctx,
802 result_buffer, result_size,
1009 result_buffer, result_size,
803 data_buffer, len(data_buffer),
1010 data_buffer, len(data_buffer),
804 ddict)
1011 ddict)
805 else:
1012 else:
806 zresult = lib.ZSTD_decompressDCtx(dctx,
1013 zresult = lib.ZSTD_decompressDCtx(dctx,
807 result_buffer, result_size,
1014 result_buffer, result_size,
808 data_buffer, len(data_buffer))
1015 data_buffer, len(data_buffer))
809 if lib.ZSTD_isError(zresult):
1016 if lib.ZSTD_isError(zresult):
810 raise ZstdError('decompression error: %s' %
1017 raise ZstdError('decompression error: %s' %
811 ffi.string(lib.ZSTD_getErrorName(zresult)))
1018 ffi.string(lib.ZSTD_getErrorName(zresult)))
812 elif output_size and zresult != output_size:
1019 elif output_size and zresult != output_size:
813 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
1020 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
814 (zresult, output_size))
1021 (zresult, output_size))
815
1022
816 return ffi.buffer(result_buffer, zresult)[:]
1023 return ffi.buffer(result_buffer, zresult)[:]
817
1024
818 def decompressobj(self):
1025 def decompressobj(self):
1026 self._ensure_dstream()
819 return ZstdDecompressionObj(self)
1027 return ZstdDecompressionObj(self)
820
1028
821 def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
1029 def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
822 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1030 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
823 skip_bytes=0):
1031 skip_bytes=0):
824 if skip_bytes >= read_size:
1032 if skip_bytes >= read_size:
825 raise ValueError('skip_bytes must be smaller than read_size')
1033 raise ValueError('skip_bytes must be smaller than read_size')
826
1034
827 if hasattr(reader, 'read'):
1035 if hasattr(reader, 'read'):
828 have_read = True
1036 have_read = True
829 elif hasattr(reader, '__getitem__'):
1037 elif hasattr(reader, '__getitem__'):
830 have_read = False
1038 have_read = False
831 buffer_offset = 0
1039 buffer_offset = 0
832 size = len(reader)
1040 size = len(reader)
833 else:
1041 else:
834 raise ValueError('must pass an object with a read() method or '
1042 raise ValueError('must pass an object with a read() method or '
835 'conforms to buffer protocol')
1043 'conforms to buffer protocol')
836
1044
837 if skip_bytes:
1045 if skip_bytes:
838 if have_read:
1046 if have_read:
839 reader.read(skip_bytes)
1047 reader.read(skip_bytes)
840 else:
1048 else:
841 if skip_bytes > size:
1049 if skip_bytes > size:
842 raise ValueError('skip_bytes larger than first input chunk')
1050 raise ValueError('skip_bytes larger than first input chunk')
843
1051
844 buffer_offset = skip_bytes
1052 buffer_offset = skip_bytes
845
1053
846 dstream = self._get_dstream()
1054 self._ensure_dstream()
847
1055
848 in_buffer = ffi.new('ZSTD_inBuffer *')
1056 in_buffer = ffi.new('ZSTD_inBuffer *')
849 out_buffer = ffi.new('ZSTD_outBuffer *')
1057 out_buffer = ffi.new('ZSTD_outBuffer *')
850
1058
851 dst_buffer = ffi.new('char[]', write_size)
1059 dst_buffer = ffi.new('char[]', write_size)
852 out_buffer.dst = dst_buffer
1060 out_buffer.dst = dst_buffer
853 out_buffer.size = len(dst_buffer)
1061 out_buffer.size = len(dst_buffer)
854 out_buffer.pos = 0
1062 out_buffer.pos = 0
855
1063
856 while True:
1064 while True:
857 assert out_buffer.pos == 0
1065 assert out_buffer.pos == 0
858
1066
859 if have_read:
1067 if have_read:
860 read_result = reader.read(read_size)
1068 read_result = reader.read(read_size)
861 else:
1069 else:
862 remaining = size - buffer_offset
1070 remaining = size - buffer_offset
863 slice_size = min(remaining, read_size)
1071 slice_size = min(remaining, read_size)
864 read_result = reader[buffer_offset:buffer_offset + slice_size]
1072 read_result = reader[buffer_offset:buffer_offset + slice_size]
865 buffer_offset += slice_size
1073 buffer_offset += slice_size
866
1074
867 # No new input. Break out of read loop.
1075 # No new input. Break out of read loop.
868 if not read_result:
1076 if not read_result:
869 break
1077 break
870
1078
871 # Feed all read data into decompressor and emit output until
1079 # Feed all read data into decompressor and emit output until
872 # exhausted.
1080 # exhausted.
873 read_buffer = ffi.from_buffer(read_result)
1081 read_buffer = ffi.from_buffer(read_result)
874 in_buffer.src = read_buffer
1082 in_buffer.src = read_buffer
875 in_buffer.size = len(read_buffer)
1083 in_buffer.size = len(read_buffer)
876 in_buffer.pos = 0
1084 in_buffer.pos = 0
877
1085
878 while in_buffer.pos < in_buffer.size:
1086 while in_buffer.pos < in_buffer.size:
879 assert out_buffer.pos == 0
1087 assert out_buffer.pos == 0
880
1088
881 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
1089 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
882 if lib.ZSTD_isError(zresult):
1090 if lib.ZSTD_isError(zresult):
883 raise ZstdError('zstd decompress error: %s' %
1091 raise ZstdError('zstd decompress error: %s' %
884 ffi.string(lib.ZSTD_getErrorName(zresult)))
1092 ffi.string(lib.ZSTD_getErrorName(zresult)))
885
1093
886 if out_buffer.pos:
1094 if out_buffer.pos:
887 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1095 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
888 out_buffer.pos = 0
1096 out_buffer.pos = 0
889 yield data
1097 yield data
890
1098
891 if zresult == 0:
1099 if zresult == 0:
892 return
1100 return
893
1101
894 # Repeat loop to collect more input data.
1102 # Repeat loop to collect more input data.
895 continue
1103 continue
896
1104
897 # If we get here, input is exhausted.
1105 # If we get here, input is exhausted.
898
1106
899 def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1107 def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
900 if not hasattr(writer, 'write'):
1108 if not hasattr(writer, 'write'):
901 raise ValueError('must pass an object with a write() method')
1109 raise ValueError('must pass an object with a write() method')
902
1110
903 return ZstdDecompressionWriter(self, writer, write_size)
1111 return ZstdDecompressionWriter(self, writer, write_size)
904
1112
905 def copy_stream(self, ifh, ofh,
1113 def copy_stream(self, ifh, ofh,
906 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
1114 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
907 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1115 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
908 if not hasattr(ifh, 'read'):
1116 if not hasattr(ifh, 'read'):
909 raise ValueError('first argument must have a read() method')
1117 raise ValueError('first argument must have a read() method')
910 if not hasattr(ofh, 'write'):
1118 if not hasattr(ofh, 'write'):
911 raise ValueError('second argument must have a write() method')
1119 raise ValueError('second argument must have a write() method')
912
1120
913 dstream = self._get_dstream()
1121 self._ensure_dstream()
914
1122
915 in_buffer = ffi.new('ZSTD_inBuffer *')
1123 in_buffer = ffi.new('ZSTD_inBuffer *')
916 out_buffer = ffi.new('ZSTD_outBuffer *')
1124 out_buffer = ffi.new('ZSTD_outBuffer *')
917
1125
918 dst_buffer = ffi.new('char[]', write_size)
1126 dst_buffer = ffi.new('char[]', write_size)
919 out_buffer.dst = dst_buffer
1127 out_buffer.dst = dst_buffer
920 out_buffer.size = write_size
1128 out_buffer.size = write_size
921 out_buffer.pos = 0
1129 out_buffer.pos = 0
922
1130
923 total_read, total_write = 0, 0
1131 total_read, total_write = 0, 0
924
1132
925 # Read all available input.
1133 # Read all available input.
926 while True:
1134 while True:
927 data = ifh.read(read_size)
1135 data = ifh.read(read_size)
928 if not data:
1136 if not data:
929 break
1137 break
930
1138
931 data_buffer = ffi.from_buffer(data)
1139 data_buffer = ffi.from_buffer(data)
932 total_read += len(data_buffer)
1140 total_read += len(data_buffer)
933 in_buffer.src = data_buffer
1141 in_buffer.src = data_buffer
934 in_buffer.size = len(data_buffer)
1142 in_buffer.size = len(data_buffer)
935 in_buffer.pos = 0
1143 in_buffer.pos = 0
936
1144
937 # Flush all read data to output.
1145 # Flush all read data to output.
938 while in_buffer.pos < in_buffer.size:
1146 while in_buffer.pos < in_buffer.size:
939 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
1147 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
940 if lib.ZSTD_isError(zresult):
1148 if lib.ZSTD_isError(zresult):
941 raise ZstdError('zstd decompressor error: %s' %
1149 raise ZstdError('zstd decompressor error: %s' %
942 ffi.string(lib.ZSTD_getErrorName(zresult)))
1150 ffi.string(lib.ZSTD_getErrorName(zresult)))
943
1151
944 if out_buffer.pos:
1152 if out_buffer.pos:
945 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1153 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
946 total_write += out_buffer.pos
1154 total_write += out_buffer.pos
947 out_buffer.pos = 0
1155 out_buffer.pos = 0
948
1156
949 # Continue loop to keep reading.
1157 # Continue loop to keep reading.
950
1158
951 return total_read, total_write
1159 return total_read, total_write
952
1160
953 def decompress_content_dict_chain(self, frames):
1161 def decompress_content_dict_chain(self, frames):
954 if not isinstance(frames, list):
1162 if not isinstance(frames, list):
955 raise TypeError('argument must be a list')
1163 raise TypeError('argument must be a list')
956
1164
957 if not frames:
1165 if not frames:
958 raise ValueError('empty input chain')
1166 raise ValueError('empty input chain')
959
1167
960 # First chunk should not be using a dictionary. We handle it specially.
1168 # First chunk should not be using a dictionary. We handle it specially.
961 chunk = frames[0]
1169 chunk = frames[0]
962 if not isinstance(chunk, bytes_type):
1170 if not isinstance(chunk, bytes_type):
963 raise ValueError('chunk 0 must be bytes')
1171 raise ValueError('chunk 0 must be bytes')
964
1172
965 # All chunks should be zstd frames and should have content size set.
1173 # All chunks should be zstd frames and should have content size set.
966 chunk_buffer = ffi.from_buffer(chunk)
1174 chunk_buffer = ffi.from_buffer(chunk)
967 params = ffi.new('ZSTD_frameParams *')
1175 params = ffi.new('ZSTD_frameParams *')
968 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1176 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
969 if lib.ZSTD_isError(zresult):
1177 if lib.ZSTD_isError(zresult):
970 raise ValueError('chunk 0 is not a valid zstd frame')
1178 raise ValueError('chunk 0 is not a valid zstd frame')
971 elif zresult:
1179 elif zresult:
972 raise ValueError('chunk 0 is too small to contain a zstd frame')
1180 raise ValueError('chunk 0 is too small to contain a zstd frame')
973
1181
974 if not params.frameContentSize:
1182 if not params.frameContentSize:
975 raise ValueError('chunk 0 missing content size in frame')
1183 raise ValueError('chunk 0 missing content size in frame')
976
1184
977 dctx = lib.ZSTD_createDCtx()
1185 dctx = lib.ZSTD_createDCtx()
978 if dctx == ffi.NULL:
1186 if dctx == ffi.NULL:
979 raise MemoryError()
1187 raise MemoryError()
980
1188
981 dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
1189 dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
982
1190
983 last_buffer = ffi.new('char[]', params.frameContentSize)
1191 last_buffer = ffi.new('char[]', params.frameContentSize)
984
1192
985 zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
1193 zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
986 chunk_buffer, len(chunk_buffer))
1194 chunk_buffer, len(chunk_buffer))
987 if lib.ZSTD_isError(zresult):
1195 if lib.ZSTD_isError(zresult):
988 raise ZstdError('could not decompress chunk 0: %s' %
1196 raise ZstdError('could not decompress chunk 0: %s' %
989 ffi.string(lib.ZSTD_getErrorName(zresult)))
1197 ffi.string(lib.ZSTD_getErrorName(zresult)))
990
1198
991 # Special case of chain length of 1
1199 # Special case of chain length of 1
992 if len(frames) == 1:
1200 if len(frames) == 1:
993 return ffi.buffer(last_buffer, len(last_buffer))[:]
1201 return ffi.buffer(last_buffer, len(last_buffer))[:]
994
1202
995 i = 1
1203 i = 1
996 while i < len(frames):
1204 while i < len(frames):
997 chunk = frames[i]
1205 chunk = frames[i]
998 if not isinstance(chunk, bytes_type):
1206 if not isinstance(chunk, bytes_type):
999 raise ValueError('chunk %d must be bytes' % i)
1207 raise ValueError('chunk %d must be bytes' % i)
1000
1208
1001 chunk_buffer = ffi.from_buffer(chunk)
1209 chunk_buffer = ffi.from_buffer(chunk)
1002 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1210 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1003 if lib.ZSTD_isError(zresult):
1211 if lib.ZSTD_isError(zresult):
1004 raise ValueError('chunk %d is not a valid zstd frame' % i)
1212 raise ValueError('chunk %d is not a valid zstd frame' % i)
1005 elif zresult:
1213 elif zresult:
1006 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
1214 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
1007
1215
1008 if not params.frameContentSize:
1216 if not params.frameContentSize:
1009 raise ValueError('chunk %d missing content size in frame' % i)
1217 raise ValueError('chunk %d missing content size in frame' % i)
1010
1218
1011 dest_buffer = ffi.new('char[]', params.frameContentSize)
1219 dest_buffer = ffi.new('char[]', params.frameContentSize)
1012
1220
1013 zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
1221 zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
1014 chunk_buffer, len(chunk_buffer),
1222 chunk_buffer, len(chunk_buffer),
1015 last_buffer, len(last_buffer))
1223 last_buffer, len(last_buffer))
1016 if lib.ZSTD_isError(zresult):
1224 if lib.ZSTD_isError(zresult):
1017 raise ZstdError('could not decompress chunk %d' % i)
1225 raise ZstdError('could not decompress chunk %d' % i)
1018
1226
1019 last_buffer = dest_buffer
1227 last_buffer = dest_buffer
1020 i += 1
1228 i += 1
1021
1229
1022 return ffi.buffer(last_buffer, len(last_buffer))[:]
1230 return ffi.buffer(last_buffer, len(last_buffer))[:]
1023
1231
1024 def _get_dstream(self):
1232 def _ensure_dstream(self):
1025 dstream = lib.ZSTD_createDStream()
1233 if self._dstream:
1026 if dstream == ffi.NULL:
1234 zresult = lib.ZSTD_resetDStream(self._dstream)
1235 if lib.ZSTD_isError(zresult):
1236 raise ZstdError('could not reset DStream: %s' %
1237 ffi.string(lib.ZSTD_getErrorName(zresult)))
1238
1239 return
1240
1241 self._dstream = lib.ZSTD_createDStream()
1242 if self._dstream == ffi.NULL:
1027 raise MemoryError()
1243 raise MemoryError()
1028
1244
1029 dstream = ffi.gc(dstream, lib.ZSTD_freeDStream)
1245 self._dstream = ffi.gc(self._dstream, lib.ZSTD_freeDStream)
1030
1246
1031 if self._dict_data:
1247 if self._dict_data:
1032 zresult = lib.ZSTD_initDStream_usingDict(dstream,
1248 zresult = lib.ZSTD_initDStream_usingDict(self._dstream,
1033 self._dict_data.as_bytes(),
1249 self._dict_data.as_bytes(),
1034 len(self._dict_data))
1250 len(self._dict_data))
1035 else:
1251 else:
1036 zresult = lib.ZSTD_initDStream(dstream)
1252 zresult = lib.ZSTD_initDStream(self._dstream)
1037
1253
1038 if lib.ZSTD_isError(zresult):
1254 if lib.ZSTD_isError(zresult):
1255 self._dstream = None
1039 raise ZstdError('could not initialize DStream: %s' %
1256 raise ZstdError('could not initialize DStream: %s' %
1040 ffi.string(lib.ZSTD_getErrorName(zresult)))
1257 ffi.string(lib.ZSTD_getErrorName(zresult)))
1041
1042 return dstream
@@ -1,41 +1,44
1 #require test-repo
1 #require test-repo
2
2
3 $ . "$TESTDIR/helpers-testrepo.sh"
3 $ . "$TESTDIR/helpers-testrepo.sh"
4 $ cd "$TESTDIR"/..
4 $ cd "$TESTDIR"/..
5
5
6 $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py
6 $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py
7 contrib/python-zstandard/setup.py not using absolute_import
7 contrib/python-zstandard/setup.py not using absolute_import
8 contrib/python-zstandard/setup_zstd.py not using absolute_import
8 contrib/python-zstandard/setup_zstd.py not using absolute_import
9 contrib/python-zstandard/tests/common.py not using absolute_import
9 contrib/python-zstandard/tests/common.py not using absolute_import
10 contrib/python-zstandard/tests/test_buffer_util.py not using absolute_import
10 contrib/python-zstandard/tests/test_compressor.py not using absolute_import
11 contrib/python-zstandard/tests/test_compressor.py not using absolute_import
12 contrib/python-zstandard/tests/test_compressor_fuzzing.py not using absolute_import
11 contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
13 contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
14 contrib/python-zstandard/tests/test_data_structures_fuzzing.py not using absolute_import
12 contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
15 contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
16 contrib/python-zstandard/tests/test_decompressor_fuzzing.py not using absolute_import
13 contrib/python-zstandard/tests/test_estimate_sizes.py not using absolute_import
17 contrib/python-zstandard/tests/test_estimate_sizes.py not using absolute_import
14 contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
18 contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
15 contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
16 contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
19 contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
17 i18n/check-translation.py not using absolute_import
20 i18n/check-translation.py not using absolute_import
18 setup.py not using absolute_import
21 setup.py not using absolute_import
19 tests/test-demandimport.py not using absolute_import
22 tests/test-demandimport.py not using absolute_import
20
23
21 #if py3exe
24 #if py3exe
22 $ hg files 'set:(**.py) - grep(pygments)' -X hgext/fsmonitor/pywatchman \
25 $ hg files 'set:(**.py) - grep(pygments)' -X hgext/fsmonitor/pywatchman \
23 > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \
26 > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \
24 > | sed 's/[0-9][0-9]*)$/*)/'
27 > | sed 's/[0-9][0-9]*)$/*)/'
25 hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob)
28 hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob)
26 hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
29 hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
27 hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
30 hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
28 mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
31 mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
29 mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
32 mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
30 mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
33 mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
31 mercurial/scmwindows.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
34 mercurial/scmwindows.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
32 mercurial/win32.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
35 mercurial/win32.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
33 mercurial/windows.py: error importing: <*Error> No module named 'msvcrt' (error at windows.py:*) (glob)
36 mercurial/windows.py: error importing: <*Error> No module named 'msvcrt' (error at windows.py:*) (glob)
34
37
35 #endif
38 #endif
36
39
37 #if py3exe py3pygments
40 #if py3exe py3pygments
38 $ hg files 'set:(**.py) and grep(pygments)' | sed 's|\\|/|g' \
41 $ hg files 'set:(**.py) and grep(pygments)' | sed 's|\\|/|g' \
39 > | xargs $PYTHON3 contrib/check-py3-compat.py \
42 > | xargs $PYTHON3 contrib/check-py3-compat.py \
40 > | sed 's/[0-9][0-9]*)$/*)/'
43 > | sed 's/[0-9][0-9]*)$/*)/'
41 #endif
44 #endif
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now