##// END OF EJS Templates
freeze: merge default into stable for 4.2 code freeze
Augie Fackler -
r32054:616e7883 merge 4.2-rc stable
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

This diff has been collapsed as it changes many lines, (770 lines changed) Show them Hide them
@@ -0,0 +1,770 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(BufferWithSegments__doc__,
14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
15 "\n"
16 "This type represents a contiguous chunk of memory containing N discrete\n"
17 "items within sub-segments of that memory.\n"
18 "\n"
19 "Segments within the buffer are stored as an array of\n"
20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
21 "integer using the host/native bit order representation.\n"
22 "\n"
23 "The type exists to facilitate operations against N>1 items without the\n"
24 "overhead of Python object creation and management.\n"
25 );
26
27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
29 if (self->parent.buf) {
30 PyBuffer_Release(&self->parent);
31 }
32 else if (self->useFree) {
33 free(self->data);
34 }
35 else {
36 PyMem_Free(self->data);
37 }
38
39 self->data = NULL;
40
41 if (self->useFree) {
42 free(self->segments);
43 }
44 else {
45 PyMem_Free(self->segments);
46 }
47
48 self->segments = NULL;
49
50 PyObject_Del(self);
51 }
52
53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
54 static char* kwlist[] = {
55 "data",
56 "segments",
57 NULL
58 };
59
60 Py_buffer segments;
61 Py_ssize_t segmentCount;
62 Py_ssize_t i;
63
64 memset(&self->parent, 0, sizeof(self->parent));
65
66 #if PY_MAJOR_VERSION >= 3
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
68 #else
69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
70 #endif
71 kwlist, &self->parent, &segments)) {
72 return -1;
73 }
74
75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
77 goto except;
78 }
79
80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
82 goto except;
83 }
84
85 if (segments.len % sizeof(BufferSegment)) {
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
87 sizeof(BufferSegment));
88 goto except;
89 }
90
91 segmentCount = segments.len / sizeof(BufferSegment);
92
93 /* Validate segments data, as blindly trusting it could lead to arbitrary
94 memory access. */
95 for (i = 0; i < segmentCount; i++) {
96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
97
98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
100 goto except;
101 return -1;
102 }
103 }
104
105 /* Make a copy of the segments data. It is cheap to do so and is a guard
106 against caller changing offsets, which has security implications. */
107 self->segments = PyMem_Malloc(segments.len);
108 if (!self->segments) {
109 PyErr_NoMemory();
110 goto except;
111 }
112
113 memcpy(self->segments, segments.buf, segments.len);
114 PyBuffer_Release(&segments);
115
116 self->data = self->parent.buf;
117 self->dataSize = self->parent.len;
118 self->segmentCount = segmentCount;
119
120 return 0;
121
122 except:
123 PyBuffer_Release(&self->parent);
124 PyBuffer_Release(&segments);
125 return -1;
126 };
127
128 /**
129 * Construct a BufferWithSegments from existing memory and offsets.
130 *
131 * Ownership of the backing memory and BufferSegments will be transferred to
132 * the created object and freed when the BufferWithSegments is destroyed.
133 */
134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
135 BufferSegment* segments, Py_ssize_t segmentsSize) {
136 ZstdBufferWithSegments* result = NULL;
137 Py_ssize_t i;
138
139 if (NULL == data) {
140 PyErr_SetString(PyExc_ValueError, "data is NULL");
141 return NULL;
142 }
143
144 if (NULL == segments) {
145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
146 return NULL;
147 }
148
149 for (i = 0; i < segmentsSize; i++) {
150 BufferSegment* segment = &segments[i];
151
152 if (segment->offset + segment->length > dataSize) {
153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
154 return NULL;
155 }
156 }
157
158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
159 if (NULL == result) {
160 return NULL;
161 }
162
163 result->useFree = 0;
164
165 memset(&result->parent, 0, sizeof(result->parent));
166 result->data = data;
167 result->dataSize = dataSize;
168 result->segments = segments;
169 result->segmentCount = segmentsSize;
170
171 return result;
172 }
173
174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
175 return self->segmentCount;
176 }
177
178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
179 ZstdBufferSegment* result = NULL;
180
181 if (i < 0) {
182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
183 return NULL;
184 }
185
186 if (i >= self->segmentCount) {
187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
188 return NULL;
189 }
190
191 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
192 if (NULL == result) {
193 return NULL;
194 }
195
196 result->parent = (PyObject*)self;
197 Py_INCREF(self);
198
199 result->data = (char*)self->data + self->segments[i].offset;
200 result->dataSize = self->segments[i].length;
201 result->offset = self->segments[i].offset;
202
203 return result;
204 }
205
206 #if PY_MAJOR_VERSION >= 3
207 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
208 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
209 }
210 #else
211 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
212 if (segment != 0) {
213 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
214 return -1;
215 }
216
217 *ptrptr = self->data;
218 return self->dataSize;
219 }
220
221 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
222 if (len) {
223 *len = 1;
224 }
225
226 return 1;
227 }
228 #endif
229
230 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
231 "Obtain a bytes instance for this buffer.\n"
232 );
233
234 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
235 return PyBytes_FromStringAndSize(self->data, self->dataSize);
236 }
237
238 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
239 "Obtain a BufferSegments describing segments in this sintance.\n"
240 );
241
242 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
243 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
244 if (NULL == result) {
245 return NULL;
246 }
247
248 result->parent = (PyObject*)self;
249 Py_INCREF(self);
250 result->segments = self->segments;
251 result->segmentCount = self->segmentCount;
252
253 return result;
254 }
255
256 static PySequenceMethods BufferWithSegments_sq = {
257 (lenfunc)BufferWithSegments_length, /* sq_length */
258 0, /* sq_concat */
259 0, /* sq_repeat */
260 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
261 0, /* sq_ass_item */
262 0, /* sq_contains */
263 0, /* sq_inplace_concat */
264 0 /* sq_inplace_repeat */
265 };
266
267 static PyBufferProcs BufferWithSegments_as_buffer = {
268 #if PY_MAJOR_VERSION >= 3
269 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
270 0 /* bf_releasebuffer */
271 #else
272 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
273 0, /* bf_getwritebuffer */
274 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
275 0 /* bf_getcharbuffer */
276 #endif
277 };
278
279 static PyMethodDef BufferWithSegments_methods[] = {
280 { "segments", (PyCFunction)BufferWithSegments_segments,
281 METH_NOARGS, BufferWithSegments_segments__doc__ },
282 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
283 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
284 { NULL, NULL }
285 };
286
287 static PyMemberDef BufferWithSegments_members[] = {
288 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
289 READONLY, "total size of the buffer in bytes" },
290 { NULL }
291 };
292
293 PyTypeObject ZstdBufferWithSegmentsType = {
294 PyVarObject_HEAD_INIT(NULL, 0)
295 "zstd.BufferWithSegments", /* tp_name */
296 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
297 0, /* tp_itemsize */
298 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
299 0, /* tp_print */
300 0, /* tp_getattr */
301 0, /* tp_setattr */
302 0, /* tp_compare */
303 0, /* tp_repr */
304 0, /* tp_as_number */
305 &BufferWithSegments_sq, /* tp_as_sequence */
306 0, /* tp_as_mapping */
307 0, /* tp_hash */
308 0, /* tp_call */
309 0, /* tp_str */
310 0, /* tp_getattro */
311 0, /* tp_setattro */
312 &BufferWithSegments_as_buffer, /* tp_as_buffer */
313 Py_TPFLAGS_DEFAULT, /* tp_flags */
314 BufferWithSegments__doc__, /* tp_doc */
315 0, /* tp_traverse */
316 0, /* tp_clear */
317 0, /* tp_richcompare */
318 0, /* tp_weaklistoffset */
319 0, /* tp_iter */
320 0, /* tp_iternext */
321 BufferWithSegments_methods, /* tp_methods */
322 BufferWithSegments_members, /* tp_members */
323 0, /* tp_getset */
324 0, /* tp_base */
325 0, /* tp_dict */
326 0, /* tp_descr_get */
327 0, /* tp_descr_set */
328 0, /* tp_dictoffset */
329 (initproc)BufferWithSegments_init, /* tp_init */
330 0, /* tp_alloc */
331 PyType_GenericNew, /* tp_new */
332 };
333
334 PyDoc_STRVAR(BufferSegments__doc__,
335 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
336 );
337
338 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
339 Py_CLEAR(self->parent);
340 PyObject_Del(self);
341 }
342
343 #if PY_MAJOR_VERSION >= 3
344 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
345 return PyBuffer_FillInfo(view, (PyObject*)self,
346 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
347 1, flags);
348 }
349 #else
350 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
351 if (segment != 0) {
352 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
353 return -1;
354 }
355
356 *ptrptr = (void*)self->segments;
357 return self->segmentCount * sizeof(BufferSegment);
358 }
359
360 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
361 if (len) {
362 *len = 1;
363 }
364
365 return 1;
366 }
367 #endif
368
369 static PyBufferProcs BufferSegments_as_buffer = {
370 #if PY_MAJOR_VERSION >= 3
371 (getbufferproc)BufferSegments_getbuffer,
372 0
373 #else
374 (readbufferproc)BufferSegments_getreadbuffer,
375 0,
376 (segcountproc)BufferSegments_getsegcount,
377 0
378 #endif
379 };
380
381 PyTypeObject ZstdBufferSegmentsType = {
382 PyVarObject_HEAD_INIT(NULL, 0)
383 "zstd.BufferSegments", /* tp_name */
384 sizeof(ZstdBufferSegments),/* tp_basicsize */
385 0, /* tp_itemsize */
386 (destructor)BufferSegments_dealloc, /* tp_dealloc */
387 0, /* tp_print */
388 0, /* tp_getattr */
389 0, /* tp_setattr */
390 0, /* tp_compare */
391 0, /* tp_repr */
392 0, /* tp_as_number */
393 0, /* tp_as_sequence */
394 0, /* tp_as_mapping */
395 0, /* tp_hash */
396 0, /* tp_call */
397 0, /* tp_str */
398 0, /* tp_getattro */
399 0, /* tp_setattro */
400 &BufferSegments_as_buffer, /* tp_as_buffer */
401 Py_TPFLAGS_DEFAULT, /* tp_flags */
402 BufferSegments__doc__, /* tp_doc */
403 0, /* tp_traverse */
404 0, /* tp_clear */
405 0, /* tp_richcompare */
406 0, /* tp_weaklistoffset */
407 0, /* tp_iter */
408 0, /* tp_iternext */
409 0, /* tp_methods */
410 0, /* tp_members */
411 0, /* tp_getset */
412 0, /* tp_base */
413 0, /* tp_dict */
414 0, /* tp_descr_get */
415 0, /* tp_descr_set */
416 0, /* tp_dictoffset */
417 0, /* tp_init */
418 0, /* tp_alloc */
419 PyType_GenericNew, /* tp_new */
420 };
421
422 PyDoc_STRVAR(BufferSegment__doc__,
423 "BufferSegment - Represents a segment within a BufferWithSegments\n"
424 );
425
426 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
427 Py_CLEAR(self->parent);
428 PyObject_Del(self);
429 }
430
431 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
432 return self->dataSize;
433 }
434
435 #if PY_MAJOR_VERSION >= 3
436 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
437 return PyBuffer_FillInfo(view, (PyObject*)self,
438 self->data, self->dataSize, 1, flags);
439 }
440 #else
441 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
442 if (segment != 0) {
443 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
444 return -1;
445 }
446
447 *ptrptr = self->data;
448 return self->dataSize;
449 }
450
451 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
452 if (len) {
453 *len = 1;
454 }
455
456 return 1;
457 }
458 #endif
459
460 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
461 "Obtain a bytes instance for this segment.\n"
462 );
463
464 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
465 return PyBytes_FromStringAndSize(self->data, self->dataSize);
466 }
467
468 static PySequenceMethods BufferSegment_sq = {
469 (lenfunc)BufferSegment_length, /* sq_length */
470 0, /* sq_concat */
471 0, /* sq_repeat */
472 0, /* sq_item */
473 0, /* sq_ass_item */
474 0, /* sq_contains */
475 0, /* sq_inplace_concat */
476 0 /* sq_inplace_repeat */
477 };
478
479 static PyBufferProcs BufferSegment_as_buffer = {
480 #if PY_MAJOR_VERSION >= 3
481 (getbufferproc)BufferSegment_getbuffer,
482 0
483 #else
484 (readbufferproc)BufferSegment_getreadbuffer,
485 0,
486 (segcountproc)BufferSegment_getsegcount,
487 0
488 #endif
489 };
490
491 static PyMethodDef BufferSegment_methods[] = {
492 { "tobytes", (PyCFunction)BufferSegment_tobytes,
493 METH_NOARGS, BufferSegment_tobytes__doc__ },
494 { NULL, NULL }
495 };
496
497 static PyMemberDef BufferSegment_members[] = {
498 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
499 "offset of segment within parent buffer" },
500 { NULL }
501 };
502
503 PyTypeObject ZstdBufferSegmentType = {
504 PyVarObject_HEAD_INIT(NULL, 0)
505 "zstd.BufferSegment", /* tp_name */
506 sizeof(ZstdBufferSegment),/* tp_basicsize */
507 0, /* tp_itemsize */
508 (destructor)BufferSegment_dealloc, /* tp_dealloc */
509 0, /* tp_print */
510 0, /* tp_getattr */
511 0, /* tp_setattr */
512 0, /* tp_compare */
513 0, /* tp_repr */
514 0, /* tp_as_number */
515 &BufferSegment_sq, /* tp_as_sequence */
516 0, /* tp_as_mapping */
517 0, /* tp_hash */
518 0, /* tp_call */
519 0, /* tp_str */
520 0, /* tp_getattro */
521 0, /* tp_setattro */
522 &BufferSegment_as_buffer, /* tp_as_buffer */
523 Py_TPFLAGS_DEFAULT, /* tp_flags */
524 BufferSegment__doc__, /* tp_doc */
525 0, /* tp_traverse */
526 0, /* tp_clear */
527 0, /* tp_richcompare */
528 0, /* tp_weaklistoffset */
529 0, /* tp_iter */
530 0, /* tp_iternext */
531 BufferSegment_methods, /* tp_methods */
532 BufferSegment_members, /* tp_members */
533 0, /* tp_getset */
534 0, /* tp_base */
535 0, /* tp_dict */
536 0, /* tp_descr_get */
537 0, /* tp_descr_set */
538 0, /* tp_dictoffset */
539 0, /* tp_init */
540 0, /* tp_alloc */
541 PyType_GenericNew, /* tp_new */
542 };
543
544 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
545 "Represents a collection of BufferWithSegments.\n"
546 );
547
548 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
549 Py_ssize_t i;
550
551 if (self->firstElements) {
552 PyMem_Free(self->firstElements);
553 self->firstElements = NULL;
554 }
555
556 if (self->buffers) {
557 for (i = 0; i < self->bufferCount; i++) {
558 Py_CLEAR(self->buffers[i]);
559 }
560
561 PyMem_Free(self->buffers);
562 self->buffers = NULL;
563 }
564
565 PyObject_Del(self);
566 }
567
568 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
569 Py_ssize_t size;
570 Py_ssize_t i;
571 Py_ssize_t offset = 0;
572
573 size = PyTuple_Size(args);
574 if (-1 == size) {
575 return -1;
576 }
577
578 if (0 == size) {
579 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
580 return -1;
581 }
582
583 for (i = 0; i < size; i++) {
584 PyObject* item = PyTuple_GET_ITEM(args, i);
585 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
586 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
587 return -1;
588 }
589
590 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
591 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
592 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
593 return -1;
594 }
595 }
596
597 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
598 if (NULL == self->buffers) {
599 PyErr_NoMemory();
600 return -1;
601 }
602
603 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
604 if (NULL == self->firstElements) {
605 PyMem_Free(self->buffers);
606 self->buffers = NULL;
607 PyErr_NoMemory();
608 return -1;
609 }
610
611 self->bufferCount = size;
612
613 for (i = 0; i < size; i++) {
614 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
615
616 self->buffers[i] = item;
617 Py_INCREF(item);
618
619 if (i > 0) {
620 self->firstElements[i - 1] = offset;
621 }
622
623 offset += item->segmentCount;
624 }
625
626 self->firstElements[size - 1] = offset;
627
628 return 0;
629 }
630
631 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
632 Py_ssize_t i;
633 Py_ssize_t j;
634 unsigned long long size = 0;
635
636 for (i = 0; i < self->bufferCount; i++) {
637 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
638 size += self->buffers[i]->segments[j].length;
639 }
640 }
641
642 return PyLong_FromUnsignedLongLong(size);
643 }
644
645 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
646 return self->firstElements[self->bufferCount - 1];
647 }
648
649 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
650 Py_ssize_t bufferOffset;
651
652 if (i < 0) {
653 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
654 return NULL;
655 }
656
657 if (i >= BufferWithSegmentsCollection_length(self)) {
658 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
659 BufferWithSegmentsCollection_length(self));
660 return NULL;
661 }
662
663 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
664 Py_ssize_t offset = 0;
665
666 if (i < self->firstElements[bufferOffset]) {
667 if (bufferOffset > 0) {
668 offset = self->firstElements[bufferOffset - 1];
669 }
670
671 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
672 }
673 }
674
675 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
676 return NULL;
677 }
678
679 static PySequenceMethods BufferWithSegmentsCollection_sq = {
680 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
681 0, /* sq_concat */
682 0, /* sq_repeat */
683 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
684 0, /* sq_ass_item */
685 0, /* sq_contains */
686 0, /* sq_inplace_concat */
687 0 /* sq_inplace_repeat */
688 };
689
690 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
691 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
692 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
693 { NULL, NULL }
694 };
695
696 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
697 PyVarObject_HEAD_INIT(NULL, 0)
698 "zstd.BufferWithSegmentsCollection", /* tp_name */
699 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
700 0, /* tp_itemsize */
701 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
702 0, /* tp_print */
703 0, /* tp_getattr */
704 0, /* tp_setattr */
705 0, /* tp_compare */
706 0, /* tp_repr */
707 0, /* tp_as_number */
708 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
709 0, /* tp_as_mapping */
710 0, /* tp_hash */
711 0, /* tp_call */
712 0, /* tp_str */
713 0, /* tp_getattro */
714 0, /* tp_setattro */
715 0, /* tp_as_buffer */
716 Py_TPFLAGS_DEFAULT, /* tp_flags */
717 BufferWithSegmentsCollection__doc__, /* tp_doc */
718 0, /* tp_traverse */
719 0, /* tp_clear */
720 0, /* tp_richcompare */
721 0, /* tp_weaklistoffset */
722 /* TODO implement iterator for performance. */
723 0, /* tp_iter */
724 0, /* tp_iternext */
725 BufferWithSegmentsCollection_methods, /* tp_methods */
726 0, /* tp_members */
727 0, /* tp_getset */
728 0, /* tp_base */
729 0, /* tp_dict */
730 0, /* tp_descr_get */
731 0, /* tp_descr_set */
732 0, /* tp_dictoffset */
733 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
734 0, /* tp_alloc */
735 PyType_GenericNew, /* tp_new */
736 };
737
738 void bufferutil_module_init(PyObject* mod) {
739 Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
740 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
741 return;
742 }
743
744 Py_INCREF(&ZstdBufferWithSegmentsType);
745 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
746
747 Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
748 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
749 return;
750 }
751
752 Py_INCREF(&ZstdBufferSegmentsType);
753 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
754
755 Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
756 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
757 return;
758 }
759
760 Py_INCREF(&ZstdBufferSegmentType);
761 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
762
763 Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
764 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
765 return;
766 }
767
768 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
769 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
770 }
@@ -0,0 +1,132 b''
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
17 const char* source;
18 Py_ssize_t sourceSize;
19 ZSTD_frameParams params;
20 FrameParametersObject* result = NULL;
21 size_t zresult;
22
23 #if PY_MAJOR_VERSION >= 3
24 if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
25 #else
26 if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
27 #endif
28 &source, &sourceSize)) {
29 return NULL;
30 }
31
32 /* Needed for Python 2 to reject unicode */
33 if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
34 PyErr_SetString(PyExc_TypeError, "argument must be bytes");
35 return NULL;
36 }
37
38 zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
39
40 if (ZSTD_isError(zresult)) {
41 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
42 return NULL;
43 }
44
45 if (zresult) {
46 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
47 return NULL;
48 }
49
50 result = PyObject_New(FrameParametersObject, &FrameParametersType);
51 if (!result) {
52 return NULL;
53 }
54
55 result->frameContentSize = params.frameContentSize;
56 result->windowSize = params.windowSize;
57 result->dictID = params.dictID;
58 result->checksumFlag = params.checksumFlag ? 1 : 0;
59
60 return result;
61 }
62
63 static void FrameParameters_dealloc(PyObject* self) {
64 PyObject_Del(self);
65 }
66
67 static PyMemberDef FrameParameters_members[] = {
68 { "content_size", T_ULONGLONG,
69 offsetof(FrameParametersObject, frameContentSize), READONLY,
70 "frame content size" },
71 { "window_size", T_UINT,
72 offsetof(FrameParametersObject, windowSize), READONLY,
73 "window size" },
74 { "dict_id", T_UINT,
75 offsetof(FrameParametersObject, dictID), READONLY,
76 "dictionary ID" },
77 { "has_checksum", T_BOOL,
78 offsetof(FrameParametersObject, checksumFlag), READONLY,
79 "checksum flag" },
80 { NULL }
81 };
82
83 PyTypeObject FrameParametersType = {
84 PyVarObject_HEAD_INIT(NULL, 0)
85 "FrameParameters", /* tp_name */
86 sizeof(FrameParametersObject), /* tp_basicsize */
87 0, /* tp_itemsize */
88 (destructor)FrameParameters_dealloc, /* tp_dealloc */
89 0, /* tp_print */
90 0, /* tp_getattr */
91 0, /* tp_setattr */
92 0, /* tp_compare */
93 0, /* tp_repr */
94 0, /* tp_as_number */
95 0, /* tp_as_sequence */
96 0, /* tp_as_mapping */
97 0, /* tp_hash */
98 0, /* tp_call */
99 0, /* tp_str */
100 0, /* tp_getattro */
101 0, /* tp_setattro */
102 0, /* tp_as_buffer */
103 Py_TPFLAGS_DEFAULT, /* tp_flags */
104 FrameParameters__doc__, /* tp_doc */
105 0, /* tp_traverse */
106 0, /* tp_clear */
107 0, /* tp_richcompare */
108 0, /* tp_weaklistoffset */
109 0, /* tp_iter */
110 0, /* tp_iternext */
111 0, /* tp_methods */
112 FrameParameters_members, /* tp_members */
113 0, /* tp_getset */
114 0, /* tp_base */
115 0, /* tp_dict */
116 0, /* tp_descr_get */
117 0, /* tp_descr_set */
118 0, /* tp_dictoffset */
119 0, /* tp_init */
120 0, /* tp_alloc */
121 0, /* tp_new */
122 };
123
124 void frameparams_module_init(PyObject* mod) {
125 Py_TYPE(&FrameParametersType) = &PyType_Type;
126 if (PyType_Ready(&FrameParametersType) < 0) {
127 return;
128 }
129
130 Py_INCREF(&FrameParametersType);
131 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
132 }
@@ -0,0 +1,112 b''
1 import struct
2
3 try:
4 import unittest2 as unittest
5 except ImportError:
6 import unittest
7
8 import zstd
9
10 ss = struct.Struct('=QQ')
11
12
13 class TestBufferWithSegments(unittest.TestCase):
14 def test_arguments(self):
15 with self.assertRaises(TypeError):
16 zstd.BufferWithSegments()
17
18 with self.assertRaises(TypeError):
19 zstd.BufferWithSegments(b'foo')
20
21 # Segments data should be a multiple of 16.
22 with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
23 zstd.BufferWithSegments(b'foo', b'\x00\x00')
24
25 def test_invalid_offset(self):
26 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
27 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
28
29 def test_invalid_getitem(self):
30 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
31
32 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
33 test = b[-10]
34
35 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
36 test = b[1]
37
38 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
39 test = b[2]
40
41 def test_single(self):
42 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
43 self.assertEqual(len(b), 1)
44 self.assertEqual(b.size, 3)
45 self.assertEqual(b.tobytes(), b'foo')
46
47 self.assertEqual(len(b[0]), 3)
48 self.assertEqual(b[0].offset, 0)
49 self.assertEqual(b[0].tobytes(), b'foo')
50
51 def test_multiple(self):
52 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
53 ss.pack(3, 4),
54 ss.pack(7, 5)]))
55 self.assertEqual(len(b), 3)
56 self.assertEqual(b.size, 12)
57 self.assertEqual(b.tobytes(), b'foofooxfooxy')
58
59 self.assertEqual(b[0].tobytes(), b'foo')
60 self.assertEqual(b[1].tobytes(), b'foox')
61 self.assertEqual(b[2].tobytes(), b'fooxy')
62
63
64 class TestBufferWithSegmentsCollection(unittest.TestCase):
65 def test_empty_constructor(self):
66 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
67 zstd.BufferWithSegmentsCollection()
68
69 def test_argument_validation(self):
70 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
71 zstd.BufferWithSegmentsCollection(None)
72
73 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
74 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
75 None)
76
77 with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
78 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
79
80 def test_length(self):
81 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
82 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
83 ss.pack(3, 3)]))
84
85 c = zstd.BufferWithSegmentsCollection(b1)
86 self.assertEqual(len(c), 1)
87 self.assertEqual(c.size(), 3)
88
89 c = zstd.BufferWithSegmentsCollection(b2)
90 self.assertEqual(len(c), 2)
91 self.assertEqual(c.size(), 6)
92
93 c = zstd.BufferWithSegmentsCollection(b1, b2)
94 self.assertEqual(len(c), 3)
95 self.assertEqual(c.size(), 9)
96
97 def test_getitem(self):
98 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
99 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
100 ss.pack(3, 3)]))
101
102 c = zstd.BufferWithSegmentsCollection(b1, b2)
103
104 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
105 c[3]
106
107 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
108 c[4]
109
110 self.assertEqual(c[0].tobytes(), b'foo')
111 self.assertEqual(c[1].tobytes(), b'bar')
112 self.assertEqual(c[2].tobytes(), b'baz')
@@ -0,0 +1,143 b''
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from . common import (
18 make_cffi,
19 random_input_data,
20 )
21
22
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
25 class TestCompressor_write_to_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=1048576))
29 def test_write_size_variance(self, original, level, write_size):
30 refctx = zstd.ZstdCompressor(level=level)
31 ref_frame = refctx.compress(original)
32
33 cctx = zstd.ZstdCompressor(level=level)
34 b = io.BytesIO()
35 with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
36 compressor.write(original)
37
38 self.assertEqual(b.getvalue(), ref_frame)
39
40
41 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
42 @make_cffi
43 class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
44 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
45 level=strategies.integers(min_value=1, max_value=5),
46 read_size=strategies.integers(min_value=1, max_value=1048576),
47 write_size=strategies.integers(min_value=1, max_value=1048576))
48 def test_read_write_size_variance(self, original, level, read_size, write_size):
49 refctx = zstd.ZstdCompressor(level=level)
50 ref_frame = refctx.compress(original)
51
52 cctx = zstd.ZstdCompressor(level=level)
53 source = io.BytesIO(original)
54 dest = io.BytesIO()
55
56 cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
57 write_size=write_size)
58
59 self.assertEqual(dest.getvalue(), ref_frame)
60
61
62 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
63 @make_cffi
64 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
65 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
66 level=strategies.integers(min_value=1, max_value=5),
67 chunk_sizes=strategies.streaming(
68 strategies.integers(min_value=1, max_value=4096)))
69 def test_random_input_sizes(self, original, level, chunk_sizes):
70 chunk_sizes = iter(chunk_sizes)
71
72 refctx = zstd.ZstdCompressor(level=level)
73 ref_frame = refctx.compress(original)
74
75 cctx = zstd.ZstdCompressor(level=level)
76 cobj = cctx.compressobj(size=len(original))
77
78 chunks = []
79 i = 0
80 while True:
81 chunk_size = next(chunk_sizes)
82 source = original[i:i + chunk_size]
83 if not source:
84 break
85
86 chunks.append(cobj.compress(source))
87 i += chunk_size
88
89 chunks.append(cobj.flush())
90
91 self.assertEqual(b''.join(chunks), ref_frame)
92
93
94 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
95 @make_cffi
96 class TestCompressor_read_from_fuzzing(unittest.TestCase):
97 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
98 level=strategies.integers(min_value=1, max_value=5),
99 read_size=strategies.integers(min_value=1, max_value=4096),
100 write_size=strategies.integers(min_value=1, max_value=4096))
101 def test_read_write_size_variance(self, original, level, read_size, write_size):
102 refcctx = zstd.ZstdCompressor(level=level)
103 ref_frame = refcctx.compress(original)
104
105 source = io.BytesIO(original)
106
107 cctx = zstd.ZstdCompressor(level=level)
108 chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
109 write_size=write_size))
110
111 self.assertEqual(b''.join(chunks), ref_frame)
112
113
114 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
115 class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
116 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
117 min_size=1, max_size=1024),
118 threads=strategies.integers(min_value=1, max_value=8),
119 use_dict=strategies.booleans())
120 def test_data_equivalence(self, original, threads, use_dict):
121 kwargs = {}
122
123 # Use a content dictionary because it is cheap to create.
124 if use_dict:
125 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
126
127 cctx = zstd.ZstdCompressor(level=1,
128 write_content_size=True,
129 write_checksum=True,
130 **kwargs)
131
132 result = cctx.multi_compress_to_buffer(original, threads=-1)
133
134 self.assertEqual(len(result), len(original))
135
136 # The frame produced via the batch APIs may not be bit identical to that
137 # produced by compress() because compression parameters are adjusted
138 # from the first input in batch mode. So the only thing we can do is
139 # verify the decompressed data matches the input.
140 dctx = zstd.ZstdDecompressor(**kwargs)
141
142 for i, frame in enumerate(result):
143 self.assertEqual(dctx.decompress(frame), original[i])
@@ -0,0 +1,79 b''
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from .common import (
18 make_cffi,
19 )
20
21
22 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
23 max_value=zstd.WINDOWLOG_MAX)
24 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
25 max_value=zstd.CHAINLOG_MAX)
26 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
27 max_value=zstd.HASHLOG_MAX)
28 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
29 max_value=zstd.SEARCHLOG_MAX)
30 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
31 max_value=zstd.SEARCHLENGTH_MAX)
32 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
33 max_value=zstd.TARGETLENGTH_MAX)
34 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
35 zstd.STRATEGY_DFAST,
36 zstd.STRATEGY_GREEDY,
37 zstd.STRATEGY_LAZY,
38 zstd.STRATEGY_LAZY2,
39 zstd.STRATEGY_BTLAZY2,
40 zstd.STRATEGY_BTOPT))
41
42
43 @make_cffi
44 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
45 class TestCompressionParametersHypothesis(unittest.TestCase):
46 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
47 s_searchlength, s_targetlength, s_strategy)
48 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
49 searchlength, targetlength, strategy):
50 # ZSTD_checkCParams moves the goal posts on us from what's advertised
51 # in the constants. So move along with them.
52 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
53 searchlength += 1
54 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
55 searchlength -= 1
56
57 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
58 searchlog, searchlength,
59 targetlength, strategy)
60
61 cctx = zstd.ZstdCompressor(compression_params=p)
62 with cctx.write_to(io.BytesIO()):
63 pass
64
65 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
66 s_searchlength, s_targetlength, s_strategy)
67 def test_estimate_compression_context_size(self, windowlog, chainlog,
68 hashlog, searchlog,
69 searchlength, targetlength,
70 strategy):
71 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
72 searchlength += 1
73 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
74 searchlength -= 1
75
76 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
77 searchlog, searchlength,
78 targetlength, strategy)
79 size = zstd.estimate_compression_context_size(p)
@@ -0,0 +1,151 b''
1 import io
2 import os
3
4 try:
5 import unittest2 as unittest
6 except ImportError:
7 import unittest
8
9 try:
10 import hypothesis
11 import hypothesis.strategies as strategies
12 except ImportError:
13 raise unittest.SkipTest('hypothesis not available')
14
15 import zstd
16
17 from . common import (
18 make_cffi,
19 random_input_data,
20 )
21
22
23 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24 @make_cffi
25 class TestDecompressor_write_to_fuzzing(unittest.TestCase):
26 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
27 level=strategies.integers(min_value=1, max_value=5),
28 write_size=strategies.integers(min_value=1, max_value=8192),
29 input_sizes=strategies.streaming(
30 strategies.integers(min_value=1, max_value=4096)))
31 def test_write_size_variance(self, original, level, write_size, input_sizes):
32 input_sizes = iter(input_sizes)
33
34 cctx = zstd.ZstdCompressor(level=level)
35 frame = cctx.compress(original)
36
37 dctx = zstd.ZstdDecompressor()
38 source = io.BytesIO(frame)
39 dest = io.BytesIO()
40
41 with dctx.write_to(dest, write_size=write_size) as decompressor:
42 while True:
43 chunk = source.read(next(input_sizes))
44 if not chunk:
45 break
46
47 decompressor.write(chunk)
48
49 self.assertEqual(dest.getvalue(), original)
50
51
52 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
53 @make_cffi
54 class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
55 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
56 level=strategies.integers(min_value=1, max_value=5),
57 read_size=strategies.integers(min_value=1, max_value=8192),
58 write_size=strategies.integers(min_value=1, max_value=8192))
59 def test_read_write_size_variance(self, original, level, read_size, write_size):
60 cctx = zstd.ZstdCompressor(level=level)
61 frame = cctx.compress(original)
62
63 source = io.BytesIO(frame)
64 dest = io.BytesIO()
65
66 dctx = zstd.ZstdDecompressor()
67 dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
68
69 self.assertEqual(dest.getvalue(), original)
70
71
72 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
73 @make_cffi
74 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
75 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
76 level=strategies.integers(min_value=1, max_value=5),
77 chunk_sizes=strategies.streaming(
78 strategies.integers(min_value=1, max_value=4096)))
79 def test_random_input_sizes(self, original, level, chunk_sizes):
80 chunk_sizes = iter(chunk_sizes)
81
82 cctx = zstd.ZstdCompressor(level=level)
83 frame = cctx.compress(original)
84
85 source = io.BytesIO(frame)
86
87 dctx = zstd.ZstdDecompressor()
88 dobj = dctx.decompressobj()
89
90 chunks = []
91 while True:
92 chunk = source.read(next(chunk_sizes))
93 if not chunk:
94 break
95
96 chunks.append(dobj.decompress(chunk))
97
98 self.assertEqual(b''.join(chunks), original)
99
100
101 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
102 @make_cffi
103 class TestDecompressor_read_from_fuzzing(unittest.TestCase):
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
105 level=strategies.integers(min_value=1, max_value=5),
106 read_size=strategies.integers(min_value=1, max_value=4096),
107 write_size=strategies.integers(min_value=1, max_value=4096))
108 def test_read_write_size_variance(self, original, level, read_size, write_size):
109 cctx = zstd.ZstdCompressor(level=level)
110 frame = cctx.compress(original)
111
112 source = io.BytesIO(frame)
113
114 dctx = zstd.ZstdDecompressor()
115 chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size))
116
117 self.assertEqual(b''.join(chunks), original)
118
119
120 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
121 class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
122 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
123 min_size=1, max_size=1024),
124 threads=strategies.integers(min_value=1, max_value=8),
125 use_dict=strategies.booleans())
126 def test_data_equivalence(self, original, threads, use_dict):
127 kwargs = {}
128 if use_dict:
129 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
130
131 cctx = zstd.ZstdCompressor(level=1,
132 write_content_size=True,
133 write_checksum=True,
134 **kwargs)
135
136 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
137
138 dctx = zstd.ZstdDecompressor(**kwargs)
139
140 result = dctx.multi_decompress_to_buffer(frames_buffer)
141
142 self.assertEqual(len(result), len(original))
143 for i, frame in enumerate(result):
144 self.assertEqual(frame.tobytes(), original[i])
145
146 frames_list = [f.tobytes() for f in frames_buffer]
147 result = dctx.multi_decompress_to_buffer(frames_list)
148
149 self.assertEqual(len(result), len(original))
150 for i, frame in enumerate(result):
151 self.assertEqual(frame.tobytes(), original[i])
@@ -0,0 +1,194 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Dependencies ======= */
12 #include <stddef.h> /* size_t */
13 #include <stdlib.h> /* malloc, calloc, free */
14 #include "pool.h"
15
16 /* ====== Compiler specifics ====== */
17 #if defined(_MSC_VER)
18 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
19 #endif
20
21
22 #ifdef ZSTD_MULTITHREAD
23
24 #include "threading.h" /* pthread adaptation */
25
26 /* A job is a function and an opaque argument */
27 typedef struct POOL_job_s {
28 POOL_function function;
29 void *opaque;
30 } POOL_job;
31
32 struct POOL_ctx_s {
33 /* Keep track of the threads */
34 pthread_t *threads;
35 size_t numThreads;
36
37 /* The queue is a circular buffer */
38 POOL_job *queue;
39 size_t queueHead;
40 size_t queueTail;
41 size_t queueSize;
42 /* The mutex protects the queue */
43 pthread_mutex_t queueMutex;
44 /* Condition variable for pushers to wait on when the queue is full */
45 pthread_cond_t queuePushCond;
46 /* Condition variables for poppers to wait on when the queue is empty */
47 pthread_cond_t queuePopCond;
48 /* Indicates if the queue is shutting down */
49 int shutdown;
50 };
51
52 /* POOL_thread() :
53 Work thread for the thread pool.
54 Waits for jobs and executes them.
55 @returns : NULL on failure else non-null.
56 */
57 static void* POOL_thread(void* opaque) {
58 POOL_ctx* const ctx = (POOL_ctx*)opaque;
59 if (!ctx) { return NULL; }
60 for (;;) {
61 /* Lock the mutex and wait for a non-empty queue or until shutdown */
62 pthread_mutex_lock(&ctx->queueMutex);
63 while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
64 pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
65 }
66 /* empty => shutting down: so stop */
67 if (ctx->queueHead == ctx->queueTail) {
68 pthread_mutex_unlock(&ctx->queueMutex);
69 return opaque;
70 }
71 /* Pop a job off the queue */
72 { POOL_job const job = ctx->queue[ctx->queueHead];
73 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
74 /* Unlock the mutex, signal a pusher, and run the job */
75 pthread_mutex_unlock(&ctx->queueMutex);
76 pthread_cond_signal(&ctx->queuePushCond);
77 job.function(job.opaque);
78 }
79 }
80 /* Unreachable */
81 }
82
83 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
84 POOL_ctx *ctx;
85 /* Check the parameters */
86 if (!numThreads || !queueSize) { return NULL; }
87 /* Allocate the context and zero initialize */
88 ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
89 if (!ctx) { return NULL; }
90 /* Initialize the job queue.
91 * It needs one extra space since one space is wasted to differentiate empty
92 * and full queues.
93 */
94 ctx->queueSize = queueSize + 1;
95 ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
96 ctx->queueHead = 0;
97 ctx->queueTail = 0;
98 pthread_mutex_init(&ctx->queueMutex, NULL);
99 pthread_cond_init(&ctx->queuePushCond, NULL);
100 pthread_cond_init(&ctx->queuePopCond, NULL);
101 ctx->shutdown = 0;
102 /* Allocate space for the thread handles */
103 ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
104 ctx->numThreads = 0;
105 /* Check for errors */
106 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
107 /* Initialize the threads */
108 { size_t i;
109 for (i = 0; i < numThreads; ++i) {
110 if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
111 ctx->numThreads = i;
112 POOL_free(ctx);
113 return NULL;
114 } }
115 ctx->numThreads = numThreads;
116 }
117 return ctx;
118 }
119
120 /*! POOL_join() :
121 Shutdown the queue, wake any sleeping threads, and join all of the threads.
122 */
123 static void POOL_join(POOL_ctx *ctx) {
124 /* Shut down the queue */
125 pthread_mutex_lock(&ctx->queueMutex);
126 ctx->shutdown = 1;
127 pthread_mutex_unlock(&ctx->queueMutex);
128 /* Wake up sleeping threads */
129 pthread_cond_broadcast(&ctx->queuePushCond);
130 pthread_cond_broadcast(&ctx->queuePopCond);
131 /* Join all of the threads */
132 { size_t i;
133 for (i = 0; i < ctx->numThreads; ++i) {
134 pthread_join(ctx->threads[i], NULL);
135 } }
136 }
137
138 void POOL_free(POOL_ctx *ctx) {
139 if (!ctx) { return; }
140 POOL_join(ctx);
141 pthread_mutex_destroy(&ctx->queueMutex);
142 pthread_cond_destroy(&ctx->queuePushCond);
143 pthread_cond_destroy(&ctx->queuePopCond);
144 if (ctx->queue) free(ctx->queue);
145 if (ctx->threads) free(ctx->threads);
146 free(ctx);
147 }
148
149 void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
150 POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
151 if (!ctx) { return; }
152
153 pthread_mutex_lock(&ctx->queueMutex);
154 { POOL_job const job = {function, opaque};
155 /* Wait until there is space in the queue for the new job */
156 size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
157 while (ctx->queueHead == newTail && !ctx->shutdown) {
158 pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
159 newTail = (ctx->queueTail + 1) % ctx->queueSize;
160 }
161 /* The queue is still going => there is space */
162 if (!ctx->shutdown) {
163 ctx->queue[ctx->queueTail] = job;
164 ctx->queueTail = newTail;
165 }
166 }
167 pthread_mutex_unlock(&ctx->queueMutex);
168 pthread_cond_signal(&ctx->queuePopCond);
169 }
170
171 #else /* ZSTD_MULTITHREAD not defined */
172 /* No multi-threading support */
173
174 /* We don't need any data, but if it is empty malloc() might return NULL. */
175 struct POOL_ctx_s {
176 int data;
177 };
178
179 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
180 (void)numThreads;
181 (void)queueSize;
182 return (POOL_ctx *)malloc(sizeof(POOL_ctx));
183 }
184
185 void POOL_free(POOL_ctx *ctx) {
186 if (ctx) free(ctx);
187 }
188
189 void POOL_add(void *ctx, POOL_function function, void *opaque) {
190 (void)ctx;
191 function(opaque);
192 }
193
194 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,56 b''
1 /**
2 * Copyright (c) 2016-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9 #ifndef POOL_H
10 #define POOL_H
11
12 #if defined (__cplusplus)
13 extern "C" {
14 #endif
15
16
17 #include <stddef.h> /* size_t */
18
19 typedef struct POOL_ctx_s POOL_ctx;
20
21 /*! POOL_create() :
22 Create a thread pool with at most `numThreads` threads.
23 `numThreads` must be at least 1.
24 The maximum number of queued jobs before blocking is `queueSize`.
25 `queueSize` must be at least 1.
26 @return : The POOL_ctx pointer on success else NULL.
27 */
28 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
29
30 /*! POOL_free() :
31 Free a thread pool returned by POOL_create().
32 */
33 void POOL_free(POOL_ctx *ctx);
34
35 /*! POOL_function :
36 The function type that can be added to a thread pool.
37 */
38 typedef void (*POOL_function)(void *);
39 /*! POOL_add_function :
40 The function type for a generic thread pool add function.
41 */
42 typedef void (*POOL_add_function)(void *, POOL_function, void *);
43
44 /*! POOL_add() :
45 Add the job `function(opaque)` to the thread pool.
46 Possibly blocks until there is room in the queue.
47 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
48 */
49 void POOL_add(void *ctx, POOL_function function, void *opaque);
50
51
52 #if defined (__cplusplus)
53 }
54 #endif
55
56 #endif
@@ -0,0 +1,79 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 /**
15 * This file will hold wrapper for systems, which do not support pthreads
16 */
17
18 /* ====== Compiler specifics ====== */
19 #if defined(_MSC_VER)
20 # pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
21 #endif
22
23
24 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
25
26 /**
27 * Windows minimalist Pthread Wrapper, based on :
28 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
29 */
30
31
32 /* === Dependencies === */
33 #include <process.h>
34 #include <errno.h>
35 #include "threading.h"
36
37
38 /* === Implementation === */
39
40 static unsigned __stdcall worker(void *arg)
41 {
42 pthread_t* const thread = (pthread_t*) arg;
43 thread->arg = thread->start_routine(thread->arg);
44 return 0;
45 }
46
47 int pthread_create(pthread_t* thread, const void* unused,
48 void* (*start_routine) (void*), void* arg)
49 {
50 (void)unused;
51 thread->arg = arg;
52 thread->start_routine = start_routine;
53 thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
54
55 if (!thread->handle)
56 return errno;
57 else
58 return 0;
59 }
60
61 int _pthread_join(pthread_t * thread, void **value_ptr)
62 {
63 DWORD result;
64
65 if (!thread->handle) return 0;
66
67 result = WaitForSingleObject(thread->handle, INFINITE);
68 switch (result) {
69 case WAIT_OBJECT_0:
70 if (value_ptr) *value_ptr = thread->arg;
71 return 0;
72 case WAIT_ABANDONED:
73 return EINVAL;
74 default:
75 return GetLastError();
76 }
77 }
78
79 #endif /* ZSTD_MULTITHREAD */
@@ -0,0 +1,104 b''
1
2 /**
3 * Copyright (c) 2016 Tino Reichardt
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree. An additional grant
8 * of patent rights can be found in the PATENTS file in the same directory.
9 *
10 * You can contact the author at:
11 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
12 */
13
14 #ifndef THREADING_H_938743
15 #define THREADING_H_938743
16
17 #if defined (__cplusplus)
18 extern "C" {
19 #endif
20
21 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
22
23 /**
24 * Windows minimalist Pthread Wrapper, based on :
25 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
26 */
27 #ifdef WINVER
28 # undef WINVER
29 #endif
30 #define WINVER 0x0600
31
32 #ifdef _WIN32_WINNT
33 # undef _WIN32_WINNT
34 #endif
35 #define _WIN32_WINNT 0x0600
36
37 #ifndef WIN32_LEAN_AND_MEAN
38 # define WIN32_LEAN_AND_MEAN
39 #endif
40
41 #include <windows.h>
42
43 /* mutex */
44 #define pthread_mutex_t CRITICAL_SECTION
45 #define pthread_mutex_init(a,b) InitializeCriticalSection((a))
46 #define pthread_mutex_destroy(a) DeleteCriticalSection((a))
47 #define pthread_mutex_lock(a) EnterCriticalSection((a))
48 #define pthread_mutex_unlock(a) LeaveCriticalSection((a))
49
50 /* condition variable */
51 #define pthread_cond_t CONDITION_VARIABLE
52 #define pthread_cond_init(a, b) InitializeConditionVariable((a))
53 #define pthread_cond_destroy(a) /* No delete */
54 #define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
55 #define pthread_cond_signal(a) WakeConditionVariable((a))
56 #define pthread_cond_broadcast(a) WakeAllConditionVariable((a))
57
58 /* pthread_create() and pthread_join() */
59 typedef struct {
60 HANDLE handle;
61 void* (*start_routine)(void*);
62 void* arg;
63 } pthread_t;
64
65 int pthread_create(pthread_t* thread, const void* unused,
66 void* (*start_routine) (void*), void* arg);
67
68 #define pthread_join(a, b) _pthread_join(&(a), (b))
69 int _pthread_join(pthread_t* thread, void** value_ptr);
70
71 /**
72 * add here more wrappers as required
73 */
74
75
76 #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */
77 /* === POSIX Systems === */
78 # include <pthread.h>
79
80 #else /* ZSTD_MULTITHREAD not defined */
81 /* No multithreading support */
82
83 #define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
84 #define pthread_mutex_init(a,b)
85 #define pthread_mutex_destroy(a)
86 #define pthread_mutex_lock(a)
87 #define pthread_mutex_unlock(a)
88
89 #define pthread_cond_t int
90 #define pthread_cond_init(a,b)
91 #define pthread_cond_destroy(a)
92 #define pthread_cond_wait(a,b)
93 #define pthread_cond_signal(a)
94 #define pthread_cond_broadcast(a)
95
96 /* do not use pthread_t */
97
98 #endif /* ZSTD_MULTITHREAD */
99
100 #if defined (__cplusplus)
101 }
102 #endif
103
104 #endif /* THREADING_H_938743 */
This diff has been collapsed as it changes many lines, (740 lines changed) Show them Hide them
@@ -0,0 +1,740 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* ====== Tuning parameters ====== */
12 #define ZSTDMT_NBTHREADS_MAX 128
13
14
15 /* ====== Compiler specifics ====== */
16 #if defined(_MSC_VER)
17 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
18 #endif
19
20
21 /* ====== Dependencies ====== */
22 #include <stdlib.h> /* malloc */
23 #include <string.h> /* memcpy */
24 #include "pool.h" /* threadpool */
25 #include "threading.h" /* mutex */
26 #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27 #include "zstdmt_compress.h"
28 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
29 #include "xxhash.h"
30
31
32 /* ====== Debug ====== */
33 #if 0
34
35 # include <stdio.h>
36 # include <unistd.h>
37 # include <sys/times.h>
38 static unsigned g_debugLevel = 3;
39 # define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
40 # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
41
42 # define DEBUG_PRINTHEX(l,p,n) { \
43 unsigned debug_u; \
44 for (debug_u=0; debug_u<(n); debug_u++) \
45 DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
46 DEBUGLOGRAW(l, " \n"); \
47 }
48
49 static unsigned long long GetCurrentClockTimeMicroseconds()
50 {
51 static clock_t _ticksPerSecond = 0;
52 if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
53
54 struct tms junk; clock_t newTicks = (clock_t) times(&junk);
55 return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
56 }
57
58 #define MUTEX_WAIT_TIME_DLEVEL 5
59 #define PTHREAD_MUTEX_LOCK(mutex) \
60 if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
61 unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
62 pthread_mutex_lock(mutex); \
63 unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
64 unsigned long long elapsedTime = (afterTime-beforeTime); \
65 if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
66 DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
67 elapsedTime, #mutex); \
68 } \
69 } else pthread_mutex_lock(mutex);
70
71 #else
72
73 # define DEBUGLOG(l, ...) {} /* disabled */
74 # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
75 # define DEBUG_PRINTHEX(l,p,n) {}
76
77 #endif
78
79
80 /* ===== Buffer Pool ===== */
81
82 typedef struct buffer_s {
83 void* start;
84 size_t size;
85 } buffer_t;
86
87 static const buffer_t g_nullBuffer = { NULL, 0 };
88
89 typedef struct ZSTDMT_bufferPool_s {
90 unsigned totalBuffers;
91 unsigned nbBuffers;
92 buffer_t bTable[1]; /* variable size */
93 } ZSTDMT_bufferPool;
94
95 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
96 {
97 unsigned const maxNbBuffers = 2*nbThreads + 2;
98 ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t));
99 if (bufPool==NULL) return NULL;
100 bufPool->totalBuffers = maxNbBuffers;
101 bufPool->nbBuffers = 0;
102 return bufPool;
103 }
104
105 static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
106 {
107 unsigned u;
108 if (!bufPool) return; /* compatibility with free on NULL */
109 for (u=0; u<bufPool->totalBuffers; u++)
110 free(bufPool->bTable[u].start);
111 free(bufPool);
112 }
113
114 /* assumption : invocation from main thread only ! */
115 static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
116 {
117 if (pool->nbBuffers) { /* try to use an existing buffer */
118 buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
119 size_t const availBufferSize = buf.size;
120 if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */
121 return buf;
122 free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
123 }
124 /* create new buffer */
125 { buffer_t buffer;
126 void* const start = malloc(bSize);
127 if (start==NULL) bSize = 0;
128 buffer.start = start; /* note : start can be NULL if malloc fails ! */
129 buffer.size = bSize;
130 return buffer;
131 }
132 }
133
134 /* store buffer for later re-use, up to pool capacity */
135 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
136 {
137 if (buf.start == NULL) return; /* release on NULL */
138 if (pool->nbBuffers < pool->totalBuffers) {
139 pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
140 return;
141 }
142 /* Reached bufferPool capacity (should not happen) */
143 free(buf.start);
144 }
145
146
147 /* ===== CCtx Pool ===== */
148
149 typedef struct {
150 unsigned totalCCtx;
151 unsigned availCCtx;
152 ZSTD_CCtx* cctx[1]; /* variable size */
153 } ZSTDMT_CCtxPool;
154
155 /* assumption : CCtxPool invocation only from main thread */
156
157 /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
158 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
159 {
160 unsigned u;
161 for (u=0; u<pool->totalCCtx; u++)
162 ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
163 free(pool);
164 }
165
166 /* ZSTDMT_createCCtxPool() :
167 * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
168 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
169 {
170 ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*));
171 if (!cctxPool) return NULL;
172 cctxPool->totalCCtx = nbThreads;
173 cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
174 cctxPool->cctx[0] = ZSTD_createCCtx();
175 if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
176 DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
177 return cctxPool;
178 }
179
180 static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
181 {
182 if (pool->availCCtx) {
183 pool->availCCtx--;
184 return pool->cctx[pool->availCCtx];
185 }
186 return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
187 }
188
189 static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
190 {
191 if (cctx==NULL) return; /* compatibility with release on NULL */
192 if (pool->availCCtx < pool->totalCCtx)
193 pool->cctx[pool->availCCtx++] = cctx;
194 else
195 /* pool overflow : should not happen, since totalCCtx==nbThreads */
196 ZSTD_freeCCtx(cctx);
197 }
198
199
200 /* ===== Thread worker ===== */
201
202 typedef struct {
203 buffer_t buffer;
204 size_t filled;
205 } inBuff_t;
206
207 typedef struct {
208 ZSTD_CCtx* cctx;
209 buffer_t src;
210 const void* srcStart;
211 size_t srcSize;
212 size_t dictSize;
213 buffer_t dstBuff;
214 size_t cSize;
215 size_t dstFlushed;
216 unsigned firstChunk;
217 unsigned lastChunk;
218 unsigned jobCompleted;
219 unsigned jobScanned;
220 pthread_mutex_t* jobCompleted_mutex;
221 pthread_cond_t* jobCompleted_cond;
222 ZSTD_parameters params;
223 ZSTD_CDict* cdict;
224 unsigned long long fullFrameSize;
225 } ZSTDMT_jobDescription;
226
227 /* ZSTDMT_compressChunk() : POOL_function type */
228 void ZSTDMT_compressChunk(void* jobDescription)
229 {
230 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
231 const void* const src = (const char*)job->srcStart + job->dictSize;
232 buffer_t const dstBuff = job->dstBuff;
233 DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
234 if (job->cdict) {
235 size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
236 if (job->cdict) DEBUGLOG(3, "using CDict ");
237 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
238 } else {
239 size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
240 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
241 ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
242 }
243 if (!job->firstChunk) { /* flush frame header */
244 size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
245 if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
246 ZSTD_invalidateRepCodes(job->cctx);
247 }
248
249 DEBUGLOG(4, "Compressing : ");
250 DEBUG_PRINTHEX(4, job->srcStart, 12);
251 job->cSize = (job->lastChunk) ? /* last chunk signal */
252 ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
253 ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
254 DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
255
256 _endJob:
257 PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
258 job->jobCompleted = 1;
259 job->jobScanned = 0;
260 pthread_cond_signal(job->jobCompleted_cond);
261 pthread_mutex_unlock(job->jobCompleted_mutex);
262 }
263
264
265 /* ------------------------------------------ */
266 /* ===== Multi-threaded compression ===== */
267 /* ------------------------------------------ */
268
269 struct ZSTDMT_CCtx_s {
270 POOL_ctx* factory;
271 ZSTDMT_bufferPool* buffPool;
272 ZSTDMT_CCtxPool* cctxPool;
273 pthread_mutex_t jobCompleted_mutex;
274 pthread_cond_t jobCompleted_cond;
275 size_t targetSectionSize;
276 size_t marginSize;
277 size_t inBuffSize;
278 size_t dictSize;
279 size_t targetDictSize;
280 inBuff_t inBuff;
281 ZSTD_parameters params;
282 XXH64_state_t xxhState;
283 unsigned nbThreads;
284 unsigned jobIDMask;
285 unsigned doneJobID;
286 unsigned nextJobID;
287 unsigned frameEnded;
288 unsigned allJobsCompleted;
289 unsigned overlapRLog;
290 unsigned long long frameContentSize;
291 size_t sectionSize;
292 ZSTD_CDict* cdict;
293 ZSTD_CStream* cstream;
294 ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
295 };
296
297 ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
298 {
299 ZSTDMT_CCtx* cctx;
300 U32 const minNbJobs = nbThreads + 2;
301 U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
302 U32 const nbJobs = 1 << nbJobsLog2;
303 DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
304 nbThreads, minNbJobs, nbJobsLog2, nbJobs);
305 if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
306 cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription));
307 if (!cctx) return NULL;
308 cctx->nbThreads = nbThreads;
309 cctx->jobIDMask = nbJobs - 1;
310 cctx->allJobsCompleted = 1;
311 cctx->sectionSize = 0;
312 cctx->overlapRLog = 3;
313 cctx->factory = POOL_create(nbThreads, 1);
314 cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
315 cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
316 if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */
317 ZSTDMT_freeCCtx(cctx);
318 return NULL;
319 }
320 if (nbThreads==1) {
321 cctx->cstream = ZSTD_createCStream();
322 if (!cctx->cstream) {
323 ZSTDMT_freeCCtx(cctx); return NULL;
324 } }
325 pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
326 pthread_cond_init(&cctx->jobCompleted_cond, NULL);
327 DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
328 return cctx;
329 }
330
331 /* ZSTDMT_releaseAllJobResources() :
332 * Ensure all workers are killed first. */
333 static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
334 {
335 unsigned jobID;
336 for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
337 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
338 mtctx->jobs[jobID].dstBuff = g_nullBuffer;
339 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
340 mtctx->jobs[jobID].src = g_nullBuffer;
341 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
342 mtctx->jobs[jobID].cctx = NULL;
343 }
344 memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
345 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
346 mtctx->inBuff.buffer = g_nullBuffer;
347 mtctx->allJobsCompleted = 1;
348 }
349
350 size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
351 {
352 if (mtctx==NULL) return 0; /* compatible with free on NULL */
353 POOL_free(mtctx->factory);
354 if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
355 ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
356 ZSTDMT_freeCCtxPool(mtctx->cctxPool);
357 ZSTD_freeCDict(mtctx->cdict);
358 ZSTD_freeCStream(mtctx->cstream);
359 pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
360 pthread_cond_destroy(&mtctx->jobCompleted_cond);
361 free(mtctx);
362 return 0;
363 }
364
365 size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
366 {
367 switch(parameter)
368 {
369 case ZSTDMT_p_sectionSize :
370 mtctx->sectionSize = value;
371 return 0;
372 case ZSTDMT_p_overlapSectionLog :
373 DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
374 mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
375 return 0;
376 default :
377 return ERROR(compressionParameter_unsupported);
378 }
379 }
380
381
382 /* ------------------------------------------ */
383 /* ===== Multi-threaded compression ===== */
384 /* ------------------------------------------ */
385
386 size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
387 void* dst, size_t dstCapacity,
388 const void* src, size_t srcSize,
389 int compressionLevel)
390 {
391 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
392 size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
393 unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
394 unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
395 size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
396 size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
397 size_t remainingSrcSize = srcSize;
398 const char* const srcStart = (const char*)src;
399 size_t frameStartPos = 0;
400
401 DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
402 DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
403 params.fParams.contentSizeFlag = 1;
404
405 if (nbChunks==1) { /* fallback to single-thread mode */
406 ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
407 return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
408 }
409
410 { unsigned u;
411 for (u=0; u<nbChunks; u++) {
412 size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
413 size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
414 buffer_t const dstAsBuffer = { dst, dstCapacity };
415 buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
416 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
417
418 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
419 mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
420 mtctx->jobs[u].jobCompleted = 1;
421 nbChunks = u+1;
422 break; /* let's wait for previous jobs to complete, but don't start new ones */
423 }
424
425 mtctx->jobs[u].srcStart = srcStart + frameStartPos;
426 mtctx->jobs[u].srcSize = chunkSize;
427 mtctx->jobs[u].fullFrameSize = srcSize;
428 mtctx->jobs[u].params = params;
429 mtctx->jobs[u].dstBuff = dstBuffer;
430 mtctx->jobs[u].cctx = cctx;
431 mtctx->jobs[u].firstChunk = (u==0);
432 mtctx->jobs[u].lastChunk = (u==nbChunks-1);
433 mtctx->jobs[u].jobCompleted = 0;
434 mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
435 mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
436
437 DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
438 DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
439 POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
440
441 frameStartPos += chunkSize;
442 remainingSrcSize -= chunkSize;
443 } }
444 /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
445
446 { unsigned chunkID;
447 size_t error = 0, dstPos = 0;
448 for (chunkID=0; chunkID<nbChunks; chunkID++) {
449 DEBUGLOG(3, "waiting for chunk %u ", chunkID);
450 PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
451 while (mtctx->jobs[chunkID].jobCompleted==0) {
452 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
453 pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
454 }
455 pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
456 DEBUGLOG(3, "ready to write chunk %u ", chunkID);
457
458 ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
459 mtctx->jobs[chunkID].cctx = NULL;
460 mtctx->jobs[chunkID].srcStart = NULL;
461 { size_t const cSize = mtctx->jobs[chunkID].cSize;
462 if (ZSTD_isError(cSize)) error = cSize;
463 if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
464 if (chunkID) { /* note : chunk 0 is already written directly into dst */
465 if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
466 ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
467 mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
468 }
469 dstPos += cSize ;
470 }
471 }
472 if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
473 return error ? error : dstPos;
474 }
475
476 }
477
478
479 /* ====================================== */
480 /* ======= Streaming API ======= */
481 /* ====================================== */
482
483 static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
484 while (zcs->doneJobID < zcs->nextJobID) {
485 unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
486 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
487 while (zcs->jobs[jobID].jobCompleted==0) {
488 DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
489 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
490 }
491 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
492 zcs->doneJobID++;
493 }
494 }
495
496
497 static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
498 const void* dict, size_t dictSize, unsigned updateDict,
499 ZSTD_parameters params, unsigned long long pledgedSrcSize)
500 {
501 ZSTD_customMem const cmem = { NULL, NULL, NULL };
502 DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
503 if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
504 if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
505 ZSTDMT_waitForAllJobsCompleted(zcs);
506 ZSTDMT_releaseAllJobResources(zcs);
507 zcs->allJobsCompleted = 1;
508 }
509 zcs->params = params;
510 if (updateDict) {
511 ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
512 if (dict && dictSize) {
513 zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
514 if (zcs->cdict == NULL) return ERROR(memory_allocation);
515 } }
516 zcs->frameContentSize = pledgedSrcSize;
517 zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
518 DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
519 DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
520 zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
521 zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
522 zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
523 DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
524 zcs->marginSize = zcs->targetSectionSize >> 2;
525 zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
526 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
527 if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
528 zcs->inBuff.filled = 0;
529 zcs->dictSize = 0;
530 zcs->doneJobID = 0;
531 zcs->nextJobID = 0;
532 zcs->frameEnded = 0;
533 zcs->allJobsCompleted = 0;
534 if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
535 return 0;
536 }
537
538 size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
539 const void* dict, size_t dictSize,
540 ZSTD_parameters params, unsigned long long pledgedSrcSize)
541 {
542 return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
543 }
544
545 /* ZSTDMT_resetCStream() :
546 * pledgedSrcSize is optional and can be zero == unknown */
547 size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
548 {
549 if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
550 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
551 }
552
553 size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
554 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
555 return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
556 }
557
558
559 static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
560 {
561 size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
562 buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
563 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
564 unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
565
566 if ((cctx==NULL) || (dstBuffer.start==NULL)) {
567 zcs->jobs[jobID].jobCompleted = 1;
568 zcs->nextJobID++;
569 ZSTDMT_waitForAllJobsCompleted(zcs);
570 ZSTDMT_releaseAllJobResources(zcs);
571 return ERROR(memory_allocation);
572 }
573
574 DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
575 zcs->jobs[jobID].src = zcs->inBuff.buffer;
576 zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
577 zcs->jobs[jobID].srcSize = srcSize;
578 zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
579 zcs->jobs[jobID].params = zcs->params;
580 if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
581 zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
582 zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
583 zcs->jobs[jobID].dstBuff = dstBuffer;
584 zcs->jobs[jobID].cctx = cctx;
585 zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
586 zcs->jobs[jobID].lastChunk = endFrame;
587 zcs->jobs[jobID].jobCompleted = 0;
588 zcs->jobs[jobID].dstFlushed = 0;
589 zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
590 zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
591
592 /* get a new buffer for next input */
593 if (!endFrame) {
594 size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
595 zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
596 if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
597 zcs->jobs[jobID].jobCompleted = 1;
598 zcs->nextJobID++;
599 ZSTDMT_waitForAllJobsCompleted(zcs);
600 ZSTDMT_releaseAllJobResources(zcs);
601 return ERROR(memory_allocation);
602 }
603 DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
604 zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
605 DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
606 memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
607 DEBUGLOG(5, "new inBuff pre-filled");
608 zcs->dictSize = newDictSize;
609 } else {
610 zcs->inBuff.buffer = g_nullBuffer;
611 zcs->inBuff.filled = 0;
612 zcs->dictSize = 0;
613 zcs->frameEnded = 1;
614 if (zcs->nextJobID == 0)
615 zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
616 }
617
618 DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
619 POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
620 zcs->nextJobID++;
621 return 0;
622 }
623
624
625 /* ZSTDMT_flushNextJob() :
626 * output : will be updated with amount of data flushed .
627 * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
628 * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
629 static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
630 {
631 unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
632 if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
633 PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
634 while (zcs->jobs[wJobID].jobCompleted==0) {
635 DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
636 if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
637 pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
638 }
639 pthread_mutex_unlock(&zcs->jobCompleted_mutex);
640 /* compression job completed : output can be flushed */
641 { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
642 if (!job.jobScanned) {
643 if (ZSTD_isError(job.cSize)) {
644 DEBUGLOG(5, "compression error detected ");
645 ZSTDMT_waitForAllJobsCompleted(zcs);
646 ZSTDMT_releaseAllJobResources(zcs);
647 return job.cSize;
648 }
649 ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
650 zcs->jobs[wJobID].cctx = NULL;
651 DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
652 if (zcs->params.fParams.checksumFlag) {
653 XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
654 if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
655 U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
656 DEBUGLOG(4, "writing checksum : %08X \n", checksum);
657 MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
658 job.cSize += 4;
659 zcs->jobs[wJobID].cSize += 4;
660 } }
661 ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
662 zcs->jobs[wJobID].srcStart = NULL;
663 zcs->jobs[wJobID].src = g_nullBuffer;
664 zcs->jobs[wJobID].jobScanned = 1;
665 }
666 { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
667 DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
668 memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
669 output->pos += toWrite;
670 job.dstFlushed += toWrite;
671 }
672 if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
673 ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
674 zcs->jobs[wJobID].dstBuff = g_nullBuffer;
675 zcs->jobs[wJobID].jobCompleted = 0;
676 zcs->doneJobID++;
677 } else {
678 zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
679 }
680 /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
681 if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
682 if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
683 zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
684 return 0; /* everything flushed */
685 } }
686
687
688 size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
689 {
690 size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
691 if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
692 if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
693
694 /* fill input buffer */
695 { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
696 memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
697 input->pos += toLoad;
698 zcs->inBuff.filled += toLoad;
699 }
700
701 if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
702 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
703 CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
704 }
705
706 /* check for data to flush */
707 CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
708
709 /* recommended next input size : fill current input buffer */
710 return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
711 }
712
713
714 static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
715 {
716 size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
717
718 if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
719 if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded))
720 && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
721 CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
722 }
723
724 /* check if there is any data available to flush */
725 DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
726 return ZSTDMT_flushNextJob(zcs, output, 1);
727 }
728
729
730 size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
731 {
732 if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
733 return ZSTDMT_flushStream_internal(zcs, output, 0);
734 }
735
736 size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
737 {
738 if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
739 return ZSTDMT_flushStream_internal(zcs, output, 1);
740 }
@@ -0,0 +1,78 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef ZSTDMT_COMPRESS_H
11 #define ZSTDMT_COMPRESS_H
12
13 #if defined (__cplusplus)
14 extern "C" {
15 #endif
16
17
18 /* Note : All prototypes defined in this file shall be considered experimental.
19 * There is no guarantee of API continuity (yet) on any of these prototypes */
20
21 /* === Dependencies === */
22 #include <stddef.h> /* size_t */
23 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
24 #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
25
26
27 /* === Simple one-pass functions === */
28
29 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
30 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
31 ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
32
33 ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
34 void* dst, size_t dstCapacity,
35 const void* src, size_t srcSize,
36 int compressionLevel);
37
38
39 /* === Streaming functions === */
40
41 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
42 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
43
44 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
45
46 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
47 ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
48
49
50 /* === Advanced functions and parameters === */
51
52 #ifndef ZSTDMT_SECTION_SIZE_MIN
53 # define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
54 #endif
55
56 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */
57 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
58
59 /* ZSDTMT_parameter :
60 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
61 typedef enum {
62 ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
63 ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
64 } ZSDTMT_parameter;
65
66 /* ZSTDMT_setMTCtxParameter() :
67 * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
68 * The function must be called typically after ZSTD_createCCtx().
69 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
70 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
71 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
72
73
74 #if defined (__cplusplus)
75 }
76 #endif
77
78 #endif /* ZSTDMT_COMPRESS_H */
This diff has been collapsed as it changes many lines, (1021 lines changed) Show them Hide them
@@ -0,0 +1,1021 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 /*-*************************************
11 * Dependencies
12 ***************************************/
13 #include <stdio.h> /* fprintf */
14 #include <stdlib.h> /* malloc, free, qsort */
15 #include <string.h> /* memset */
16 #include <time.h> /* clock */
17
18 #include "mem.h" /* read */
19 #include "pool.h"
20 #include "threading.h"
21 #include "zstd_internal.h" /* includes zstd.h */
22 #ifndef ZDICT_STATIC_LINKING_ONLY
23 #define ZDICT_STATIC_LINKING_ONLY
24 #endif
25 #include "zdict.h"
26
27 /*-*************************************
28 * Constants
29 ***************************************/
30 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
31
32 /*-*************************************
33 * Console display
34 ***************************************/
35 static int g_displayLevel = 2;
36 #define DISPLAY(...) \
37 { \
38 fprintf(stderr, __VA_ARGS__); \
39 fflush(stderr); \
40 }
41 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
42 if (displayLevel >= l) { \
43 DISPLAY(__VA_ARGS__); \
44 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
45 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
46
47 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
48 if (displayLevel >= l) { \
49 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
50 g_time = clock(); \
51 DISPLAY(__VA_ARGS__); \
52 if (displayLevel >= 4) \
53 fflush(stdout); \
54 } \
55 }
56 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
57 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
58 static clock_t g_time = 0;
59
60 /*-*************************************
61 * Hash table
62 ***************************************
63 * A small specialized hash map for storing activeDmers.
64 * The map does not resize, so if it becomes full it will loop forever.
65 * Thus, the map must be large enough to store every value.
66 * The map implements linear probing and keeps its load less than 0.5.
67 */
68
69 #define MAP_EMPTY_VALUE ((U32)-1)
70 typedef struct COVER_map_pair_t_s {
71 U32 key;
72 U32 value;
73 } COVER_map_pair_t;
74
75 typedef struct COVER_map_s {
76 COVER_map_pair_t *data;
77 U32 sizeLog;
78 U32 size;
79 U32 sizeMask;
80 } COVER_map_t;
81
82 /**
83 * Clear the map.
84 */
85 static void COVER_map_clear(COVER_map_t *map) {
86 memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
87 }
88
89 /**
90 * Initializes a map of the given size.
91 * Returns 1 on success and 0 on failure.
92 * The map must be destroyed with COVER_map_destroy().
93 * The map is only guaranteed to be large enough to hold size elements.
94 */
95 static int COVER_map_init(COVER_map_t *map, U32 size) {
96 map->sizeLog = ZSTD_highbit32(size) + 2;
97 map->size = (U32)1 << map->sizeLog;
98 map->sizeMask = map->size - 1;
99 map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
100 if (!map->data) {
101 map->sizeLog = 0;
102 map->size = 0;
103 return 0;
104 }
105 COVER_map_clear(map);
106 return 1;
107 }
108
109 /**
110 * Internal hash function
111 */
112 static const U32 prime4bytes = 2654435761U;
113 static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
114 return (key * prime4bytes) >> (32 - map->sizeLog);
115 }
116
117 /**
118 * Helper function that returns the index that a key should be placed into.
119 */
120 static U32 COVER_map_index(COVER_map_t *map, U32 key) {
121 const U32 hash = COVER_map_hash(map, key);
122 U32 i;
123 for (i = hash;; i = (i + 1) & map->sizeMask) {
124 COVER_map_pair_t *pos = &map->data[i];
125 if (pos->value == MAP_EMPTY_VALUE) {
126 return i;
127 }
128 if (pos->key == key) {
129 return i;
130 }
131 }
132 }
133
134 /**
135 * Returns the pointer to the value for key.
136 * If key is not in the map, it is inserted and the value is set to 0.
137 * The map must not be full.
138 */
139 static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
140 COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
141 if (pos->value == MAP_EMPTY_VALUE) {
142 pos->key = key;
143 pos->value = 0;
144 }
145 return &pos->value;
146 }
147
148 /**
149 * Deletes key from the map if present.
150 */
151 static void COVER_map_remove(COVER_map_t *map, U32 key) {
152 U32 i = COVER_map_index(map, key);
153 COVER_map_pair_t *del = &map->data[i];
154 U32 shift = 1;
155 if (del->value == MAP_EMPTY_VALUE) {
156 return;
157 }
158 for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
159 COVER_map_pair_t *const pos = &map->data[i];
160 /* If the position is empty we are done */
161 if (pos->value == MAP_EMPTY_VALUE) {
162 del->value = MAP_EMPTY_VALUE;
163 return;
164 }
165 /* If pos can be moved to del do so */
166 if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
167 del->key = pos->key;
168 del->value = pos->value;
169 del = pos;
170 shift = 1;
171 } else {
172 ++shift;
173 }
174 }
175 }
176
177 /**
178 * Destroyes a map that is inited with COVER_map_init().
179 */
180 static void COVER_map_destroy(COVER_map_t *map) {
181 if (map->data) {
182 free(map->data);
183 }
184 map->data = NULL;
185 map->size = 0;
186 }
187
188 /*-*************************************
189 * Context
190 ***************************************/
191
192 typedef struct {
193 const BYTE *samples;
194 size_t *offsets;
195 const size_t *samplesSizes;
196 size_t nbSamples;
197 U32 *suffix;
198 size_t suffixSize;
199 U32 *freqs;
200 U32 *dmerAt;
201 unsigned d;
202 } COVER_ctx_t;
203
204 /* We need a global context for qsort... */
205 static COVER_ctx_t *g_ctx = NULL;
206
207 /*-*************************************
208 * Helper functions
209 ***************************************/
210
211 /**
212 * Returns the sum of the sample sizes.
213 */
214 static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
215 size_t sum = 0;
216 size_t i;
217 for (i = 0; i < nbSamples; ++i) {
218 sum += samplesSizes[i];
219 }
220 return sum;
221 }
222
223 /**
224 * Returns -1 if the dmer at lp is less than the dmer at rp.
225 * Return 0 if the dmers at lp and rp are equal.
226 * Returns 1 if the dmer at lp is greater than the dmer at rp.
227 */
228 static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
229 const U32 lhs = *(const U32 *)lp;
230 const U32 rhs = *(const U32 *)rp;
231 return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
232 }
233
234 /**
235 * Same as COVER_cmp() except ties are broken by pointer value
236 * NOTE: g_ctx must be set to call this function. A global is required because
237 * qsort doesn't take an opaque pointer.
238 */
239 static int COVER_strict_cmp(const void *lp, const void *rp) {
240 int result = COVER_cmp(g_ctx, lp, rp);
241 if (result == 0) {
242 result = lp < rp ? -1 : 1;
243 }
244 return result;
245 }
246
247 /**
248 * Returns the first pointer in [first, last) whose element does not compare
249 * less than value. If no such element exists it returns last.
250 */
251 static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
252 size_t value) {
253 size_t count = last - first;
254 while (count != 0) {
255 size_t step = count / 2;
256 const size_t *ptr = first;
257 ptr += step;
258 if (*ptr < value) {
259 first = ++ptr;
260 count -= step + 1;
261 } else {
262 count = step;
263 }
264 }
265 return first;
266 }
267
268 /**
269 * Generic groupBy function.
270 * Groups an array sorted by cmp into groups with equivalent values.
271 * Calls grp for each group.
272 */
273 static void
274 COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
275 int (*cmp)(COVER_ctx_t *, const void *, const void *),
276 void (*grp)(COVER_ctx_t *, const void *, const void *)) {
277 const BYTE *ptr = (const BYTE *)data;
278 size_t num = 0;
279 while (num < count) {
280 const BYTE *grpEnd = ptr + size;
281 ++num;
282 while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
283 grpEnd += size;
284 ++num;
285 }
286 grp(ctx, ptr, grpEnd);
287 ptr = grpEnd;
288 }
289 }
290
291 /*-*************************************
292 * Cover functions
293 ***************************************/
294
295 /**
296 * Called on each group of positions with the same dmer.
297 * Counts the frequency of each dmer and saves it in the suffix array.
298 * Fills `ctx->dmerAt`.
299 */
300 static void COVER_group(COVER_ctx_t *ctx, const void *group,
301 const void *groupEnd) {
302 /* The group consists of all the positions with the same first d bytes. */
303 const U32 *grpPtr = (const U32 *)group;
304 const U32 *grpEnd = (const U32 *)groupEnd;
305 /* The dmerId is how we will reference this dmer.
306 * This allows us to map the whole dmer space to a much smaller space, the
307 * size of the suffix array.
308 */
309 const U32 dmerId = (U32)(grpPtr - ctx->suffix);
310 /* Count the number of samples this dmer shows up in */
311 U32 freq = 0;
312 /* Details */
313 const size_t *curOffsetPtr = ctx->offsets;
314 const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
315 /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
316 * different sample than the last.
317 */
318 size_t curSampleEnd = ctx->offsets[0];
319 for (; grpPtr != grpEnd; ++grpPtr) {
320 /* Save the dmerId for this position so we can get back to it. */
321 ctx->dmerAt[*grpPtr] = dmerId;
322 /* Dictionaries only help for the first reference to the dmer.
323 * After that zstd can reference the match from the previous reference.
324 * So only count each dmer once for each sample it is in.
325 */
326 if (*grpPtr < curSampleEnd) {
327 continue;
328 }
329 freq += 1;
330 /* Binary search to find the end of the sample *grpPtr is in.
331 * In the common case that grpPtr + 1 == grpEnd we can skip the binary
332 * search because the loop is over.
333 */
334 if (grpPtr + 1 != grpEnd) {
335 const size_t *sampleEndPtr =
336 COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
337 curSampleEnd = *sampleEndPtr;
338 curOffsetPtr = sampleEndPtr + 1;
339 }
340 }
341 /* At this point we are never going to look at this segment of the suffix
342 * array again. We take advantage of this fact to save memory.
343 * We store the frequency of the dmer in the first position of the group,
344 * which is dmerId.
345 */
346 ctx->suffix[dmerId] = freq;
347 }
348
349 /**
350 * A segment is a range in the source as well as the score of the segment.
351 */
352 typedef struct {
353 U32 begin;
354 U32 end;
355 double score;
356 } COVER_segment_t;
357
358 /**
359 * Selects the best segment in an epoch.
360 * Segments of are scored according to the function:
361 *
362 * Let F(d) be the frequency of dmer d.
363 * Let S_i be the dmer at position i of segment S which has length k.
364 *
365 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
366 *
367 * Once the dmer d is in the dictionay we set F(d) = 0.
368 */
369 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
370 COVER_map_t *activeDmers, U32 begin,
371 U32 end, COVER_params_t parameters) {
372 /* Constants */
373 const U32 k = parameters.k;
374 const U32 d = parameters.d;
375 const U32 dmersInK = k - d + 1;
376 /* Try each segment (activeSegment) and save the best (bestSegment) */
377 COVER_segment_t bestSegment = {0, 0, 0};
378 COVER_segment_t activeSegment;
379 /* Reset the activeDmers in the segment */
380 COVER_map_clear(activeDmers);
381 /* The activeSegment starts at the beginning of the epoch. */
382 activeSegment.begin = begin;
383 activeSegment.end = begin;
384 activeSegment.score = 0;
385 /* Slide the activeSegment through the whole epoch.
386 * Save the best segment in bestSegment.
387 */
388 while (activeSegment.end < end) {
389 /* The dmerId for the dmer at the next position */
390 U32 newDmer = ctx->dmerAt[activeSegment.end];
391 /* The entry in activeDmers for this dmerId */
392 U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
393 /* If the dmer isn't already present in the segment add its score. */
394 if (*newDmerOcc == 0) {
395 /* The paper suggest using the L-0.5 norm, but experiments show that it
396 * doesn't help.
397 */
398 activeSegment.score += freqs[newDmer];
399 }
400 /* Add the dmer to the segment */
401 activeSegment.end += 1;
402 *newDmerOcc += 1;
403
404 /* If the window is now too large, drop the first position */
405 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
406 U32 delDmer = ctx->dmerAt[activeSegment.begin];
407 U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
408 activeSegment.begin += 1;
409 *delDmerOcc -= 1;
410 /* If this is the last occurence of the dmer, subtract its score */
411 if (*delDmerOcc == 0) {
412 COVER_map_remove(activeDmers, delDmer);
413 activeSegment.score -= freqs[delDmer];
414 }
415 }
416
417 /* If this segment is the best so far save it */
418 if (activeSegment.score > bestSegment.score) {
419 bestSegment = activeSegment;
420 }
421 }
422 {
423 /* Trim off the zero frequency head and tail from the segment. */
424 U32 newBegin = bestSegment.end;
425 U32 newEnd = bestSegment.begin;
426 U32 pos;
427 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
428 U32 freq = freqs[ctx->dmerAt[pos]];
429 if (freq != 0) {
430 newBegin = MIN(newBegin, pos);
431 newEnd = pos + 1;
432 }
433 }
434 bestSegment.begin = newBegin;
435 bestSegment.end = newEnd;
436 }
437 {
438 /* Zero out the frequency of each dmer covered by the chosen segment. */
439 U32 pos;
440 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
441 freqs[ctx->dmerAt[pos]] = 0;
442 }
443 }
444 return bestSegment;
445 }
446
447 /**
448 * Check the validity of the parameters.
449 * Returns non-zero if the parameters are valid and 0 otherwise.
450 */
451 static int COVER_checkParameters(COVER_params_t parameters) {
452 /* k and d are required parameters */
453 if (parameters.d == 0 || parameters.k == 0) {
454 return 0;
455 }
456 /* d <= k */
457 if (parameters.d > parameters.k) {
458 return 0;
459 }
460 return 1;
461 }
462
463 /**
464 * Clean up a context initialized with `COVER_ctx_init()`.
465 */
466 static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
467 if (!ctx) {
468 return;
469 }
470 if (ctx->suffix) {
471 free(ctx->suffix);
472 ctx->suffix = NULL;
473 }
474 if (ctx->freqs) {
475 free(ctx->freqs);
476 ctx->freqs = NULL;
477 }
478 if (ctx->dmerAt) {
479 free(ctx->dmerAt);
480 ctx->dmerAt = NULL;
481 }
482 if (ctx->offsets) {
483 free(ctx->offsets);
484 ctx->offsets = NULL;
485 }
486 }
487
488 /**
489 * Prepare a context for dictionary building.
490 * The context is only dependent on the parameter `d` and can used multiple
491 * times.
492 * Returns 1 on success or zero on error.
493 * The context must be destroyed with `COVER_ctx_destroy()`.
494 */
495 static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
496 const size_t *samplesSizes, unsigned nbSamples,
497 unsigned d) {
498 const BYTE *const samples = (const BYTE *)samplesBuffer;
499 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
500 /* Checks */
501 if (totalSamplesSize < d ||
502 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
503 DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
504 (COVER_MAX_SAMPLES_SIZE >> 20));
505 return 0;
506 }
507 /* Zero the context */
508 memset(ctx, 0, sizeof(*ctx));
509 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
510 (U32)totalSamplesSize);
511 ctx->samples = samples;
512 ctx->samplesSizes = samplesSizes;
513 ctx->nbSamples = nbSamples;
514 /* Partial suffix array */
515 ctx->suffixSize = totalSamplesSize - d + 1;
516 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
517 /* Maps index to the dmerID */
518 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
519 /* The offsets of each file */
520 ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
521 if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
522 DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
523 COVER_ctx_destroy(ctx);
524 return 0;
525 }
526 ctx->freqs = NULL;
527 ctx->d = d;
528
529 /* Fill offsets from the samlesSizes */
530 {
531 U32 i;
532 ctx->offsets[0] = 0;
533 for (i = 1; i <= nbSamples; ++i) {
534 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
535 }
536 }
537 DISPLAYLEVEL(2, "Constructing partial suffix array\n");
538 {
539 /* suffix is a partial suffix array.
540 * It only sorts suffixes by their first parameters.d bytes.
541 * The sort is stable, so each dmer group is sorted by position in input.
542 */
543 U32 i;
544 for (i = 0; i < ctx->suffixSize; ++i) {
545 ctx->suffix[i] = i;
546 }
547 /* qsort doesn't take an opaque pointer, so pass as a global */
548 g_ctx = ctx;
549 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
550 }
551 DISPLAYLEVEL(2, "Computing frequencies\n");
552 /* For each dmer group (group of positions with the same first d bytes):
553 * 1. For each position we set dmerAt[position] = dmerID. The dmerID is
554 * (groupBeginPtr - suffix). This allows us to go from position to
555 * dmerID so we can look up values in freq.
556 * 2. We calculate how many samples the dmer occurs in and save it in
557 * freqs[dmerId].
558 */
559 COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
560 &COVER_group);
561 ctx->freqs = ctx->suffix;
562 ctx->suffix = NULL;
563 return 1;
564 }
565
566 /**
567 * Given the prepared context build the dictionary.
568 */
569 static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
570 COVER_map_t *activeDmers, void *dictBuffer,
571 size_t dictBufferCapacity,
572 COVER_params_t parameters) {
573 BYTE *const dict = (BYTE *)dictBuffer;
574 size_t tail = dictBufferCapacity;
575 /* Divide the data up into epochs of equal size.
576 * We will select at least one segment from each epoch.
577 */
578 const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
579 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
580 size_t epoch;
581 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
582 epochSize);
583 /* Loop through the epochs until there are no more segments or the dictionary
584 * is full.
585 */
586 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
587 const U32 epochBegin = (U32)(epoch * epochSize);
588 const U32 epochEnd = epochBegin + epochSize;
589 size_t segmentSize;
590 /* Select a segment */
591 COVER_segment_t segment = COVER_selectSegment(
592 ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
593 /* Trim the segment if necessary and if it is empty then we are done */
594 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
595 if (segmentSize == 0) {
596 break;
597 }
598 /* We fill the dictionary from the back to allow the best segments to be
599 * referenced with the smallest offsets.
600 */
601 tail -= segmentSize;
602 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
603 DISPLAYUPDATE(
604 2, "\r%u%% ",
605 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
606 }
607 DISPLAYLEVEL(2, "\r%79s\r", "");
608 return tail;
609 }
610
611 /**
612 * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
613 * dictionary.
614 */
615 static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
616 ZDICT_params_t zdictParams;
617 memset(&zdictParams, 0, sizeof(zdictParams));
618 zdictParams.notificationLevel = 1;
619 zdictParams.dictID = parameters.dictID;
620 zdictParams.compressionLevel = parameters.compressionLevel;
621 return zdictParams;
622 }
623
624 /**
625 * Constructs a dictionary using a heuristic based on the following paper:
626 *
627 * Liao, Petri, Moffat, Wirth
628 * Effective Construction of Relative Lempel-Ziv Dictionaries
629 * Published in WWW 2016.
630 */
631 ZDICTLIB_API size_t COVER_trainFromBuffer(
632 void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
633 const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
634 BYTE *const dict = (BYTE *)dictBuffer;
635 COVER_ctx_t ctx;
636 COVER_map_t activeDmers;
637 /* Checks */
638 if (!COVER_checkParameters(parameters)) {
639 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
640 return ERROR(GENERIC);
641 }
642 if (nbSamples == 0) {
643 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
644 return ERROR(GENERIC);
645 }
646 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
647 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
648 ZDICT_DICTSIZE_MIN);
649 return ERROR(dstSize_tooSmall);
650 }
651 /* Initialize global data */
652 g_displayLevel = parameters.notificationLevel;
653 /* Initialize context and activeDmers */
654 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
655 parameters.d)) {
656 return ERROR(GENERIC);
657 }
658 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
659 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
660 COVER_ctx_destroy(&ctx);
661 return ERROR(GENERIC);
662 }
663
664 DISPLAYLEVEL(2, "Building dictionary\n");
665 {
666 const size_t tail =
667 COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
668 dictBufferCapacity, parameters);
669 ZDICT_params_t zdictParams = COVER_translateParams(parameters);
670 const size_t dictionarySize = ZDICT_finalizeDictionary(
671 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
672 samplesBuffer, samplesSizes, nbSamples, zdictParams);
673 if (!ZSTD_isError(dictionarySize)) {
674 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
675 (U32)dictionarySize);
676 }
677 COVER_ctx_destroy(&ctx);
678 COVER_map_destroy(&activeDmers);
679 return dictionarySize;
680 }
681 }
682
683 /**
684 * COVER_best_t is used for two purposes:
685 * 1. Synchronizing threads.
686 * 2. Saving the best parameters and dictionary.
687 *
688 * All of the methods except COVER_best_init() are thread safe if zstd is
689 * compiled with multithreaded support.
690 */
691 typedef struct COVER_best_s {
692 pthread_mutex_t mutex;
693 pthread_cond_t cond;
694 size_t liveJobs;
695 void *dict;
696 size_t dictSize;
697 COVER_params_t parameters;
698 size_t compressedSize;
699 } COVER_best_t;
700
701 /**
702 * Initialize the `COVER_best_t`.
703 */
704 static void COVER_best_init(COVER_best_t *best) {
705 if (!best) {
706 return;
707 }
708 pthread_mutex_init(&best->mutex, NULL);
709 pthread_cond_init(&best->cond, NULL);
710 best->liveJobs = 0;
711 best->dict = NULL;
712 best->dictSize = 0;
713 best->compressedSize = (size_t)-1;
714 memset(&best->parameters, 0, sizeof(best->parameters));
715 }
716
717 /**
718 * Wait until liveJobs == 0.
719 */
720 static void COVER_best_wait(COVER_best_t *best) {
721 if (!best) {
722 return;
723 }
724 pthread_mutex_lock(&best->mutex);
725 while (best->liveJobs != 0) {
726 pthread_cond_wait(&best->cond, &best->mutex);
727 }
728 pthread_mutex_unlock(&best->mutex);
729 }
730
731 /**
732 * Call COVER_best_wait() and then destroy the COVER_best_t.
733 */
734 static void COVER_best_destroy(COVER_best_t *best) {
735 if (!best) {
736 return;
737 }
738 COVER_best_wait(best);
739 if (best->dict) {
740 free(best->dict);
741 }
742 pthread_mutex_destroy(&best->mutex);
743 pthread_cond_destroy(&best->cond);
744 }
745
746 /**
747 * Called when a thread is about to be launched.
748 * Increments liveJobs.
749 */
750 static void COVER_best_start(COVER_best_t *best) {
751 if (!best) {
752 return;
753 }
754 pthread_mutex_lock(&best->mutex);
755 ++best->liveJobs;
756 pthread_mutex_unlock(&best->mutex);
757 }
758
759 /**
760 * Called when a thread finishes executing, both on error or success.
761 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
762 * If this dictionary is the best so far save it and its parameters.
763 */
764 static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
765 COVER_params_t parameters, void *dict,
766 size_t dictSize) {
767 if (!best) {
768 return;
769 }
770 {
771 size_t liveJobs;
772 pthread_mutex_lock(&best->mutex);
773 --best->liveJobs;
774 liveJobs = best->liveJobs;
775 /* If the new dictionary is better */
776 if (compressedSize < best->compressedSize) {
777 /* Allocate space if necessary */
778 if (!best->dict || best->dictSize < dictSize) {
779 if (best->dict) {
780 free(best->dict);
781 }
782 best->dict = malloc(dictSize);
783 if (!best->dict) {
784 best->compressedSize = ERROR(GENERIC);
785 best->dictSize = 0;
786 return;
787 }
788 }
789 /* Save the dictionary, parameters, and size */
790 memcpy(best->dict, dict, dictSize);
791 best->dictSize = dictSize;
792 best->parameters = parameters;
793 best->compressedSize = compressedSize;
794 }
795 pthread_mutex_unlock(&best->mutex);
796 if (liveJobs == 0) {
797 pthread_cond_broadcast(&best->cond);
798 }
799 }
800 }
801
802 /**
803 * Parameters for COVER_tryParameters().
804 */
805 typedef struct COVER_tryParameters_data_s {
806 const COVER_ctx_t *ctx;
807 COVER_best_t *best;
808 size_t dictBufferCapacity;
809 COVER_params_t parameters;
810 } COVER_tryParameters_data_t;
811
812 /**
813 * Tries a set of parameters and upates the COVER_best_t with the results.
814 * This function is thread safe if zstd is compiled with multithreaded support.
815 * It takes its parameters as an *OWNING* opaque pointer to support threading.
816 */
817 static void COVER_tryParameters(void *opaque) {
818 /* Save parameters as local variables */
819 COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
820 const COVER_ctx_t *const ctx = data->ctx;
821 const COVER_params_t parameters = data->parameters;
822 size_t dictBufferCapacity = data->dictBufferCapacity;
823 size_t totalCompressedSize = ERROR(GENERIC);
824 /* Allocate space for hash table, dict, and freqs */
825 COVER_map_t activeDmers;
826 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
827 U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
828 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
829 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
830 goto _cleanup;
831 }
832 if (!dict || !freqs) {
833 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
834 goto _cleanup;
835 }
836 /* Copy the frequencies because we need to modify them */
837 memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
838 /* Build the dictionary */
839 {
840 const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
841 dictBufferCapacity, parameters);
842 const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
843 dictBufferCapacity = ZDICT_finalizeDictionary(
844 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
845 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
846 if (ZDICT_isError(dictBufferCapacity)) {
847 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
848 goto _cleanup;
849 }
850 }
851 /* Check total compressed size */
852 {
853 /* Pointers */
854 ZSTD_CCtx *cctx;
855 ZSTD_CDict *cdict;
856 void *dst;
857 /* Local variables */
858 size_t dstCapacity;
859 size_t i;
860 /* Allocate dst with enough space to compress the maximum sized sample */
861 {
862 size_t maxSampleSize = 0;
863 for (i = 0; i < ctx->nbSamples; ++i) {
864 maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
865 }
866 dstCapacity = ZSTD_compressBound(maxSampleSize);
867 dst = malloc(dstCapacity);
868 }
869 /* Create the cctx and cdict */
870 cctx = ZSTD_createCCtx();
871 cdict =
872 ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
873 if (!dst || !cctx || !cdict) {
874 goto _compressCleanup;
875 }
876 /* Compress each sample and sum their sizes (or error) */
877 totalCompressedSize = 0;
878 for (i = 0; i < ctx->nbSamples; ++i) {
879 const size_t size = ZSTD_compress_usingCDict(
880 cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
881 ctx->samplesSizes[i], cdict);
882 if (ZSTD_isError(size)) {
883 totalCompressedSize = ERROR(GENERIC);
884 goto _compressCleanup;
885 }
886 totalCompressedSize += size;
887 }
888 _compressCleanup:
889 ZSTD_freeCCtx(cctx);
890 ZSTD_freeCDict(cdict);
891 if (dst) {
892 free(dst);
893 }
894 }
895
896 _cleanup:
897 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
898 dictBufferCapacity);
899 free(data);
900 COVER_map_destroy(&activeDmers);
901 if (dict) {
902 free(dict);
903 }
904 if (freqs) {
905 free(freqs);
906 }
907 }
908
909 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
910 size_t dictBufferCapacity,
911 const void *samplesBuffer,
912 const size_t *samplesSizes,
913 unsigned nbSamples,
914 COVER_params_t *parameters) {
915 /* constants */
916 const unsigned nbThreads = parameters->nbThreads;
917 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
918 const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
919 const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
920 const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
921 const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
922 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
923 const unsigned kIterations =
924 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
925 /* Local variables */
926 const int displayLevel = parameters->notificationLevel;
927 unsigned iteration = 1;
928 unsigned d;
929 unsigned k;
930 COVER_best_t best;
931 POOL_ctx *pool = NULL;
932 /* Checks */
933 if (kMinK < kMaxD || kMaxK < kMinK) {
934 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
935 return ERROR(GENERIC);
936 }
937 if (nbSamples == 0) {
938 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
939 return ERROR(GENERIC);
940 }
941 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
942 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
943 ZDICT_DICTSIZE_MIN);
944 return ERROR(dstSize_tooSmall);
945 }
946 if (nbThreads > 1) {
947 pool = POOL_create(nbThreads, 1);
948 if (!pool) {
949 return ERROR(memory_allocation);
950 }
951 }
952 /* Initialization */
953 COVER_best_init(&best);
954 /* Turn down global display level to clean up display at level 2 and below */
955 g_displayLevel = parameters->notificationLevel - 1;
956 /* Loop through d first because each new value needs a new context */
957 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
958 kIterations);
959 for (d = kMinD; d <= kMaxD; d += 2) {
960 /* Initialize the context for this value of d */
961 COVER_ctx_t ctx;
962 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
963 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
964 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
965 COVER_best_destroy(&best);
966 return ERROR(GENERIC);
967 }
968 /* Loop through k reusing the same context */
969 for (k = kMinK; k <= kMaxK; k += kStepSize) {
970 /* Prepare the arguments */
971 COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
972 sizeof(COVER_tryParameters_data_t));
973 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
974 if (!data) {
975 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
976 COVER_best_destroy(&best);
977 COVER_ctx_destroy(&ctx);
978 return ERROR(GENERIC);
979 }
980 data->ctx = &ctx;
981 data->best = &best;
982 data->dictBufferCapacity = dictBufferCapacity;
983 data->parameters = *parameters;
984 data->parameters.k = k;
985 data->parameters.d = d;
986 data->parameters.steps = kSteps;
987 /* Check the parameters */
988 if (!COVER_checkParameters(data->parameters)) {
989 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
990 continue;
991 }
992 /* Call the function and pass ownership of data to it */
993 COVER_best_start(&best);
994 if (pool) {
995 POOL_add(pool, &COVER_tryParameters, data);
996 } else {
997 COVER_tryParameters(data);
998 }
999 /* Print status */
1000 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
1001 (U32)((iteration * 100) / kIterations));
1002 ++iteration;
1003 }
1004 COVER_best_wait(&best);
1005 COVER_ctx_destroy(&ctx);
1006 }
1007 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
1008 /* Fill the output buffer and parameters with output of the best parameters */
1009 {
1010 const size_t dictSize = best.dictSize;
1011 if (ZSTD_isError(best.compressedSize)) {
1012 COVER_best_destroy(&best);
1013 return best.compressedSize;
1014 }
1015 *parameters = best.parameters;
1016 memcpy(dictBuffer, best.dict, dictSize);
1017 COVER_best_destroy(&best);
1018 POOL_free(pool);
1019 return dictSize;
1020 }
1021 }
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -159,10 +159,21 b' i18n/hg.pot: $(PYFILES) $(DOCFILES) i18n'
159 159 # Packaging targets
160 160
161 161 osx:
162 rm -rf build/mercurial
162 163 /usr/bin/python2.7 setup.py install --optimize=1 \
163 164 --root=build/mercurial/ --prefix=/usr/local/ \
164 165 --install-lib=/Library/Python/2.7/site-packages/
165 166 make -C doc all install DESTDIR="$(PWD)/build/mercurial/"
167 # install zsh completions - this location appears to be
168 # searched by default as of macOS Sierra.
169 install -d build/mercurial/usr/local/share/zsh/site-functions/
170 install -m 0644 contrib/zsh_completion build/mercurial/usr/local/share/zsh/site-functions/hg
171 # install bash completions - there doesn't appear to be a
172 # place that's searched by default for bash, so we'll follow
173 # the lead of Apple's git install and just put it in a
174 # location of our own.
175 install -d build/mercurial/usr/local/hg/contrib/
176 install -m 0644 contrib/bash_completion build/mercurial/usr/local/hg/contrib/hg-completion.bash
166 177 mkdir -p $${OUTPUTDIR:-dist}
167 178 HGVER=$$((cat build/mercurial/Library/Python/2.7/site-packages/mercurial/__version__.py; echo 'print(version)') | python) && \
168 179 OSXVER=$$(sw_vers -productVersion | cut -d. -f1,2) && \
@@ -262,5 +273,9 b' docker-centos7:'
262 273 .PHONY: help all local build doc cleanbutpackages clean install install-bin \
263 274 install-doc install-home install-home-bin install-home-doc \
264 275 dist dist-notests check tests check-code update-pot \
265 osx fedora20 docker-fedora20 fedora21 docker-fedora21 \
276 osx deb ppa docker-debian-jessie \
277 docker-ubuntu-trusty docker-ubuntu-trusty-ppa \
278 docker-ubuntu-xenial docker-ubuntu-xenial-ppa \
279 docker-ubuntu-yakkety docker-ubuntu-yakkety-ppa \
280 fedora20 docker-fedora20 fedora21 docker-fedora21 \
266 281 centos5 docker-centos5 centos6 docker-centos6 centos7 docker-centos7
@@ -100,7 +100,7 b' def rephere(m):'
100 100
101 101 testpats = [
102 102 [
103 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
103 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
104 104 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
105 105 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
106 106 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
@@ -190,8 +190,10 b' utestpats = ['
190 190 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
191 191 (r'^ .*file://\$TESTTMP',
192 192 'write "file:/*/$TESTTMP" + (glob) to match on windows too'),
193 (r'^ [^$>].*27\.0\.0\.1.*[^)]$',
194 'use (glob) to match localhost IP on hosts without 127.0.0.1 too'),
193 (r'^ [^$>].*27\.0\.0\.1',
194 'use $LOCALIP not an explicit loopback address'),
195 (r'^ [^$>].*\$LOCALIP.*[^)]$',
196 'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
195 197 (r'^ (cat|find): .*: No such file or directory',
196 198 'use test -f to test for file existence'),
197 199 (r'^ diff -[^ -]*p',
@@ -210,8 +212,8 b' utestpats = ['
210 212 ],
211 213 # warnings
212 214 [
213 (r'^ (?!.*127\.0\.0\.1)[^*?/\n]* \(glob\)$',
214 "glob match with no glob string (?, *, /, and 127.0.0.1)"),
215 (r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
216 "glob match with no glob string (?, *, /, and $LOCALIP)"),
215 217 ]
216 218 ]
217 219
@@ -237,7 +239,7 b' pypats = ['
237 239 (r'lambda\s*\(.*,.*\)',
238 240 "tuple parameter unpacking not available in Python 3+"),
239 241 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
240 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
242 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
241 243 (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
242 244 'dict-from-generator'),
243 245 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
@@ -318,7 +320,7 b' pypats = ['
318 320 'legacy exception syntax; use "as" instead of ","'),
319 321 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
320 322 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
321 (r'\b__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
323 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
322 324 (r'os\.path\.join\(.*, *(""|\'\')\)',
323 325 "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
324 326 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
@@ -330,13 +332,15 b' pypats = ['
330 332 (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
331 333 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
332 334 (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
333 (r'^import urlparse', "don't use urlparse, use util.urlparse"),
335 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
334 336 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
335 337 (r'^import cPickle', "don't use cPickle, use util.pickle"),
336 338 (r'^import pickle', "don't use pickle, use util.pickle"),
337 339 (r'^import httplib', "don't use httplib, use util.httplib"),
338 340 (r'^import BaseHTTPServer', "use util.httpserver instead"),
339 341 (r'\.next\(\)', "don't use .next(), use next(...)"),
342 (r'([a-z]*).revision\(\1\.node\(',
343 "don't convert rev to node before passing to revision(nodeorrev)"),
340 344
341 345 # rules depending on implementation of repquote()
342 346 (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
@@ -371,6 +375,13 b' pyfilters = ['
371 375 (?P=quote))""", reppython),
372 376 ]
373 377
378 # extension non-filter patterns
379 pyextnfpats = [
380 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
381 # warnings
382 [],
383 ]
384
374 385 txtfilters = []
375 386
376 387 txtpats = [
@@ -480,6 +491,7 b' py3pats = ['
480 491
481 492 checks = [
482 493 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
494 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
483 495 ('python 3', r'.*(hgext|mercurial).*(?<!pycompat)\.py', '',
484 496 pyfilters, py3pats),
485 497 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
@@ -661,7 +673,7 b' def checkfile(f, logfunc=_defaultlogger.'
661 673 return result
662 674
663 675 def main():
664 parser = optparse.OptionParser("%prog [options] [files]")
676 parser = optparse.OptionParser("%prog [options] [files | -]")
665 677 parser.add_option("-w", "--warnings", action="store_true",
666 678 help="include warning-level checks")
667 679 parser.add_option("-p", "--per-file", type="int",
@@ -679,6 +691,9 b' def main():'
679 691
680 692 if len(args) == 0:
681 693 check = glob.glob("*")
694 elif args == ['-']:
695 # read file list from stdin
696 check = sys.stdin.read().splitlines()
682 697 else:
683 698 check = args
684 699
@@ -128,6 +128,24 b' static void preparesockdir(const char *s'
128 128 abortmsg("insecure sockdir %s", sockdir);
129 129 }
130 130
131 /*
132 * Check if a socket directory exists and is only owned by the current user.
133 * Return 1 if so, 0 if not. This is used to check if XDG_RUNTIME_DIR can be
134 * used or not. According to the specification [1], XDG_RUNTIME_DIR should be
135 * ignored if the directory is not owned by the user with mode 0700.
136 * [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
137 */
138 static int checkruntimedir(const char *sockdir)
139 {
140 struct stat st;
141 int r = lstat(sockdir, &st);
142 if (r < 0) /* ex. does not exist */
143 return 0;
144 if (!S_ISDIR(st.st_mode)) /* ex. is a file, not a directory */
145 return 0;
146 return st.st_uid == geteuid() && (st.st_mode & 0777) == 0700;
147 }
148
131 149 static void getdefaultsockdir(char sockdir[], size_t size)
132 150 {
133 151 /* by default, put socket file in secure directory
@@ -135,7 +153,7 b' static void getdefaultsockdir(char sockd'
135 153 * (permission of socket file may be ignored on some Unices) */
136 154 const char *runtimedir = getenv("XDG_RUNTIME_DIR");
137 155 int r;
138 if (runtimedir) {
156 if (runtimedir && checkruntimedir(runtimedir)) {
139 157 r = snprintf(sockdir, size, "%s/chg", runtimedir);
140 158 } else {
141 159 const char *tmpdir = getenv("TMPDIR");
@@ -429,11 +447,11 b' int main(int argc, const char *argv[], c'
429 447 }
430 448
431 449 setupsignalhandler(hgc_peerpid(hgc), hgc_peerpgid(hgc));
450 atexit(waitpager);
432 451 int exitcode = hgc_runcommand(hgc, argv + 1, argc - 1);
433 452 restoresignalhandler();
434 453 hgc_close(hgc);
435 454 freecmdserveropts(&opts);
436 waitpager();
437 455
438 456 return exitcode;
439 457 }
@@ -252,7 +252,7 b' static void handlesystemrequest(hgclient'
252 252 ctx->datasize = sizeof(r_n);
253 253 writeblock(hgc);
254 254 } else if (strcmp(args[0], "pager") == 0) {
255 setuppager(args[1]);
255 setuppager(args[1], args + 3);
256 256 if (hgc->capflags & CAP_ATTACHIO)
257 257 attachio(hgc);
258 258 /* unblock the server */
@@ -91,11 +91,15 b' void setupsignalhandler(pid_t pid, pid_t'
91 91
92 92 struct sigaction sa;
93 93 memset(&sa, 0, sizeof(sa));
94
95 /* deadly signals meant to be sent to a process group:
96 * - SIGHUP: usually generated by the kernel, when termination of a
97 * process causes that process group to become orphaned
98 * - SIGINT: usually generated by the terminal */
94 99 sa.sa_handler = forwardsignaltogroup;
95 100 sa.sa_flags = SA_RESTART;
96 101 if (sigemptyset(&sa.sa_mask) < 0)
97 102 goto error;
98
99 103 if (sigaction(SIGHUP, &sa, NULL) < 0)
100 104 goto error;
101 105 if (sigaction(SIGINT, &sa, NULL) < 0)
@@ -111,6 +115,11 b' void setupsignalhandler(pid_t pid, pid_t'
111 115 sa.sa_flags = SA_RESTART;
112 116 if (sigaction(SIGWINCH, &sa, NULL) < 0)
113 117 goto error;
118 /* forward user-defined signals */
119 if (sigaction(SIGUSR1, &sa, NULL) < 0)
120 goto error;
121 if (sigaction(SIGUSR2, &sa, NULL) < 0)
122 goto error;
114 123 /* propagate job control requests to worker */
115 124 sa.sa_handler = forwardsignal;
116 125 sa.sa_flags = SA_RESTART;
@@ -168,7 +177,7 b' error:'
168 177
169 178 /* This implementation is based on hgext/pager.py (post 369741ef7253)
170 179 * Return 0 if pager is not started, or pid of the pager */
171 pid_t setuppager(const char *pagercmd)
180 pid_t setuppager(const char *pagercmd, const char *envp[])
172 181 {
173 182 assert(pagerpid == 0);
174 183 if (!pagercmd)
@@ -196,7 +205,8 b' pid_t setuppager(const char *pagercmd)'
196 205 close(pipefds[0]);
197 206 close(pipefds[1]);
198 207
199 int r = execlp("/bin/sh", "/bin/sh", "-c", pagercmd, NULL);
208 int r = execle("/bin/sh", "/bin/sh", "-c", pagercmd, NULL,
209 envp);
200 210 if (r < 0) {
201 211 abortmsgerrno("cannot start pager '%s'", pagercmd);
202 212 }
@@ -15,7 +15,7 b''
15 15 void restoresignalhandler(void);
16 16 void setupsignalhandler(pid_t pid, pid_t pgid);
17 17
18 pid_t setuppager(const char *pagercmd);
18 pid_t setuppager(const char *pagercmd, const char *envp[]);
19 19 void waitpager(void);
20 20
21 21 #endif /* PROCUTIL_H_ */
@@ -55,17 +55,15 b' except ImportError:'
55 55 import mercurial.util
56 56 import mercurial.dispatch
57 57
58 import time
59
60 58 def timer(func, title=None):
61 59 results = []
62 begin = time.time()
60 begin = mercurial.util.timer()
63 61 count = 0
64 62 while True:
65 63 ostart = os.times()
66 cstart = time.time()
64 cstart = mercurial.util.timer()
67 65 r = func()
68 cstop = time.time()
66 cstop = mercurial.util.timer()
69 67 ostop = os.times()
70 68 count += 1
71 69 a, b = ostart, ostop
@@ -12,7 +12,6 b' prints it to ``stderr`` on exit.'
12 12 '''
13 13
14 14 from __future__ import absolute_import
15 import atexit
16 15
17 16 def memusage(ui):
18 17 """Report memory usage of the current process."""
@@ -29,4 +28,4 b' def memusage(ui):'
29 28 for k, v in result.iteritems()]) + "\n")
30 29
31 30 def extsetup(ui):
32 atexit.register(memusage, ui)
31 ui.atexit(memusage, ui)
@@ -20,6 +20,7 b''
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 import gc
23 24 import os
24 25 import random
25 26 import sys
@@ -66,6 +67,16 b' def safehasattr(thing, attr):'
66 67 setattr(util, 'safehasattr', safehasattr)
67 68
68 69 # for "historical portability":
70 # define util.timer forcibly, because util.timer has been available
71 # since ae5d60bb70c9
72 if safehasattr(time, 'perf_counter'):
73 util.timer = time.perf_counter
74 elif os.name == 'nt':
75 util.timer = time.clock
76 else:
77 util.timer = time.time
78
79 # for "historical portability":
69 80 # use locally defined empty option list, if formatteropts isn't
70 81 # available, because commands.formatteropts has been available since
71 82 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
@@ -164,6 +175,7 b' def gettimer(ui, opts=None):'
164 175 self.hexfunc = node.short
165 176 def __nonzero__(self):
166 177 return False
178 __bool__ = __nonzero__
167 179 def startitem(self):
168 180 pass
169 181 def data(self, **data):
@@ -189,14 +201,15 b' def stub_timer(fm, func, title=None):'
189 201 func()
190 202
191 203 def _timer(fm, func, title=None):
204 gc.collect()
192 205 results = []
193 begin = time.time()
206 begin = util.timer()
194 207 count = 0
195 208 while True:
196 209 ostart = os.times()
197 cstart = time.time()
210 cstart = util.timer()
198 211 r = func()
199 cstop = time.time()
212 cstop = util.timer()
200 213 ostop = os.times()
201 214 count += 1
202 215 a, b = ostart, ostop
@@ -993,6 +1006,26 b' def perfrevlogrevision(ui, repo, file_, '
993 1006 node = r.lookup(rev)
994 1007 rev = r.rev(node)
995 1008
1009 def getrawchunks(data, chain):
1010 start = r.start
1011 length = r.length
1012 inline = r._inline
1013 iosize = r._io.size
1014 buffer = util.buffer
1015 offset = start(chain[0])
1016
1017 chunks = []
1018 ladd = chunks.append
1019
1020 for rev in chain:
1021 chunkstart = start(rev)
1022 if inline:
1023 chunkstart += (rev + 1) * iosize
1024 chunklength = length(rev)
1025 ladd(buffer(data, chunkstart - offset, chunklength))
1026
1027 return chunks
1028
996 1029 def dodeltachain(rev):
997 1030 if not cache:
998 1031 r.clearcaches()
@@ -1003,24 +1036,15 b' def perfrevlogrevision(ui, repo, file_, '
1003 1036 r.clearcaches()
1004 1037 r._chunkraw(chain[0], chain[-1])
1005 1038
1006 def dodecompress(data, chain):
1039 def dorawchunks(data, chain):
1007 1040 if not cache:
1008 1041 r.clearcaches()
1009
1010 start = r.start
1011 length = r.length
1012 inline = r._inline
1013 iosize = r._io.size
1014 buffer = util.buffer
1015 offset = start(chain[0])
1042 getrawchunks(data, chain)
1016 1043
1017 for rev in chain:
1018 chunkstart = start(rev)
1019 if inline:
1020 chunkstart += (rev + 1) * iosize
1021 chunklength = length(rev)
1022 b = buffer(data, chunkstart - offset, chunklength)
1023 r.decompress(b)
1044 def dodecompress(chunks):
1045 decomp = r.decompress
1046 for chunk in chunks:
1047 decomp(chunk)
1024 1048
1025 1049 def dopatch(text, bins):
1026 1050 if not cache:
@@ -1039,6 +1063,7 b' def perfrevlogrevision(ui, repo, file_, '
1039 1063
1040 1064 chain = r._deltachain(rev)[0]
1041 1065 data = r._chunkraw(chain[0], chain[-1])[1]
1066 rawchunks = getrawchunks(data, chain)
1042 1067 bins = r._chunks(chain)
1043 1068 text = str(bins[0])
1044 1069 bins = bins[1:]
@@ -1048,7 +1073,8 b' def perfrevlogrevision(ui, repo, file_, '
1048 1073 (lambda: dorevision(), 'full'),
1049 1074 (lambda: dodeltachain(rev), 'deltachain'),
1050 1075 (lambda: doread(chain), 'read'),
1051 (lambda: dodecompress(data, chain), 'decompress'),
1076 (lambda: dorawchunks(data, chain), 'rawchunks'),
1077 (lambda: dodecompress(rawchunks), 'decompress'),
1052 1078 (lambda: dopatch(text, bins), 'patch'),
1053 1079 (lambda: dohash(text), 'hash'),
1054 1080 ]
@@ -1256,6 +1282,17 b' def perflrucache(ui, size=4, gets=10000,'
1256 1282 timer(fn, title=title)
1257 1283 fm.end()
1258 1284
1285 @command('perfwrite', formatteropts)
1286 def perfwrite(ui, repo, **opts):
1287 """microbenchmark ui.write
1288 """
1289 timer, fm = gettimer(ui, opts)
1290 def write():
1291 for i in range(100000):
1292 ui.write(('Testing write performance\n'))
1293 timer(write)
1294 fm.end()
1295
1259 1296 def uisetup(ui):
1260 1297 if (util.safehasattr(cmdutil, 'openrevlog') and
1261 1298 not util.safehasattr(commands, 'debugrevlogopts')):
@@ -1,6 +1,66 b''
1 1 Version History
2 2 ===============
3 3
4 0.8.1 (released 2017-04-08)
5 ---------------------------
6
7 * Add #includes so compilation on OS X and BSDs works (#20).
8
9 0.8.0 (released 2017-03-08)
10 ---------------------------
11
12 * CompressionParameters now has a estimated_compression_context_size() method.
13 zstd.estimate_compression_context_size() is now deprecated and slated for
14 removal.
15 * Implemented a lot of fuzzing tests.
16 * CompressionParameters instances now perform extra validation by calling
17 ZSTD_checkCParams() at construction time.
18 * multi_compress_to_buffer() API for compressing multiple inputs as a
19 single operation, as efficiently as possible.
20 * ZSTD_CStream instances are now used across multiple operations on
21 ZstdCompressor instances, resulting in much better performance for
22 APIs that do streaming.
23 * ZSTD_DStream instances are now used across multiple operations on
24 ZstdDecompressor instances, resulting in much better performance for
25 APIs that do streaming.
26 * train_dictionary() now releases the GIL.
27 * Support for training dictionaries using the COVER algorithm.
28 * multi_decompress_to_buffer() API for decompressing multiple frames as a
29 single operation, as efficiently as possible.
30 * Support for multi-threaded compression.
31 * Disable deprecation warnings when compiling CFFI module.
32 * Fixed memory leak in train_dictionary().
33 * Removed DictParameters type.
34 * train_dictionary() now accepts keyword arguments instead of a
35 DictParameters instance to control dictionary generation.
36
37 0.7.0 (released 2017-02-07)
38 ---------------------------
39
40 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
41 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
42 decompression of *content-only dictionary chains*.
43 * CFFI module fully implemented; all tests run against both C extension and
44 CFFI implementation.
45 * Vendored version of zstd updated to 1.1.3.
46 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
47 to avoid extra memory allocation of dict data.
48 * Add function names to error messages (by using ":name" in PyArg_Parse*
49 functions).
50 * Reuse decompression context across operations. Previously, we created a
51 new ZSTD_DCtx for each decompress(). This was measured to slow down
52 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
53 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
54 things faster in the process.
55 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
56 of bytes written.
57 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
58 written to the underlying output object.
59 * CompressionParameters instances now expose their values as attributes.
60 * CompressionParameters instances no longer are subscriptable nor behave
61 as tuples (backwards incompatible). Use attributes to obtain values.
62 * DictParameters instances now expose their values as attributes.
63
4 64 0.6.0 (released 2017-01-14)
5 65 ---------------------------
6 66
This diff has been collapsed as it changes many lines, (720 lines changed) Show them Hide them
@@ -4,10 +4,11 b' python-zstandard'
4 4
5 5 This project provides Python bindings for interfacing with the
6 6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface is provided.
7 and CFFI interface are provided.
8 8
9 The primary goal of the extension is to provide a Pythonic interface to
10 the underlying C API. This means exposing most of the features and flexibility
9 The primary goal of the project is to provide a rich interface to the
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 12 of the C API while not sacrificing usability or safety that Python provides.
12 13
13 14 The canonical home for this project is
@@ -19,15 +20,24 b' State of Project'
19 20 ================
20 21
21 22 The project is officially in beta state. The author is reasonably satisfied
22 with the current API and that functionality works as advertised. There
23 may be some backwards incompatible changes before 1.0. Though the author
24 does not intend to make any major changes to the Python API.
23 that functionality works as advertised. **There will be some backwards
24 incompatible changes before 1.0, probably in the 0.9 release.** This may
25 involve renaming the main module from *zstd* to *zstandard* and renaming
26 various types and methods. Pin the package version to prevent unwanted
27 breakage when this change occurs!
28
29 This project is vendored and distributed with Mercurial 4.1, where it is
30 used in a production capacity.
25 31
26 32 There is continuous integration for Python versions 2.6, 2.7, and 3.3+
27 33 on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
28 34 confident the extension is stable and works as advertised on these
29 35 platforms.
30 36
37 The CFFI bindings are mostly feature complete. Where a feature is implemented
38 in CFFI, unit tests run against both C extension and CFFI implementation to
39 ensure behavior parity.
40
31 41 Expected Changes
32 42 ----------------
33 43
@@ -43,19 +53,27 b" sizes using zstd's preferred defaults)."
43 53 There should be an API that accepts an object that conforms to the buffer
44 54 interface and returns an iterator over compressed or decompressed output.
45 55
56 There should be an API that exposes an ``io.RawIOBase`` interface to
57 compressor and decompressor streams, like how ``gzip.GzipFile`` from
58 the standard library works (issue 13).
59
46 60 The author is on the fence as to whether to support the extremely
47 61 low level compression and decompression APIs. It could be useful to
48 62 support compression without the framing headers. But the author doesn't
49 63 believe it a high priority at this time.
50 64
51 The CFFI bindings are half-baked and need to be finished.
65 There will likely be a refactoring of the module names. Currently,
66 ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
67 This means that all code for the C extension must be implemented in
68 C. ``zstd`` may be converted to a Python module so code can be reused
69 between CFFI and C and so not all code in the C extension has to be C.
52 70
53 71 Requirements
54 72 ============
55 73
56 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and 3.5
57 on common platforms (Linux, Windows, and OS X). Only x86_64 is currently
58 well-tested as an architecture.
74 This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
75 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
76 currently well-tested as an architecture.
59 77
60 78 Installing
61 79 ==========
@@ -106,15 +124,11 b' compressing at several hundred MB/s and '
106 124 Comparison to Other Python Bindings
107 125 ===================================
108 126
109 https://pypi.python.org/pypi/zstd is an alternative Python binding to
127 https://pypi.python.org/pypi/zstd is an alternate Python binding to
110 128 Zstandard. At the time this was written, the latest release of that
111 package (1.0.0.2) had the following significant differences from this package:
112
113 * It only exposes the simple API for compression and decompression operations.
114 This extension exposes the streaming API, dictionary training, and more.
115 * It adds a custom framing header to compressed data and there is no way to
116 disable it. This means that data produced with that module cannot be used by
117 other Zstandard implementations.
129 package (1.1.2) only exposed the simple APIs for compression and decompression.
130 This package exposes much more of the zstd API, including streaming and
131 dictionary compression. This package also has CFFI support.
118 132
119 133 Bundling of Zstandard Source Code
120 134 =================================
@@ -151,10 +165,13 b' A Tox configuration is present to test a'
151 165 $ tox
152 166
153 167 Tests use the ``hypothesis`` Python package to perform fuzzing. If you
154 don't have it, those tests won't run.
168 don't have it, those tests won't run. Since the fuzzing tests take longer
169 to execute than normal tests, you'll need to opt in to running them by
170 setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
171 automatically when using ``tox``.
155 172
156 There is also an experimental CFFI module. You need the ``cffi`` Python
157 package installed to build and test that.
173 The ``cffi`` Python package needs to be installed in order to build the CFFI
174 bindings. If it isn't present, the CFFI bindings won't be built.
158 175
159 176 To create a virtualenv with all development dependencies, do something
160 177 like the following::
@@ -171,8 +188,16 b' like the following::'
171 188 API
172 189 ===
173 190
174 The compiled C extension provides a ``zstd`` Python module. This module
175 exposes the following interfaces.
191 The compiled C extension provides a ``zstd`` Python module. The CFFI
192 bindings provide a ``zstd_cffi`` module. Both provide an identical API
193 interface. The types, functions, and attributes exposed by these modules
194 are documented in the sections below.
195
196 .. note::
197
198 The documentation in this section makes references to various zstd
199 concepts and functionality. The ``Concepts`` section below explains
200 these concepts in more detail.
176 201
177 202 ZstdCompressor
178 203 --------------
@@ -208,6 +233,14 b' write_dict_id'
208 233 Whether to write the dictionary ID into the compressed data.
209 234 Defaults to True. The dictionary ID is only written if a dictionary
210 235 is being used.
236 threads
237 Enables and sets the number of threads to use for multi-threaded compression
238 operations. Defaults to 0, which means to use single-threaded compression.
239 Negative values will resolve to the number of logical CPUs in the system.
240 Read below for more info on multi-threaded compression. This argument only
241 controls thread count for operations that operate on individual pieces of
242 data. APIs that spawn multiple threads for working on multiple pieces of
243 data have their own ``threads`` argument.
211 244
212 245 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
213 246 instances can be called from multiple Python threads simultaneously. In other
@@ -221,6 +254,8 b' Simple API'
221 254 cctx = zstd.ZstdCompressor()
222 255 compressed = cctx.compress(b'data to compress')
223 256
257 The ``data`` argument can be any object that implements the *buffer protocol*.
258
224 259 Unless ``compression_params`` or ``dict_data`` are passed to the
225 260 ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
226 261 optimal compression parameters for the configured compression ``level`` and
@@ -260,6 +295,10 b' A ``flush()`` method can be called to ev'
260 295 compressor's internal state into the output object. This may result in 0 or
261 296 more ``write()`` calls to the output object.
262 297
298 Both ``write()`` and ``flush()`` return the number of bytes written to the
299 object's ``write()``. In many cases, small inputs do not accumulate enough
300 data to cause a write and ``write()`` will return ``0``.
301
263 302 If the size of the data being fed to this streaming compressor is known,
264 303 you can declare it before compression begins::
265 304
@@ -406,6 +445,42 b' the compressor::'
406 445 data = cobj.compress(b'foobar')
407 446 data = cobj.flush()
408 447
448 Batch Compression API
449 ^^^^^^^^^^^^^^^^^^^^^
450
451 (Experimental. Not yet supported in CFFI bindings.)
452
453 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
454 inputs as a single operation.
455
456 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
457 ``BufferWithSegments``, or a list containing byte like objects. Each element of
458 the container will be compressed individually using the configured parameters
459 on the ``ZstdCompressor`` instance.
460
461 The ``threads`` argument controls how many threads to use for compression. The
462 default is ``0`` which means to use a single thread. Negative values use the
463 number of logical CPUs in the machine.
464
465 The function returns a ``BufferWithSegmentsCollection``. This type represents
466 N discrete memory allocations, eaching holding 1 or more compressed frames.
467
468 Output data is written to shared memory buffers. This means that unlike
469 regular Python objects, a reference to *any* object within the collection
470 keeps the shared buffer and therefore memory backing it alive. This can have
471 undesirable effects on process memory usage.
472
473 The API and behavior of this function is experimental and will likely change.
474 Known deficiencies include:
475
476 * If asked to use multiple threads, it will always spawn that many threads,
477 even if the input is too small to use them. It should automatically lower
478 the thread count when the extra threads would just add overhead.
479 * The buffer allocation strategy is fixed. There is room to make it dynamic,
480 perhaps even to allow one output buffer per input, facilitating a variation
481 of the API to return a list without the adverse effects of shared memory
482 buffers.
483
409 484 ZstdDecompressor
410 485 ----------------
411 486
@@ -476,6 +551,10 b' This behaves similarly to ``zstd.ZstdCom'
476 551 the decompressor by calling ``write(data)`` and decompressed output is written
477 552 to the output object by calling its ``write(data)`` method.
478 553
554 Calls to ``write()`` will return the number of bytes written to the output
555 object. Not all inputs will result in bytes being written, so return values
556 of ``0`` are possible.
557
479 558 The size of chunks being ``write()`` to the destination can be specified::
480 559
481 560 dctx = zstd.ZstdDecompressor()
@@ -576,64 +655,155 b' Here is how this API should be used::'
576 655 data = dobj.decompress(compressed_chunk_0)
577 656 data = dobj.decompress(compressed_chunk_1)
578 657
579 Choosing an API
580 ---------------
658 Batch Decompression API
659 ^^^^^^^^^^^^^^^^^^^^^^^
660
661 (Experimental. Not yet supported in CFFI bindings.)
662
663 ``multi_decompress_to_buffer()`` performs decompression of multiple
664 frames as a single operation and returns a ``BufferWithSegmentsCollection``
665 containing decompressed data for all inputs.
581 666
582 Various forms of compression and decompression APIs are provided because each
583 are suitable for different use cases.
667 Compressed frames can be passed to the function as a ``BufferWithSegments``,
668 a ``BufferWithSegmentsCollection``, or as a list containing objects that
669 conform to the buffer protocol. For best performance, pass a
670 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
671 minimal input validation will be done for that type. If calling from
672 Python (as opposed to C), constructing one of these instances may add
673 overhead cancelling out the performance overhead of validation for list
674 inputs.
675
676 The decompressed size of each frame must be discoverable. It can either be
677 embedded within the zstd frame (``write_content_size=True`` argument to
678 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
679
680 The ``decompressed_sizes`` argument is an object conforming to the buffer
681 protocol which holds an array of 64-bit unsigned integers in the machine's
682 native format defining the decompressed sizes of each frame. If this argument
683 is passed, it avoids having to scan each frame for its decompressed size.
684 This frame scanning can add noticeable overhead in some scenarios.
584 685
585 The simple/one-shot APIs are useful for small data, when the decompressed
586 data size is known (either recorded in the zstd frame header via
587 ``write_content_size`` or known via an out-of-band mechanism, such as a file
588 size).
686 The ``threads`` argument controls the number of threads to use to perform
687 decompression operations. The default (``0``) or the value ``1`` means to
688 use a single thread. Negative values use the number of logical CPUs in the
689 machine.
690
691 .. note::
692
693 It is possible to pass a ``mmap.mmap()`` instance into this function by
694 wrapping it with a ``BufferWithSegments`` instance (which will define the
695 offsets of frames within the memory mapped region).
696
697 This function is logically equivalent to performing ``dctx.decompress()``
698 on each input frame and returning the result.
589 699
590 A limitation of the simple APIs is that input or output data must fit in memory.
591 And unless using advanced tricks with Python *buffer objects*, both input and
592 output must fit in memory simultaneously.
700 This function exists to perform decompression on multiple frames as fast
701 as possible by having as little overhead as possible. Since decompression is
702 performed as a single operation and since the decompressed output is stored in
703 a single buffer, extra memory allocations, Python objects, and Python function
704 calls are avoided. This is ideal for scenarios where callers need to access
705 decompressed data for multiple frames.
593 706
594 Another limitation is that compression or decompression is performed as a single
595 operation. So if you feed large input, it could take a long time for the
596 function to return.
707 Currently, the implementation always spawns multiple threads when requested,
708 even if the amount of work to do is small. In the future, it will be smarter
709 about avoiding threads and their associated overhead when the amount of
710 work to do is small.
711
712 Content-Only Dictionary Chain Decompression
713 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
714
715 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
716 zstd frames produced using chained *content-only* dictionary compression. Such
717 a list of frames is produced by compressing discrete inputs where each
718 non-initial input is compressed with a *content-only* dictionary consisting
719 of the content of the previous input.
720
721 For example, say you have the following inputs::
597 722
598 The streaming APIs do not have the limitations of the simple API. The cost to
599 this is they are more complex to use than a single function call.
723 inputs = [b'input 1', b'input 2', b'input 3']
724
725 The zstd frame chain consists of:
726
727 1. ``b'input 1'`` compressed in standalone/discrete mode
728 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
729 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
730
731 Each zstd frame **must** have the content size written.
732
733 The following Python code can be used to produce a *content-only dictionary
734 chain*::
600 735
601 The streaming APIs put the caller in control of compression and decompression
602 behavior by allowing them to directly control either the input or output side
603 of the operation.
736 def make_chain(inputs):
737 frames = []
738
739 # First frame is compressed in standalone/discrete mode.
740 zctx = zstd.ZstdCompressor(write_content_size=True)
741 frames.append(zctx.compress(inputs[0]))
604 742
605 With the streaming input APIs, the caller feeds data into the compressor or
606 decompressor as they see fit. Output data will only be written after the caller
607 has explicitly written data.
743 # Subsequent frames use the previous fulltext as a content-only dictionary
744 for i, raw in enumerate(inputs[1:]):
745 dict_data = zstd.ZstdCompressionDict(inputs[i])
746 zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
747 frames.append(zctx.compress(raw))
748
749 return frames
750
751 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
752 element in the input chain.
608 753
609 With the streaming output APIs, the caller consumes output from the compressor
610 or decompressor as they see fit. The compressor or decompressor will only
611 consume data from the source when the caller is ready to receive it.
754 It is possible to implement *content-only dictionary chain* decompression
755 on top of other Python APIs. However, this function will likely be significantly
756 faster, especially for long input chains, as it avoids the overhead of
757 instantiating and passing around intermediate objects between C and Python.
758
759 Multi-Threaded Compression
760 --------------------------
761
762 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
763 of threads to use for compression. The way this works is that input is split
764 into segments and each segment is fed into a worker pool for compression. Once
765 a segment is compressed, it is flushed/appended to the output.
766
767 The segment size for multi-threaded compression is chosen from the window size
768 of the compressor. This is derived from the ``window_log`` attribute of a
769 ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB
770 range.
612 771
613 One end of the streaming APIs involves a file-like object that must
614 ``write()`` output data or ``read()`` input data. Depending on what the
615 backing storage for these objects is, those operations may not complete quickly.
616 For example, when streaming compressed data to a file, the ``write()`` into
617 a streaming compressor could result in a ``write()`` to the filesystem, which
618 may take a long time to finish due to slow I/O on the filesystem. So, there
619 may be overhead in streaming APIs beyond the compression and decompression
620 operations.
772 If multi-threaded compression is requested and the input is smaller than the
773 configured segment size, only a single compression thread will be used. If the
774 input is smaller than the segment size multiplied by the thread pool size or
775 if data cannot be delivered to the compressor fast enough, not all requested
776 compressor threads may be active simultaneously.
777
778 Compared to non-multi-threaded compression, multi-threaded compression has
779 higher per-operation overhead. This includes extra memory operations,
780 thread creation, lock acquisition, etc.
781
782 Due to the nature of multi-threaded compression using *N* compression
783 *states*, the output from multi-threaded compression will likely be larger
784 than non-multi-threaded compression. The difference is usually small. But
785 there is a CPU/wall time versus size trade off that may warrant investigation.
786
787 Output from multi-threaded compression does not require any special handling
788 on the decompression side. In other words, any zstd decompressor should be able
789 to consume data produced with multi-threaded compression.
621 790
622 791 Dictionary Creation and Management
623 792 ----------------------------------
624 793
625 Zstandard allows *dictionaries* to be used when compressing and
626 decompressing data. The idea is that if you are compressing a lot of similar
627 data, you can precompute common properties of that data (such as recurring
628 byte sequences) to achieve better compression ratios.
629
630 In Python, compression dictionaries are represented as the
631 ``ZstdCompressionDict`` type.
794 Compression dictionaries are represented as the ``ZstdCompressionDict`` type.
632 795
633 796 Instances can be constructed from bytes::
634 797
635 798 dict_data = zstd.ZstdCompressionDict(data)
636 799
800 It is possible to construct a dictionary from *any* data. Unless the
801 data begins with a magic header, the dictionary will be treated as
802 *content-only*. *Content-only* dictionaries allow compression operations
803 that follow to reference raw data within the content. For one use of
804 *content-only* dictionaries, see
805 ``ZstdDecompressor.decompress_content_dict_chain()``.
806
637 807 More interestingly, instances can be created by *training* on sample data::
638 808
639 809 dict_data = zstd.train_dictionary(size, samples)
@@ -673,6 +843,88 b' a ``ZstdCompressionDict`` later) via ``a'
673 843 dict_data = zstd.train_dictionary(size, samples)
674 844 raw_data = dict_data.as_bytes()
675 845
846 The following named arguments to ``train_dictionary`` can also be used
847 to further control dictionary generation.
848
849 selectivity
850 Integer selectivity level. Default is 9. Larger values yield more data in
851 dictionary.
852 level
853 Integer compression level. Default is 6.
854 dict_id
855 Integer dictionary ID for the produced dictionary. Default is 0, which
856 means to use a random value.
857 notifications
858 Controls writing of informational messages to ``stderr``. ``0`` (the
859 default) means to write nothing. ``1`` writes errors. ``2`` writes
860 progression info. ``3`` writes more details. And ``4`` writes all info.
861
862 Cover Dictionaries
863 ^^^^^^^^^^^^^^^^^^
864
865 An alternate dictionary training mechanism named *cover* is also available.
866 More details about this training mechanism are available in the paper
867 *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors:
868 Liao, Petri, Moffat, Wirth).
869
870 To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
871 ``zstd.train_dictionary()``. The function behaves nearly the same except
872 its arguments are different and the returned dictionary will contain ``k``
873 and ``d`` attributes reflecting the parameters to the cover algorithm.
874
875 .. note::
876
877 The ``k`` and ``d`` attributes are only populated on dictionary
878 instances created by this function. If a ``ZstdCompressionDict`` is
879 constructed from raw bytes data, the ``k`` and ``d`` attributes will
880 be ``0``.
881
882 The segment and dmer size parameters to the cover algorithm can either be
883 specified manually or you can ask ``train_cover_dictionary()`` to try
884 multiple values and pick the best one, where *best* means the smallest
885 compressed data size.
886
887 In manual mode, the ``k`` and ``d`` arguments must be specified or a
888 ``ZstdError`` will be raised.
889
890 In automatic mode (triggered by specifying ``optimize=True``), ``k``
891 and ``d`` are optional. If a value isn't specified, then default values for
892 both are tested. The ``steps`` argument can control the number of steps
893 through ``k`` values. The ``level`` argument defines the compression level
894 that will be used when testing the compressed size. And ``threads`` can
895 specify the number of threads to use for concurrent operation.
896
897 This function takes the following arguments:
898
899 dict_size
900 Target size in bytes of the dictionary to generate.
901 samples
902 A list of bytes holding samples the dictionary will be trained from.
903 k
904 Parameter to cover algorithm defining the segment size. A reasonable range
905 is [16, 2048+].
906 d
907 Parameter to cover algorithm defining the dmer size. A reasonable range is
908 [6, 16]. ``d`` must be less than or equal to ``k``.
909 dict_id
910 Integer dictionary ID for the produced dictionary. Default is 0, which uses
911 a random value.
912 optimize
913 When true, test dictionary generation with multiple parameters.
914 level
915 Integer target compression level when testing compression with
916 ``optimize=True``. Default is 1.
917 steps
918 Number of steps through ``k`` values to perform when ``optimize=True``.
919 Default is 32.
920 threads
921 Number of threads to use when ``optimize=True``. Default is 0, which means
922 to use a single thread. A negative value can be specified to use as many
923 threads as there are detected logical CPUs.
924 notifications
925 Controls writing of informational messages to ``stderr``. See the
926 documentation for ``train_dictionary()`` for more.
927
676 928 Explicit Compression Parameters
677 929 -------------------------------
678 930
@@ -700,19 +952,57 b' You can then configure a compressor to u'
700 952
701 953 cctx = zstd.ZstdCompressor(compression_params=params)
702 954
703 The members of the ``CompressionParameters`` tuple are as follows::
955 The members/attributes of ``CompressionParameters`` instances are as follows::
704 956
705 * 0 - Window log
706 * 1 - Chain log
707 * 2 - Hash log
708 * 3 - Search log
709 * 4 - Search length
710 * 5 - Target length
711 * 6 - Strategy (one of the ``zstd.STRATEGY_`` constants)
957 * window_log
958 * chain_log
959 * hash_log
960 * search_log
961 * search_length
962 * target_length
963 * strategy
964
965 This is the order the arguments are passed to the constructor if not using
966 named arguments.
712 967
713 968 You'll need to read the Zstandard documentation for what these parameters
714 969 do.
715 970
971 Frame Inspection
972 ----------------
973
974 Data emitted from zstd compression is encapsulated in a *frame*. This frame
975 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
976 the frame in more detail. For more info, see
977 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
978
979 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
980 instance and return a ``FrameParameters`` object describing the frame.
981
982 Depending on which fields are present in the frame and their values, the
983 length of the frame parameters varies. If insufficient bytes are passed
984 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
985 frame parameters can be parsed, pass in at least 18 bytes.
986
987 ``FrameParameters`` instances have the following attributes:
988
989 content_size
990 Integer size of original, uncompressed content. This will be ``0`` if the
991 original content size isn't written to the frame (controlled with the
992 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
993 content size was ``0``.
994
995 window_size
996 Integer size of maximum back-reference distance in compressed data.
997
998 dict_id
999 Integer of dictionary ID used for compression. ``0`` if no dictionary
1000 ID was used or if the dictionary ID was ``0``.
1001
1002 has_checksum
1003 Bool indicating whether a 4 byte content checksum is stored at the end
1004 of the frame.
1005
716 1006 Misc Functionality
717 1007 ------------------
718 1008
@@ -776,19 +1066,293 b' TARGETLENGTH_MIN'
776 1066 TARGETLENGTH_MAX
777 1067 Maximum value for compression parameter
778 1068 STRATEGY_FAST
779 Compression strategory
1069 Compression strategy
780 1070 STRATEGY_DFAST
781 Compression strategory
1071 Compression strategy
782 1072 STRATEGY_GREEDY
783 Compression strategory
1073 Compression strategy
784 1074 STRATEGY_LAZY
785 Compression strategory
1075 Compression strategy
786 1076 STRATEGY_LAZY2
787 Compression strategory
1077 Compression strategy
788 1078 STRATEGY_BTLAZY2
789 Compression strategory
1079 Compression strategy
790 1080 STRATEGY_BTOPT
791 Compression strategory
1081 Compression strategy
1082
1083 Performance Considerations
1084 --------------------------
1085
1086 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1087 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1088 or ``ZstdDecompressor`` instance for multiple operations is faster than
1089 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1090 operation. The differences are magnified as the size of data decreases. For
1091 example, the difference between *context* reuse and non-reuse for 100,000
1092 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1093 whereas 10 1,000,000 byte inputs will be more similar in speed (because the
1094 time spent doing compression dwarfs time spent creating new *contexts*).
1095
1096 Buffer Types
1097 ------------
1098
1099 The API exposes a handful of custom types for interfacing with memory buffers.
1100 The primary goal of these types is to facilitate efficient multi-object
1101 operations.
1102
1103 The essential idea is to have a single memory allocation provide backing
1104 storage for multiple logical objects. This has 2 main advantages: fewer
1105 allocations and optimal memory access patterns. This avoids having to allocate
1106 a Python object for each logical object and furthermore ensures that access of
1107 data for objects can be sequential (read: fast) in memory.
1108
1109 BufferWithSegments
1110 ^^^^^^^^^^^^^^^^^^
1111
1112 The ``BufferWithSegments`` type represents a memory buffer containing N
1113 discrete items of known lengths (segments). It is essentially a fixed size
1114 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1115 unsigned native endian integers defining the byte offset and length of each
1116 segment within the buffer.
1117
1118 Instances behave like containers.
1119
1120 ``len()`` returns the number of segments within the instance.
1121
1122 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1123 individual segment within the backing buffer. That returned object references
1124 (not copies) memory. This means that iterating all objects doesn't copy
1125 data within the buffer.
1126
1127 The ``.size`` attribute contains the total size in bytes of the backing
1128 buffer.
1129
1130 Instances conform to the buffer protocol. So a reference to the backing bytes
1131 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1132 be obtained via ``.tobytes()``.
1133
1134 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1135 segments within the buffer. It is a ``BufferSegments`` type.
1136
1137 BufferSegment
1138 ^^^^^^^^^^^^^
1139
1140 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1141 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1142
1143 ``len()`` returns the length of the segment in bytes.
1144
1145 ``.offset`` contains the byte offset of this segment within its parent
1146 ``BufferWithSegments`` instance.
1147
1148 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1149 obtain a ``bytes`` instance with a copy of the backing bytes.
1150
1151 BufferSegments
1152 ^^^^^^^^^^^^^^
1153
1154 This type represents an array of ``(offset, length)`` integers defining segments
1155 within a ``BufferWithSegments``.
1156
1157 The array members are 64-bit unsigned integers using host/native bit order.
1158
1159 Instances conform to the buffer protocol.
1160
1161 BufferWithSegmentsCollection
1162 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1163
1164 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1165 of multiple ``BufferWithSegments`` instances.
1166
1167 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1168 resulting object behaves like an ordered sequence whose members are the
1169 segments within each ``BufferWithSegments``.
1170
1171 ``len()`` returns the number of segments within all ``BufferWithSegments``
1172 instances.
1173
1174 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1175 that offset as if all ``BufferWithSegments`` instances were a single
1176 entity.
1177
1178 If the object is composed of 2 ``BufferWithSegments`` instances with the
1179 first having 2 segments and the second have 3 segments, then ``b[0]``
1180 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1181 and ``b[4]`` access segments from the second.
1182
1183 Choosing an API
1184 ===============
1185
1186 There are multiple APIs for performing compression and decompression. This is
1187 because different applications have different needs and the library wants to
1188 facilitate optimal use in as many use cases as possible.
1189
1190 From a high-level, APIs are divided into *one-shot* and *streaming*. See
1191 the ``Concepts`` section for a description of how these are different at
1192 the C layer.
1193
1194 The *one-shot* APIs are useful for small data, where the input or output
1195 size is known. (The size can come from a buffer length, file size, or
1196 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1197 input and output must fit in memory simultaneously. For say a 4 GB input,
1198 this is often not feasible.
1199
1200 The *one-shot* APIs also perform all work as a single operation. So, if you
1201 feed it large input, it could take a long time for the function to return.
1202
1203 The streaming APIs do not have the limitations of the simple API. But the
1204 price you pay for this flexibility is that they are more complex than a
1205 single function call.
1206
1207 The streaming APIs put the caller in control of compression and decompression
1208 behavior by allowing them to directly control either the input or output side
1209 of the operation.
1210
1211 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1212 has full control over the input to the compression or decompression stream.
1213 They can directly choose when new data is operated on.
1214
1215 With the *streaming ouput* APIs, the caller has full control over the output
1216 of the compression or decompression stream. It can choose when to receive
1217 new data.
1218
1219 When using the *streaming* APIs that operate on file-like or stream objects,
1220 it is important to consider what happens in that object when I/O is requested.
1221 There is potential for long pauses as data is read or written from the
1222 underlying stream (say from interacting with a filesystem or network). This
1223 could add considerable overhead.
1224
1225 Concepts
1226 ========
1227
1228 It is important to have a basic understanding of how Zstandard works in order
1229 to optimally use this library. In addition, there are some low-level Python
1230 concepts that are worth explaining to aid understanding. This section aims to
1231 provide that knowledge.
1232
1233 Zstandard Frames and Compression Format
1234 ---------------------------------------
1235
1236 Compressed zstandard data almost always exists within a container called a
1237 *frame*. (For the technically curious, see the
1238 `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
1239
1240 The frame contains a header and optional trailer. The header contains a
1241 magic number to self-identify as a zstd frame and a description of the
1242 compressed data that follows.
1243
1244 Among other things, the frame *optionally* contains the size of the
1245 decompressed data the frame represents, a 32-bit checksum of the
1246 decompressed data (to facilitate verification during decompression),
1247 and the ID of the dictionary used to compress the data.
1248
1249 Storing the original content size in the frame (``write_content_size=True``
1250 to ``ZstdCompressor``) is important for performance in some scenarios. Having
1251 the decompressed size stored there (or storing it elsewhere) allows
1252 decompression to perform a single memory allocation that is exactly sized to
1253 the output. This is faster than continuously growing a memory buffer to hold
1254 output.
1255
1256 Compression and Decompression Contexts
1257 --------------------------------------
1258
1259 In order to perform a compression or decompression operation with the zstd
1260 C API, you need what's called a *context*. A context essentially holds
1261 configuration and state for a compression or decompression operation. For
1262 example, a compression context holds the configured compression level.
1263
1264 Contexts can be reused for multiple operations. Since creating and
1265 destroying contexts is not free, there are performance advantages to
1266 reusing contexts.
1267
1268 The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
1269 wrappers around these contexts in the zstd C API.
1270
1271 One-shot And Streaming Operations
1272 ---------------------------------
1273
1274 A compression or decompression operation can either be performed as a
1275 single *one-shot* operation or as a continuous *streaming* operation.
1276
1277 In one-shot mode (the *simple* APIs provided by the Python interface),
1278 **all** input is handed to the compressor or decompressor as a single buffer
1279 and **all** output is returned as a single buffer.
1280
1281 In streaming mode, input is delivered to the compressor or decompressor as
1282 a series of chunks via multiple function calls. Likewise, output is
1283 obtained in chunks as well.
1284
1285 Streaming operations require an additional *stream* object to be created
1286 to track the operation. These are logical extensions of *context*
1287 instances.
1288
1289 There are advantages and disadvantages to each mode of operation. There
1290 are scenarios where certain modes can't be used. See the
1291 ``Choosing an API`` section for more.
1292
1293 Dictionaries
1294 ------------
1295
1296 A compression *dictionary* is essentially data used to seed the compressor
1297 state so it can achieve better compression. The idea is that if you are
1298 compressing a lot of similar pieces of data (e.g. JSON documents or anything
1299 sharing similar structure), then you can find common patterns across multiple
1300 objects then leverage those common patterns during compression and
1301 decompression operations to achieve better compression ratios.
1302
1303 Dictionary compression is generally only useful for small inputs - data no
1304 larger than a few kilobytes. The upper bound on this range is highly dependent
1305 on the input data and the dictionary.
1306
1307 Python Buffer Protocol
1308 ----------------------
1309
1310 Many functions in the library operate on objects that implement Python's
1311 `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
1312
1313 The *buffer protocol* is an internal implementation detail of a Python
1314 type that allows instances of that type (objects) to be exposed as a raw
1315 pointer (or buffer) in the C API. In other words, it allows objects to be
1316 exposed as an array of bytes.
1317
1318 From the perspective of the C API, objects implementing the *buffer protocol*
1319 all look the same: they are just a pointer to a memory address of a defined
1320 length. This allows the C API to be largely type agnostic when accessing their
1321 data. This allows custom types to be passed in without first converting them
1322 to a specific type.
1323
1324 Many Python types implement the buffer protocol. These include ``bytes``
1325 (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,
1326 ``mmap.mmap``, and ``memoryview``.
1327
1328 ``python-zstandard`` APIs that accept objects conforming to the buffer
1329 protocol require that the buffer is *C contiguous* and has a single
1330 dimension (``ndim==1``). This is usually the case. An example of where it
1331 is not is a Numpy matrix type.
1332
1333 Requiring Output Sizes for Non-Streaming Decompression APIs
1334 -----------------------------------------------------------
1335
1336 Non-streaming decompression APIs require that either the output size is
1337 explicitly defined (either in the zstd frame header or passed into the
1338 function) or that a max output size is specified. This restriction is for
1339 your safety.
1340
1341 The *one-shot* decompression APIs store the decompressed result in a
1342 single buffer. This means that a buffer needs to be pre-allocated to hold
1343 the result. If the decompressed size is not known, then there is no universal
1344 good default size to use. Any default will fail or will be highly sub-optimal
1345 in some scenarios (it will either be too small or will put stress on the
1346 memory allocator to allocate a too large block).
1347
1348 A *helpful* API may retry decompression with buffers of increasing size.
1349 While useful, there are obvious performance disadvantages, namely redoing
1350 decompression N times until it works. In addition, there is a security
1351 concern. Say the input came from highly compressible data, like 1 GB of the
1352 same byte value. The output size could be several magnitudes larger than the
1353 input size. An input of <100KB could decompress to >1GB. Without a bounds
1354 restriction on the decompressed size, certain inputs could exhaust all system
1355 memory. That's not good and is why the maximum output size is limited.
792 1356
793 1357 Note on Zstandard's *Experimental* API
794 1358 ======================================
@@ -11,45 +11,48 b''
11 11 extern PyObject* ZstdError;
12 12
13 13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
14 static char* kwlist[] = {
15 "dict_size",
16 "samples",
17 "selectivity",
18 "level",
19 "notifications",
20 "dict_id",
21 NULL
22 };
15 23 size_t capacity;
16 24 PyObject* samples;
17 25 Py_ssize_t samplesLen;
18 PyObject* parameters = NULL;
26 unsigned selectivity = 0;
27 int level = 0;
28 unsigned notifications = 0;
29 unsigned dictID = 0;
19 30 ZDICT_params_t zparams;
20 31 Py_ssize_t sampleIndex;
21 32 Py_ssize_t sampleSize;
22 33 PyObject* sampleItem;
23 34 size_t zresult;
24 void* sampleBuffer;
35 void* sampleBuffer = NULL;
25 36 void* sampleOffset;
26 37 size_t samplesSize = 0;
27 size_t* sampleSizes;
28 void* dict;
29 ZstdCompressionDict* result;
38 size_t* sampleSizes = NULL;
39 void* dict = NULL;
40 ZstdCompressionDict* result = NULL;
30 41
31 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
42 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
43 kwlist,
32 44 &capacity,
33 45 &PyList_Type, &samples,
34 (PyObject*)&DictParametersType, &parameters)) {
46 &selectivity, &level, &notifications, &dictID)) {
35 47 return NULL;
36 48 }
37 49
38 /* Validate parameters first since it is easiest. */
39 zparams.selectivityLevel = 0;
40 zparams.compressionLevel = 0;
41 zparams.notificationLevel = 0;
42 zparams.dictID = 0;
43 zparams.reserved[0] = 0;
44 zparams.reserved[1] = 0;
50 memset(&zparams, 0, sizeof(zparams));
45 51
46 if (parameters) {
47 /* TODO validate data ranges */
48 zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
49 zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
50 zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
51 zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
52 }
52 zparams.selectivityLevel = selectivity;
53 zparams.compressionLevel = level;
54 zparams.notificationLevel = notifications;
55 zparams.dictID = dictID;
53 56
54 57 /* Figure out the size of the raw samples */
55 58 samplesLen = PyList_Size(samples);
@@ -57,7 +60,6 b' ZstdCompressionDict* train_dictionary(Py'
57 60 sampleItem = PyList_GetItem(samples, sampleIndex);
58 61 if (!PyBytes_Check(sampleItem)) {
59 62 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
60 /* TODO probably need to perform DECREF here */
61 63 return NULL;
62 64 }
63 65 samplesSize += PyBytes_GET_SIZE(sampleItem);
@@ -68,13 +70,12 b' ZstdCompressionDict* train_dictionary(Py'
68 70 sampleBuffer = PyMem_Malloc(samplesSize);
69 71 if (!sampleBuffer) {
70 72 PyErr_NoMemory();
71 return NULL;
73 goto finally;
72 74 }
73 75 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
74 76 if (!sampleSizes) {
75 PyMem_Free(sampleBuffer);
76 77 PyErr_NoMemory();
77 return NULL;
78 goto finally;
78 79 }
79 80
80 81 sampleOffset = sampleBuffer;
@@ -89,33 +90,168 b' ZstdCompressionDict* train_dictionary(Py'
89 90
90 91 dict = PyMem_Malloc(capacity);
91 92 if (!dict) {
92 PyMem_Free(sampleSizes);
93 PyMem_Free(sampleBuffer);
94 93 PyErr_NoMemory();
95 return NULL;
94 goto finally;
96 95 }
97 96
97 /* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
98 Py_BEGIN_ALLOW_THREADS
98 99 zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
99 100 sampleBuffer, sampleSizes, (unsigned int)samplesLen,
100 101 zparams);
102 Py_END_ALLOW_THREADS
101 103 if (ZDICT_isError(zresult)) {
102 104 PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
103 105 PyMem_Free(dict);
104 PyMem_Free(sampleSizes);
105 PyMem_Free(sampleBuffer);
106 return NULL;
106 goto finally;
107 107 }
108 108
109 109 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
110 110 if (!result) {
111 return NULL;
111 goto finally;
112 112 }
113 113
114 114 result->dictData = dict;
115 115 result->dictSize = zresult;
116 result->d = 0;
117 result->k = 0;
118
119 finally:
120 PyMem_Free(sampleBuffer);
121 PyMem_Free(sampleSizes);
122
116 123 return result;
117 124 }
118 125
126 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
127 static char* kwlist[] = {
128 "dict_size",
129 "samples",
130 "k",
131 "d",
132 "notifications",
133 "dict_id",
134 "level",
135 "optimize",
136 "steps",
137 "threads",
138 NULL
139 };
140
141 size_t capacity;
142 PyObject* samples;
143 unsigned k = 0;
144 unsigned d = 0;
145 unsigned notifications = 0;
146 unsigned dictID = 0;
147 int level = 0;
148 PyObject* optimize = NULL;
149 unsigned steps = 0;
150 int threads = 0;
151 COVER_params_t params;
152 Py_ssize_t samplesLen;
153 Py_ssize_t i;
154 size_t samplesSize = 0;
155 void* sampleBuffer = NULL;
156 size_t* sampleSizes = NULL;
157 void* sampleOffset;
158 Py_ssize_t sampleSize;
159 void* dict = NULL;
160 size_t zresult;
161 ZstdCompressionDict* result = NULL;
162
163 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
164 kwlist, &capacity, &PyList_Type, &samples,
165 &k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
166 return NULL;
167 }
168
169 if (threads < 0) {
170 threads = cpu_count();
171 }
172
173 memset(&params, 0, sizeof(params));
174 params.k = k;
175 params.d = d;
176 params.steps = steps;
177 params.nbThreads = threads;
178 params.notificationLevel = notifications;
179 params.dictID = dictID;
180 params.compressionLevel = level;
181
182 /* Figure out total size of input samples. */
183 samplesLen = PyList_Size(samples);
184 for (i = 0; i < samplesLen; i++) {
185 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
186
187 if (!PyBytes_Check(sampleItem)) {
188 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
189 return NULL;
190 }
191 samplesSize += PyBytes_GET_SIZE(sampleItem);
192 }
193
194 sampleBuffer = PyMem_Malloc(samplesSize);
195 if (!sampleBuffer) {
196 PyErr_NoMemory();
197 goto finally;
198 }
199
200 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
201 if (!sampleSizes) {
202 PyErr_NoMemory();
203 goto finally;
204 }
205
206 sampleOffset = sampleBuffer;
207 for (i = 0; i < samplesLen; i++) {
208 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
209 sampleSize = PyBytes_GET_SIZE(sampleItem);
210 sampleSizes[i] = sampleSize;
211 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
212 sampleOffset = (char*)sampleOffset + sampleSize;
213 }
214
215 dict = PyMem_Malloc(capacity);
216 if (!dict) {
217 PyErr_NoMemory();
218 goto finally;
219 }
220
221 Py_BEGIN_ALLOW_THREADS
222 if (optimize && PyObject_IsTrue(optimize)) {
223 zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
224 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
225 }
226 else {
227 zresult = COVER_trainFromBuffer(dict, capacity,
228 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
229 }
230 Py_END_ALLOW_THREADS
231
232 if (ZDICT_isError(zresult)) {
233 PyMem_Free(dict);
234 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
235 goto finally;
236 }
237
238 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
239 if (!result) {
240 PyMem_Free(dict);
241 goto finally;
242 }
243
244 result->dictData = dict;
245 result->dictSize = zresult;
246 result->d = params.d;
247 result->k = params.k;
248
249 finally:
250 PyMem_Free(sampleBuffer);
251 PyMem_Free(sampleSizes);
252
253 return result;
254 }
119 255
120 256 PyDoc_STRVAR(ZstdCompressionDict__doc__,
121 257 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
@@ -133,10 +269,11 b' static int ZstdCompressionDict_init(Zstd'
133 269 self->dictSize = 0;
134 270
135 271 #if PY_MAJOR_VERSION >= 3
136 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
272 if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
137 273 #else
138 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
274 if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
139 275 #endif
276 &source, &sourceSize)) {
140 277 return -1;
141 278 }
142 279
@@ -179,6 +316,14 b' static PyMethodDef ZstdCompressionDict_m'
179 316 { NULL, NULL }
180 317 };
181 318
319 static PyMemberDef ZstdCompressionDict_members[] = {
320 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
321 "segment size" },
322 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
323 "dmer size" },
324 { NULL }
325 };
326
182 327 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
183 328 return self->dictSize;
184 329 }
@@ -223,7 +368,7 b' PyTypeObject ZstdCompressionDictType = {'
223 368 0, /* tp_iter */
224 369 0, /* tp_iternext */
225 370 ZstdCompressionDict_methods, /* tp_methods */
226 0, /* tp_members */
371 ZstdCompressionDict_members, /* tp_members */
227 372 0, /* tp_getset */
228 373 0, /* tp_base */
229 374 0, /* tp_dict */
@@ -25,7 +25,8 b' CompressionParametersObject* get_compres'
25 25 ZSTD_compressionParameters params;
26 26 CompressionParametersObject* result;
27 27
28 if (!PyArg_ParseTuple(args, "i|Kn", &compressionLevel, &sourceSize, &dictSize)) {
28 if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
29 &compressionLevel, &sourceSize, &dictSize)) {
29 30 return NULL;
30 31 }
31 32
@@ -47,12 +48,108 b' CompressionParametersObject* get_compres'
47 48 return result;
48 49 }
49 50
51 static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
52 static char* kwlist[] = {
53 "window_log",
54 "chain_log",
55 "hash_log",
56 "search_log",
57 "search_length",
58 "target_length",
59 "strategy",
60 NULL
61 };
62
63 unsigned windowLog;
64 unsigned chainLog;
65 unsigned hashLog;
66 unsigned searchLog;
67 unsigned searchLength;
68 unsigned targetLength;
69 unsigned strategy;
70 ZSTD_compressionParameters params;
71 size_t zresult;
72
73 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
74 kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
75 &targetLength, &strategy)) {
76 return -1;
77 }
78
79 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
80 PyErr_SetString(PyExc_ValueError, "invalid window log value");
81 return -1;
82 }
83
84 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
85 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
86 return -1;
87 }
88
89 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
90 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
91 return -1;
92 }
93
94 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
95 PyErr_SetString(PyExc_ValueError, "invalid search log value");
96 return -1;
97 }
98
99 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
100 PyErr_SetString(PyExc_ValueError, "invalid search length value");
101 return -1;
102 }
103
104 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
105 PyErr_SetString(PyExc_ValueError, "invalid target length value");
106 return -1;
107 }
108
109 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
110 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
111 return -1;
112 }
113
114 self->windowLog = windowLog;
115 self->chainLog = chainLog;
116 self->hashLog = hashLog;
117 self->searchLog = searchLog;
118 self->searchLength = searchLength;
119 self->targetLength = targetLength;
120 self->strategy = strategy;
121
122 ztopy_compression_parameters(self, &params);
123 zresult = ZSTD_checkCParams(params);
124
125 if (ZSTD_isError(zresult)) {
126 PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
127 ZSTD_getErrorName(zresult));
128 return -1;
129 }
130
131 return 0;
132 }
133
134 PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__,
135 "Estimate the size in bytes of a compression context for compression parameters\n"
136 );
137
138 PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {
139 ZSTD_compressionParameters params;
140
141 ztopy_compression_parameters(self, &params);
142
143 return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
144 }
145
50 146 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
51 147 CompressionParametersObject* params;
52 148 ZSTD_compressionParameters zparams;
53 149 PyObject* result;
54 150
55 if (!PyArg_ParseTuple(args, "O!", &CompressionParametersType, &params)) {
151 if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
152 &CompressionParametersType, &params)) {
56 153 return NULL;
57 154 }
58 155
@@ -64,113 +161,43 b' PyObject* estimate_compression_context_s'
64 161 PyDoc_STRVAR(CompressionParameters__doc__,
65 162 "CompressionParameters: low-level control over zstd compression");
66 163
67 static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
68 CompressionParametersObject* self;
69 unsigned windowLog;
70 unsigned chainLog;
71 unsigned hashLog;
72 unsigned searchLog;
73 unsigned searchLength;
74 unsigned targetLength;
75 unsigned strategy;
76
77 if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog,
78 &searchLength, &targetLength, &strategy)) {
79 return NULL;
80 }
81
82 if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
83 PyErr_SetString(PyExc_ValueError, "invalid window log value");
84 return NULL;
85 }
86
87 if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
88 PyErr_SetString(PyExc_ValueError, "invalid chain log value");
89 return NULL;
90 }
91
92 if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
93 PyErr_SetString(PyExc_ValueError, "invalid hash log value");
94 return NULL;
95 }
96
97 if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
98 PyErr_SetString(PyExc_ValueError, "invalid search log value");
99 return NULL;
100 }
101
102 if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
103 PyErr_SetString(PyExc_ValueError, "invalid search length value");
104 return NULL;
105 }
106
107 if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
108 PyErr_SetString(PyExc_ValueError, "invalid target length value");
109 return NULL;
110 }
111
112 if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
113 PyErr_SetString(PyExc_ValueError, "invalid strategy value");
114 return NULL;
115 }
116
117 self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1);
118 if (!self) {
119 return NULL;
120 }
121
122 self->windowLog = windowLog;
123 self->chainLog = chainLog;
124 self->hashLog = hashLog;
125 self->searchLog = searchLog;
126 self->searchLength = searchLength;
127 self->targetLength = targetLength;
128 self->strategy = strategy;
129
130 return (PyObject*)self;
131 }
132
133 164 static void CompressionParameters_dealloc(PyObject* self) {
134 165 PyObject_Del(self);
135 166 }
136 167
137 static Py_ssize_t CompressionParameters_length(PyObject* self) {
138 return 7;
139 }
140
141 static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) {
142 CompressionParametersObject* self = (CompressionParametersObject*)o;
168 static PyMethodDef CompressionParameters_methods[] = {
169 {
170 "estimated_compression_context_size",
171 (PyCFunction)CompressionParameters_estimated_compression_context_size,
172 METH_NOARGS,
173 CompressionParameters_estimated_compression_context_size__doc__
174 },
175 { NULL, NULL }
176 };
143 177
144 switch (i) {
145 case 0:
146 return PyLong_FromLong(self->windowLog);
147 case 1:
148 return PyLong_FromLong(self->chainLog);
149 case 2:
150 return PyLong_FromLong(self->hashLog);
151 case 3:
152 return PyLong_FromLong(self->searchLog);
153 case 4:
154 return PyLong_FromLong(self->searchLength);
155 case 5:
156 return PyLong_FromLong(self->targetLength);
157 case 6:
158 return PyLong_FromLong(self->strategy);
159 default:
160 PyErr_SetString(PyExc_IndexError, "index out of range");
161 return NULL;
162 }
163 }
164
165 static PySequenceMethods CompressionParameters_sq = {
166 CompressionParameters_length, /* sq_length */
167 0, /* sq_concat */
168 0, /* sq_repeat */
169 CompressionParameters_item, /* sq_item */
170 0, /* sq_ass_item */
171 0, /* sq_contains */
172 0, /* sq_inplace_concat */
173 0 /* sq_inplace_repeat */
178 static PyMemberDef CompressionParameters_members[] = {
179 { "window_log", T_UINT,
180 offsetof(CompressionParametersObject, windowLog), READONLY,
181 "window log" },
182 { "chain_log", T_UINT,
183 offsetof(CompressionParametersObject, chainLog), READONLY,
184 "chain log" },
185 { "hash_log", T_UINT,
186 offsetof(CompressionParametersObject, hashLog), READONLY,
187 "hash log" },
188 { "search_log", T_UINT,
189 offsetof(CompressionParametersObject, searchLog), READONLY,
190 "search log" },
191 { "search_length", T_UINT,
192 offsetof(CompressionParametersObject, searchLength), READONLY,
193 "search length" },
194 { "target_length", T_UINT,
195 offsetof(CompressionParametersObject, targetLength), READONLY,
196 "target length" },
197 { "strategy", T_INT,
198 offsetof(CompressionParametersObject, strategy), READONLY,
199 "strategy" },
200 { NULL }
174 201 };
175 202
176 203 PyTypeObject CompressionParametersType = {
@@ -185,7 +212,7 b' PyTypeObject CompressionParametersType ='
185 212 0, /* tp_compare */
186 213 0, /* tp_repr */
187 214 0, /* tp_as_number */
188 &CompressionParameters_sq, /* tp_as_sequence */
215 0, /* tp_as_sequence */
189 216 0, /* tp_as_mapping */
190 217 0, /* tp_hash */
191 218 0, /* tp_call */
@@ -193,7 +220,7 b' PyTypeObject CompressionParametersType ='
193 220 0, /* tp_getattro */
194 221 0, /* tp_setattro */
195 222 0, /* tp_as_buffer */
196 Py_TPFLAGS_DEFAULT, /* tp_flags */
223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
197 224 CompressionParameters__doc__, /* tp_doc */
198 225 0, /* tp_traverse */
199 226 0, /* tp_clear */
@@ -201,17 +228,17 b' PyTypeObject CompressionParametersType ='
201 228 0, /* tp_weaklistoffset */
202 229 0, /* tp_iter */
203 230 0, /* tp_iternext */
204 0, /* tp_methods */
205 0, /* tp_members */
231 CompressionParameters_methods, /* tp_methods */
232 CompressionParameters_members, /* tp_members */
206 233 0, /* tp_getset */
207 234 0, /* tp_base */
208 235 0, /* tp_dict */
209 236 0, /* tp_descr_get */
210 237 0, /* tp_descr_set */
211 238 0, /* tp_dictoffset */
212 0, /* tp_init */
239 (initproc)CompressionParameters_init, /* tp_init */
213 240 0, /* tp_alloc */
214 CompressionParameters_new, /* tp_new */
241 PyType_GenericNew, /* tp_new */
215 242 };
216 243
217 244 void compressionparams_module_init(PyObject* mod) {
@@ -220,7 +247,7 b' void compressionparams_module_init(PyObj'
220 247 return;
221 248 }
222 249
223 Py_IncRef((PyObject*)&CompressionParametersType);
250 Py_INCREF(&CompressionParametersType);
224 251 PyModule_AddObject(mod, "CompressionParameters",
225 252 (PyObject*)&CompressionParametersType);
226 253 }
@@ -18,11 +18,6 b' static void ZstdCompressionWriter_deallo'
18 18 Py_XDECREF(self->compressor);
19 19 Py_XDECREF(self->writer);
20 20
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
24 }
25
26 21 PyObject_Del(self);
27 22 }
28 23
@@ -32,9 +27,15 b' static PyObject* ZstdCompressionWriter_e'
32 27 return NULL;
33 28 }
34 29
35 self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
36 if (!self->cstream) {
37 return NULL;
30 if (self->compressor->mtcctx) {
31 if (init_mtcstream(self->compressor, self->sourceSize)) {
32 return NULL;
33 }
34 }
35 else {
36 if (0 != init_cstream(self->compressor, self->sourceSize)) {
37 return NULL;
38 }
38 39 }
39 40
40 41 self->entered = 1;
@@ -52,14 +53,14 b' static PyObject* ZstdCompressionWriter_e'
52 53 ZSTD_outBuffer output;
53 54 PyObject* res;
54 55
55 if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) {
56 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
56 57 return NULL;
57 58 }
58 59
59 60 self->entered = 0;
60 61
61 if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
62 exc_tb == Py_None) {
62 if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
63 && exc_value == Py_None && exc_tb == Py_None) {
63 64
64 65 output.dst = PyMem_Malloc(self->outSize);
65 66 if (!output.dst) {
@@ -69,7 +70,12 b' static PyObject* ZstdCompressionWriter_e'
69 70 output.pos = 0;
70 71
71 72 while (1) {
72 zresult = ZSTD_endStream(self->cstream, &output);
73 if (self->compressor->mtcctx) {
74 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
75 }
76 else {
77 zresult = ZSTD_endStream(self->compressor->cstream, &output);
78 }
73 79 if (ZSTD_isError(zresult)) {
74 80 PyErr_Format(ZstdError, "error ending compression stream: %s",
75 81 ZSTD_getErrorName(zresult));
@@ -95,21 +101,19 b' static PyObject* ZstdCompressionWriter_e'
95 101 }
96 102
97 103 PyMem_Free(output.dst);
98 ZSTD_freeCStream(self->cstream);
99 self->cstream = NULL;
100 104 }
101 105
102 106 Py_RETURN_FALSE;
103 107 }
104 108
105 109 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
106 if (!self->cstream) {
110 if (!self->compressor->cstream) {
107 111 PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
108 112 "call when a context manager is active");
109 113 return NULL;
110 114 }
111 115
112 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
116 return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
113 117 }
114 118
115 119 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
@@ -119,11 +123,12 b' static PyObject* ZstdCompressionWriter_w'
119 123 ZSTD_inBuffer input;
120 124 ZSTD_outBuffer output;
121 125 PyObject* res;
126 Py_ssize_t totalWrite = 0;
122 127
123 128 #if PY_MAJOR_VERSION >= 3
124 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
129 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
125 130 #else
126 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
131 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
127 132 #endif
128 133 return NULL;
129 134 }
@@ -146,7 +151,13 b' static PyObject* ZstdCompressionWriter_w'
146 151
147 152 while ((ssize_t)input.pos < sourceSize) {
148 153 Py_BEGIN_ALLOW_THREADS
149 zresult = ZSTD_compressStream(self->cstream, &output, &input);
154 if (self->compressor->mtcctx) {
155 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
156 &output, &input);
157 }
158 else {
159 zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
160 }
150 161 Py_END_ALLOW_THREADS
151 162
152 163 if (ZSTD_isError(zresult)) {
@@ -164,20 +175,21 b' static PyObject* ZstdCompressionWriter_w'
164 175 #endif
165 176 output.dst, output.pos);
166 177 Py_XDECREF(res);
178 totalWrite += output.pos;
167 179 }
168 180 output.pos = 0;
169 181 }
170 182
171 183 PyMem_Free(output.dst);
172 184
173 /* TODO return bytes written */
174 Py_RETURN_NONE;
185 return PyLong_FromSsize_t(totalWrite);
175 186 }
176 187
177 188 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
178 189 size_t zresult;
179 190 ZSTD_outBuffer output;
180 191 PyObject* res;
192 Py_ssize_t totalWrite = 0;
181 193
182 194 if (!self->entered) {
183 195 PyErr_SetString(ZstdError, "flush must be called from an active context manager");
@@ -193,7 +205,12 b' static PyObject* ZstdCompressionWriter_f'
193 205
194 206 while (1) {
195 207 Py_BEGIN_ALLOW_THREADS
196 zresult = ZSTD_flushStream(self->cstream, &output);
208 if (self->compressor->mtcctx) {
209 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
210 }
211 else {
212 zresult = ZSTD_flushStream(self->compressor->cstream, &output);
213 }
197 214 Py_END_ALLOW_THREADS
198 215
199 216 if (ZSTD_isError(zresult)) {
@@ -215,14 +232,14 b' static PyObject* ZstdCompressionWriter_f'
215 232 #endif
216 233 output.dst, output.pos);
217 234 Py_XDECREF(res);
235 totalWrite += output.pos;
218 236 }
219 237 output.pos = 0;
220 238 }
221 239
222 240 PyMem_Free(output.dst);
223 241
224 /* TODO return bytes written */
225 Py_RETURN_NONE;
242 return PyLong_FromSsize_t(totalWrite);
226 243 }
227 244
228 245 static PyMethodDef ZstdCompressionWriter_methods[] = {
@@ -18,11 +18,6 b' static void ZstdCompressionObj_dealloc(Z'
18 18 PyMem_Free(self->output.dst);
19 19 self->output.dst = NULL;
20 20
21 if (self->cstream) {
22 ZSTD_freeCStream(self->cstream);
23 self->cstream = NULL;
24 }
25
26 21 Py_XDECREF(self->compressor);
27 22
28 23 PyObject_Del(self);
@@ -42,9 +37,9 b' static PyObject* ZstdCompressionObj_comp'
42 37 }
43 38
44 39 #if PY_MAJOR_VERSION >= 3
45 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
40 if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
46 41 #else
47 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
42 if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
48 43 #endif
49 44 return NULL;
50 45 }
@@ -55,7 +50,13 b' static PyObject* ZstdCompressionObj_comp'
55 50
56 51 while ((ssize_t)input.pos < sourceSize) {
57 52 Py_BEGIN_ALLOW_THREADS
58 zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
53 if (self->compressor->mtcctx) {
54 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
55 &self->output, &input);
56 }
57 else {
58 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
59 }
59 60 Py_END_ALLOW_THREADS
60 61
61 62 if (ZSTD_isError(zresult)) {
@@ -98,7 +99,7 b' static PyObject* ZstdCompressionObj_flus'
98 99 PyObject* result = NULL;
99 100 Py_ssize_t resultSize = 0;
100 101
101 if (!PyArg_ParseTuple(args, "|i", &flushMode)) {
102 if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
102 103 return NULL;
103 104 }
104 105
@@ -118,7 +119,12 b' static PyObject* ZstdCompressionObj_flus'
118 119 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
119 120 guaranteed to hold a full block. */
120 121 Py_BEGIN_ALLOW_THREADS
121 zresult = ZSTD_flushStream(self->cstream, &self->output);
122 if (self->compressor->mtcctx) {
123 zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
124 }
125 else {
126 zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
127 }
122 128 Py_END_ALLOW_THREADS
123 129
124 130 if (ZSTD_isError(zresult)) {
@@ -150,7 +156,12 b' static PyObject* ZstdCompressionObj_flus'
150 156 self->finished = 1;
151 157
152 158 while (1) {
153 zresult = ZSTD_endStream(self->cstream, &self->output);
159 if (self->compressor->mtcctx) {
160 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
161 }
162 else {
163 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
164 }
154 165 if (ZSTD_isError(zresult)) {
155 166 PyErr_Format(ZstdError, "error ending compression stream: %s",
156 167 ZSTD_getErrorName(zresult));
@@ -182,9 +193,6 b' static PyObject* ZstdCompressionObj_flus'
182 193 }
183 194 }
184 195
185 ZSTD_freeCStream(self->cstream);
186 self->cstream = NULL;
187
188 196 if (result) {
189 197 return result;
190 198 }
This diff has been collapsed as it changes many lines, (979 lines changed) Show them Hide them
@@ -7,16 +7,21 b''
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 #include "pool.h"
10 11
11 12 extern PyObject* ZstdError;
12 13
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
14 int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) {
14 15 ZSTD_customMem zmem;
15 assert(!compressor->cdict);
16
17 if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) {
18 return 0;
19 }
20
16 21 Py_BEGIN_ALLOW_THREADS
17 22 memset(&zmem, 0, sizeof(zmem));
18 23 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
19 compressor->dict->dictSize, *zparams, zmem);
24 compressor->dict->dictSize, 1, *zparams, zmem);
20 25 Py_END_ALLOW_THREADS
21 26
22 27 if (!compressor->cdict) {
@@ -28,22 +33,32 b' int populate_cdict(ZstdCompressor* compr'
28 33 }
29 34
30 35 /**
31 * Initialize a zstd CStream from a ZstdCompressor instance.
32 *
33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
34 * exception will be set.
35 */
36 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
37 ZSTD_CStream* cstream;
36 * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.
37 *
38 * Returns 0 on success. Other value on failure. Will set a Python exception
39 * on failure.
40 */
41 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
38 42 ZSTD_parameters zparams;
39 43 void* dictData = NULL;
40 44 size_t dictSize = 0;
41 45 size_t zresult;
42 46
43 cstream = ZSTD_createCStream();
44 if (!cstream) {
45 PyErr_SetString(ZstdError, "cannot create CStream");
46 return NULL;
47 if (compressor->cstream) {
48 zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);
49 if (ZSTD_isError(zresult)) {
50 PyErr_Format(ZstdError, "could not reset CStream: %s",
51 ZSTD_getErrorName(zresult));
52 return -1;
53 }
54
55 return 0;
56 }
57
58 compressor->cstream = ZSTD_createCStream();
59 if (!compressor->cstream) {
60 PyErr_SetString(ZstdError, "could not create CStream");
61 return -1;
47 62 }
48 63
49 64 if (compressor->dict) {
@@ -63,15 +78,51 b' ZSTD_CStream* CStream_from_ZstdCompresso'
63 78
64 79 zparams.fParams = compressor->fparams;
65 80
66 zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, zparams, sourceSize);
81 zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
82 zparams, sourceSize);
67 83
68 84 if (ZSTD_isError(zresult)) {
69 ZSTD_freeCStream(cstream);
85 ZSTD_freeCStream(compressor->cstream);
86 compressor->cstream = NULL;
70 87 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
71 return NULL;
88 return -1;
72 89 }
73 90
74 return cstream;
91 return 0;;
92 }
93
94 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
95 size_t zresult;
96 void* dictData = NULL;
97 size_t dictSize = 0;
98 ZSTD_parameters zparams;
99
100 assert(compressor->mtcctx);
101
102 if (compressor->dict) {
103 dictData = compressor->dict->dictData;
104 dictSize = compressor->dict->dictSize;
105 }
106
107 memset(&zparams, 0, sizeof(zparams));
108 if (compressor->cparams) {
109 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
110 }
111 else {
112 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
113 }
114
115 zparams.fParams = compressor->fparams;
116
117 zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,
118 zparams, sourceSize);
119
120 if (ZSTD_isError(zresult)) {
121 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
122 return -1;
123 }
124
125 return 0;
75 126 }
76 127
77 128 PyDoc_STRVAR(ZstdCompressor__doc__,
@@ -103,6 +154,11 b' PyDoc_STRVAR(ZstdCompressor__doc__,'
103 154 " Determines whether the dictionary ID will be written into the compressed\n"
104 155 " data. Defaults to True. Only adds content to the compressed data if\n"
105 156 " a dictionary is being used.\n"
157 "threads\n"
158 " Number of threads to use to compress data concurrently. When set,\n"
159 " compression operations are performed on multiple threads. The default\n"
160 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
161 " set the number of threads to the number of detected logical CPUs.\n"
106 162 );
107 163
108 164 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
@@ -113,6 +169,7 b' static int ZstdCompressor_init(ZstdCompr'
113 169 "write_checksum",
114 170 "write_content_size",
115 171 "write_dict_id",
172 "threads",
116 173 NULL
117 174 };
118 175
@@ -122,16 +179,12 b' static int ZstdCompressor_init(ZstdCompr'
122 179 PyObject* writeChecksum = NULL;
123 180 PyObject* writeContentSize = NULL;
124 181 PyObject* writeDictID = NULL;
182 int threads = 0;
125 183
126 self->cctx = NULL;
127 self->dict = NULL;
128 self->cparams = NULL;
129 self->cdict = NULL;
130
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist,
132 &level, &ZstdCompressionDictType, &dict,
184 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
185 kwlist, &level, &ZstdCompressionDictType, &dict,
133 186 &CompressionParametersType, &params,
134 &writeChecksum, &writeContentSize, &writeDictID)) {
187 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
135 188 return -1;
136 189 }
137 190
@@ -146,12 +199,27 b' static int ZstdCompressor_init(ZstdCompr'
146 199 return -1;
147 200 }
148 201
202 if (threads < 0) {
203 threads = cpu_count();
204 }
205
206 self->threads = threads;
207
149 208 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
150 209 overhead of each compression operation. */
151 self->cctx = ZSTD_createCCtx();
152 if (!self->cctx) {
153 PyErr_NoMemory();
154 return -1;
210 if (threads) {
211 self->mtcctx = ZSTDMT_createCCtx(threads);
212 if (!self->mtcctx) {
213 PyErr_NoMemory();
214 return -1;
215 }
216 }
217 else {
218 self->cctx = ZSTD_createCCtx();
219 if (!self->cctx) {
220 PyErr_NoMemory();
221 return -1;
222 }
155 223 }
156 224
157 225 self->compressionLevel = level;
@@ -182,6 +250,11 b' static int ZstdCompressor_init(ZstdCompr'
182 250 }
183 251
184 252 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
253 if (self->cstream) {
254 ZSTD_freeCStream(self->cstream);
255 self->cstream = NULL;
256 }
257
185 258 Py_XDECREF(self->cparams);
186 259 Py_XDECREF(self->dict);
187 260
@@ -195,6 +268,11 b' static void ZstdCompressor_dealloc(ZstdC'
195 268 self->cctx = NULL;
196 269 }
197 270
271 if (self->mtcctx) {
272 ZSTDMT_freeCCtx(self->mtcctx);
273 self->mtcctx = NULL;
274 }
275
198 276 PyObject_Del(self);
199 277 }
200 278
@@ -229,7 +307,6 b' static PyObject* ZstdCompressor_copy_str'
229 307 Py_ssize_t sourceSize = 0;
230 308 size_t inSize = ZSTD_CStreamInSize();
231 309 size_t outSize = ZSTD_CStreamOutSize();
232 ZSTD_CStream* cstream;
233 310 ZSTD_inBuffer input;
234 311 ZSTD_outBuffer output;
235 312 Py_ssize_t totalRead = 0;
@@ -243,8 +320,8 b' static PyObject* ZstdCompressor_copy_str'
243 320 PyObject* totalReadPy;
244 321 PyObject* totalWritePy;
245 322
246 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk", kwlist, &source, &dest, &sourceSize,
247 &inSize, &outSize)) {
323 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
324 &source, &dest, &sourceSize, &inSize, &outSize)) {
248 325 return NULL;
249 326 }
250 327
@@ -261,10 +338,17 b' static PyObject* ZstdCompressor_copy_str'
261 338 /* Prevent free on uninitialized memory in finally. */
262 339 output.dst = NULL;
263 340
264 cstream = CStream_from_ZstdCompressor(self, sourceSize);
265 if (!cstream) {
266 res = NULL;
267 goto finally;
341 if (self->mtcctx) {
342 if (init_mtcstream(self, sourceSize)) {
343 res = NULL;
344 goto finally;
345 }
346 }
347 else {
348 if (0 != init_cstream(self, sourceSize)) {
349 res = NULL;
350 goto finally;
351 }
268 352 }
269 353
270 354 output.dst = PyMem_Malloc(outSize);
@@ -300,7 +384,12 b' static PyObject* ZstdCompressor_copy_str'
300 384
301 385 while (input.pos < input.size) {
302 386 Py_BEGIN_ALLOW_THREADS
303 zresult = ZSTD_compressStream(cstream, &output, &input);
387 if (self->mtcctx) {
388 zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
389 }
390 else {
391 zresult = ZSTD_compressStream(self->cstream, &output, &input);
392 }
304 393 Py_END_ALLOW_THREADS
305 394
306 395 if (ZSTD_isError(zresult)) {
@@ -325,7 +414,12 b' static PyObject* ZstdCompressor_copy_str'
325 414
326 415 /* We've finished reading. Now flush the compressor stream. */
327 416 while (1) {
328 zresult = ZSTD_endStream(cstream, &output);
417 if (self->mtcctx) {
418 zresult = ZSTDMT_endStream(self->mtcctx, &output);
419 }
420 else {
421 zresult = ZSTD_endStream(self->cstream, &output);
422 }
329 423 if (ZSTD_isError(zresult)) {
330 424 PyErr_Format(ZstdError, "error ending compression stream: %s",
331 425 ZSTD_getErrorName(zresult));
@@ -350,24 +444,17 b' static PyObject* ZstdCompressor_copy_str'
350 444 }
351 445 }
352 446
353 ZSTD_freeCStream(cstream);
354 cstream = NULL;
355
356 447 totalReadPy = PyLong_FromSsize_t(totalRead);
357 448 totalWritePy = PyLong_FromSsize_t(totalWrite);
358 449 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
359 Py_DecRef(totalReadPy);
360 Py_DecRef(totalWritePy);
450 Py_DECREF(totalReadPy);
451 Py_DECREF(totalWritePy);
361 452
362 453 finally:
363 454 if (output.dst) {
364 455 PyMem_Free(output.dst);
365 456 }
366 457
367 if (cstream) {
368 ZSTD_freeCStream(cstream);
369 }
370
371 458 return res;
372 459 }
373 460
@@ -402,14 +489,26 b' static PyObject* ZstdCompressor_compress'
402 489 ZSTD_parameters zparams;
403 490
404 491 #if PY_MAJOR_VERSION >= 3
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
492 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
406 493 #else
407 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
494 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
408 495 #endif
409 496 kwlist, &source, &sourceSize, &allowEmpty)) {
410 497 return NULL;
411 498 }
412 499
500 if (self->threads && self->dict) {
501 PyErr_SetString(ZstdError,
502 "compress() cannot be used with both dictionaries and multi-threaded compression");
503 return NULL;
504 }
505
506 if (self->threads && self->cparams) {
507 PyErr_SetString(ZstdError,
508 "compress() cannot be used with both compression parameters and multi-threaded compression");
509 return NULL;
510 }
511
413 512 /* Limitation in zstd C API doesn't let decompression side distinguish
414 513 between content size of 0 and unknown content size. This can make round
415 514 tripping via Python difficult. Until this is fixed, require a flag
@@ -456,24 +555,28 b' static PyObject* ZstdCompressor_compress'
456 555 https://github.com/facebook/zstd/issues/358 contains more info. We could
457 556 potentially add an argument somewhere to control this behavior.
458 557 */
459 if (dictData && !self->cdict) {
460 if (populate_cdict(self, dictData, dictSize, &zparams)) {
461 Py_DECREF(output);
462 return NULL;
463 }
558 if (0 != populate_cdict(self, &zparams)) {
559 Py_DECREF(output);
560 return NULL;
464 561 }
465 562
466 563 Py_BEGIN_ALLOW_THREADS
467 /* By avoiding ZSTD_compress(), we don't necessarily write out content
468 size. This means the argument to ZstdCompressor to control frame
469 parameters is honored. */
470 if (self->cdict) {
471 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
472 source, sourceSize, self->cdict);
564 if (self->mtcctx) {
565 zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,
566 source, sourceSize, self->compressionLevel);
473 567 }
474 568 else {
475 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
476 source, sourceSize, dictData, dictSize, zparams);
569 /* By avoiding ZSTD_compress(), we don't necessarily write out content
570 size. This means the argument to ZstdCompressor to control frame
571 parameters is honored. */
572 if (self->cdict) {
573 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
574 source, sourceSize, self->cdict);
575 }
576 else {
577 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
578 source, sourceSize, dictData, dictSize, zparams);
579 }
477 580 }
478 581 Py_END_ALLOW_THREADS
479 582
@@ -507,19 +610,28 b' static ZstdCompressionObj* ZstdCompresso'
507 610
508 611 Py_ssize_t inSize = 0;
509 612 size_t outSize = ZSTD_CStreamOutSize();
510 ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
613 ZstdCompressionObj* result = NULL;
614
615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
616 return NULL;
617 }
618
619 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
511 620 if (!result) {
512 621 return NULL;
513 622 }
514 623
515 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &inSize)) {
516 return NULL;
624 if (self->mtcctx) {
625 if (init_mtcstream(self, inSize)) {
626 Py_DECREF(result);
627 return NULL;
628 }
517 629 }
518
519 result->cstream = CStream_from_ZstdCompressor(self, inSize);
520 if (!result->cstream) {
521 Py_DECREF(result);
522 return NULL;
630 else {
631 if (0 != init_cstream(self, inSize)) {
632 Py_DECREF(result);
633 return NULL;
634 }
523 635 }
524 636
525 637 result->output.dst = PyMem_Malloc(outSize);
@@ -529,13 +641,9 b' static ZstdCompressionObj* ZstdCompresso'
529 641 return NULL;
530 642 }
531 643 result->output.size = outSize;
532 result->output.pos = 0;
533
534 644 result->compressor = self;
535 645 Py_INCREF(result->compressor);
536 646
537 result->finished = 0;
538
539 647 return result;
540 648 }
541 649
@@ -574,24 +682,15 b' static ZstdCompressorIterator* ZstdCompr'
574 682 size_t outSize = ZSTD_CStreamOutSize();
575 683 ZstdCompressorIterator* result;
576 684
577 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk", kwlist, &reader, &sourceSize,
578 &inSize, &outSize)) {
685 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
686 &reader, &sourceSize, &inSize, &outSize)) {
579 687 return NULL;
580 688 }
581 689
582 result = PyObject_New(ZstdCompressorIterator, &ZstdCompressorIteratorType);
690 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
583 691 if (!result) {
584 692 return NULL;
585 693 }
586
587 result->compressor = NULL;
588 result->reader = NULL;
589 result->buffer = NULL;
590 result->cstream = NULL;
591 result->input.src = NULL;
592 result->output.dst = NULL;
593 result->readResult = NULL;
594
595 694 if (PyObject_HasAttrString(reader, "read")) {
596 695 result->reader = reader;
597 696 Py_INCREF(result->reader);
@@ -608,7 +707,6 b' static ZstdCompressorIterator* ZstdCompr'
608 707 goto except;
609 708 }
610 709
611 result->bufferOffset = 0;
612 710 sourceSize = result->buffer->len;
613 711 }
614 712 else {
@@ -621,9 +719,16 b' static ZstdCompressorIterator* ZstdCompr'
621 719 Py_INCREF(result->compressor);
622 720
623 721 result->sourceSize = sourceSize;
624 result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
625 if (!result->cstream) {
626 goto except;
722
723 if (self->mtcctx) {
724 if (init_mtcstream(self, sourceSize)) {
725 goto except;
726 }
727 }
728 else {
729 if (0 != init_cstream(self, sourceSize)) {
730 goto except;
731 }
627 732 }
628 733
629 734 result->inSize = inSize;
@@ -635,26 +740,12 b' static ZstdCompressorIterator* ZstdCompr'
635 740 goto except;
636 741 }
637 742 result->output.size = outSize;
638 result->output.pos = 0;
639
640 result->input.src = NULL;
641 result->input.size = 0;
642 result->input.pos = 0;
643
644 result->finishedInput = 0;
645 result->finishedOutput = 0;
646 743
647 744 goto finally;
648 745
649 746 except:
650 if (result->cstream) {
651 ZSTD_freeCStream(result->cstream);
652 result->cstream = NULL;
653 }
654
655 Py_DecRef((PyObject*)result->compressor);
656 Py_DecRef(result->reader);
657
747 Py_XDECREF(result->compressor);
748 Py_XDECREF(result->reader);
658 749 Py_DECREF(result);
659 750 result = NULL;
660 751
@@ -693,8 +784,8 b' static ZstdCompressionWriter* ZstdCompre'
693 784 Py_ssize_t sourceSize = 0;
694 785 size_t outSize = ZSTD_CStreamOutSize();
695 786
696 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk", kwlist, &writer, &sourceSize,
697 &outSize)) {
787 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
788 &writer, &sourceSize, &outSize)) {
698 789 return NULL;
699 790 }
700 791
@@ -703,7 +794,7 b' static ZstdCompressionWriter* ZstdCompre'
703 794 return NULL;
704 795 }
705 796
706 result = PyObject_New(ZstdCompressionWriter, &ZstdCompressionWriterType);
797 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
707 798 if (!result) {
708 799 return NULL;
709 800 }
@@ -715,11 +806,671 b' static ZstdCompressionWriter* ZstdCompre'
715 806 Py_INCREF(result->writer);
716 807
717 808 result->sourceSize = sourceSize;
718
719 809 result->outSize = outSize;
720 810
721 result->entered = 0;
722 result->cstream = NULL;
811 return result;
812 }
813
814 typedef struct {
815 void* sourceData;
816 size_t sourceSize;
817 } DataSource;
818
819 typedef struct {
820 DataSource* sources;
821 Py_ssize_t sourcesSize;
822 unsigned long long totalSourceSize;
823 } DataSources;
824
825 typedef struct {
826 void* dest;
827 Py_ssize_t destSize;
828 BufferSegment* segments;
829 Py_ssize_t segmentsSize;
830 } DestBuffer;
831
832 typedef enum {
833 WorkerError_none = 0,
834 WorkerError_zstd = 1,
835 WorkerError_no_memory = 2,
836 } WorkerError;
837
838 /**
839 * Holds state for an individual worker performing multi_compress_to_buffer work.
840 */
841 typedef struct {
842 /* Used for compression. */
843 ZSTD_CCtx* cctx;
844 ZSTD_CDict* cdict;
845 int cLevel;
846 CompressionParametersObject* cParams;
847 ZSTD_frameParameters fParams;
848
849 /* What to compress. */
850 DataSource* sources;
851 Py_ssize_t sourcesSize;
852 Py_ssize_t startOffset;
853 Py_ssize_t endOffset;
854 unsigned long long totalSourceSize;
855
856 /* Result storage. */
857 DestBuffer* destBuffers;
858 Py_ssize_t destCount;
859
860 /* Error tracking. */
861 WorkerError error;
862 size_t zresult;
863 Py_ssize_t errorOffset;
864 } WorkerState;
865
866 static void compress_worker(WorkerState* state) {
867 Py_ssize_t inputOffset = state->startOffset;
868 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
869 Py_ssize_t currentBufferStartOffset = state->startOffset;
870 size_t zresult;
871 ZSTD_parameters zparams;
872 void* newDest;
873 size_t allocationSize;
874 size_t boundSize;
875 Py_ssize_t destOffset = 0;
876 DataSource* sources = state->sources;
877 DestBuffer* destBuffer;
878
879 assert(!state->destBuffers);
880 assert(0 == state->destCount);
881
882 if (state->cParams) {
883 ztopy_compression_parameters(state->cParams, &zparams.cParams);
884 }
885
886 zparams.fParams = state->fParams;
887
888 /*
889 * The total size of the compressed data is unknown until we actually
890 * compress data. That means we can't pre-allocate the exact size we need.
891 *
892 * There is a cost to every allocation and reallocation. So, it is in our
893 * interest to minimize the number of allocations.
894 *
895 * There is also a cost to too few allocations. If allocations are too
896 * large they may fail. If buffers are shared and all inputs become
897 * irrelevant at different lifetimes, then a reference to one segment
898 * in the buffer will keep the entire buffer alive. This leads to excessive
899 * memory usage.
900 *
901 * Our current strategy is to assume a compression ratio of 16:1 and
902 * allocate buffers of that size, rounded up to the nearest power of 2
903 * (because computers like round numbers). That ratio is greater than what
904 * most inputs achieve. This is by design: we don't want to over-allocate.
905 * But we don't want to under-allocate and lead to too many buffers either.
906 */
907
908 state->destCount = 1;
909
910 state->destBuffers = calloc(1, sizeof(DestBuffer));
911 if (NULL == state->destBuffers) {
912 state->error = WorkerError_no_memory;
913 return;
914 }
915
916 destBuffer = &state->destBuffers[state->destCount - 1];
917
918 /*
919 * Rather than track bounds and grow the segments buffer, allocate space
920 * to hold remaining items then truncate when we're done with it.
921 */
922 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
923 if (NULL == destBuffer->segments) {
924 state->error = WorkerError_no_memory;
925 return;
926 }
927
928 destBuffer->segmentsSize = remainingItems;
929
930 allocationSize = roundpow2(state->totalSourceSize >> 4);
931
932 /* If the maximum size of the output is larger than that, round up. */
933 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
934
935 if (boundSize > allocationSize) {
936 allocationSize = roundpow2(boundSize);
937 }
938
939 destBuffer->dest = malloc(allocationSize);
940 if (NULL == destBuffer->dest) {
941 state->error = WorkerError_no_memory;
942 return;
943 }
944
945 destBuffer->destSize = allocationSize;
946
947 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
948 void* source = sources[inputOffset].sourceData;
949 size_t sourceSize = sources[inputOffset].sourceSize;
950 size_t destAvailable;
951 void* dest;
952
953 destAvailable = destBuffer->destSize - destOffset;
954 boundSize = ZSTD_compressBound(sourceSize);
955
956 /*
957 * Not enough space in current buffer to hold largest compressed output.
958 * So allocate and switch to a new output buffer.
959 */
960 if (boundSize > destAvailable) {
961 /*
962 * The downsizing of the existing buffer is optional. It should be cheap
963 * (unlike growing). So we just do it.
964 */
965 if (destAvailable) {
966 newDest = realloc(destBuffer->dest, destOffset);
967 if (NULL == newDest) {
968 state->error = WorkerError_no_memory;
969 return;
970 }
971
972 destBuffer->dest = newDest;
973 destBuffer->destSize = destOffset;
974 }
975
976 /* Truncate segments buffer. */
977 newDest = realloc(destBuffer->segments,
978 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
979 if (NULL == newDest) {
980 state->error = WorkerError_no_memory;
981 return;
982 }
983
984 destBuffer->segments = newDest;
985 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
986
987 /* Grow space for new struct. */
988 /* TODO consider over-allocating so we don't do this every time. */
989 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
990 if (NULL == newDest) {
991 state->error = WorkerError_no_memory;
992 return;
993 }
994
995 state->destBuffers = newDest;
996 state->destCount++;
997
998 destBuffer = &state->destBuffers[state->destCount - 1];
999
1000 /* Don't take any chances with non-NULL pointers. */
1001 memset(destBuffer, 0, sizeof(DestBuffer));
1002
1003 /**
1004 * We could dynamically update allocation size based on work done so far.
1005 * For now, keep is simple.
1006 */
1007 allocationSize = roundpow2(state->totalSourceSize >> 4);
1008
1009 if (boundSize > allocationSize) {
1010 allocationSize = roundpow2(boundSize);
1011 }
1012
1013 destBuffer->dest = malloc(allocationSize);
1014 if (NULL == destBuffer->dest) {
1015 state->error = WorkerError_no_memory;
1016 return;
1017 }
1018
1019 destBuffer->destSize = allocationSize;
1020 destAvailable = allocationSize;
1021 destOffset = 0;
1022
1023 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1024 if (NULL == destBuffer->segments) {
1025 state->error = WorkerError_no_memory;
1026 return;
1027 }
1028
1029 destBuffer->segmentsSize = remainingItems;
1030 currentBufferStartOffset = inputOffset;
1031 }
1032
1033 dest = (char*)destBuffer->dest + destOffset;
1034
1035 if (state->cdict) {
1036 zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,
1037 source, sourceSize, state->cdict);
1038 }
1039 else {
1040 if (!state->cParams) {
1041 zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
1042 }
1043
1044 zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,
1045 source, sourceSize, NULL, 0, zparams);
1046 }
1047
1048 if (ZSTD_isError(zresult)) {
1049 state->error = WorkerError_zstd;
1050 state->zresult = zresult;
1051 state->errorOffset = inputOffset;
1052 break;
1053 }
1054
1055 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1056 destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult;
1057
1058 destOffset += zresult;
1059 remainingItems--;
1060 }
1061
1062 if (destBuffer->destSize > destOffset) {
1063 newDest = realloc(destBuffer->dest, destOffset);
1064 if (NULL == newDest) {
1065 state->error = WorkerError_no_memory;
1066 return;
1067 }
1068
1069 destBuffer->dest = newDest;
1070 destBuffer->destSize = destOffset;
1071 }
1072 }
1073
1074 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1075 DataSources* sources, unsigned int threadCount) {
1076 ZSTD_parameters zparams;
1077 unsigned long long bytesPerWorker;
1078 POOL_ctx* pool = NULL;
1079 WorkerState* workerStates = NULL;
1080 Py_ssize_t i;
1081 unsigned long long workerBytes = 0;
1082 Py_ssize_t workerStartOffset = 0;
1083 size_t currentThread = 0;
1084 int errored = 0;
1085 Py_ssize_t segmentsCount = 0;
1086 Py_ssize_t segmentIndex;
1087 PyObject* segmentsArg = NULL;
1088 ZstdBufferWithSegments* buffer;
1089 ZstdBufferWithSegmentsCollection* result = NULL;
1090
1091 assert(sources->sourcesSize > 0);
1092 assert(sources->totalSourceSize > 0);
1093 assert(threadCount >= 1);
1094
1095 /* More threads than inputs makes no sense. */
1096 threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize
1097 : threadCount;
1098
1099 /* TODO lower thread count when input size is too small and threads would add
1100 overhead. */
1101
1102 /*
1103 * When dictionaries are used, parameters are derived from the size of the
1104 * first element.
1105 *
1106 * TODO come up with a better mechanism.
1107 */
1108 memset(&zparams, 0, sizeof(zparams));
1109 if (compressor->cparams) {
1110 ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
1111 }
1112 else {
1113 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
1114 sources->sources[0].sourceSize,
1115 compressor->dict ? compressor->dict->dictSize : 0);
1116 }
1117
1118 zparams.fParams = compressor->fparams;
1119
1120 if (0 != populate_cdict(compressor, &zparams)) {
1121 return NULL;
1122 }
1123
1124 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1125 if (NULL == workerStates) {
1126 PyErr_NoMemory();
1127 goto finally;
1128 }
1129
1130 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1131
1132 if (threadCount > 1) {
1133 pool = POOL_create(threadCount, 1);
1134 if (NULL == pool) {
1135 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1136 goto finally;
1137 }
1138 }
1139
1140 bytesPerWorker = sources->totalSourceSize / threadCount;
1141
1142 for (i = 0; i < threadCount; i++) {
1143 workerStates[i].cctx = ZSTD_createCCtx();
1144 if (!workerStates[i].cctx) {
1145 PyErr_NoMemory();
1146 goto finally;
1147 }
1148
1149 workerStates[i].cdict = compressor->cdict;
1150 workerStates[i].cLevel = compressor->compressionLevel;
1151 workerStates[i].cParams = compressor->cparams;
1152 workerStates[i].fParams = compressor->fparams;
1153
1154 workerStates[i].sources = sources->sources;
1155 workerStates[i].sourcesSize = sources->sourcesSize;
1156 }
1157
1158 Py_BEGIN_ALLOW_THREADS
1159 for (i = 0; i < sources->sourcesSize; i++) {
1160 workerBytes += sources->sources[i].sourceSize;
1161
1162 /*
1163 * The last worker/thread needs to handle all remaining work. Don't
1164 * trigger it prematurely. Defer to the block outside of the loop
1165 * to run the last worker/thread. But do still process this loop
1166 * so workerBytes is correct.
1167 */
1168 if (currentThread == threadCount - 1) {
1169 continue;
1170 }
1171
1172 if (workerBytes >= bytesPerWorker) {
1173 assert(currentThread < threadCount);
1174 workerStates[currentThread].totalSourceSize = workerBytes;
1175 workerStates[currentThread].startOffset = workerStartOffset;
1176 workerStates[currentThread].endOffset = i;
1177
1178 if (threadCount > 1) {
1179 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1180 }
1181 else {
1182 compress_worker(&workerStates[currentThread]);
1183 }
1184
1185 currentThread++;
1186 workerStartOffset = i + 1;
1187 workerBytes = 0;
1188 }
1189 }
1190
1191 if (workerBytes) {
1192 assert(currentThread < threadCount);
1193 workerStates[currentThread].totalSourceSize = workerBytes;
1194 workerStates[currentThread].startOffset = workerStartOffset;
1195 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1196
1197 if (threadCount > 1) {
1198 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1199 }
1200 else {
1201 compress_worker(&workerStates[currentThread]);
1202 }
1203 }
1204
1205 if (threadCount > 1) {
1206 POOL_free(pool);
1207 pool = NULL;
1208 }
1209
1210 Py_END_ALLOW_THREADS
1211
1212 for (i = 0; i < threadCount; i++) {
1213 switch (workerStates[i].error) {
1214 case WorkerError_no_memory:
1215 PyErr_NoMemory();
1216 errored = 1;
1217 break;
1218
1219 case WorkerError_zstd:
1220 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1221 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1222 errored = 1;
1223 break;
1224 default:
1225 ;
1226 }
1227
1228 if (errored) {
1229 break;
1230 }
1231
1232 }
1233
1234 if (errored) {
1235 goto finally;
1236 }
1237
1238 segmentsCount = 0;
1239 for (i = 0; i < threadCount; i++) {
1240 WorkerState* state = &workerStates[i];
1241 segmentsCount += state->destCount;
1242 }
1243
1244 segmentsArg = PyTuple_New(segmentsCount);
1245 if (NULL == segmentsArg) {
1246 goto finally;
1247 }
1248
1249 segmentIndex = 0;
1250
1251 for (i = 0; i < threadCount; i++) {
1252 Py_ssize_t j;
1253 WorkerState* state = &workerStates[i];
1254
1255 for (j = 0; j < state->destCount; j++) {
1256 DestBuffer* destBuffer = &state->destBuffers[j];
1257 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1258 destBuffer->segments, destBuffer->segmentsSize);
1259
1260 if (NULL == buffer) {
1261 goto finally;
1262 }
1263
1264 /* Tell instance to use free() instsead of PyMem_Free(). */
1265 buffer->useFree = 1;
1266
1267 /*
1268 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1269 * Unset it here so it doesn't get freed below.
1270 */
1271 destBuffer->dest = NULL;
1272 destBuffer->segments = NULL;
1273
1274 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1275 }
1276 }
1277
1278 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1279 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1280
1281 finally:
1282 Py_CLEAR(segmentsArg);
1283
1284 if (pool) {
1285 POOL_free(pool);
1286 }
1287
1288 if (workerStates) {
1289 Py_ssize_t j;
1290
1291 for (i = 0; i < threadCount; i++) {
1292 WorkerState state = workerStates[i];
1293
1294 if (state.cctx) {
1295 ZSTD_freeCCtx(state.cctx);
1296 }
1297
1298 /* malloc() is used in worker thread. */
1299
1300 for (j = 0; j < state.destCount; j++) {
1301 if (state.destBuffers) {
1302 free(state.destBuffers[j].dest);
1303 free(state.destBuffers[j].segments);
1304 }
1305 }
1306
1307
1308 free(state.destBuffers);
1309 }
1310
1311 PyMem_Free(workerStates);
1312 }
1313
1314 return result;
1315 }
1316
1317 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1318 "Compress multiple pieces of data as a single operation\n"
1319 "\n"
1320 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1321 "a list of bytes like objects holding data to compress.\n"
1322 "\n"
1323 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1324 "\n"
1325 "This function is optimized to perform multiple compression operations as\n"
1326 "as possible with as little overhead as possbile.\n"
1327 );
1328
1329 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1330 static char* kwlist[] = {
1331 "data",
1332 "threads",
1333 NULL
1334 };
1335
1336 PyObject* data;
1337 int threads = 0;
1338 Py_buffer* dataBuffers = NULL;
1339 DataSources sources;
1340 Py_ssize_t i;
1341 Py_ssize_t sourceCount = 0;
1342 ZstdBufferWithSegmentsCollection* result = NULL;
1343
1344 if (self->mtcctx) {
1345 PyErr_SetString(ZstdError,
1346 "function cannot be called on ZstdCompressor configured for multi-threaded compression");
1347 return NULL;
1348 }
1349
1350 memset(&sources, 0, sizeof(sources));
1351
1352 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1353 &data, &threads)) {
1354 return NULL;
1355 }
1356
1357 if (threads < 0) {
1358 threads = cpu_count();
1359 }
1360
1361 if (threads < 2) {
1362 threads = 1;
1363 }
1364
1365 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1366 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1367
1368 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1369 if (NULL == sources.sources) {
1370 PyErr_NoMemory();
1371 goto finally;
1372 }
1373
1374 for (i = 0; i < buffer->segmentCount; i++) {
1375 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1376 sources.sources[i].sourceSize = buffer->segments[i].length;
1377 sources.totalSourceSize += buffer->segments[i].length;
1378 }
1379
1380 sources.sourcesSize = buffer->segmentCount;
1381 }
1382 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1383 Py_ssize_t j;
1384 Py_ssize_t offset = 0;
1385 ZstdBufferWithSegments* buffer;
1386 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1387
1388 sourceCount = BufferWithSegmentsCollection_length(collection);
1389
1390 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1391 if (NULL == sources.sources) {
1392 PyErr_NoMemory();
1393 goto finally;
1394 }
1395
1396 for (i = 0; i < collection->bufferCount; i++) {
1397 buffer = collection->buffers[i];
1398
1399 for (j = 0; j < buffer->segmentCount; j++) {
1400 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1401 sources.sources[offset].sourceSize = buffer->segments[j].length;
1402 sources.totalSourceSize += buffer->segments[j].length;
1403
1404 offset++;
1405 }
1406 }
1407
1408 sources.sourcesSize = sourceCount;
1409 }
1410 else if (PyList_Check(data)) {
1411 sourceCount = PyList_GET_SIZE(data);
1412
1413 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1414 if (NULL == sources.sources) {
1415 PyErr_NoMemory();
1416 goto finally;
1417 }
1418
1419 /*
1420 * It isn't clear whether the address referred to by Py_buffer.buf
1421 * is still valid after PyBuffer_Release. We we hold a reference to all
1422 * Py_buffer instances for the duration of the operation.
1423 */
1424 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1425 if (NULL == dataBuffers) {
1426 PyErr_NoMemory();
1427 goto finally;
1428 }
1429
1430 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1431
1432 for (i = 0; i < sourceCount; i++) {
1433 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1434 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1435 PyErr_Clear();
1436 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1437 goto finally;
1438 }
1439
1440 sources.sources[i].sourceData = dataBuffers[i].buf;
1441 sources.sources[i].sourceSize = dataBuffers[i].len;
1442 sources.totalSourceSize += dataBuffers[i].len;
1443 }
1444
1445 sources.sourcesSize = sourceCount;
1446 }
1447 else {
1448 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1449 goto finally;
1450 }
1451
1452 if (0 == sources.sourcesSize) {
1453 PyErr_SetString(PyExc_ValueError, "no source elements found");
1454 goto finally;
1455 }
1456
1457 if (0 == sources.totalSourceSize) {
1458 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1459 goto finally;
1460 }
1461
1462 result = compress_from_datasources(self, &sources, threads);
1463
1464 finally:
1465 PyMem_Free(sources.sources);
1466
1467 if (dataBuffers) {
1468 for (i = 0; i < sourceCount; i++) {
1469 PyBuffer_Release(&dataBuffers[i]);
1470 }
1471
1472 PyMem_Free(dataBuffers);
1473 }
723 1474
724 1475 return result;
725 1476 }
@@ -735,6 +1486,8 b' static PyMethodDef ZstdCompressor_method'
735 1486 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
736 1487 { "write_to", (PyCFunction)ZstdCompressor_write_to,
737 1488 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
1489 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1490 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
738 1491 { NULL, NULL }
739 1492 };
740 1493
@@ -27,11 +27,6 b' static void ZstdCompressorIterator_deall'
27 27 self->buffer = NULL;
28 28 }
29 29
30 if (self->cstream) {
31 ZSTD_freeCStream(self->cstream);
32 self->cstream = NULL;
33 }
34
35 30 if (self->output.dst) {
36 31 PyMem_Free(self->output.dst);
37 32 self->output.dst = NULL;
@@ -63,7 +58,14 b' feedcompressor:'
63 58 /* If we have data left in the input, consume it. */
64 59 if (self->input.pos < self->input.size) {
65 60 Py_BEGIN_ALLOW_THREADS
66 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
61 if (self->compressor->mtcctx) {
62 zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
63 &self->output, &self->input);
64 }
65 else {
66 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
67 &self->input);
68 }
67 69 Py_END_ALLOW_THREADS
68 70
69 71 /* Release the Python object holding the input buffer. */
@@ -128,7 +130,12 b' feedcompressor:'
128 130
129 131 /* EOF */
130 132 if (0 == readSize) {
131 zresult = ZSTD_endStream(self->cstream, &self->output);
133 if (self->compressor->mtcctx) {
134 zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
135 }
136 else {
137 zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
138 }
132 139 if (ZSTD_isError(zresult)) {
133 140 PyErr_Format(ZstdError, "error ending compression stream: %s",
134 141 ZSTD_getErrorName(zresult));
@@ -152,7 +159,13 b' feedcompressor:'
152 159 self->input.pos = 0;
153 160
154 161 Py_BEGIN_ALLOW_THREADS
155 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
162 if (self->compressor->mtcctx) {
163 zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
164 &self->input);
165 }
166 else {
167 zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
168 }
156 169 Py_END_ALLOW_THREADS
157 170
158 171 /* The input buffer currently points to memory managed by Python
@@ -41,7 +41,7 b' void constants_module_init(PyObject* mod'
41 41 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
42 42 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
43 43 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
44 Py_IncRef(zstdVersion);
44 Py_INCREF(zstdVersion);
45 45 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
46 46
47 47 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
@@ -18,11 +18,6 b' static void ZstdDecompressionWriter_deal'
18 18 Py_XDECREF(self->decompressor);
19 19 Py_XDECREF(self->writer);
20 20
21 if (self->dstream) {
22 ZSTD_freeDStream(self->dstream);
23 self->dstream = NULL;
24 }
25
26 21 PyObject_Del(self);
27 22 }
28 23
@@ -32,8 +27,7 b' static PyObject* ZstdDecompressionWriter'
32 27 return NULL;
33 28 }
34 29
35 self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
36 if (!self->dstream) {
30 if (0 != init_dstream(self->decompressor)) {
37 31 return NULL;
38 32 }
39 33
@@ -46,22 +40,17 b' static PyObject* ZstdDecompressionWriter'
46 40 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
47 41 self->entered = 0;
48 42
49 if (self->dstream) {
50 ZSTD_freeDStream(self->dstream);
51 self->dstream = NULL;
52 }
53
54 43 Py_RETURN_FALSE;
55 44 }
56 45
57 46 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
58 if (!self->dstream) {
47 if (!self->decompressor->dstream) {
59 48 PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
60 49 "call when context manager is active");
61 50 return NULL;
62 51 }
63 52
64 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
53 return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
65 54 }
66 55
67 56 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
@@ -71,11 +60,12 b' static PyObject* ZstdDecompressionWriter'
71 60 ZSTD_inBuffer input;
72 61 ZSTD_outBuffer output;
73 62 PyObject* res;
63 Py_ssize_t totalWrite = 0;
74 64
75 65 #if PY_MAJOR_VERSION >= 3
76 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
66 if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
77 67 #else
78 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
68 if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
79 69 #endif
80 70 return NULL;
81 71 }
@@ -85,6 +75,8 b' static PyObject* ZstdDecompressionWriter'
85 75 return NULL;
86 76 }
87 77
78 assert(self->decompressor->dstream);
79
88 80 output.dst = PyMem_Malloc(self->outSize);
89 81 if (!output.dst) {
90 82 return PyErr_NoMemory();
@@ -98,7 +90,7 b' static PyObject* ZstdDecompressionWriter'
98 90
99 91 while ((ssize_t)input.pos < sourceSize) {
100 92 Py_BEGIN_ALLOW_THREADS
101 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
93 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
102 94 Py_END_ALLOW_THREADS
103 95
104 96 if (ZSTD_isError(zresult)) {
@@ -116,15 +108,15 b' static PyObject* ZstdDecompressionWriter'
116 108 #endif
117 109 output.dst, output.pos);
118 110 Py_XDECREF(res);
111 totalWrite += output.pos;
119 112 output.pos = 0;
120 113 }
121 114 }
122 115
123 116 PyMem_Free(output.dst);
124 117
125 /* TODO return bytes written */
126 Py_RETURN_NONE;
127 }
118 return PyLong_FromSsize_t(totalWrite);
119 }
128 120
129 121 static PyMethodDef ZstdDecompressionWriter_methods[] = {
130 122 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
@@ -15,11 +15,6 b' PyDoc_STRVAR(DecompressionObj__doc__,'
15 15 );
16 16
17 17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
18 if (self->dstream) {
19 ZSTD_freeDStream(self->dstream);
20 self->dstream = NULL;
21 }
22
23 18 Py_XDECREF(self->decompressor);
24 19
25 20 PyObject_Del(self);
@@ -35,15 +30,18 b' static PyObject* DecompressionObj_decomp'
35 30 PyObject* result = NULL;
36 31 Py_ssize_t resultSize = 0;
37 32
33 /* Constructor should ensure stream is populated. */
34 assert(self->decompressor->dstream);
35
38 36 if (self->finished) {
39 37 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
40 38 return NULL;
41 39 }
42 40
43 41 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTuple(args, "y#",
42 if (!PyArg_ParseTuple(args, "y#:decompress",
45 43 #else
46 if (!PyArg_ParseTuple(args, "s#",
44 if (!PyArg_ParseTuple(args, "s#:decompress",
47 45 #endif
48 46 &source, &sourceSize)) {
49 47 return NULL;
@@ -64,7 +62,7 b' static PyObject* DecompressionObj_decomp'
64 62 /* Read input until exhausted. */
65 63 while (input.pos < input.size) {
66 64 Py_BEGIN_ALLOW_THREADS
67 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
65 zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
68 66 Py_END_ALLOW_THREADS
69 67
70 68 if (ZSTD_isError(zresult)) {
@@ -106,8 +104,7 b' static PyObject* DecompressionObj_decomp'
106 104 goto finally;
107 105
108 106 except:
109 Py_DecRef(result);
110 result = NULL;
107 Py_CLEAR(result);
111 108
112 109 finally:
113 110 PyMem_Free(output.dst);
This diff has been collapsed as it changes many lines, (1138 lines changed) Show them Hide them
@@ -7,19 +7,37 b''
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 #include "pool.h"
10 11
11 12 extern PyObject* ZstdError;
12 13
13 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
14 ZSTD_DStream* dstream;
14 /**
15 * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
16 *
17 * This should be called before starting a decompression operation with a
18 * ZSTD_DStream on a ZstdDecompressor.
19 */
20 int init_dstream(ZstdDecompressor* decompressor) {
15 21 void* dictData = NULL;
16 22 size_t dictSize = 0;
17 23 size_t zresult;
18 24
19 dstream = ZSTD_createDStream();
20 if (!dstream) {
25 /* Simple case of dstream already exists. Just reset it. */
26 if (decompressor->dstream) {
27 zresult = ZSTD_resetDStream(decompressor->dstream);
28 if (ZSTD_isError(zresult)) {
29 PyErr_Format(ZstdError, "could not reset DStream: %s",
30 ZSTD_getErrorName(zresult));
31 return -1;
32 }
33
34 return 0;
35 }
36
37 decompressor->dstream = ZSTD_createDStream();
38 if (!decompressor->dstream) {
21 39 PyErr_SetString(ZstdError, "could not create DStream");
22 return NULL;
40 return -1;
23 41 }
24 42
25 43 if (decompressor->dict) {
@@ -28,19 +46,23 b' ZSTD_DStream* DStream_from_ZstdDecompres'
28 46 }
29 47
30 48 if (dictData) {
31 zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
49 zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
32 50 }
33 51 else {
34 zresult = ZSTD_initDStream(dstream);
52 zresult = ZSTD_initDStream(decompressor->dstream);
35 53 }
36 54
37 55 if (ZSTD_isError(zresult)) {
56 /* Don't leave a reference to an invalid object. */
57 ZSTD_freeDStream(decompressor->dstream);
58 decompressor->dstream = NULL;
59
38 60 PyErr_Format(ZstdError, "could not initialize DStream: %s",
39 61 ZSTD_getErrorName(zresult));
40 return NULL;
62 return -1;
41 63 }
42 64
43 return dstream;
65 return 0;
44 66 }
45 67
46 68 PyDoc_STRVAR(Decompressor__doc__,
@@ -59,23 +81,19 b' static int Decompressor_init(ZstdDecompr'
59 81
60 82 ZstdCompressionDict* dict = NULL;
61 83
62 self->refdctx = NULL;
84 self->dctx = NULL;
63 85 self->dict = NULL;
64 86 self->ddict = NULL;
65 87
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!", kwlist,
88 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
67 89 &ZstdCompressionDictType, &dict)) {
68 90 return -1;
69 91 }
70 92
71 /* Instead of creating a ZSTD_DCtx for every decompression operation,
72 we create an instance at object creation time and recycle it via
73 ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy
74 instead of a malloc+init. */
75 93 /* TODO lazily initialize the reference ZSTD_DCtx on first use since
76 94 not instances of ZstdDecompressor will use a ZSTD_DCtx. */
77 self->refdctx = ZSTD_createDCtx();
78 if (!self->refdctx) {
95 self->dctx = ZSTD_createDCtx();
96 if (!self->dctx) {
79 97 PyErr_NoMemory();
80 98 goto except;
81 99 }
@@ -88,26 +106,32 b' static int Decompressor_init(ZstdDecompr'
88 106 return 0;
89 107
90 108 except:
91 if (self->refdctx) {
92 ZSTD_freeDCtx(self->refdctx);
93 self->refdctx = NULL;
109 if (self->dctx) {
110 ZSTD_freeDCtx(self->dctx);
111 self->dctx = NULL;
94 112 }
95 113
96 114 return -1;
97 115 }
98 116
99 117 static void Decompressor_dealloc(ZstdDecompressor* self) {
100 if (self->refdctx) {
101 ZSTD_freeDCtx(self->refdctx);
102 }
103
104 Py_XDECREF(self->dict);
118 Py_CLEAR(self->dict);
105 119
106 120 if (self->ddict) {
107 121 ZSTD_freeDDict(self->ddict);
108 122 self->ddict = NULL;
109 123 }
110 124
125 if (self->dstream) {
126 ZSTD_freeDStream(self->dstream);
127 self->dstream = NULL;
128 }
129
130 if (self->dctx) {
131 ZSTD_freeDCtx(self->dctx);
132 self->dctx = NULL;
133 }
134
111 135 PyObject_Del(self);
112 136 }
113 137
@@ -136,7 +160,6 b' static PyObject* Decompressor_copy_strea'
136 160 PyObject* dest;
137 161 size_t inSize = ZSTD_DStreamInSize();
138 162 size_t outSize = ZSTD_DStreamOutSize();
139 ZSTD_DStream* dstream;
140 163 ZSTD_inBuffer input;
141 164 ZSTD_outBuffer output;
142 165 Py_ssize_t totalRead = 0;
@@ -150,8 +173,8 b' static PyObject* Decompressor_copy_strea'
150 173 PyObject* totalReadPy;
151 174 PyObject* totalWritePy;
152 175
153 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk", kwlist, &source,
154 &dest, &inSize, &outSize)) {
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
177 &source, &dest, &inSize, &outSize)) {
155 178 return NULL;
156 179 }
157 180
@@ -168,8 +191,7 b' static PyObject* Decompressor_copy_strea'
168 191 /* Prevent free on uninitialized memory in finally. */
169 192 output.dst = NULL;
170 193
171 dstream = DStream_from_ZstdDecompressor(self);
172 if (!dstream) {
194 if (0 != init_dstream(self)) {
173 195 res = NULL;
174 196 goto finally;
175 197 }
@@ -207,7 +229,7 b' static PyObject* Decompressor_copy_strea'
207 229
208 230 while (input.pos < input.size) {
209 231 Py_BEGIN_ALLOW_THREADS
210 zresult = ZSTD_decompressStream(dstream, &output, &input);
232 zresult = ZSTD_decompressStream(self->dstream, &output, &input);
211 233 Py_END_ALLOW_THREADS
212 234
213 235 if (ZSTD_isError(zresult)) {
@@ -234,24 +256,17 b' static PyObject* Decompressor_copy_strea'
234 256
235 257 /* Source stream is exhausted. Finish up. */
236 258
237 ZSTD_freeDStream(dstream);
238 dstream = NULL;
239
240 259 totalReadPy = PyLong_FromSsize_t(totalRead);
241 260 totalWritePy = PyLong_FromSsize_t(totalWrite);
242 261 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
243 Py_DecRef(totalReadPy);
244 Py_DecRef(totalWritePy);
262 Py_DECREF(totalReadPy);
263 Py_DECREF(totalWritePy);
245 264
246 finally:
265 finally:
247 266 if (output.dst) {
248 267 PyMem_Free(output.dst);
249 268 }
250 269
251 if (dstream) {
252 ZSTD_freeDStream(dstream);
253 }
254
255 270 return res;
256 271 }
257 272
@@ -291,28 +306,19 b' PyObject* Decompressor_decompress(ZstdDe'
291 306 unsigned long long decompressedSize;
292 307 size_t destCapacity;
293 308 PyObject* result = NULL;
294 ZSTD_DCtx* dctx = NULL;
295 309 void* dictData = NULL;
296 310 size_t dictSize = 0;
297 311 size_t zresult;
298 312
299 313 #if PY_MAJOR_VERSION >= 3
300 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", kwlist,
314 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
301 315 #else
302 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", kwlist,
316 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
303 317 #endif
304 &source, &sourceSize, &maxOutputSize)) {
318 kwlist, &source, &sourceSize, &maxOutputSize)) {
305 319 return NULL;
306 320 }
307 321
308 dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx));
309 if (!dctx) {
310 PyErr_NoMemory();
311 return NULL;
312 }
313
314 ZSTD_copyDCtx(dctx, self->refdctx);
315
316 322 if (self->dict) {
317 323 dictData = self->dict->dictData;
318 324 dictSize = self->dict->dictSize;
@@ -320,12 +326,12 b' PyObject* Decompressor_decompress(ZstdDe'
320 326
321 327 if (dictData && !self->ddict) {
322 328 Py_BEGIN_ALLOW_THREADS
323 self->ddict = ZSTD_createDDict(dictData, dictSize);
329 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
324 330 Py_END_ALLOW_THREADS
325 331
326 332 if (!self->ddict) {
327 333 PyErr_SetString(ZstdError, "could not create decompression dict");
328 goto except;
334 return NULL;
329 335 }
330 336 }
331 337
@@ -335,7 +341,7 b' PyObject* Decompressor_decompress(ZstdDe'
335 341 if (0 == maxOutputSize) {
336 342 PyErr_SetString(ZstdError, "input data invalid or missing content size "
337 343 "in frame header");
338 goto except;
344 return NULL;
339 345 }
340 346 else {
341 347 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
@@ -348,45 +354,39 b' PyObject* Decompressor_decompress(ZstdDe'
348 354 }
349 355
350 356 if (!result) {
351 goto except;
357 return NULL;
352 358 }
353 359
354 360 Py_BEGIN_ALLOW_THREADS
355 361 if (self->ddict) {
356 zresult = ZSTD_decompress_usingDDict(dctx, PyBytes_AsString(result), destCapacity,
362 zresult = ZSTD_decompress_usingDDict(self->dctx,
363 PyBytes_AsString(result), destCapacity,
357 364 source, sourceSize, self->ddict);
358 365 }
359 366 else {
360 zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize);
367 zresult = ZSTD_decompressDCtx(self->dctx,
368 PyBytes_AsString(result), destCapacity, source, sourceSize);
361 369 }
362 370 Py_END_ALLOW_THREADS
363 371
364 372 if (ZSTD_isError(zresult)) {
365 373 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
366 goto except;
374 Py_DECREF(result);
375 return NULL;
367 376 }
368 377 else if (decompressedSize && zresult != decompressedSize) {
369 378 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
370 379 zresult, decompressedSize);
371 goto except;
380 Py_DECREF(result);
381 return NULL;
372 382 }
373 383 else if (zresult < destCapacity) {
374 384 if (_PyBytes_Resize(&result, zresult)) {
375 goto except;
385 Py_DECREF(result);
386 return NULL;
376 387 }
377 388 }
378 389
379 goto finally;
380
381 except:
382 Py_DecRef(result);
383 result = NULL;
384
385 finally:
386 if (dctx) {
387 PyMem_FREE(dctx);
388 }
389
390 390 return result;
391 391 }
392 392
@@ -401,22 +401,19 b' PyDoc_STRVAR(Decompressor_decompressobj_'
401 401 );
402 402
403 403 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
404 ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
404 ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
405 405 if (!result) {
406 406 return NULL;
407 407 }
408 408
409 result->dstream = DStream_from_ZstdDecompressor(self);
410 if (!result->dstream) {
411 Py_DecRef((PyObject*)result);
409 if (0 != init_dstream(self)) {
410 Py_DECREF(result);
412 411 return NULL;
413 412 }
414 413
415 414 result->decompressor = self;
416 415 Py_INCREF(result->decompressor);
417 416
418 result->finished = 0;
419
420 417 return result;
421 418 }
422 419
@@ -455,8 +452,8 b' static ZstdDecompressorIterator* Decompr'
455 452 ZstdDecompressorIterator* result;
456 453 size_t skipBytes = 0;
457 454
458 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk", kwlist, &reader,
459 &inSize, &outSize, &skipBytes)) {
455 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
456 &reader, &inSize, &outSize, &skipBytes)) {
460 457 return NULL;
461 458 }
462 459
@@ -466,18 +463,11 b' static ZstdDecompressorIterator* Decompr'
466 463 return NULL;
467 464 }
468 465
469 result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
466 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
470 467 if (!result) {
471 468 return NULL;
472 469 }
473 470
474 result->decompressor = NULL;
475 result->reader = NULL;
476 result->buffer = NULL;
477 result->dstream = NULL;
478 result->input.src = NULL;
479 result->output.dst = NULL;
480
481 471 if (PyObject_HasAttrString(reader, "read")) {
482 472 result->reader = reader;
483 473 Py_INCREF(result->reader);
@@ -494,8 +484,6 b' static ZstdDecompressorIterator* Decompr'
494 484 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
495 485 goto except;
496 486 }
497
498 result->bufferOffset = 0;
499 487 }
500 488 else {
501 489 PyErr_SetString(PyExc_ValueError,
@@ -510,8 +498,7 b' static ZstdDecompressorIterator* Decompr'
510 498 result->outSize = outSize;
511 499 result->skipBytes = skipBytes;
512 500
513 result->dstream = DStream_from_ZstdDecompressor(self);
514 if (!result->dstream) {
501 if (0 != init_dstream(self)) {
515 502 goto except;
516 503 }
517 504
@@ -520,33 +507,18 b' static ZstdDecompressorIterator* Decompr'
520 507 PyErr_NoMemory();
521 508 goto except;
522 509 }
523 result->input.size = 0;
524 result->input.pos = 0;
525
526 result->output.dst = NULL;
527 result->output.size = 0;
528 result->output.pos = 0;
529
530 result->readCount = 0;
531 result->finishedInput = 0;
532 result->finishedOutput = 0;
533 510
534 511 goto finally;
535 512
536 513 except:
537 if (result->reader) {
538 Py_DECREF(result->reader);
539 result->reader = NULL;
540 }
514 Py_CLEAR(result->reader);
541 515
542 516 if (result->buffer) {
543 517 PyBuffer_Release(result->buffer);
544 Py_DECREF(result->buffer);
545 result->buffer = NULL;
518 Py_CLEAR(result->buffer);
546 519 }
547 520
548 Py_DECREF(result);
549 result = NULL;
521 Py_CLEAR(result);
550 522
551 523 finally:
552 524
@@ -577,7 +549,8 b' static ZstdDecompressionWriter* Decompre'
577 549 size_t outSize = ZSTD_DStreamOutSize();
578 550 ZstdDecompressionWriter* result;
579 551
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k", kwlist, &writer, &outSize)) {
552 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
553 &writer, &outSize)) {
581 554 return NULL;
582 555 }
583 556
@@ -586,7 +559,7 b' static ZstdDecompressionWriter* Decompre'
586 559 return NULL;
587 560 }
588 561
589 result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
562 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
590 563 if (!result) {
591 564 return NULL;
592 565 }
@@ -599,8 +572,939 b' static ZstdDecompressionWriter* Decompre'
599 572
600 573 result->outSize = outSize;
601 574
602 result->entered = 0;
603 result->dstream = NULL;
575 return result;
576 }
577
578 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
579 "Decompress a series of chunks using the content dictionary chaining technique\n"
580 );
581
582 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
583 static char* kwlist[] = {
584 "frames",
585 NULL
586 };
587
588 PyObject* chunks;
589 Py_ssize_t chunksLen;
590 Py_ssize_t chunkIndex;
591 char parity = 0;
592 PyObject* chunk;
593 char* chunkData;
594 Py_ssize_t chunkSize;
595 ZSTD_DCtx* dctx = NULL;
596 size_t zresult;
597 ZSTD_frameParams frameParams;
598 void* buffer1 = NULL;
599 size_t buffer1Size = 0;
600 size_t buffer1ContentSize = 0;
601 void* buffer2 = NULL;
602 size_t buffer2Size = 0;
603 size_t buffer2ContentSize = 0;
604 void* destBuffer = NULL;
605 PyObject* result = NULL;
606
607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
608 kwlist, &PyList_Type, &chunks)) {
609 return NULL;
610 }
611
612 chunksLen = PyList_Size(chunks);
613 if (!chunksLen) {
614 PyErr_SetString(PyExc_ValueError, "empty input chain");
615 return NULL;
616 }
617
618 /* The first chunk should not be using a dictionary. We handle it specially. */
619 chunk = PyList_GetItem(chunks, 0);
620 if (!PyBytes_Check(chunk)) {
621 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
622 return NULL;
623 }
624
625 /* We require that all chunks be zstd frames and that they have content size set. */
626 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
627 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
628 if (ZSTD_isError(zresult)) {
629 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
630 return NULL;
631 }
632 else if (zresult) {
633 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
634 return NULL;
635 }
636
637 if (0 == frameParams.frameContentSize) {
638 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
639 return NULL;
640 }
641
642 dctx = ZSTD_createDCtx();
643 if (!dctx) {
644 PyErr_NoMemory();
645 goto finally;
646 }
647
648 buffer1Size = frameParams.frameContentSize;
649 buffer1 = PyMem_Malloc(buffer1Size);
650 if (!buffer1) {
651 goto finally;
652 }
653
654 Py_BEGIN_ALLOW_THREADS
655 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
656 Py_END_ALLOW_THREADS
657 if (ZSTD_isError(zresult)) {
658 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
659 goto finally;
660 }
661
662 buffer1ContentSize = zresult;
663
664 /* Special case of a simple chain. */
665 if (1 == chunksLen) {
666 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
667 goto finally;
668 }
669
670 /* This should ideally look at next chunk. But this is slightly simpler. */
671 buffer2Size = frameParams.frameContentSize;
672 buffer2 = PyMem_Malloc(buffer2Size);
673 if (!buffer2) {
674 goto finally;
675 }
676
677 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
678 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
679 used as a content dictionary) and the other holds the new fulltext. The
680 buffers grow when needed but never decrease in size. This limits the
681 memory allocator overhead.
682 */
683 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
684 chunk = PyList_GetItem(chunks, chunkIndex);
685 if (!PyBytes_Check(chunk)) {
686 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
687 goto finally;
688 }
689
690 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
691 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
692 if (ZSTD_isError(zresult)) {
693 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
694 goto finally;
695 }
696 else if (zresult) {
697 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
698 goto finally;
699 }
700
701 if (0 == frameParams.frameContentSize) {
702 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
703 goto finally;
704 }
705
706 parity = chunkIndex % 2;
707
708 /* This could definitely be abstracted to reduce code duplication. */
709 if (parity) {
710 /* Resize destination buffer to hold larger content. */
711 if (buffer2Size < frameParams.frameContentSize) {
712 buffer2Size = frameParams.frameContentSize;
713 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
714 if (!destBuffer) {
715 goto finally;
716 }
717 buffer2 = destBuffer;
718 }
719
720 Py_BEGIN_ALLOW_THREADS
721 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
722 chunkData, chunkSize, buffer1, buffer1ContentSize);
723 Py_END_ALLOW_THREADS
724 if (ZSTD_isError(zresult)) {
725 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
726 chunkIndex, ZSTD_getErrorName(zresult));
727 goto finally;
728 }
729 buffer2ContentSize = zresult;
730 }
731 else {
732 if (buffer1Size < frameParams.frameContentSize) {
733 buffer1Size = frameParams.frameContentSize;
734 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
735 if (!destBuffer) {
736 goto finally;
737 }
738 buffer1 = destBuffer;
739 }
740
741 Py_BEGIN_ALLOW_THREADS
742 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
743 chunkData, chunkSize, buffer2, buffer2ContentSize);
744 Py_END_ALLOW_THREADS
745 if (ZSTD_isError(zresult)) {
746 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
747 chunkIndex, ZSTD_getErrorName(zresult));
748 goto finally;
749 }
750 buffer1ContentSize = zresult;
751 }
752 }
753
754 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
755 parity ? buffer2ContentSize : buffer1ContentSize);
756
757 finally:
758 if (buffer2) {
759 PyMem_Free(buffer2);
760 }
761 if (buffer1) {
762 PyMem_Free(buffer1);
763 }
764
765 if (dctx) {
766 ZSTD_freeDCtx(dctx);
767 }
768
769 return result;
770 }
771
772 typedef struct {
773 void* sourceData;
774 size_t sourceSize;
775 unsigned long long destSize;
776 } FramePointer;
777
778 typedef struct {
779 FramePointer* frames;
780 Py_ssize_t framesSize;
781 unsigned long long compressedSize;
782 } FrameSources;
783
784 typedef struct {
785 void* dest;
786 Py_ssize_t destSize;
787 BufferSegment* segments;
788 Py_ssize_t segmentsSize;
789 } DestBuffer;
790
791 typedef enum {
792 WorkerError_none = 0,
793 WorkerError_zstd = 1,
794 WorkerError_memory = 2,
795 WorkerError_sizeMismatch = 3,
796 WorkerError_unknownSize = 4,
797 } WorkerError;
798
799 typedef struct {
800 /* Source records and length */
801 FramePointer* framePointers;
802 /* Which records to process. */
803 Py_ssize_t startOffset;
804 Py_ssize_t endOffset;
805 unsigned long long totalSourceSize;
806
807 /* Compression state and settings. */
808 ZSTD_DCtx* dctx;
809 ZSTD_DDict* ddict;
810 int requireOutputSizes;
811
812 /* Output storage. */
813 DestBuffer* destBuffers;
814 Py_ssize_t destCount;
815
816 /* Item that error occurred on. */
817 Py_ssize_t errorOffset;
818 /* If an error occurred. */
819 WorkerError error;
820 /* result from zstd decompression operation */
821 size_t zresult;
822 } WorkerState;
823
824 static void decompress_worker(WorkerState* state) {
825 size_t allocationSize;
826 DestBuffer* destBuffer;
827 Py_ssize_t frameIndex;
828 Py_ssize_t localOffset = 0;
829 Py_ssize_t currentBufferStartIndex = state->startOffset;
830 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
831 void* tmpBuf;
832 Py_ssize_t destOffset = 0;
833 FramePointer* framePointers = state->framePointers;
834 size_t zresult;
835 unsigned long long totalOutputSize = 0;
836
837 assert(NULL == state->destBuffers);
838 assert(0 == state->destCount);
839 assert(state->endOffset - state->startOffset >= 0);
840
841 /*
842 * We need to allocate a buffer to hold decompressed data. How we do this
843 * depends on what we know about the output. The following scenarios are
844 * possible:
845 *
846 * 1. All structs defining frames declare the output size.
847 * 2. The decompressed size is embedded within the zstd frame.
848 * 3. The decompressed size is not stored anywhere.
849 *
850 * For now, we only support #1 and #2.
851 */
852
853 /* Resolve ouput segments. */
854 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
855 FramePointer* fp = &framePointers[frameIndex];
856
857 if (0 == fp->destSize) {
858 fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
859 if (0 == fp->destSize && state->requireOutputSizes) {
860 state->error = WorkerError_unknownSize;
861 state->errorOffset = frameIndex;
862 return;
863 }
864 }
865
866 totalOutputSize += fp->destSize;
867 }
868
869 state->destBuffers = calloc(1, sizeof(DestBuffer));
870 if (NULL == state->destBuffers) {
871 state->error = WorkerError_memory;
872 return;
873 }
874
875 state->destCount = 1;
876
877 destBuffer = &state->destBuffers[state->destCount - 1];
878
879 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
880
881 allocationSize = roundpow2(state->totalSourceSize);
882
883 if (framePointers[state->startOffset].destSize > allocationSize) {
884 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
885 }
886
887 destBuffer->dest = malloc(allocationSize);
888 if (NULL == destBuffer->dest) {
889 state->error = WorkerError_memory;
890 return;
891 }
892
893 destBuffer->destSize = allocationSize;
894
895 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
896 if (NULL == destBuffer->segments) {
897 /* Caller will free state->dest as part of cleanup. */
898 state->error = WorkerError_memory;
899 return;
900 }
901
902 destBuffer->segmentsSize = remainingItems;
903
904 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
905 const void* source = framePointers[frameIndex].sourceData;
906 const size_t sourceSize = framePointers[frameIndex].sourceSize;
907 void* dest;
908 const size_t decompressedSize = framePointers[frameIndex].destSize;
909 size_t destAvailable = destBuffer->destSize - destOffset;
910
911 assert(decompressedSize > 0); /* For now. */
912
913 /*
914 * Not enough space in current buffer. Finish current before and allocate and
915 * switch to a new one.
916 */
917 if (decompressedSize > destAvailable) {
918 /*
919 * Shrinking the destination buffer is optional. But it should be cheap,
920 * so we just do it.
921 */
922 if (destAvailable) {
923 tmpBuf = realloc(destBuffer->dest, destOffset);
924 if (NULL == tmpBuf) {
925 state->error = WorkerError_memory;
926 return;
927 }
928
929 destBuffer->dest = tmpBuf;
930 destBuffer->destSize = destOffset;
931 }
932
933 /* Truncate segments buffer. */
934 tmpBuf = realloc(destBuffer->segments,
935 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
936 if (NULL == tmpBuf) {
937 state->error = WorkerError_memory;
938 return;
939 }
940
941 destBuffer->segments = tmpBuf;
942 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
943
944 /* Grow space for new DestBuffer. */
945 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
946 if (NULL == tmpBuf) {
947 state->error = WorkerError_memory;
948 return;
949 }
950
951 state->destBuffers = tmpBuf;
952 state->destCount++;
953
954 destBuffer = &state->destBuffers[state->destCount - 1];
955
956 /* Don't take any chances will non-NULL pointers. */
957 memset(destBuffer, 0, sizeof(DestBuffer));
958
959 allocationSize = roundpow2(state->totalSourceSize);
960
961 if (decompressedSize > allocationSize) {
962 allocationSize = roundpow2(decompressedSize);
963 }
964
965 destBuffer->dest = malloc(allocationSize);
966 if (NULL == destBuffer->dest) {
967 state->error = WorkerError_memory;
968 return;
969 }
970
971 destBuffer->destSize = allocationSize;
972 destAvailable = allocationSize;
973 destOffset = 0;
974 localOffset = 0;
975
976 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
977 if (NULL == destBuffer->segments) {
978 state->error = WorkerError_memory;
979 return;
980 }
981
982 destBuffer->segmentsSize = remainingItems;
983 currentBufferStartIndex = frameIndex;
984 }
985
986 dest = (char*)destBuffer->dest + destOffset;
987
988 if (state->ddict) {
989 zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
990 source, sourceSize, state->ddict);
991 }
992 else {
993 zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
994 source, sourceSize);
995 }
996
997 if (ZSTD_isError(zresult)) {
998 state->error = WorkerError_zstd;
999 state->zresult = zresult;
1000 state->errorOffset = frameIndex;
1001 return;
1002 }
1003 else if (zresult != decompressedSize) {
1004 state->error = WorkerError_sizeMismatch;
1005 state->zresult = zresult;
1006 state->errorOffset = frameIndex;
1007 return;
1008 }
1009
1010 destBuffer->segments[localOffset].offset = destOffset;
1011 destBuffer->segments[localOffset].length = decompressedSize;
1012 destOffset += zresult;
1013 localOffset++;
1014 remainingItems--;
1015 }
1016
1017 if (destBuffer->destSize > destOffset) {
1018 tmpBuf = realloc(destBuffer->dest, destOffset);
1019 if (NULL == tmpBuf) {
1020 state->error = WorkerError_memory;
1021 return;
1022 }
1023
1024 destBuffer->dest = tmpBuf;
1025 destBuffer->destSize = destOffset;
1026 }
1027 }
1028
1029 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1030 unsigned int threadCount) {
1031 void* dictData = NULL;
1032 size_t dictSize = 0;
1033 Py_ssize_t i = 0;
1034 int errored = 0;
1035 Py_ssize_t segmentsCount;
1036 ZstdBufferWithSegments* bws = NULL;
1037 PyObject* resultArg = NULL;
1038 Py_ssize_t resultIndex;
1039 ZstdBufferWithSegmentsCollection* result = NULL;
1040 FramePointer* framePointers = frames->frames;
1041 unsigned long long workerBytes = 0;
1042 int currentThread = 0;
1043 Py_ssize_t workerStartOffset = 0;
1044 POOL_ctx* pool = NULL;
1045 WorkerState* workerStates = NULL;
1046 unsigned long long bytesPerWorker;
1047
1048 /* Caller should normalize 0 and negative values to 1 or larger. */
1049 assert(threadCount >= 1);
1050
1051 /* More threads than inputs makes no sense under any conditions. */
1052 threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
1053 : threadCount;
1054
1055 /* TODO lower thread count if input size is too small and threads would just
1056 add overhead. */
1057
1058 if (decompressor->dict) {
1059 dictData = decompressor->dict->dictData;
1060 dictSize = decompressor->dict->dictSize;
1061 }
1062
1063 if (dictData && !decompressor->ddict) {
1064 Py_BEGIN_ALLOW_THREADS
1065 decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
1066 Py_END_ALLOW_THREADS
1067
1068 if (!decompressor->ddict) {
1069 PyErr_SetString(ZstdError, "could not create decompression dict");
1070 return NULL;
1071 }
1072 }
1073
1074 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1075 same API for dispatching work. */
1076 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1077 if (NULL == workerStates) {
1078 PyErr_NoMemory();
1079 goto finally;
1080 }
1081
1082 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1083
1084 if (threadCount > 1) {
1085 pool = POOL_create(threadCount, 1);
1086 if (NULL == pool) {
1087 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1088 goto finally;
1089 }
1090 }
1091
1092 bytesPerWorker = frames->compressedSize / threadCount;
1093
1094 for (i = 0; i < threadCount; i++) {
1095 workerStates[i].dctx = ZSTD_createDCtx();
1096 if (NULL == workerStates[i].dctx) {
1097 PyErr_NoMemory();
1098 goto finally;
1099 }
1100
1101 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1102
1103 workerStates[i].ddict = decompressor->ddict;
1104 workerStates[i].framePointers = framePointers;
1105 workerStates[i].requireOutputSizes = 1;
1106 }
1107
1108 Py_BEGIN_ALLOW_THREADS
1109 /* There are many ways to split work among workers.
1110
1111 For now, we take a simple approach of splitting work so each worker
1112 gets roughly the same number of input bytes. This will result in more
1113 starvation than running N>threadCount jobs. But it avoids complications
1114 around state tracking, which could involve extra locking.
1115 */
1116 for (i = 0; i < frames->framesSize; i++) {
1117 workerBytes += frames->frames[i].sourceSize;
1118
1119 /*
1120 * The last worker/thread needs to handle all remaining work. Don't
1121 * trigger it prematurely. Defer to the block outside of the loop.
1122 * (But still process this loop so workerBytes is correct.
1123 */
1124 if (currentThread == threadCount - 1) {
1125 continue;
1126 }
1127
1128 if (workerBytes >= bytesPerWorker) {
1129 workerStates[currentThread].startOffset = workerStartOffset;
1130 workerStates[currentThread].endOffset = i;
1131 workerStates[currentThread].totalSourceSize = workerBytes;
1132
1133 if (threadCount > 1) {
1134 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1135 }
1136 else {
1137 decompress_worker(&workerStates[currentThread]);
1138 }
1139 currentThread++;
1140 workerStartOffset = i + 1;
1141 workerBytes = 0;
1142 }
1143 }
1144
1145 if (workerBytes) {
1146 workerStates[currentThread].startOffset = workerStartOffset;
1147 workerStates[currentThread].endOffset = frames->framesSize - 1;
1148 workerStates[currentThread].totalSourceSize = workerBytes;
1149
1150 if (threadCount > 1) {
1151 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1152 }
1153 else {
1154 decompress_worker(&workerStates[currentThread]);
1155 }
1156 }
1157
1158 if (threadCount > 1) {
1159 POOL_free(pool);
1160 pool = NULL;
1161 }
1162 Py_END_ALLOW_THREADS
1163
1164 for (i = 0; i < threadCount; i++) {
1165 switch (workerStates[i].error) {
1166 case WorkerError_none:
1167 break;
1168
1169 case WorkerError_zstd:
1170 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1171 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1172 errored = 1;
1173 break;
1174
1175 case WorkerError_memory:
1176 PyErr_NoMemory();
1177 errored = 1;
1178 break;
1179
1180 case WorkerError_sizeMismatch:
1181 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
1182 workerStates[i].errorOffset, workerStates[i].zresult,
1183 framePointers[workerStates[i].errorOffset].destSize);
1184 errored = 1;
1185 break;
1186
1187 case WorkerError_unknownSize:
1188 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1189 workerStates[i].errorOffset);
1190 errored = 1;
1191 break;
1192
1193 default:
1194 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1195 workerStates[i].error);
1196 errored = 1;
1197 break;
1198 }
1199
1200 if (errored) {
1201 break;
1202 }
1203 }
1204
1205 if (errored) {
1206 goto finally;
1207 }
1208
1209 segmentsCount = 0;
1210 for (i = 0; i < threadCount; i++) {
1211 segmentsCount += workerStates[i].destCount;
1212 }
1213
1214 resultArg = PyTuple_New(segmentsCount);
1215 if (NULL == resultArg) {
1216 goto finally;
1217 }
1218
1219 resultIndex = 0;
1220
1221 for (i = 0; i < threadCount; i++) {
1222 Py_ssize_t bufferIndex;
1223 WorkerState* state = &workerStates[i];
1224
1225 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1226 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1227
1228 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1229 destBuffer->segments, destBuffer->segmentsSize);
1230 if (NULL == bws) {
1231 goto finally;
1232 }
1233
1234 /*
1235 * Memory for buffer and segments was allocated using malloc() in worker
1236 * and the memory is transferred to the BufferWithSegments instance. So
1237 * tell instance to use free() and NULL the reference in the state struct
1238 * so it isn't freed below.
1239 */
1240 bws->useFree = 1;
1241 destBuffer->dest = NULL;
1242 destBuffer->segments = NULL;
1243
1244 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1245 }
1246 }
1247
1248 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1249 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1250
1251 finally:
1252 Py_CLEAR(resultArg);
1253
1254 if (workerStates) {
1255 for (i = 0; i < threadCount; i++) {
1256 Py_ssize_t bufferIndex;
1257 WorkerState* state = &workerStates[i];
1258
1259 if (state->dctx) {
1260 ZSTD_freeDCtx(state->dctx);
1261 }
1262
1263 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1264 if (state->destBuffers) {
1265 /*
1266 * Will be NULL if memory transfered to a BufferWithSegments.
1267 * Otherwise it is left over after an error occurred.
1268 */
1269 free(state->destBuffers[bufferIndex].dest);
1270 free(state->destBuffers[bufferIndex].segments);
1271 }
1272 }
1273
1274 free(state->destBuffers);
1275 }
1276
1277 PyMem_Free(workerStates);
1278 }
1279
1280 POOL_free(pool);
1281
1282 return result;
1283 }
1284
1285 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1286 "Decompress multiple frames to output buffers\n"
1287 "\n"
1288 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1289 "list of bytes-like objects. Each item in the passed collection should be a\n"
1290 "compressed zstd frame.\n"
1291 "\n"
1292 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1293 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1294 "it is an object conforming to the buffer protocol that represents an array\n"
1295 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1296 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1297 "output size.\n"
1298 "\n"
1299 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1300 "data. All decompressed data is allocated in a single memory buffer. The\n"
1301 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1302 "and their respective lengths.\n"
1303 "\n"
1304 "The ``threads`` argument controls how many threads to use for operations.\n"
1305 "Negative values will use the same number of threads as logical CPUs on the\n"
1306 "machine.\n"
1307 );
1308
1309 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1310 static char* kwlist[] = {
1311 "frames",
1312 "decompressed_sizes",
1313 "threads",
1314 NULL
1315 };
1316
1317 PyObject* frames;
1318 Py_buffer frameSizes;
1319 int threads = 0;
1320 Py_ssize_t frameCount;
1321 Py_buffer* frameBuffers = NULL;
1322 FramePointer* framePointers = NULL;
1323 unsigned long long* frameSizesP = NULL;
1324 unsigned long long totalInputSize = 0;
1325 FrameSources frameSources;
1326 ZstdBufferWithSegmentsCollection* result = NULL;
1327 Py_ssize_t i;
1328
1329 memset(&frameSizes, 0, sizeof(frameSizes));
1330
1331 #if PY_MAJOR_VERSION >= 3
1332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1333 #else
1334 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1335 #endif
1336 kwlist, &frames, &frameSizes, &threads)) {
1337 return NULL;
1338 }
1339
1340 if (frameSizes.buf) {
1341 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1342 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1343 goto finally;
1344 }
1345
1346 frameSizesP = (unsigned long long*)frameSizes.buf;
1347 }
1348
1349 if (threads < 0) {
1350 threads = cpu_count();
1351 }
1352
1353 if (threads < 2) {
1354 threads = 1;
1355 }
1356
1357 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1358 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1359 frameCount = buffer->segmentCount;
1360
1361 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1362 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1363 frameCount * sizeof(unsigned long long), frameSizes.len);
1364 goto finally;
1365 }
1366
1367 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1368 if (!framePointers) {
1369 PyErr_NoMemory();
1370 goto finally;
1371 }
1372
1373 for (i = 0; i < frameCount; i++) {
1374 void* sourceData;
1375 unsigned long long sourceSize;
1376 unsigned long long decompressedSize = 0;
1377
1378 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1379 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1380 goto finally;
1381 }
1382
1383 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1384 sourceSize = buffer->segments[i].length;
1385 totalInputSize += sourceSize;
1386
1387 if (frameSizesP) {
1388 decompressedSize = frameSizesP[i];
1389 }
1390
1391 framePointers[i].sourceData = sourceData;
1392 framePointers[i].sourceSize = sourceSize;
1393 framePointers[i].destSize = decompressedSize;
1394 }
1395 }
1396 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1397 Py_ssize_t offset = 0;
1398 ZstdBufferWithSegments* buffer;
1399 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1400
1401 frameCount = BufferWithSegmentsCollection_length(collection);
1402
1403 if (frameSizes.buf && frameSizes.len != frameCount) {
1404 PyErr_Format(PyExc_ValueError,
1405 "decompressed_sizes size mismatch; expected %zd; got %zd",
1406 frameCount * sizeof(unsigned long long), frameSizes.len);
1407 goto finally;
1408 }
1409
1410 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1411 if (NULL == framePointers) {
1412 PyErr_NoMemory();
1413 goto finally;
1414 }
1415
1416 /* Iterate the data structure directly because it is faster. */
1417 for (i = 0; i < collection->bufferCount; i++) {
1418 Py_ssize_t segmentIndex;
1419 buffer = collection->buffers[i];
1420
1421 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1422 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1423 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1424 offset);
1425 goto finally;
1426 }
1427
1428 totalInputSize += buffer->segments[segmentIndex].length;
1429
1430 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1431 framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
1432 framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
1433
1434 offset++;
1435 }
1436 }
1437 }
1438 else if (PyList_Check(frames)) {
1439 frameCount = PyList_GET_SIZE(frames);
1440
1441 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1442 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1443 frameCount * sizeof(unsigned long long), frameSizes.len);
1444 goto finally;
1445 }
1446
1447 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1448 if (!framePointers) {
1449 PyErr_NoMemory();
1450 goto finally;
1451 }
1452
1453 /*
1454 * It is not clear whether Py_buffer.buf is still valid after
1455 * PyBuffer_Release. So, we hold a reference to all Py_buffer instances
1456 * for the duration of the operation.
1457 */
1458 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1459 if (NULL == frameBuffers) {
1460 PyErr_NoMemory();
1461 goto finally;
1462 }
1463
1464 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1465
1466 /* Do a pass to assemble info about our input buffers and output sizes. */
1467 for (i = 0; i < frameCount; i++) {
1468 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1469 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1470 PyErr_Clear();
1471 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1472 goto finally;
1473 }
1474
1475 totalInputSize += frameBuffers[i].len;
1476
1477 framePointers[i].sourceData = frameBuffers[i].buf;
1478 framePointers[i].sourceSize = frameBuffers[i].len;
1479 framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
1480 }
1481 }
1482 else {
1483 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1484 goto finally;
1485 }
1486
1487 /* We now have an array with info about our inputs and outputs. Feed it into
1488 our generic decompression function. */
1489 frameSources.frames = framePointers;
1490 frameSources.framesSize = frameCount;
1491 frameSources.compressedSize = totalInputSize;
1492
1493 result = decompress_from_framesources(self, &frameSources, threads);
1494
1495 finally:
1496 if (frameSizes.buf) {
1497 PyBuffer_Release(&frameSizes);
1498 }
1499 PyMem_Free(framePointers);
1500
1501 if (frameBuffers) {
1502 for (i = 0; i < frameCount; i++) {
1503 PyBuffer_Release(&frameBuffers[i]);
1504 }
1505
1506 PyMem_Free(frameBuffers);
1507 }
604 1508
605 1509 return result;
606 1510 }
@@ -616,6 +1520,10 b' static PyMethodDef Decompressor_methods['
616 1520 Decompressor_read_from__doc__ },
617 1521 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
618 1522 Decompressor_write_to__doc__ },
1523 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1524 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1525 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1526 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
619 1527 { NULL, NULL }
620 1528 };
621 1529
@@ -26,11 +26,6 b' static void ZstdDecompressorIterator_dea'
26 26 self->buffer = NULL;
27 27 }
28 28
29 if (self->dstream) {
30 ZSTD_freeDStream(self->dstream);
31 self->dstream = NULL;
32 }
33
34 29 if (self->input.src) {
35 30 PyMem_Free((void*)self->input.src);
36 31 self->input.src = NULL;
@@ -50,6 +45,8 b' static DecompressorIteratorResult read_d'
50 45 DecompressorIteratorResult result;
51 46 size_t oldInputPos = self->input.pos;
52 47
48 assert(self->decompressor->dstream);
49
53 50 result.chunk = NULL;
54 51
55 52 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
@@ -63,7 +60,7 b' static DecompressorIteratorResult read_d'
63 60 self->output.pos = 0;
64 61
65 62 Py_BEGIN_ALLOW_THREADS
66 zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
63 zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
67 64 Py_END_ALLOW_THREADS
68 65
69 66 /* We're done with the pointer. Nullify to prevent anyone from getting a
@@ -160,7 +157,7 b' read_from_source:'
160 157 PyErr_SetString(PyExc_ValueError,
161 158 "skip_bytes larger than first input chunk; "
162 159 "this scenario is currently unsupported");
163 Py_DecRef(readResult);
160 Py_XDECREF(readResult);
164 161 return NULL;
165 162 }
166 163
@@ -179,7 +176,7 b' read_from_source:'
179 176 else if (!self->readCount) {
180 177 self->finishedInput = 1;
181 178 self->finishedOutput = 1;
182 Py_DecRef(readResult);
179 Py_XDECREF(readResult);
183 180 PyErr_SetString(PyExc_StopIteration, "empty input");
184 181 return NULL;
185 182 }
@@ -188,7 +185,7 b' read_from_source:'
188 185 }
189 186
190 187 /* We've copied the data managed by memory. Discard the Python object. */
191 Py_DecRef(readResult);
188 Py_XDECREF(readResult);
192 189 }
193 190
194 191 result = read_decompressor_iterator(self);
@@ -8,20 +8,27 b''
8 8
9 9 #define PY_SSIZE_T_CLEAN
10 10 #include <Python.h>
11 #include "structmember.h"
11 12
12 13 #define ZSTD_STATIC_LINKING_ONLY
13 14 #define ZDICT_STATIC_LINKING_ONLY
14 15 #include "mem.h"
15 16 #include "zstd.h"
16 17 #include "zdict.h"
18 #include "zstdmt_compress.h"
17 19
18 #define PYTHON_ZSTANDARD_VERSION "0.6.0"
20 #define PYTHON_ZSTANDARD_VERSION "0.8.1"
19 21
20 22 typedef enum {
21 23 compressorobj_flush_finish,
22 24 compressorobj_flush_block,
23 25 } CompressorObj_Flush;
24 26
27 /*
28 Represents a CompressionParameters type.
29
30 This type is basically a wrapper around ZSTD_compressionParameters.
31 */
25 32 typedef struct {
26 33 PyObject_HEAD
27 34 unsigned windowLog;
@@ -35,34 +42,70 b' typedef struct {'
35 42
36 43 extern PyTypeObject CompressionParametersType;
37 44
45 /*
46 Represents a FrameParameters type.
47
48 This type is basically a wrapper around ZSTD_frameParams.
49 */
38 50 typedef struct {
39 51 PyObject_HEAD
40 unsigned selectivityLevel;
41 int compressionLevel;
42 unsigned notificationLevel;
52 unsigned long long frameContentSize;
53 unsigned windowSize;
43 54 unsigned dictID;
44 } DictParametersObject;
55 char checksumFlag;
56 } FrameParametersObject;
57
58 extern PyTypeObject FrameParametersType;
45 59
46 extern PyTypeObject DictParametersType;
60 /*
61 Represents a ZstdCompressionDict type.
47 62
63 Instances hold data used for a zstd compression dictionary.
64 */
48 65 typedef struct {
49 66 PyObject_HEAD
50 67
68 /* Pointer to dictionary data. Owned by self. */
51 69 void* dictData;
70 /* Size of dictionary data. */
52 71 size_t dictSize;
72 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
73 unsigned k;
74 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
75 unsigned d;
53 76 } ZstdCompressionDict;
54 77
55 78 extern PyTypeObject ZstdCompressionDictType;
56 79
80 /*
81 Represents a ZstdCompressor type.
82 */
57 83 typedef struct {
58 84 PyObject_HEAD
59 85
86 /* Configured compression level. Should be always set. */
60 87 int compressionLevel;
88 /* Number of threads to use for operations. */
89 unsigned int threads;
90 /* Pointer to compression dictionary to use. NULL if not using dictionary
91 compression. */
61 92 ZstdCompressionDict* dict;
93 /* Compression context to use. Populated during object construction. NULL
94 if using multi-threaded compression. */
62 95 ZSTD_CCtx* cctx;
96 /* Multi-threaded compression context to use. Populated during object
97 construction. NULL if not using multi-threaded compression. */
98 ZSTDMT_CCtx* mtcctx;
99 /* Digest compression dictionary. NULL initially. Populated on first use. */
63 100 ZSTD_CDict* cdict;
101 /* Low-level compression parameter control. NULL unless passed to
102 constructor. Takes precedence over `compressionLevel` if defined. */
64 103 CompressionParametersObject* cparams;
104 /* Controls zstd frame options. */
65 105 ZSTD_frameParameters fparams;
106 /* Holds state for streaming compression. Shared across all invocation.
107 Populated on first use. */
108 ZSTD_CStream* cstream;
66 109 } ZstdCompressor;
67 110
68 111 extern PyTypeObject ZstdCompressorType;
@@ -71,7 +114,6 b' typedef struct {'
71 114 PyObject_HEAD
72 115
73 116 ZstdCompressor* compressor;
74 ZSTD_CStream* cstream;
75 117 ZSTD_outBuffer output;
76 118 int finished;
77 119 } ZstdCompressionObj;
@@ -85,7 +127,6 b' typedef struct {'
85 127 PyObject* writer;
86 128 Py_ssize_t sourceSize;
87 129 size_t outSize;
88 ZSTD_CStream* cstream;
89 130 int entered;
90 131 } ZstdCompressionWriter;
91 132
@@ -102,7 +143,6 b' typedef struct {'
102 143 size_t inSize;
103 144 size_t outSize;
104 145
105 ZSTD_CStream* cstream;
106 146 ZSTD_inBuffer input;
107 147 ZSTD_outBuffer output;
108 148 int finishedOutput;
@@ -115,10 +155,11 b' extern PyTypeObject ZstdCompressorIterat'
115 155 typedef struct {
116 156 PyObject_HEAD
117 157
118 ZSTD_DCtx* refdctx;
158 ZSTD_DCtx* dctx;
119 159
120 160 ZstdCompressionDict* dict;
121 161 ZSTD_DDict* ddict;
162 ZSTD_DStream* dstream;
122 163 } ZstdDecompressor;
123 164
124 165 extern PyTypeObject ZstdDecompressorType;
@@ -127,7 +168,6 b' typedef struct {'
127 168 PyObject_HEAD
128 169
129 170 ZstdDecompressor* decompressor;
130 ZSTD_DStream* dstream;
131 171 int finished;
132 172 } ZstdDecompressionObj;
133 173
@@ -139,7 +179,6 b' typedef struct {'
139 179 ZstdDecompressor* decompressor;
140 180 PyObject* writer;
141 181 size_t outSize;
142 ZSTD_DStream* dstream;
143 182 int entered;
144 183 } ZstdDecompressionWriter;
145 184
@@ -155,7 +194,6 b' typedef struct {'
155 194 size_t inSize;
156 195 size_t outSize;
157 196 size_t skipBytes;
158 ZSTD_DStream* dstream;
159 197 ZSTD_inBuffer input;
160 198 ZSTD_outBuffer output;
161 199 Py_ssize_t readCount;
@@ -170,9 +208,78 b' typedef struct {'
170 208 PyObject* chunk;
171 209 } DecompressorIteratorResult;
172 210
211 typedef struct {
212 unsigned long long offset;
213 unsigned long long length;
214 } BufferSegment;
215
216 typedef struct {
217 PyObject_HEAD
218
219 PyObject* parent;
220 BufferSegment* segments;
221 Py_ssize_t segmentCount;
222 } ZstdBufferSegments;
223
224 extern PyTypeObject ZstdBufferSegmentsType;
225
226 typedef struct {
227 PyObject_HEAD
228
229 PyObject* parent;
230 void* data;
231 Py_ssize_t dataSize;
232 unsigned long long offset;
233 } ZstdBufferSegment;
234
235 extern PyTypeObject ZstdBufferSegmentType;
236
237 typedef struct {
238 PyObject_HEAD
239
240 Py_buffer parent;
241 void* data;
242 unsigned long long dataSize;
243 BufferSegment* segments;
244 Py_ssize_t segmentCount;
245 int useFree;
246 } ZstdBufferWithSegments;
247
248 extern PyTypeObject ZstdBufferWithSegmentsType;
249
250 /**
251 * An ordered collection of BufferWithSegments exposed as a squashed collection.
252 *
253 * This type provides a virtual view spanning multiple BufferWithSegments
254 * instances. It allows multiple instances to be "chained" together and
255 * exposed as a single collection. e.g. if there are 2 buffers holding
256 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
257 */
258 typedef struct {
259 PyObject_HEAD
260
261 /* An array of buffers that should be exposed through this instance. */
262 ZstdBufferWithSegments** buffers;
263 /* Number of elements in buffers array. */
264 Py_ssize_t bufferCount;
265 /* Array of first offset in each buffer instance. 0th entry corresponds
266 to number of elements in the 0th buffer. 1st entry corresponds to the
267 sum of elements in 0th and 1st buffers. */
268 Py_ssize_t* firstElements;
269 } ZstdBufferWithSegmentsCollection;
270
271 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
272
173 273 void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
174 274 CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
275 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
175 276 PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
176 ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
177 ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
277 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
278 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
279 int init_dstream(ZstdDecompressor* decompressor);
178 280 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
281 ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
282 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
283 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
284 int cpu_count(void);
285 size_t roundpow2(size_t);
@@ -9,6 +9,7 b' from __future__ import absolute_import'
9 9 import cffi
10 10 import distutils.ccompiler
11 11 import os
12 import re
12 13 import subprocess
13 14 import tempfile
14 15
@@ -19,17 +20,28 b" SOURCES = ['zstd/%s' % p for p in ("
19 20 'common/entropy_common.c',
20 21 'common/error_private.c',
21 22 'common/fse_decompress.c',
23 'common/pool.c',
24 'common/threading.c',
22 25 'common/xxhash.c',
23 26 'common/zstd_common.c',
24 27 'compress/fse_compress.c',
25 28 'compress/huf_compress.c',
26 29 'compress/zstd_compress.c',
30 'compress/zstdmt_compress.c',
27 31 'decompress/huf_decompress.c',
28 32 'decompress/zstd_decompress.c',
33 'dictBuilder/cover.c',
29 34 'dictBuilder/divsufsort.c',
30 35 'dictBuilder/zdict.c',
31 36 )]
32 37
38 # Headers whose preprocessed output will be fed into cdef().
39 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
40 ('zstd.h',),
41 ('compress', 'zstdmt_compress.h'),
42 ('dictBuilder', 'zdict.h'),
43 )]
44
33 45 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
34 46 'zstd',
35 47 'zstd/common',
@@ -53,56 +65,123 b" if compiler.compiler_type == 'unix':"
53 65 args.extend([
54 66 '-E',
55 67 '-DZSTD_STATIC_LINKING_ONLY',
68 '-DZDICT_STATIC_LINKING_ONLY',
56 69 ])
57 70 elif compiler.compiler_type == 'msvc':
58 71 args = [compiler.cc]
59 72 args.extend([
60 73 '/EP',
61 74 '/DZSTD_STATIC_LINKING_ONLY',
75 '/DZDICT_STATIC_LINKING_ONLY',
62 76 ])
63 77 else:
64 78 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
65 79
66 # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
67 # This can lead to duplicate declarations. So we strip this include from the
68 # preprocessor invocation.
69
70 with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh:
71 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
72
73 fd, input_file = tempfile.mkstemp(suffix='.h')
74 os.write(fd, b''.join(lines))
75 os.close(fd)
80 def preprocess(path):
81 with open(path, 'rb') as fh:
82 lines = []
83 for l in fh:
84 # zstd.h includes <stddef.h>, which is also included by cffi's
85 # boilerplate. This can lead to duplicate declarations. So we strip
86 # this include from the preprocessor invocation.
87 #
88 # The same things happens for including zstd.h, so give it the same
89 # treatment.
90 #
91 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
92 # #define in zstdmt_compress.h and results in a compiler warning. So drop
93 # the inline #define.
94 if l.startswith((b'#include <stddef.h>',
95 b'#include "zstd.h"',
96 b'#define ZSTD_STATIC_LINKING_ONLY')):
97 continue
76 98
77 args.append(input_file)
99 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
100 # important so just filter it out.
101 if l.startswith(b'ZSTDLIB_API'):
102 l = l[len(b'ZSTDLIB_API '):]
103
104 lines.append(l)
105
106 fd, input_file = tempfile.mkstemp(suffix='.h')
107 os.write(fd, b''.join(lines))
108 os.close(fd)
78 109
79 try:
80 process = subprocess.Popen(args, stdout=subprocess.PIPE)
81 output = process.communicate()[0]
82 ret = process.poll()
83 if ret:
84 raise Exception('preprocessor exited with error')
85 finally:
86 os.unlink(input_file)
110 try:
111 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
112 output = process.communicate()[0]
113 ret = process.poll()
114 if ret:
115 raise Exception('preprocessor exited with error')
87 116
88 def normalize_output():
117 return output
118 finally:
119 os.unlink(input_file)
120
121
122 def normalize_output(output):
89 123 lines = []
90 124 for line in output.splitlines():
91 125 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
92 126 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
93 127 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
94 128
129 if line.startswith(b'__attribute__((deprecated('):
130 continue
131 elif b'__declspec(deprecated(' in line:
132 continue
133
95 134 lines.append(line)
96 135
97 136 return b'\n'.join(lines)
98 137
138
99 139 ffi = cffi.FFI()
140 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
141 # when cffi uses the function. Since we statically link against zstd, even
142 # if we use the deprecated functions it shouldn't be a huge problem.
100 143 ffi.set_source('_zstd_cffi', '''
144 #include "mem.h"
101 145 #define ZSTD_STATIC_LINKING_ONLY
102 146 #include "zstd.h"
147 #define ZDICT_STATIC_LINKING_ONLY
148 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
149 #include "zdict.h"
150 #include "zstdmt_compress.h"
103 151 ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
104 152
105 ffi.cdef(normalize_output().decode('latin1'))
153 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
154
155 sources = []
156
157 # Feed normalized preprocessor output for headers into the cdef parser.
158 for header in HEADERS:
159 preprocessed = preprocess(header)
160 sources.append(normalize_output(preprocessed))
161
162 # #define's are effectively erased as part of going through preprocessor.
163 # So perform a manual pass to re-add those to the cdef source.
164 with open(header, 'rb') as fh:
165 for line in fh:
166 line = line.strip()
167 m = DEFINE.match(line)
168 if not m:
169 continue
170
171 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
172 continue
173
174 # The parser doesn't like some constants with complex values.
175 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
176 continue
177
178 # The ... is magic syntax by the cdef parser to resolve the
179 # value at compile time.
180 sources.append(m.group(0) + b' ...')
181
182 cdeflines = b'\n'.join(sources).splitlines()
183 cdeflines = [l for l in cdeflines if l.strip()]
184 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
106 185
107 186 if __name__ == '__main__':
108 187 ffi.compile()
@@ -25,10 +25,15 b' if "--legacy" in sys.argv:'
25 25 # facilitate reuse in other projects.
26 26 extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
27 27
28 install_requires = []
29
28 30 if cffi:
29 31 import make_cffi
30 32 extensions.append(make_cffi.ffi.distutils_extension())
31 33
34 # Need change in 1.8 for ffi.from_buffer() behavior.
35 install_requires.append('cffi>=1.8')
36
32 37 version = None
33 38
34 39 with open('c-ext/python-zstandard.h', 'r') as fh:
@@ -62,8 +67,10 b' setup('
62 67 'Programming Language :: Python :: 3.3',
63 68 'Programming Language :: Python :: 3.4',
64 69 'Programming Language :: Python :: 3.5',
70 'Programming Language :: Python :: 3.6',
65 71 ],
66 72 keywords='zstandard zstd compression',
67 73 ext_modules=extensions,
68 74 test_suite='tests',
75 install_requires=install_requires,
69 76 )
@@ -12,18 +12,23 b" zstd_sources = ['zstd/%s' % p for p in ("
12 12 'common/entropy_common.c',
13 13 'common/error_private.c',
14 14 'common/fse_decompress.c',
15 'common/pool.c',
16 'common/threading.c',
15 17 'common/xxhash.c',
16 18 'common/zstd_common.c',
17 19 'compress/fse_compress.c',
18 20 'compress/huf_compress.c',
19 21 'compress/zstd_compress.c',
22 'compress/zstdmt_compress.c',
20 23 'decompress/huf_decompress.c',
21 24 'decompress/zstd_decompress.c',
25 'dictBuilder/cover.c',
22 26 'dictBuilder/divsufsort.c',
23 27 'dictBuilder/zdict.c',
24 28 )]
25 29
26 30 zstd_sources_legacy = ['zstd/%s' % p for p in (
31 'deprecated/zbuff_common.c',
27 32 'deprecated/zbuff_compress.c',
28 33 'deprecated/zbuff_decompress.c',
29 34 'legacy/zstd_v01.c',
@@ -51,6 +56,7 b' zstd_includes_legacy = ['
51 56
52 57 ext_sources = [
53 58 'zstd.c',
59 'c-ext/bufferutil.c',
54 60 'c-ext/compressiondict.c',
55 61 'c-ext/compressobj.c',
56 62 'c-ext/compressor.c',
@@ -62,7 +68,7 b' ext_sources = ['
62 68 'c-ext/decompressor.c',
63 69 'c-ext/decompressoriterator.c',
64 70 'c-ext/decompressionwriter.c',
65 'c-ext/dictparams.c',
71 'c-ext/frameparams.c',
66 72 ]
67 73
68 74 zstd_depends = [
@@ -84,8 +90,13 b' def get_c_extension(support_legacy=False'
84 90
85 91 depends = [os.path.join(root, p) for p in zstd_depends]
86 92
93 extra_args = ['-DZSTD_MULTITHREAD']
94
95 if support_legacy:
96 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
97
87 98 # TODO compile with optimizations.
88 99 return Extension(name, sources,
89 100 include_dirs=include_dirs,
90 101 depends=depends,
91 extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
102 extra_compile_args=extra_args)
@@ -1,4 +1,51 b''
1 import inspect
1 2 import io
3 import os
4 import types
5
6
7 def make_cffi(cls):
8 """Decorator to add CFFI versions of each test method."""
9
10 try:
11 import zstd_cffi
12 except ImportError:
13 return cls
14
15 # If CFFI version is available, dynamically construct test methods
16 # that use it.
17
18 for attr in dir(cls):
19 fn = getattr(cls, attr)
20 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
21 continue
22
23 if not fn.__name__.startswith('test_'):
24 continue
25
26 name = '%s_cffi' % fn.__name__
27
28 # Replace the "zstd" symbol with the CFFI module instance. Then copy
29 # the function object and install it in a new attribute.
30 if isinstance(fn, types.FunctionType):
31 globs = dict(fn.__globals__)
32 globs['zstd'] = zstd_cffi
33 new_fn = types.FunctionType(fn.__code__, globs, name,
34 fn.__defaults__, fn.__closure__)
35 new_method = new_fn
36 else:
37 globs = dict(fn.__func__.func_globals)
38 globs['zstd'] = zstd_cffi
39 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
40 fn.__func__.func_defaults,
41 fn.__func__.func_closure)
42 new_method = types.UnboundMethodType(new_fn, fn.im_self,
43 fn.im_class)
44
45 setattr(cls, name, new_method)
46
47 return cls
48
2 49
3 50 class OpCountingBytesIO(io.BytesIO):
4 51 def __init__(self, *args, **kwargs):
@@ -13,3 +60,29 b' class OpCountingBytesIO(io.BytesIO):'
13 60 def write(self, data):
14 61 self._write_count += 1
15 62 return super(OpCountingBytesIO, self).write(data)
63
64
65 _source_files = []
66
67
68 def random_input_data():
69 """Obtain the raw content of source files.
70
71 This is used for generating "random" data to feed into fuzzing, since it is
72 faster than random content generation.
73 """
74 if _source_files:
75 return _source_files
76
77 for root, dirs, files in os.walk(os.path.dirname(__file__)):
78 dirs[:] = list(sorted(dirs))
79 for f in sorted(files):
80 try:
81 with open(os.path.join(root, f), 'rb') as fh:
82 data = fh.read()
83 if data:
84 _source_files.append(data)
85 except OSError:
86 pass
87
88 return _source_files
@@ -10,7 +10,10 b' except ImportError:'
10 10
11 11 import zstd
12 12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14 17
15 18
16 19 if sys.version_info[0] >= 3:
@@ -19,6 +22,13 b' else:'
19 22 next = lambda it: it.next()
20 23
21 24
25 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.get_compression_parameters(level, source_size)
27
28 return 1 << (params.window_log + 2)
29
30
31 @make_cffi
22 32 class TestCompressor(unittest.TestCase):
23 33 def test_level_bounds(self):
24 34 with self.assertRaises(ValueError):
@@ -28,18 +38,35 b' class TestCompressor(unittest.TestCase):'
28 38 zstd.ZstdCompressor(level=23)
29 39
30 40
41 @make_cffi
31 42 class TestCompressor_compress(unittest.TestCase):
32 def test_compress_empty(self):
33 cctx = zstd.ZstdCompressor(level=1)
34 cctx.compress(b'')
43 def test_multithreaded_unsupported(self):
44 samples = []
45 for i in range(128):
46 samples.append(b'foo' * 64)
47 samples.append(b'bar' * 64)
48
49 d = zstd.train_dictionary(8192, samples)
35 50
36 cctx = zstd.ZstdCompressor(level=22)
37 cctx.compress(b'')
51 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
52
53 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
54 cctx.compress(b'foo')
55
56 params = zstd.get_compression_parameters(3)
57 cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
58 with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
59 cctx.compress(b'foo')
38 60
39 61 def test_compress_empty(self):
40 62 cctx = zstd.ZstdCompressor(level=1)
41 self.assertEqual(cctx.compress(b''),
42 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
63 result = cctx.compress(b'')
64 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
65 params = zstd.get_frame_parameters(result)
66 self.assertEqual(params.content_size, 0)
67 self.assertEqual(params.window_size, 524288)
68 self.assertEqual(params.dict_id, 0)
69 self.assertFalse(params.has_checksum, 0)
43 70
44 71 # TODO should be temporary until https://github.com/facebook/zstd/issues/506
45 72 # is fixed.
@@ -59,6 +86,13 b' class TestCompressor_compress(unittest.T'
59 86 self.assertEqual(len(result), 999)
60 87 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
61 88
89 # This matches the test for read_from() below.
90 cctx = zstd.ZstdCompressor(level=1)
91 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
92 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
93 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
94 b'\x02\x09\x00\x00\x6f')
95
62 96 def test_write_checksum(self):
63 97 cctx = zstd.ZstdCompressor(level=1)
64 98 no_checksum = cctx.compress(b'foobar')
@@ -67,6 +101,12 b' class TestCompressor_compress(unittest.T'
67 101
68 102 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
69 103
104 no_params = zstd.get_frame_parameters(no_checksum)
105 with_params = zstd.get_frame_parameters(with_checksum)
106
107 self.assertFalse(no_params.has_checksum)
108 self.assertTrue(with_params.has_checksum)
109
70 110 def test_write_content_size(self):
71 111 cctx = zstd.ZstdCompressor(level=1)
72 112 no_size = cctx.compress(b'foobar' * 256)
@@ -75,6 +115,11 b' class TestCompressor_compress(unittest.T'
75 115
76 116 self.assertEqual(len(with_size), len(no_size) + 1)
77 117
118 no_params = zstd.get_frame_parameters(no_size)
119 with_params = zstd.get_frame_parameters(with_size)
120 self.assertEqual(no_params.content_size, 0)
121 self.assertEqual(with_params.content_size, 1536)
122
78 123 def test_no_dict_id(self):
79 124 samples = []
80 125 for i in range(128):
@@ -92,6 +137,11 b' class TestCompressor_compress(unittest.T'
92 137
93 138 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
94 139
140 no_params = zstd.get_frame_parameters(no_dict_id)
141 with_params = zstd.get_frame_parameters(with_dict_id)
142 self.assertEqual(no_params.dict_id, 0)
143 self.assertEqual(with_params.dict_id, 1584102229)
144
95 145 def test_compress_dict_multiple(self):
96 146 samples = []
97 147 for i in range(128):
@@ -106,7 +156,23 b' class TestCompressor_compress(unittest.T'
106 156 for i in range(32):
107 157 cctx.compress(b'foo bar foobar foo bar foobar')
108 158
159 def test_multithreaded(self):
160 chunk_size = multithreaded_chunk_size(1)
161 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
109 162
163 cctx = zstd.ZstdCompressor(level=1, threads=2)
164 compressed = cctx.compress(source)
165
166 params = zstd.get_frame_parameters(compressed)
167 self.assertEqual(params.content_size, chunk_size * 2)
168 self.assertEqual(params.dict_id, 0)
169 self.assertFalse(params.has_checksum)
170
171 dctx = zstd.ZstdDecompressor()
172 self.assertEqual(dctx.decompress(compressed), source)
173
174
175 @make_cffi
110 176 class TestCompressor_compressobj(unittest.TestCase):
111 177 def test_compressobj_empty(self):
112 178 cctx = zstd.ZstdCompressor(level=1)
@@ -127,6 +193,12 b' class TestCompressor_compressobj(unittes'
127 193 self.assertEqual(len(result), 999)
128 194 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
129 195
196 params = zstd.get_frame_parameters(result)
197 self.assertEqual(params.content_size, 0)
198 self.assertEqual(params.window_size, 1048576)
199 self.assertEqual(params.dict_id, 0)
200 self.assertFalse(params.has_checksum)
201
130 202 def test_write_checksum(self):
131 203 cctx = zstd.ZstdCompressor(level=1)
132 204 cobj = cctx.compressobj()
@@ -135,6 +207,15 b' class TestCompressor_compressobj(unittes'
135 207 cobj = cctx.compressobj()
136 208 with_checksum = cobj.compress(b'foobar') + cobj.flush()
137 209
210 no_params = zstd.get_frame_parameters(no_checksum)
211 with_params = zstd.get_frame_parameters(with_checksum)
212 self.assertEqual(no_params.content_size, 0)
213 self.assertEqual(with_params.content_size, 0)
214 self.assertEqual(no_params.dict_id, 0)
215 self.assertEqual(with_params.dict_id, 0)
216 self.assertFalse(no_params.has_checksum)
217 self.assertTrue(with_params.has_checksum)
218
138 219 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
139 220
140 221 def test_write_content_size(self):
@@ -145,6 +226,15 b' class TestCompressor_compressobj(unittes'
145 226 cobj = cctx.compressobj(size=len(b'foobar' * 256))
146 227 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
147 228
229 no_params = zstd.get_frame_parameters(no_size)
230 with_params = zstd.get_frame_parameters(with_size)
231 self.assertEqual(no_params.content_size, 0)
232 self.assertEqual(with_params.content_size, 1536)
233 self.assertEqual(no_params.dict_id, 0)
234 self.assertEqual(with_params.dict_id, 0)
235 self.assertFalse(no_params.has_checksum)
236 self.assertFalse(with_params.has_checksum)
237
148 238 self.assertEqual(len(with_size), len(no_size) + 1)
149 239
150 240 def test_compress_after_finished(self):
@@ -186,7 +276,32 b' class TestCompressor_compressobj(unittes'
186 276 header = trailing[0:3]
187 277 self.assertEqual(header, b'\x01\x00\x00')
188 278
279 def test_multithreaded(self):
280 source = io.BytesIO()
281 source.write(b'a' * 1048576)
282 source.write(b'b' * 1048576)
283 source.write(b'c' * 1048576)
284 source.seek(0)
189 285
286 cctx = zstd.ZstdCompressor(level=1, threads=2)
287 cobj = cctx.compressobj()
288
289 chunks = []
290 while True:
291 d = source.read(8192)
292 if not d:
293 break
294
295 chunks.append(cobj.compress(d))
296
297 chunks.append(cobj.flush())
298
299 compressed = b''.join(chunks)
300
301 self.assertEqual(len(compressed), 295)
302
303
304 @make_cffi
190 305 class TestCompressor_copy_stream(unittest.TestCase):
191 306 def test_no_read(self):
192 307 source = object()
@@ -229,6 +344,12 b' class TestCompressor_copy_stream(unittes'
229 344 self.assertEqual(r, 255 * 16384)
230 345 self.assertEqual(w, 999)
231 346
347 params = zstd.get_frame_parameters(dest.getvalue())
348 self.assertEqual(params.content_size, 0)
349 self.assertEqual(params.window_size, 1048576)
350 self.assertEqual(params.dict_id, 0)
351 self.assertFalse(params.has_checksum)
352
232 353 def test_write_checksum(self):
233 354 source = io.BytesIO(b'foobar')
234 355 no_checksum = io.BytesIO()
@@ -244,6 +365,15 b' class TestCompressor_copy_stream(unittes'
244 365 self.assertEqual(len(with_checksum.getvalue()),
245 366 len(no_checksum.getvalue()) + 4)
246 367
368 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
369 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
370 self.assertEqual(no_params.content_size, 0)
371 self.assertEqual(with_params.content_size, 0)
372 self.assertEqual(no_params.dict_id, 0)
373 self.assertEqual(with_params.dict_id, 0)
374 self.assertFalse(no_params.has_checksum)
375 self.assertTrue(with_params.has_checksum)
376
247 377 def test_write_content_size(self):
248 378 source = io.BytesIO(b'foobar' * 256)
249 379 no_size = io.BytesIO()
@@ -268,6 +398,15 b' class TestCompressor_copy_stream(unittes'
268 398 self.assertEqual(len(with_size.getvalue()),
269 399 len(no_size.getvalue()) + 1)
270 400
401 no_params = zstd.get_frame_parameters(no_size.getvalue())
402 with_params = zstd.get_frame_parameters(with_size.getvalue())
403 self.assertEqual(no_params.content_size, 0)
404 self.assertEqual(with_params.content_size, 1536)
405 self.assertEqual(no_params.dict_id, 0)
406 self.assertEqual(with_params.dict_id, 0)
407 self.assertFalse(no_params.has_checksum)
408 self.assertFalse(with_params.has_checksum)
409
271 410 def test_read_write_size(self):
272 411 source = OpCountingBytesIO(b'foobarfoobar')
273 412 dest = OpCountingBytesIO()
@@ -279,6 +418,36 b' class TestCompressor_copy_stream(unittes'
279 418 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
280 419 self.assertEqual(dest._write_count, len(dest.getvalue()))
281 420
421 def test_multithreaded(self):
422 source = io.BytesIO()
423 source.write(b'a' * 1048576)
424 source.write(b'b' * 1048576)
425 source.write(b'c' * 1048576)
426 source.seek(0)
427
428 dest = io.BytesIO()
429 cctx = zstd.ZstdCompressor(threads=2)
430 r, w = cctx.copy_stream(source, dest)
431 self.assertEqual(r, 3145728)
432 self.assertEqual(w, 295)
433
434 params = zstd.get_frame_parameters(dest.getvalue())
435 self.assertEqual(params.content_size, 0)
436 self.assertEqual(params.dict_id, 0)
437 self.assertFalse(params.has_checksum)
438
439 # Writing content size and checksum works.
440 cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
441 write_checksum=True)
442 dest = io.BytesIO()
443 source.seek(0)
444 cctx.copy_stream(source, dest, size=len(source.getvalue()))
445
446 params = zstd.get_frame_parameters(dest.getvalue())
447 self.assertEqual(params.content_size, 3145728)
448 self.assertEqual(params.dict_id, 0)
449 self.assertTrue(params.has_checksum)
450
282 451
283 452 def compress(data, level):
284 453 buffer = io.BytesIO()
@@ -288,18 +457,25 b' def compress(data, level):'
288 457 return buffer.getvalue()
289 458
290 459
460 @make_cffi
291 461 class TestCompressor_write_to(unittest.TestCase):
292 462 def test_empty(self):
293 self.assertEqual(compress(b'', 1),
294 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
463 result = compress(b'', 1)
464 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
465
466 params = zstd.get_frame_parameters(result)
467 self.assertEqual(params.content_size, 0)
468 self.assertEqual(params.window_size, 524288)
469 self.assertEqual(params.dict_id, 0)
470 self.assertFalse(params.has_checksum)
295 471
296 472 def test_multiple_compress(self):
297 473 buffer = io.BytesIO()
298 474 cctx = zstd.ZstdCompressor(level=5)
299 475 with cctx.write_to(buffer) as compressor:
300 compressor.write(b'foo')
301 compressor.write(b'bar')
302 compressor.write(b'x' * 8192)
476 self.assertEqual(compressor.write(b'foo'), 0)
477 self.assertEqual(compressor.write(b'bar'), 0)
478 self.assertEqual(compressor.write(b'x' * 8192), 0)
303 479
304 480 result = buffer.getvalue()
305 481 self.assertEqual(result,
@@ -318,11 +494,23 b' class TestCompressor_write_to(unittest.T'
318 494 buffer = io.BytesIO()
319 495 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
320 496 with cctx.write_to(buffer) as compressor:
321 compressor.write(b'foo')
322 compressor.write(b'bar')
323 compressor.write(b'foo' * 16384)
497 self.assertEqual(compressor.write(b'foo'), 0)
498 self.assertEqual(compressor.write(b'bar'), 0)
499 self.assertEqual(compressor.write(b'foo' * 16384), 634)
324 500
325 501 compressed = buffer.getvalue()
502
503 params = zstd.get_frame_parameters(compressed)
504 self.assertEqual(params.content_size, 0)
505 self.assertEqual(params.window_size, 1024)
506 self.assertEqual(params.dict_id, d.dict_id())
507 self.assertFalse(params.has_checksum)
508
509 self.assertEqual(compressed[0:32],
510 b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
511 b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
512 b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
513
326 514 h = hashlib.sha1(compressed).hexdigest()
327 515 self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
328 516
@@ -332,11 +520,18 b' class TestCompressor_write_to(unittest.T'
332 520 buffer = io.BytesIO()
333 521 cctx = zstd.ZstdCompressor(compression_params=params)
334 522 with cctx.write_to(buffer) as compressor:
335 compressor.write(b'foo')
336 compressor.write(b'bar')
337 compressor.write(b'foobar' * 16384)
523 self.assertEqual(compressor.write(b'foo'), 0)
524 self.assertEqual(compressor.write(b'bar'), 0)
525 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
338 526
339 527 compressed = buffer.getvalue()
528
529 params = zstd.get_frame_parameters(compressed)
530 self.assertEqual(params.content_size, 0)
531 self.assertEqual(params.window_size, 1048576)
532 self.assertEqual(params.dict_id, 0)
533 self.assertFalse(params.has_checksum)
534
340 535 h = hashlib.sha1(compressed).hexdigest()
341 536 self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
342 537
@@ -344,12 +539,21 b' class TestCompressor_write_to(unittest.T'
344 539 no_checksum = io.BytesIO()
345 540 cctx = zstd.ZstdCompressor(level=1)
346 541 with cctx.write_to(no_checksum) as compressor:
347 compressor.write(b'foobar')
542 self.assertEqual(compressor.write(b'foobar'), 0)
348 543
349 544 with_checksum = io.BytesIO()
350 545 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
351 546 with cctx.write_to(with_checksum) as compressor:
352 compressor.write(b'foobar')
547 self.assertEqual(compressor.write(b'foobar'), 0)
548
549 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
550 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
551 self.assertEqual(no_params.content_size, 0)
552 self.assertEqual(with_params.content_size, 0)
553 self.assertEqual(no_params.dict_id, 0)
554 self.assertEqual(with_params.dict_id, 0)
555 self.assertFalse(no_params.has_checksum)
556 self.assertTrue(with_params.has_checksum)
353 557
354 558 self.assertEqual(len(with_checksum.getvalue()),
355 559 len(no_checksum.getvalue()) + 4)
@@ -358,12 +562,12 b' class TestCompressor_write_to(unittest.T'
358 562 no_size = io.BytesIO()
359 563 cctx = zstd.ZstdCompressor(level=1)
360 564 with cctx.write_to(no_size) as compressor:
361 compressor.write(b'foobar' * 256)
565 self.assertEqual(compressor.write(b'foobar' * 256), 0)
362 566
363 567 with_size = io.BytesIO()
364 568 cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
365 569 with cctx.write_to(with_size) as compressor:
366 compressor.write(b'foobar' * 256)
570 self.assertEqual(compressor.write(b'foobar' * 256), 0)
367 571
368 572 # Source size is not known in streaming mode, so header not
369 573 # written.
@@ -373,7 +577,16 b' class TestCompressor_write_to(unittest.T'
373 577 # Declaring size will write the header.
374 578 with_size = io.BytesIO()
375 579 with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
376 compressor.write(b'foobar' * 256)
580 self.assertEqual(compressor.write(b'foobar' * 256), 0)
581
582 no_params = zstd.get_frame_parameters(no_size.getvalue())
583 with_params = zstd.get_frame_parameters(with_size.getvalue())
584 self.assertEqual(no_params.content_size, 0)
585 self.assertEqual(with_params.content_size, 1536)
586 self.assertEqual(no_params.dict_id, 0)
587 self.assertEqual(with_params.dict_id, 0)
588 self.assertFalse(no_params.has_checksum)
589 self.assertFalse(with_params.has_checksum)
377 590
378 591 self.assertEqual(len(with_size.getvalue()),
379 592 len(no_size.getvalue()) + 1)
@@ -390,12 +603,21 b' class TestCompressor_write_to(unittest.T'
390 603 with_dict_id = io.BytesIO()
391 604 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
392 605 with cctx.write_to(with_dict_id) as compressor:
393 compressor.write(b'foobarfoobar')
606 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
394 607
395 608 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
396 609 no_dict_id = io.BytesIO()
397 610 with cctx.write_to(no_dict_id) as compressor:
398 compressor.write(b'foobarfoobar')
611 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
612
613 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
614 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
615 self.assertEqual(no_params.content_size, 0)
616 self.assertEqual(with_params.content_size, 0)
617 self.assertEqual(no_params.dict_id, 0)
618 self.assertEqual(with_params.dict_id, d.dict_id())
619 self.assertFalse(no_params.has_checksum)
620 self.assertFalse(with_params.has_checksum)
399 621
400 622 self.assertEqual(len(with_dict_id.getvalue()),
401 623 len(no_dict_id.getvalue()) + 4)
@@ -412,9 +634,9 b' class TestCompressor_write_to(unittest.T'
412 634 cctx = zstd.ZstdCompressor(level=3)
413 635 dest = OpCountingBytesIO()
414 636 with cctx.write_to(dest, write_size=1) as compressor:
415 compressor.write(b'foo')
416 compressor.write(b'bar')
417 compressor.write(b'foobar')
637 self.assertEqual(compressor.write(b'foo'), 0)
638 self.assertEqual(compressor.write(b'bar'), 0)
639 self.assertEqual(compressor.write(b'foobar'), 0)
418 640
419 641 self.assertEqual(len(dest.getvalue()), dest._write_count)
420 642
@@ -422,15 +644,15 b' class TestCompressor_write_to(unittest.T'
422 644 cctx = zstd.ZstdCompressor(level=3)
423 645 dest = OpCountingBytesIO()
424 646 with cctx.write_to(dest) as compressor:
425 compressor.write(b'foo')
647 self.assertEqual(compressor.write(b'foo'), 0)
426 648 self.assertEqual(dest._write_count, 0)
427 compressor.flush()
649 self.assertEqual(compressor.flush(), 12)
428 650 self.assertEqual(dest._write_count, 1)
429 compressor.write(b'bar')
651 self.assertEqual(compressor.write(b'bar'), 0)
430 652 self.assertEqual(dest._write_count, 1)
431 compressor.flush()
653 self.assertEqual(compressor.flush(), 6)
432 654 self.assertEqual(dest._write_count, 2)
433 compressor.write(b'baz')
655 self.assertEqual(compressor.write(b'baz'), 0)
434 656
435 657 self.assertEqual(dest._write_count, 3)
436 658
@@ -438,10 +660,10 b' class TestCompressor_write_to(unittest.T'
438 660 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
439 661 dest = OpCountingBytesIO()
440 662 with cctx.write_to(dest) as compressor:
441 compressor.write(b'foobar' * 8192)
663 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
442 664 count = dest._write_count
443 665 offset = dest.tell()
444 compressor.flush()
666 self.assertEqual(compressor.flush(), 23)
445 667 self.assertGreater(dest._write_count, count)
446 668 self.assertGreater(dest.tell(), offset)
447 669 offset = dest.tell()
@@ -455,19 +677,33 b' class TestCompressor_write_to(unittest.T'
455 677 header = trailing[0:3]
456 678 self.assertEqual(header, b'\x01\x00\x00')
457 679
680 def test_multithreaded(self):
681 dest = io.BytesIO()
682 cctx = zstd.ZstdCompressor(threads=2)
683 with cctx.write_to(dest) as compressor:
684 compressor.write(b'a' * 1048576)
685 compressor.write(b'b' * 1048576)
686 compressor.write(b'c' * 1048576)
458 687
688 self.assertEqual(len(dest.getvalue()), 295)
689
690
691 @make_cffi
459 692 class TestCompressor_read_from(unittest.TestCase):
460 693 def test_type_validation(self):
461 694 cctx = zstd.ZstdCompressor()
462 695
463 696 # Object with read() works.
464 cctx.read_from(io.BytesIO())
697 for chunk in cctx.read_from(io.BytesIO()):
698 pass
465 699
466 700 # Buffer protocol works.
467 cctx.read_from(b'foobar')
701 for chunk in cctx.read_from(b'foobar'):
702 pass
468 703
469 704 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
470 cctx.read_from(True)
705 for chunk in cctx.read_from(True):
706 pass
471 707
472 708 def test_read_empty(self):
473 709 cctx = zstd.ZstdCompressor(level=1)
@@ -521,6 +757,12 b' class TestCompressor_read_from(unittest.'
521 757 # We should get the same output as the one-shot compression mechanism.
522 758 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
523 759
760 params = zstd.get_frame_parameters(b''.join(chunks))
761 self.assertEqual(params.content_size, 0)
762 self.assertEqual(params.window_size, 262144)
763 self.assertEqual(params.dict_id, 0)
764 self.assertFalse(params.has_checksum)
765
524 766 # Now check the buffer protocol.
525 767 it = cctx.read_from(source.getvalue())
526 768 chunks = list(it)
@@ -534,3 +776,130 b' class TestCompressor_read_from(unittest.'
534 776 self.assertEqual(len(chunk), 1)
535 777
536 778 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
779
780 def test_multithreaded(self):
781 source = io.BytesIO()
782 source.write(b'a' * 1048576)
783 source.write(b'b' * 1048576)
784 source.write(b'c' * 1048576)
785 source.seek(0)
786
787 cctx = zstd.ZstdCompressor(threads=2)
788
789 compressed = b''.join(cctx.read_from(source))
790 self.assertEqual(len(compressed), 295)
791
792
793 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
794 def test_multithreaded_unsupported(self):
795 cctx = zstd.ZstdCompressor(threads=2)
796
797 with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
798 cctx.multi_compress_to_buffer([b'foo'])
799
800 def test_invalid_inputs(self):
801 cctx = zstd.ZstdCompressor()
802
803 with self.assertRaises(TypeError):
804 cctx.multi_compress_to_buffer(True)
805
806 with self.assertRaises(TypeError):
807 cctx.multi_compress_to_buffer((1, 2))
808
809 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
810 cctx.multi_compress_to_buffer([u'foo'])
811
812 def test_empty_input(self):
813 cctx = zstd.ZstdCompressor()
814
815 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
816 cctx.multi_compress_to_buffer([])
817
818 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
819 cctx.multi_compress_to_buffer([b'', b'', b''])
820
821 def test_list_input(self):
822 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
823
824 original = [b'foo' * 12, b'bar' * 6]
825 frames = [cctx.compress(c) for c in original]
826 b = cctx.multi_compress_to_buffer(original)
827
828 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
829
830 self.assertEqual(len(b), 2)
831 self.assertEqual(b.size(), 44)
832
833 self.assertEqual(b[0].tobytes(), frames[0])
834 self.assertEqual(b[1].tobytes(), frames[1])
835
836 def test_buffer_with_segments_input(self):
837 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
838
839 original = [b'foo' * 4, b'bar' * 6]
840 frames = [cctx.compress(c) for c in original]
841
842 offsets = struct.pack('=QQQQ', 0, len(original[0]),
843 len(original[0]), len(original[1]))
844 segments = zstd.BufferWithSegments(b''.join(original), offsets)
845
846 result = cctx.multi_compress_to_buffer(segments)
847
848 self.assertEqual(len(result), 2)
849 self.assertEqual(result.size(), 47)
850
851 self.assertEqual(result[0].tobytes(), frames[0])
852 self.assertEqual(result[1].tobytes(), frames[1])
853
854 def test_buffer_with_segments_collection_input(self):
855 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
856
857 original = [
858 b'foo1',
859 b'foo2' * 2,
860 b'foo3' * 3,
861 b'foo4' * 4,
862 b'foo5' * 5,
863 ]
864
865 frames = [cctx.compress(c) for c in original]
866
867 b = b''.join([original[0], original[1]])
868 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
869 0, len(original[0]),
870 len(original[0]), len(original[1])))
871 b = b''.join([original[2], original[3], original[4]])
872 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
873 0, len(original[2]),
874 len(original[2]), len(original[3]),
875 len(original[2]) + len(original[3]), len(original[4])))
876
877 c = zstd.BufferWithSegmentsCollection(b1, b2)
878
879 result = cctx.multi_compress_to_buffer(c)
880
881 self.assertEqual(len(result), len(frames))
882
883 for i, frame in enumerate(frames):
884 self.assertEqual(result[i].tobytes(), frame)
885
886 def test_multiple_threads(self):
887 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
888 # make output different.
889 refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
890 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
891
892 cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
893
894 frames = []
895 frames.extend(b'x' * 64 for i in range(256))
896 frames.extend(b'y' * 64 for i in range(256))
897
898 result = cctx.multi_compress_to_buffer(frames, threads=-1)
899
900 self.assertEqual(len(result), 512)
901 for i in range(512):
902 if i < 256:
903 self.assertEqual(result[i].tobytes(), reference[0])
904 else:
905 self.assertEqual(result[i].tobytes(), reference[1])
@@ -1,18 +1,16 b''
1 import io
2
3 1 try:
4 2 import unittest2 as unittest
5 3 except ImportError:
6 4 import unittest
7 5
8 try:
9 import hypothesis
10 import hypothesis.strategies as strategies
11 except ImportError:
12 hypothesis = None
13
14 6 import zstd
15 7
8 from . common import (
9 make_cffi,
10 )
11
12
13 @make_cffi
16 14 class TestCompressionParameters(unittest.TestCase):
17 15 def test_init_bad_arg_type(self):
18 16 with self.assertRaises(TypeError):
@@ -26,7 +24,7 b' class TestCompressionParameters(unittest'
26 24 zstd.CHAINLOG_MIN,
27 25 zstd.HASHLOG_MIN,
28 26 zstd.SEARCHLOG_MIN,
29 zstd.SEARCHLENGTH_MIN,
27 zstd.SEARCHLENGTH_MIN + 1,
30 28 zstd.TARGETLENGTH_MIN,
31 29 zstd.STRATEGY_FAST)
32 30
@@ -34,7 +32,7 b' class TestCompressionParameters(unittest'
34 32 zstd.CHAINLOG_MAX,
35 33 zstd.HASHLOG_MAX,
36 34 zstd.SEARCHLOG_MAX,
37 zstd.SEARCHLENGTH_MAX,
35 zstd.SEARCHLENGTH_MAX - 1,
38 36 zstd.TARGETLENGTH_MAX,
39 37 zstd.STRATEGY_BTOPT)
40 38
@@ -42,66 +40,84 b' class TestCompressionParameters(unittest'
42 40 p = zstd.get_compression_parameters(1)
43 41 self.assertIsInstance(p, zstd.CompressionParameters)
44 42
45 self.assertEqual(p[0], 19)
43 self.assertEqual(p.window_log, 19)
44
45 def test_members(self):
46 p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
47 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7)
50 self.assertEqual(p.search_log, 4)
51 self.assertEqual(p.search_length, 5)
52 self.assertEqual(p.target_length, 8)
53 self.assertEqual(p.strategy, 1)
54
55 def test_estimated_compression_context_size(self):
56 p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, zstd.STRATEGY_DFAST)
57
58 # 32-bit has slightly different values from 64-bit.
59 self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076,
60 delta=110)
61
46 62
47 if hypothesis:
48 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
49 max_value=zstd.WINDOWLOG_MAX)
50 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
51 max_value=zstd.CHAINLOG_MAX)
52 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
53 max_value=zstd.HASHLOG_MAX)
54 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
55 max_value=zstd.SEARCHLOG_MAX)
56 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
57 max_value=zstd.SEARCHLENGTH_MAX)
58 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
59 max_value=zstd.TARGETLENGTH_MAX)
60 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
61 zstd.STRATEGY_DFAST,
62 zstd.STRATEGY_GREEDY,
63 zstd.STRATEGY_LAZY,
64 zstd.STRATEGY_LAZY2,
65 zstd.STRATEGY_BTLAZY2,
66 zstd.STRATEGY_BTOPT))
63 @make_cffi
64 class TestFrameParameters(unittest.TestCase):
65 def test_invalid_type(self):
66 with self.assertRaises(TypeError):
67 zstd.get_frame_parameters(None)
68
69 with self.assertRaises(TypeError):
70 zstd.get_frame_parameters(u'foobarbaz')
71
72 def test_invalid_input_sizes(self):
73 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
74 zstd.get_frame_parameters(b'')
75
76 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
77 zstd.get_frame_parameters(zstd.FRAME_HEADER)
78
79 def test_invalid_frame(self):
80 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
81 zstd.get_frame_parameters(b'foobarbaz')
67 82
68 class TestCompressionParametersHypothesis(unittest.TestCase):
69 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
70 s_searchlength, s_targetlength, s_strategy)
71 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
72 searchlength, targetlength, strategy):
73 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
74 searchlog, searchlength,
75 targetlength, strategy)
76 self.assertEqual(tuple(p),
77 (windowlog, chainlog, hashlog, searchlog,
78 searchlength, targetlength, strategy))
83 def test_attributes(self):
84 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
85 self.assertEqual(params.content_size, 0)
86 self.assertEqual(params.window_size, 1024)
87 self.assertEqual(params.dict_id, 0)
88 self.assertFalse(params.has_checksum)
89
90 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
91 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
92 self.assertEqual(params.content_size, 0)
93 self.assertEqual(params.window_size, 1024)
94 self.assertEqual(params.dict_id, 255)
95 self.assertFalse(params.has_checksum)
96
97 # Lowest 3rd bit indicates if checksum is present.
98 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
99 self.assertEqual(params.content_size, 0)
100 self.assertEqual(params.window_size, 1024)
101 self.assertEqual(params.dict_id, 0)
102 self.assertTrue(params.has_checksum)
79 103
80 # Verify we can instantiate a compressor with the supplied values.
81 # ZSTD_checkCParams moves the goal posts on us from what's advertised
82 # in the constants. So move along with them.
83 if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
84 searchlength += 1
85 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
86 searchlog, searchlength,
87 targetlength, strategy)
88 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
89 searchlength -= 1
90 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
91 searchlog, searchlength,
92 targetlength, strategy)
104 # Upper 2 bits indicate content size.
105 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
106 self.assertEqual(params.content_size, 511)
107 self.assertEqual(params.window_size, 1024)
108 self.assertEqual(params.dict_id, 0)
109 self.assertFalse(params.has_checksum)
93 110
94 cctx = zstd.ZstdCompressor(compression_params=p)
95 with cctx.write_to(io.BytesIO()):
96 pass
111 # Window descriptor is 2nd byte after frame header.
112 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
113 self.assertEqual(params.content_size, 0)
114 self.assertEqual(params.window_size, 262144)
115 self.assertEqual(params.dict_id, 0)
116 self.assertFalse(params.has_checksum)
97 117
98 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
99 s_searchlength, s_targetlength, s_strategy)
100 def test_estimate_compression_context_size(self, windowlog, chainlog,
101 hashlog, searchlog,
102 searchlength, targetlength,
103 strategy):
104 p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
105 searchlog, searchlength,
106 targetlength, strategy)
107 size = zstd.estimate_compression_context_size(p)
118 # Set multiple things.
119 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
120 self.assertEqual(params.content_size, 272)
121 self.assertEqual(params.window_size, 262144)
122 self.assertEqual(params.dict_id, 15)
123 self.assertTrue(params.has_checksum)
@@ -10,7 +10,10 b' except ImportError:'
10 10
11 11 import zstd
12 12
13 from .common import OpCountingBytesIO
13 from .common import (
14 make_cffi,
15 OpCountingBytesIO,
16 )
14 17
15 18
16 19 if sys.version_info[0] >= 3:
@@ -19,6 +22,7 b' else:'
19 22 next = lambda it: it.next()
20 23
21 24
25 @make_cffi
22 26 class TestDecompressor_decompress(unittest.TestCase):
23 27 def test_empty_input(self):
24 28 dctx = zstd.ZstdDecompressor()
@@ -45,7 +49,7 b' class TestDecompressor_decompress(unitte'
45 49 compressed = cctx.compress(b'foobar')
46 50
47 51 dctx = zstd.ZstdDecompressor()
48 decompressed = dctx.decompress(compressed)
52 decompressed = dctx.decompress(compressed)
49 53 self.assertEqual(decompressed, b'foobar')
50 54
51 55 def test_max_output_size(self):
@@ -119,6 +123,7 b' class TestDecompressor_decompress(unitte'
119 123 self.assertEqual(decompressed, sources[i])
120 124
121 125
126 @make_cffi
122 127 class TestDecompressor_copy_stream(unittest.TestCase):
123 128 def test_no_read(self):
124 129 source = object()
@@ -180,6 +185,7 b' class TestDecompressor_copy_stream(unitt'
180 185 self.assertEqual(dest._write_count, len(dest.getvalue()))
181 186
182 187
188 @make_cffi
183 189 class TestDecompressor_decompressobj(unittest.TestCase):
184 190 def test_simple(self):
185 191 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
@@ -207,6 +213,7 b' def decompress_via_writer(data):'
207 213 return buffer.getvalue()
208 214
209 215
216 @make_cffi
210 217 class TestDecompressor_write_to(unittest.TestCase):
211 218 def test_empty_roundtrip(self):
212 219 cctx = zstd.ZstdCompressor()
@@ -256,14 +263,14 b' class TestDecompressor_write_to(unittest'
256 263 buffer = io.BytesIO()
257 264 cctx = zstd.ZstdCompressor(dict_data=d)
258 265 with cctx.write_to(buffer) as compressor:
259 compressor.write(orig)
266 self.assertEqual(compressor.write(orig), 1544)
260 267
261 268 compressed = buffer.getvalue()
262 269 buffer = io.BytesIO()
263 270
264 271 dctx = zstd.ZstdDecompressor(dict_data=d)
265 272 with dctx.write_to(buffer) as decompressor:
266 decompressor.write(compressed)
273 self.assertEqual(decompressor.write(compressed), len(orig))
267 274
268 275 self.assertEqual(buffer.getvalue(), orig)
269 276
@@ -286,11 +293,11 b' class TestDecompressor_write_to(unittest'
286 293 c = s.pack(c)
287 294 decompressor.write(c)
288 295
289
290 296 self.assertEqual(dest.getvalue(), b'foobarfoobar')
291 297 self.assertEqual(dest._write_count, len(dest.getvalue()))
292 298
293 299
300 @make_cffi
294 301 class TestDecompressor_read_from(unittest.TestCase):
295 302 def test_type_validation(self):
296 303 dctx = zstd.ZstdDecompressor()
@@ -302,7 +309,7 b' class TestDecompressor_read_from(unittes'
302 309 dctx.read_from(b'foobar')
303 310
304 311 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
305 dctx.read_from(True)
312 b''.join(dctx.read_from(True))
306 313
307 314 def test_empty_input(self):
308 315 dctx = zstd.ZstdDecompressor()
@@ -351,7 +358,7 b' class TestDecompressor_read_from(unittes'
351 358 dctx = zstd.ZstdDecompressor()
352 359
353 360 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
354 dctx.read_from(b'', skip_bytes=1, read_size=1)
361 b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
355 362
356 363 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
357 364 b''.join(dctx.read_from(b'foobar', skip_bytes=10))
@@ -476,3 +483,259 b' class TestDecompressor_read_from(unittes'
476 483 self.assertEqual(len(chunk), 1)
477 484
478 485 self.assertEqual(source._read_count, len(source.getvalue()))
486
487
488 @make_cffi
489 class TestDecompressor_content_dict_chain(unittest.TestCase):
490 def test_bad_inputs_simple(self):
491 dctx = zstd.ZstdDecompressor()
492
493 with self.assertRaises(TypeError):
494 dctx.decompress_content_dict_chain(b'foo')
495
496 with self.assertRaises(TypeError):
497 dctx.decompress_content_dict_chain((b'foo', b'bar'))
498
499 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
500 dctx.decompress_content_dict_chain([])
501
502 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
503 dctx.decompress_content_dict_chain([u'foo'])
504
505 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
506 dctx.decompress_content_dict_chain([True])
507
508 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
509 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
510
511 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
512 dctx.decompress_content_dict_chain([b'foo' * 8])
513
514 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
515
516 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
517 dctx.decompress_content_dict_chain([no_size])
518
519 # Corrupt first frame.
520 frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
521 frame = frame[0:12] + frame[15:]
522 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
523 dctx.decompress_content_dict_chain([frame])
524
525 def test_bad_subsequent_input(self):
526 initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
527
528 dctx = zstd.ZstdDecompressor()
529
530 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
531 dctx.decompress_content_dict_chain([initial, u'foo'])
532
533 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
534 dctx.decompress_content_dict_chain([initial, None])
535
536 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
537 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
538
539 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
540 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
541
542 no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
543
544 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
545 dctx.decompress_content_dict_chain([initial, no_size])
546
547 # Corrupt second frame.
548 cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
549 frame = cctx.compress(b'bar' * 64)
550 frame = frame[0:12] + frame[15:]
551
552 with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
553 dctx.decompress_content_dict_chain([initial, frame])
554
555 def test_simple(self):
556 original = [
557 b'foo' * 64,
558 b'foobar' * 64,
559 b'baz' * 64,
560 b'foobaz' * 64,
561 b'foobarbaz' * 64,
562 ]
563
564 chunks = []
565 chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
566 for i, chunk in enumerate(original[1:]):
567 d = zstd.ZstdCompressionDict(original[i])
568 cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
569 chunks.append(cctx.compress(chunk))
570
571 for i in range(1, len(original)):
572 chain = chunks[0:i]
573 expected = original[i - 1]
574 dctx = zstd.ZstdDecompressor()
575 decompressed = dctx.decompress_content_dict_chain(chain)
576 self.assertEqual(decompressed, expected)
577
578
579 # TODO enable for CFFI
580 class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
581 def test_invalid_inputs(self):
582 dctx = zstd.ZstdDecompressor()
583
584 with self.assertRaises(TypeError):
585 dctx.multi_decompress_to_buffer(True)
586
587 with self.assertRaises(TypeError):
588 dctx.multi_decompress_to_buffer((1, 2))
589
590 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
591 dctx.multi_decompress_to_buffer([u'foo'])
592
593 with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
594 dctx.multi_decompress_to_buffer([b'foobarbaz'])
595
596 def test_list_input(self):
597 cctx = zstd.ZstdCompressor(write_content_size=True)
598
599 original = [b'foo' * 4, b'bar' * 6]
600 frames = [cctx.compress(d) for d in original]
601
602 dctx = zstd.ZstdDecompressor()
603 result = dctx.multi_decompress_to_buffer(frames)
604
605 self.assertEqual(len(result), len(frames))
606 self.assertEqual(result.size(), sum(map(len, original)))
607
608 for i, data in enumerate(original):
609 self.assertEqual(result[i].tobytes(), data)
610
611 self.assertEqual(result[0].offset, 0)
612 self.assertEqual(len(result[0]), 12)
613 self.assertEqual(result[1].offset, 12)
614 self.assertEqual(len(result[1]), 18)
615
616 def test_list_input_frame_sizes(self):
617 cctx = zstd.ZstdCompressor(write_content_size=False)
618
619 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
620 frames = [cctx.compress(d) for d in original]
621 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
622
623 dctx = zstd.ZstdDecompressor()
624 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
625
626 self.assertEqual(len(result), len(frames))
627 self.assertEqual(result.size(), sum(map(len, original)))
628
629 for i, data in enumerate(original):
630 self.assertEqual(result[i].tobytes(), data)
631
632 def test_buffer_with_segments_input(self):
633 cctx = zstd.ZstdCompressor(write_content_size=True)
634
635 original = [b'foo' * 4, b'bar' * 6]
636 frames = [cctx.compress(d) for d in original]
637
638 dctx = zstd.ZstdDecompressor()
639
640 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
641 b = zstd.BufferWithSegments(b''.join(frames), segments)
642
643 result = dctx.multi_decompress_to_buffer(b)
644
645 self.assertEqual(len(result), len(frames))
646 self.assertEqual(result[0].offset, 0)
647 self.assertEqual(len(result[0]), 12)
648 self.assertEqual(result[1].offset, 12)
649 self.assertEqual(len(result[1]), 18)
650
651 def test_buffer_with_segments_sizes(self):
652 cctx = zstd.ZstdCompressor(write_content_size=False)
653 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
654 frames = [cctx.compress(d) for d in original]
655 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
656
657 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
658 len(frames[0]), len(frames[1]),
659 len(frames[0]) + len(frames[1]), len(frames[2]))
660 b = zstd.BufferWithSegments(b''.join(frames), segments)
661
662 dctx = zstd.ZstdDecompressor()
663 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
664
665 self.assertEqual(len(result), len(frames))
666 self.assertEqual(result.size(), sum(map(len, original)))
667
668 for i, data in enumerate(original):
669 self.assertEqual(result[i].tobytes(), data)
670
671 def test_buffer_with_segments_collection_input(self):
672 cctx = zstd.ZstdCompressor(write_content_size=True)
673
674 original = [
675 b'foo0' * 2,
676 b'foo1' * 3,
677 b'foo2' * 4,
678 b'foo3' * 5,
679 b'foo4' * 6,
680 ]
681
682 frames = cctx.multi_compress_to_buffer(original)
683
684 # Check round trip.
685 dctx = zstd.ZstdDecompressor()
686 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
687
688 self.assertEqual(len(decompressed), len(original))
689
690 for i, data in enumerate(original):
691 self.assertEqual(data, decompressed[i].tobytes())
692
693 # And a manual mode.
694 b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
696 0, len(frames[0]),
697 len(frames[0]), len(frames[1])))
698
699 b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
700 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
701 0, len(frames[2]),
702 len(frames[2]), len(frames[3]),
703 len(frames[2]) + len(frames[3]), len(frames[4])))
704
705 c = zstd.BufferWithSegmentsCollection(b1, b2)
706
707 dctx = zstd.ZstdDecompressor()
708 decompressed = dctx.multi_decompress_to_buffer(c)
709
710 self.assertEqual(len(decompressed), 5)
711 for i in range(5):
712 self.assertEqual(decompressed[i].tobytes(), original[i])
713
714 def test_multiple_threads(self):
715 cctx = zstd.ZstdCompressor(write_content_size=True)
716
717 frames = []
718 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
719 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
720
721 dctx = zstd.ZstdDecompressor()
722 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
723
724 self.assertEqual(len(result), len(frames))
725 self.assertEqual(result.size(), 2 * 64 * 256)
726 self.assertEqual(result[0].tobytes(), b'x' * 64)
727 self.assertEqual(result[256].tobytes(), b'y' * 64)
728
729 def test_item_failure(self):
730 cctx = zstd.ZstdCompressor(write_content_size=True)
731 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
732
733 frames[1] = frames[1] + b'extra'
734
735 dctx = zstd.ZstdDecompressor()
736
737 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
738 dctx.multi_decompress_to_buffer(frames)
739
740 with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
741 dctx.multi_decompress_to_buffer(frames, threads=2)
@@ -5,7 +5,12 b' except ImportError:'
5 5
6 6 import zstd
7 7
8 from . common import (
9 make_cffi,
10 )
8 11
12
13 @make_cffi
9 14 class TestSizes(unittest.TestCase):
10 15 def test_decompression_size(self):
11 16 size = zstd.estimate_decompression_context_size()
@@ -7,9 +7,15 b' except ImportError:'
7 7
8 8 import zstd
9 9
10 from . common import (
11 make_cffi,
12 )
13
14
15 @make_cffi
10 16 class TestModuleAttributes(unittest.TestCase):
11 17 def test_version(self):
12 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2))
18 self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
13 19
14 20 def test_constants(self):
15 21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -45,4 +51,4 b' class TestModuleAttributes(unittest.Test'
45 51 )
46 52
47 53 for a in attrs:
48 self.assertTrue(hasattr(zstd, a))
54 self.assertTrue(hasattr(zstd, a), a)
@@ -7,6 +7,9 b' except ImportError:'
7 7
8 8 import zstd
9 9
10 from . common import (
11 make_cffi,
12 )
10 13
11 14 if sys.version_info[0] >= 3:
12 15 int_type = int
@@ -14,6 +17,7 b' else:'
14 17 int_type = long
15 18
16 19
20 @make_cffi
17 21 class TestTrainDictionary(unittest.TestCase):
18 22 def test_no_args(self):
19 23 with self.assertRaises(TypeError):
@@ -44,3 +48,63 b' class TestTrainDictionary(unittest.TestC'
44 48
45 49 data = d.as_bytes()
46 50 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
51
52 def test_set_dict_id(self):
53 samples = []
54 for i in range(128):
55 samples.append(b'foo' * 64)
56 samples.append(b'foobar' * 64)
57
58 d = zstd.train_dictionary(8192, samples, dict_id=42)
59 self.assertEqual(d.dict_id(), 42)
60
61
62 @make_cffi
63 class TestTrainCoverDictionary(unittest.TestCase):
64 def test_no_args(self):
65 with self.assertRaises(TypeError):
66 zstd.train_cover_dictionary()
67
68 def test_bad_args(self):
69 with self.assertRaises(TypeError):
70 zstd.train_cover_dictionary(8192, u'foo')
71
72 with self.assertRaises(ValueError):
73 zstd.train_cover_dictionary(8192, [u'foo'])
74
75 def test_basic(self):
76 samples = []
77 for i in range(128):
78 samples.append(b'foo' * 64)
79 samples.append(b'foobar' * 64)
80
81 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
82 self.assertIsInstance(d.dict_id(), int_type)
83
84 data = d.as_bytes()
85 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
86
87 self.assertEqual(d.k, 64)
88 self.assertEqual(d.d, 16)
89
90 def test_set_dict_id(self):
91 samples = []
92 for i in range(128):
93 samples.append(b'foo' * 64)
94 samples.append(b'foobar' * 64)
95
96 d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
97 dict_id=42)
98 self.assertEqual(d.dict_id(), 42)
99
100 def test_optimize(self):
101 samples = []
102 for i in range(128):
103 samples.append(b'foo' * 64)
104 samples.append(b'foobar' * 64)
105
106 d = zstd.train_cover_dictionary(8192, samples, optimize=True,
107 threads=-1, steps=1, d=16)
108
109 self.assertEqual(d.k, 16)
110 self.assertEqual(d.d, 16)
@@ -8,6 +8,14 b''
8 8
9 9 /* A Python C extension for Zstandard. */
10 10
11 #if defined(_WIN32)
12 #define WIN32_LEAN_AND_MEAN
13 #include <Windows.h>
14 #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
15 #include <sys/types.h>
16 #include <sys/sysctl.h>
17 #endif
18
11 19 #include "python-zstandard.h"
12 20
13 21 PyObject *ZstdError;
@@ -34,6 +42,11 b' PyDoc_STRVAR(get_compression_parameters_'
34 42 "Obtains a ``CompressionParameters`` instance from a compression level and\n"
35 43 "optional input size and dictionary size");
36 44
45 PyDoc_STRVAR(get_frame_parameters__doc__,
46 "get_frame_parameters(data)\n"
47 "\n"
48 "Obtains a ``FrameParameters`` instance by parsing data.\n");
49
37 50 PyDoc_STRVAR(train_dictionary__doc__,
38 51 "train_dictionary(dict_size, samples)\n"
39 52 "\n"
@@ -44,25 +57,42 b' PyDoc_STRVAR(train_dictionary__doc__,'
44 57 "\n"
45 58 "The raw dictionary content will be returned\n");
46 59
60 PyDoc_STRVAR(train_cover_dictionary__doc__,
61 "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
62 "\n"
63 "Train a dictionary from sample data using the COVER algorithm.\n"
64 "\n"
65 "This behaves like ``train_dictionary()`` except a different algorithm is\n"
66 "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
67 "``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
68 "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
69 "``d`` must be less than or equal to ``k``.\n"
70 );
71
47 72 static char zstd_doc[] = "Interface to zstandard";
48 73
49 74 static PyMethodDef zstd_methods[] = {
75 /* TODO remove since it is a method on CompressionParameters. */
50 76 { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
51 77 METH_VARARGS, estimate_compression_context_size__doc__ },
52 78 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
53 79 METH_NOARGS, estimate_decompression_context_size__doc__ },
54 80 { "get_compression_parameters", (PyCFunction)get_compression_parameters,
55 81 METH_VARARGS, get_compression_parameters__doc__ },
82 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
83 METH_VARARGS, get_frame_parameters__doc__ },
56 84 { "train_dictionary", (PyCFunction)train_dictionary,
57 85 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
86 { "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
87 METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
58 88 { NULL, NULL }
59 89 };
60 90
91 void bufferutil_module_init(PyObject* mod);
61 92 void compressobj_module_init(PyObject* mod);
62 93 void compressor_module_init(PyObject* mod);
63 94 void compressionparams_module_init(PyObject* mod);
64 95 void constants_module_init(PyObject* mod);
65 void dictparams_module_init(PyObject* mod);
66 96 void compressiondict_module_init(PyObject* mod);
67 97 void compressionwriter_module_init(PyObject* mod);
68 98 void compressoriterator_module_init(PyObject* mod);
@@ -70,6 +100,7 b' void decompressor_module_init(PyObject* '
70 100 void decompressobj_module_init(PyObject* mod);
71 101 void decompressionwriter_module_init(PyObject* mod);
72 102 void decompressoriterator_module_init(PyObject* mod);
103 void frameparams_module_init(PyObject* mod);
73 104
74 105 void zstd_module_init(PyObject* m) {
75 106 /* python-zstandard relies on unstable zstd C API features. This means
@@ -87,13 +118,13 b' void zstd_module_init(PyObject* m) {'
87 118 We detect this mismatch here and refuse to load the module if this
88 119 scenario is detected.
89 120 */
90 if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) {
121 if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
91 122 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
92 123 return;
93 124 }
94 125
126 bufferutil_module_init(m);
95 127 compressionparams_module_init(m);
96 dictparams_module_init(m);
97 128 compressiondict_module_init(m);
98 129 compressobj_module_init(m);
99 130 compressor_module_init(m);
@@ -104,6 +135,7 b' void zstd_module_init(PyObject* m) {'
104 135 decompressobj_module_init(m);
105 136 decompressionwriter_module_init(m);
106 137 decompressoriterator_module_init(m);
138 frameparams_module_init(m);
107 139 }
108 140
109 141 #if PY_MAJOR_VERSION >= 3
@@ -134,3 +166,48 b' PyMODINIT_FUNC initzstd(void) {'
134 166 }
135 167 }
136 168 #endif
169
170 /* Attempt to resolve the number of CPUs in the system. */
171 int cpu_count() {
172 int count = 0;
173
174 #if defined(_WIN32)
175 SYSTEM_INFO si;
176 si.dwNumberOfProcessors = 0;
177 GetSystemInfo(&si);
178 count = si.dwNumberOfProcessors;
179 #elif defined(__APPLE__)
180 int num;
181 size_t size = sizeof(int);
182
183 if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
184 count = num;
185 }
186 #elif defined(__linux__)
187 count = sysconf(_SC_NPROCESSORS_ONLN);
188 #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
189 int mib[2];
190 size_t len = sizeof(count);
191 mib[0] = CTL_HW;
192 mib[1] = HW_NCPU;
193 if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
194 count = 0;
195 }
196 #elif defined(__hpux)
197 count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
198 #endif
199
200 return count;
201 }
202
203 size_t roundpow2(size_t i) {
204 i--;
205 i |= i >> 1;
206 i |= i >> 2;
207 i |= i >> 4;
208 i |= i >> 8;
209 i |= i >> 16;
210 i++;
211
212 return i;
213 }
@@ -39,7 +39,7 b' extern "C" {'
39 39 #endif
40 40
41 41 /* code only tested on 32 and 64 bits systems */
42 #define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
42 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
43 43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
44 44
45 45
@@ -43,10 +43,6 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t '
43 43 * provides error code string from enum */
44 44 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
45 45
46 /* --- ZBUFF Error Management (deprecated) --- */
47 unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
48 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
49
50 46
51 47 /*=**************************************************************
52 48 * Custom allocator
@@ -18,6 +18,20 b' extern "C" {'
18 18 #include <stddef.h> /* size_t */
19 19
20 20
21 /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
22 #if defined(__GNUC__) && (__GNUC__ >= 4)
23 # define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
24 #else
25 # define ZSTDERRORLIB_VISIBILITY
26 #endif
27 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
29 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
30 # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
31 #else
32 # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
33 #endif
34
21 35 /*-****************************************
22 36 * error codes list
23 37 ******************************************/
@@ -49,8 +63,8 b' typedef enum {'
49 63 /*! ZSTD_getErrorCode() :
50 64 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
51 65 which can be used to compare directly with enum list published into "error_public.h" */
52 ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
53 const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
66 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
67 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
54 68
55 69
56 70 #if defined (__cplusplus)
@@ -267,4 +267,13 b' MEM_STATIC U32 ZSTD_highbit32(U32 val)'
267 267 }
268 268
269 269
270 /* hidden functions */
271
272 /* ZSTD_invalidateRepCodes() :
273 * ensures next compression will not use repcodes from previous block.
274 * Note : only works with regular variant;
275 * do not use with extDict variant ! */
276 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
277
278
270 279 #endif /* ZSTD_CCOMMON_H_MODULE */
@@ -51,8 +51,7 b' static void ZSTD_resetSeqStore(seqStore_'
51 51 /*-*************************************
52 52 * Context memory management
53 53 ***************************************/
54 struct ZSTD_CCtx_s
55 {
54 struct ZSTD_CCtx_s {
56 55 const BYTE* nextSrc; /* next block here to continue on current prefix */
57 56 const BYTE* base; /* All regular indexes relative to this position */
58 57 const BYTE* dictBase; /* extDict indexes relative to this position */
@@ -61,10 +60,11 b' struct ZSTD_CCtx_s'
61 60 U32 nextToUpdate; /* index from which to continue dictionary update */
62 61 U32 nextToUpdate3; /* index from which to continue dictionary update */
63 62 U32 hashLog3; /* dispatch table : larger == faster, more memory */
64 U32 loadedDictEnd;
63 U32 loadedDictEnd; /* index of end of dictionary */
64 U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
65 65 ZSTD_compressionStage_e stage;
66 66 U32 rep[ZSTD_REP_NUM];
67 U32 savedRep[ZSTD_REP_NUM];
67 U32 repToConfirm[ZSTD_REP_NUM];
68 68 U32 dictID;
69 69 ZSTD_parameters params;
70 70 void* workSpace;
@@ -101,7 +101,7 b' ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD'
101 101 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
102 102 if (!cctx) return NULL;
103 103 memset(cctx, 0, sizeof(ZSTD_CCtx));
104 memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
104 cctx->customMem = customMem;
105 105 return cctx;
106 106 }
107 107
@@ -119,6 +119,15 b' size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx*'
119 119 return sizeof(*cctx) + cctx->workSpaceSize;
120 120 }
121 121
122 size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
123 {
124 switch(param)
125 {
126 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127 default: return ERROR(parameter_unknown);
128 }
129 }
130
122 131 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
123 132 {
124 133 return &(ctx->seqStore);
@@ -318,6 +327,14 b' static size_t ZSTD_resetCCtx_advanced (Z'
318 327 }
319 328 }
320 329
330 /* ZSTD_invalidateRepCodes() :
331 * ensures next compression will not use repcodes from previous block.
332 * Note : only works with regular variant;
333 * do not use with extDict variant ! */
334 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
335 int i;
336 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
337 }
321 338
322 339 /*! ZSTD_copyCCtx() :
323 340 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -735,12 +752,19 b' size_t ZSTD_compressSequences(ZSTD_CCtx*'
735 752 if ((size_t)(op-ostart) >= maxCSize) return 0; }
736 753
737 754 /* confirm repcodes */
738 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
755 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
739 756
740 757 return op - ostart;
741 758 }
742 759
743 760
761 #if 0 /* for debug */
762 # define STORESEQ_DEBUG
763 #include <stdio.h> /* fprintf */
764 U32 g_startDebug = 0;
765 const BYTE* g_start = NULL;
766 #endif
767
744 768 /*! ZSTD_storeSeq() :
745 769 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
746 770 `offsetCode` : distance to match, or 0 == repCode.
@@ -748,13 +772,14 b' size_t ZSTD_compressSequences(ZSTD_CCtx*'
748 772 */
749 773 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
750 774 {
751 #if 0 /* for debug */
752 static const BYTE* g_start = NULL;
753 const U32 pos = (U32)((const BYTE*)literals - g_start);
754 if (g_start==NULL) g_start = (const BYTE*)literals;
755 //if ((pos > 1) && (pos < 50000))
756 printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
757 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
775 #ifdef STORESEQ_DEBUG
776 if (g_startDebug) {
777 const U32 pos = (U32)((const BYTE*)literals - g_start);
778 if (g_start==NULL) g_start = (const BYTE*)literals;
779 if ((pos > 1895000) && (pos < 1895300))
780 fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
781 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
782 }
758 783 #endif
759 784 /* copy Literals */
760 785 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
@@ -1004,8 +1029,8 b' void ZSTD_compressBlock_fast_generic(ZST'
1004 1029 } } }
1005 1030
1006 1031 /* save reps for next block */
1007 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1008 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1032 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1033 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1009 1034
1010 1035 /* Last Literals */
1011 1036 { size_t const lastLLSize = iend - anchor;
@@ -1119,7 +1144,7 b' static void ZSTD_compressBlock_fast_extD'
1119 1144 } } }
1120 1145
1121 1146 /* save reps for next block */
1122 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1147 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1123 1148
1124 1149 /* Last Literals */
1125 1150 { size_t const lastLLSize = iend - anchor;
@@ -1273,8 +1298,8 b' void ZSTD_compressBlock_doubleFast_gener'
1273 1298 } } }
1274 1299
1275 1300 /* save reps for next block */
1276 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1277 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1301 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1302 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1278 1303
1279 1304 /* Last Literals */
1280 1305 { size_t const lastLLSize = iend - anchor;
@@ -1423,7 +1448,7 b' static void ZSTD_compressBlock_doubleFas'
1423 1448 } } }
1424 1449
1425 1450 /* save reps for next block */
1426 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1451 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1427 1452
1428 1453 /* Last Literals */
1429 1454 { size_t const lastLLSize = iend - anchor;
@@ -1955,8 +1980,8 b' void ZSTD_compressBlock_lazy_generic(ZST'
1955 1980 } }
1956 1981
1957 1982 /* Save reps for next block */
1958 ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1959 ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
1983 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
1984 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
1960 1985
1961 1986 /* Last Literals */
1962 1987 { size_t const lastLLSize = iend - anchor;
@@ -2150,7 +2175,7 b' void ZSTD_compressBlock_lazy_extDict_gen'
2150 2175 } }
2151 2176
2152 2177 /* Save reps for next block */
2153 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2178 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2154 2179
2155 2180 /* Last Literals */
2156 2181 { size_t const lastLLSize = iend - anchor;
@@ -2409,12 +2434,14 b' static size_t ZSTD_compressContinue_inte'
2409 2434
2410 2435 cctx->nextSrc = ip + srcSize;
2411 2436
2412 { size_t const cSize = frame ?
2437 if (srcSize) {
2438 size_t const cSize = frame ?
2413 2439 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2414 2440 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2415 2441 if (ZSTD_isError(cSize)) return cSize;
2416 2442 return cSize + fhSize;
2417 }
2443 } else
2444 return fhSize;
2418 2445 }
2419 2446
2420 2447
@@ -2450,7 +2477,7 b' static size_t ZSTD_loadDictionaryContent'
2450 2477 zc->dictBase = zc->base;
2451 2478 zc->base += ip - zc->nextSrc;
2452 2479 zc->nextToUpdate = zc->dictLimit;
2453 zc->loadedDictEnd = (U32)(iend - zc->base);
2480 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
2454 2481
2455 2482 zc->nextSrc = iend;
2456 2483 if (srcSize <= HASH_READ_SIZE) return 0;
@@ -2557,9 +2584,9 b' static size_t ZSTD_loadDictEntropyStats('
2557 2584 }
2558 2585
2559 2586 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2560 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2561 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2562 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2587 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2588 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2589 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2563 2590 dictPtr += 12;
2564 2591
2565 2592 { U32 offcodeMax = MaxOff;
@@ -2594,7 +2621,6 b' static size_t ZSTD_compress_insertDictio'
2594 2621 }
2595 2622 }
2596 2623
2597
2598 2624 /*! ZSTD_compressBegin_internal() :
2599 2625 * @return : 0, or an error code */
2600 2626 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2626,9 +2652,9 b' size_t ZSTD_compressBegin_usingDict(ZSTD'
2626 2652 }
2627 2653
2628 2654
2629 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2655 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2630 2656 {
2631 return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2657 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2632 2658 }
2633 2659
2634 2660
@@ -2733,7 +2759,8 b' size_t ZSTD_compress(void* dst, size_t d'
2733 2759 /* ===== Dictionary API ===== */
2734 2760
2735 2761 struct ZSTD_CDict_s {
2736 void* dictContent;
2762 void* dictBuffer;
2763 const void* dictContent;
2737 2764 size_t dictContentSize;
2738 2765 ZSTD_CCtx* refContext;
2739 2766 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
@@ -2741,39 +2768,45 b' struct ZSTD_CDict_s {'
2741 2768 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2742 2769 {
2743 2770 if (cdict==NULL) return 0; /* support sizeof on NULL */
2744 return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
2771 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
2745 2772 }
2746 2773
2747 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2774 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
2775 ZSTD_parameters params, ZSTD_customMem customMem)
2748 2776 {
2749 2777 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2750 2778 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2751 2779
2752 2780 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2753 void* const dictContent = ZSTD_malloc(dictSize, customMem);
2754 2781 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2755 2782
2756 if (!dictContent || !cdict || !cctx) {
2757 ZSTD_free(dictContent, customMem);
2783 if (!cdict || !cctx) {
2758 2784 ZSTD_free(cdict, customMem);
2759 2785 ZSTD_free(cctx, customMem);
2760 2786 return NULL;
2761 2787 }
2762 2788
2763 if (dictSize) {
2764 memcpy(dictContent, dict, dictSize);
2789 if ((byReference) || (!dictBuffer) || (!dictSize)) {
2790 cdict->dictBuffer = NULL;
2791 cdict->dictContent = dictBuffer;
2792 } else {
2793 void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
2794 if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
2795 memcpy(internalBuffer, dictBuffer, dictSize);
2796 cdict->dictBuffer = internalBuffer;
2797 cdict->dictContent = internalBuffer;
2765 2798 }
2766 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2799
2800 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2767 2801 if (ZSTD_isError(errorCode)) {
2768 ZSTD_free(dictContent, customMem);
2802 ZSTD_free(cdict->dictBuffer, customMem);
2803 ZSTD_free(cctx, customMem);
2769 2804 ZSTD_free(cdict, customMem);
2770 ZSTD_free(cctx, customMem);
2771 2805 return NULL;
2772 2806 } }
2773 2807
2774 cdict->dictContent = dictContent;
2808 cdict->refContext = cctx;
2775 2809 cdict->dictContentSize = dictSize;
2776 cdict->refContext = cctx;
2777 2810 return cdict;
2778 2811 }
2779 2812 }
@@ -2783,7 +2816,15 b' ZSTD_CDict* ZSTD_createCDict(const void*'
2783 2816 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2784 2817 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2785 2818 params.fParams.contentSizeFlag = 1;
2786 return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2819 return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
2820 }
2821
2822 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
2823 {
2824 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2825 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2826 params.fParams.contentSizeFlag = 1;
2827 return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
2787 2828 }
2788 2829
2789 2830 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2791,7 +2832,7 b' size_t ZSTD_freeCDict(ZSTD_CDict* cdict)'
2791 2832 if (cdict==NULL) return 0; /* support free on NULL */
2792 2833 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2793 2834 ZSTD_freeCCtx(cdict->refContext);
2794 ZSTD_free(cdict->dictContent, cMem);
2835 ZSTD_free(cdict->dictBuffer, cMem);
2795 2836 ZSTD_free(cdict, cMem);
2796 2837 return 0;
2797 2838 }
@@ -2801,7 +2842,7 b' static ZSTD_parameters ZSTD_getParamsFro'
2801 2842 return ZSTD_getParamsFromCCtx(cdict->refContext);
2802 2843 }
2803 2844
2804 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
2845 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2805 2846 {
2806 2847 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2807 2848 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
@@ -2900,7 +2941,7 b' size_t ZSTD_CStreamOutSize(void) { retur'
2900 2941
2901 2942 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2902 2943 {
2903 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */
2944 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2904 2945
2905 2946 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2906 2947 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
@@ -2937,9 +2978,9 b' size_t ZSTD_initCStream_advanced(ZSTD_CS'
2937 2978 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2938 2979 }
2939 2980
2940 if (dict) {
2981 if (dict && dictSize >= 8) {
2941 2982 ZSTD_freeCDict(zcs->cdictLocal);
2942 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
2983 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
2943 2984 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2944 2985 zcs->cdict = zcs->cdictLocal;
2945 2986 } else zcs->cdict = NULL;
@@ -2956,6 +2997,7 b' size_t ZSTD_initCStream_usingCDict(ZSTD_'
2956 2997 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2957 2998 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2958 2999 zcs->cdict = cdict;
3000 zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
2959 3001 return initError;
2960 3002 }
2961 3003
@@ -2967,7 +3009,8 b' size_t ZSTD_initCStream_usingDict(ZSTD_C'
2967 3009
2968 3010 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
2969 3011 {
2970 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3012 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3013 if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
2971 3014 return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
2972 3015 }
2973 3016
@@ -38,7 +38,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto'
38 38
39 39 ssPtr->cachedLiterals = NULL;
40 40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41 ssPtr->staticPrices = 0;
41 ssPtr->staticPrices = 0;
42 42
43 43 if (ssPtr->litLengthSum == 0) {
44 44 if (srcSize <= 1024) ssPtr->staticPrices = 1;
@@ -56,7 +56,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto'
56 56
57 57 for (u=0; u<=MaxLit; u++) {
58 58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59 ssPtr->litSum += ssPtr->litFreq[u];
59 ssPtr->litSum += ssPtr->litFreq[u];
60 60 }
61 61 for (u=0; u<=MaxLL; u++)
62 62 ssPtr->litLengthFreq[u] = 1;
@@ -634,7 +634,7 b' void ZSTD_compressBlock_opt_generic(ZSTD'
634 634 } } /* for (cur=0; cur < last_pos; ) */
635 635
636 636 /* Save reps for next block */
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
637 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
638 638
639 639 /* Last Literals */
640 640 { size_t const lastLLSize = iend - anchor;
@@ -825,7 +825,7 b' void ZSTD_compressBlock_opt_extDict_gene'
825 825
826 826 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
827 827
828 if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
828 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
829 829 best_mlen = matches[match_num-1].len;
830 830 best_off = matches[match_num-1].off;
831 831 last_pos = cur + 1;
@@ -835,7 +835,7 b' void ZSTD_compressBlock_opt_extDict_gene'
835 835 /* set prices using matches at position = cur */
836 836 for (u = 0; u < match_num; u++) {
837 837 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
838 best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
838 best_mlen = matches[u].len;
839 839
840 840 while (mlen <= best_mlen) {
841 841 if (opt[cur].mlen == 1) {
@@ -907,7 +907,7 b' void ZSTD_compressBlock_opt_extDict_gene'
907 907 } } /* for (cur=0; cur < last_pos; ) */
908 908
909 909 /* Save reps for next block */
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
910 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
911 911
912 912 /* Last Literals */
913 913 { size_t lastLLSize = iend - anchor;
@@ -1444,7 +1444,7 b' size_t ZSTD_decompress_usingDict(ZSTD_DC'
1444 1444 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1445 1445 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
1446 1446 #endif
1447 ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
1447 CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
1448 1448 ZSTD_checkContinuity(dctx, dst);
1449 1449 return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
1450 1450 }
@@ -1671,9 +1671,9 b' static size_t ZSTD_loadEntropy(ZSTD_DCtx'
1671 1671 }
1672 1672
1673 1673 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
1674 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
1675 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
1676 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
1674 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
1675 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
1676 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
1677 1677 dictPtr += 12;
1678 1678
1679 1679 dctx->litEntropy = dctx->fseEntropy = 1;
@@ -1713,39 +1713,44 b' size_t ZSTD_decompressBegin_usingDict(ZS'
1713 1713 /* ====== ZSTD_DDict ====== */
1714 1714
1715 1715 struct ZSTD_DDict_s {
1716 void* dict;
1716 void* dictBuffer;
1717 const void* dictContent;
1717 1718 size_t dictSize;
1718 1719 ZSTD_DCtx* refContext;
1719 1720 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
1720 1721
1721 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
1722 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
1722 1723 {
1723 1724 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
1724 1725 if (!customMem.customAlloc || !customMem.customFree) return NULL;
1725 1726
1726 1727 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
1727 void* const dictContent = ZSTD_malloc(dictSize, customMem);
1728 1728 ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
1729 1729
1730 if (!dictContent || !ddict || !dctx) {
1731 ZSTD_free(dictContent, customMem);
1730 if (!ddict || !dctx) {
1732 1731 ZSTD_free(ddict, customMem);
1733 1732 ZSTD_free(dctx, customMem);
1734 1733 return NULL;
1735 1734 }
1736 1735
1737 if (dictSize) {
1738 memcpy(dictContent, dict, dictSize);
1736 if ((byReference) || (!dict) || (!dictSize)) {
1737 ddict->dictBuffer = NULL;
1738 ddict->dictContent = dict;
1739 } else {
1740 void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
1741 if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
1742 memcpy(internalBuffer, dict, dictSize);
1743 ddict->dictBuffer = internalBuffer;
1744 ddict->dictContent = internalBuffer;
1739 1745 }
1740 { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
1746 { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
1741 1747 if (ZSTD_isError(errorCode)) {
1742 ZSTD_free(dictContent, customMem);
1748 ZSTD_free(ddict->dictBuffer, customMem);
1743 1749 ZSTD_free(ddict, customMem);
1744 1750 ZSTD_free(dctx, customMem);
1745 1751 return NULL;
1746 1752 } }
1747 1753
1748 ddict->dict = dictContent;
1749 1754 ddict->dictSize = dictSize;
1750 1755 ddict->refContext = dctx;
1751 1756 return ddict;
@@ -1758,15 +1763,27 b' ZSTD_DDict* ZSTD_createDDict_advanced(co'
1758 1763 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
1759 1764 {
1760 1765 ZSTD_customMem const allocator = { NULL, NULL, NULL };
1761 return ZSTD_createDDict_advanced(dict, dictSize, allocator);
1766 return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
1762 1767 }
1763 1768
1769
1770 /*! ZSTD_createDDict_byReference() :
1771 * Create a digested dictionary, ready to start decompression operation without startup delay.
1772 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
1773 * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
1774 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
1775 {
1776 ZSTD_customMem const allocator = { NULL, NULL, NULL };
1777 return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
1778 }
1779
1780
1764 1781 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
1765 1782 {
1766 1783 if (ddict==NULL) return 0; /* support free on NULL */
1767 1784 { ZSTD_customMem const cMem = ddict->refContext->customMem;
1768 1785 ZSTD_freeDCtx(ddict->refContext);
1769 ZSTD_free(ddict->dict, cMem);
1786 ZSTD_free(ddict->dictBuffer, cMem);
1770 1787 ZSTD_free(ddict, cMem);
1771 1788 return 0;
1772 1789 }
@@ -1775,7 +1792,7 b' size_t ZSTD_freeDDict(ZSTD_DDict* ddict)'
1775 1792 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
1776 1793 {
1777 1794 if (ddict==NULL) return 0; /* support sizeof on NULL */
1778 return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
1795 return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
1779 1796 }
1780 1797
1781 1798 /*! ZSTD_getDictID_fromDict() :
@@ -1796,7 +1813,7 b' unsigned ZSTD_getDictID_fromDict(const v'
1796 1813 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
1797 1814 {
1798 1815 if (ddict==NULL) return 0;
1799 return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize);
1816 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
1800 1817 }
1801 1818
1802 1819 /*! ZSTD_getDictID_fromFrame() :
@@ -1827,7 +1844,7 b' size_t ZSTD_decompress_usingDDict(ZSTD_D'
1827 1844 const ZSTD_DDict* ddict)
1828 1845 {
1829 1846 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
1830 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
1847 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize);
1831 1848 #endif
1832 1849 ZSTD_refDCtx(dctx, ddict->refContext);
1833 1850 ZSTD_checkContinuity(dctx, dst);
@@ -1919,7 +1936,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D'
1919 1936 zds->stage = zdss_loadHeader;
1920 1937 zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
1921 1938 ZSTD_freeDDict(zds->ddictLocal);
1922 if (dict) {
1939 if (dict && dictSize >= 8) {
1923 1940 zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
1924 1941 if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
1925 1942 } else zds->ddictLocal = NULL;
@@ -1956,7 +1973,7 b' size_t ZSTD_setDStreamParameter(ZSTD_DSt'
1956 1973 switch(paramType)
1957 1974 {
1958 1975 default : return ERROR(parameter_unknown);
1959 case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
1976 case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
1960 1977 }
1961 1978 return 0;
1962 1979 }
@@ -2007,7 +2024,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2007 2024 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2008 2025 { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
2009 2026 if (legacyVersion) {
2010 const void* const dict = zds->ddict ? zds->ddict->dict : NULL;
2027 const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
2011 2028 size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
2012 2029 CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
2013 2030 dict, dictSize));
@@ -36,12 +36,11 b''
36 36 #include <time.h> /* clock */
37 37
38 38 #include "mem.h" /* read */
39 #include "error_private.h"
40 39 #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
41 40 #define HUF_STATIC_LINKING_ONLY
42 #include "huf.h"
41 #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
43 42 #include "zstd_internal.h" /* includes zstd.h */
44 #include "xxhash.h"
43 #include "xxhash.h" /* XXH64 */
45 44 #include "divsufsort.h"
46 45 #ifndef ZDICT_STATIC_LINKING_ONLY
47 46 # define ZDICT_STATIC_LINKING_ONLY
@@ -61,7 +60,7 b''
61 60 #define NOISELENGTH 32
62 61
63 62 #define MINRATIO 4
64 static const int g_compressionLevel_default = 5;
63 static const int g_compressionLevel_default = 6;
65 64 static const U32 g_selectivity_default = 9;
66 65 static const size_t g_provision_entropySize = 200;
67 66 static const size_t g_min_fast_dictContent = 192;
@@ -307,13 +306,13 b' static dictItem ZDICT_analyzePos('
307 306 } while (length >=MINMATCHLENGTH);
308 307
309 308 /* look backward */
310 length = MINMATCHLENGTH;
311 while ((length >= MINMATCHLENGTH) & (start > 0)) {
312 length = ZDICT_count(b + pos, b + suffix[start - 1]);
313 if (length >= LLIMIT) length = LLIMIT - 1;
314 lengthList[length]++;
315 if (length >= MINMATCHLENGTH) start--;
316 }
309 length = MINMATCHLENGTH;
310 while ((length >= MINMATCHLENGTH) & (start > 0)) {
311 length = ZDICT_count(b + pos, b + suffix[start - 1]);
312 if (length >= LLIMIT) length = LLIMIT - 1;
313 lengthList[length]++;
314 if (length >= MINMATCHLENGTH) start--;
315 }
317 316
318 317 /* largest useful length */
319 318 memset(cumulLength, 0, sizeof(cumulLength));
@@ -570,7 +569,7 b' static void ZDICT_countEStats(EStats_res'
570 569 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571 570 }
572 571 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
573 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
572 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
574 573
575 574 if (cSize) { /* if == 0; block is not compressible */
576 575 const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
@@ -825,6 +824,55 b' static size_t ZDICT_analyzeEntropy(void*'
825 824 }
826 825
827 826
827
828 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
829 const void* customDictContent, size_t dictContentSize,
830 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
831 ZDICT_params_t params)
832 {
833 size_t hSize;
834 #define HBUFFSIZE 256
835 BYTE header[HBUFFSIZE];
836 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837 U32 const notificationLevel = params.notificationLevel;
838
839 /* check conditions */
840 if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
841 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
842 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
844 /* dictionary header */
845 MEM_writeLE32(header, ZSTD_DICT_MAGIC);
846 { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847 U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848 U32 const dictID = params.dictID ? params.dictID : compliantID;
849 MEM_writeLE32(header+4, dictID);
850 }
851 hSize = 8;
852
853 /* entropy tables */
854 DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
855 DISPLAYLEVEL(2, "statistics ... \n");
856 { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
857 compressionLevel,
858 samplesBuffer, samplesSizes, nbSamples,
859 customDictContent, dictContentSize,
860 notificationLevel);
861 if (ZDICT_isError(eSize)) return eSize;
862 hSize += eSize;
863 }
864
865 /* copy elements in final buffer ; note : src and dst buffer can overlap */
866 if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
867 { size_t const dictSize = hSize + dictContentSize;
868 char* dictEnd = (char*)dictBuffer + dictSize;
869 memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
870 memcpy(dictBuffer, header, hSize);
871 return dictSize;
872 }
873 }
874
875
828 876 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
829 877 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
830 878 ZDICT_params_t params)
@@ -19,15 +19,18 b' extern "C" {'
19 19 #include <stddef.h> /* size_t */
20 20
21 21
22 /*====== Export for Windows ======*/
23 /*!
24 * ZSTD_DLL_EXPORT :
25 * Enable exporting of functions when building a Windows DLL
26 */
27 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZDICTLIB_API __declspec(dllexport)
22 /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23 #if defined(__GNUC__) && (__GNUC__ >= 4)
24 # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
29 25 #else
30 # define ZDICTLIB_API
26 # define ZDICTLIB_VISIBILITY
27 #endif
28 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29 # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
30 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
31 # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
32 #else
33 # define ZDICTLIB_API ZDICTLIB_VISIBILITY
31 34 #endif
32 35
33 36
@@ -79,27 +82,114 b' typedef struct {'
79 82 or an error code, which can be tested by ZDICT_isError().
80 83 note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
81 84 */
82 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
85 ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
86 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
87 ZDICT_params_t parameters);
88
89 /*! COVER_params_t :
90 For all values 0 means default.
91 kMin and d are the only required parameters.
92 */
93 typedef struct {
94 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
98 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99 unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100 unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101 int compressionLevel; /* 0 means default; target a specific zstd compression level */
102 } COVER_params_t;
103
104
105 /*! COVER_trainFromBuffer() :
106 Train a dictionary from an array of samples using the COVER algorithm.
107 Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108 supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109 The resulting dictionary will be saved into `dictBuffer`.
110 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111 or an error code, which can be tested with ZDICT_isError().
112 Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113 Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114 It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115 In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116 It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117 */
118 ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120 COVER_params_t parameters);
121
122 /*! COVER_optimizeTrainFromBuffer() :
123 The same requirements as above hold for all the parameters except `parameters`.
124 This function tries many parameter combinations and picks the best parameters.
125 `*parameters` is filled with the best parameters found, and the dictionary
126 constructed with those parameters is stored in `dictBuffer`.
127
128 All of the parameters d, k, steps are optional.
129 If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130 if steps is zero it defaults to its default value.
131 If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
133 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134 or an error code, which can be tested with ZDICT_isError().
135 On success `*parameters` contains the parameters selected.
136 Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137 */
138 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139 const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140 COVER_params_t *parameters);
141
142 /*! ZDICT_finalizeDictionary() :
143
144 Given a custom content as a basis for dictionary, and a set of samples,
145 finalize dictionary by adding headers and statistics.
146
147 Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148 supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
150 dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151 maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
152
153 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154 or an error code, which can be tested by ZDICT_isError().
155 note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156 note 2 : dictBuffer and customDictContent can overlap
157 */
158 #define ZDICT_CONTENTSIZE_MIN 256
159 #define ZDICT_DICTSIZE_MIN 512
160 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161 const void* customDictContent, size_t dictContentSize,
83 162 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
84 163 ZDICT_params_t parameters);
85 164
86 165
87 /*! ZDICT_addEntropyTablesFromBuffer() :
88
89 Given a content-only dictionary (built using any 3rd party algorithm),
90 add entropy tables computed from an array of samples.
91 Samples must be stored concatenated in a flat buffer `samplesBuffer`,
92 supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
93 166
94 The input dictionary content must be stored *at the end* of `dictBuffer`.
95 Its size is `dictContentSize`.
96 The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*,
97 starting from its beginning.
98 @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
99 */
167 /* Deprecation warnings */
168 /* It is generally possible to disable deprecation warnings from compiler,
169 for example with -Wno-deprecated-declarations for gcc
170 or _CRT_SECURE_NO_WARNINGS in Visual.
171 Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
172 #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
173 # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
174 #else
175 # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
176 # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
177 # define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
178 # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
179 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
180 # elif (ZDICT_GCC_VERSION >= 301)
181 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
182 # elif defined(_MSC_VER)
183 # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
184 # else
185 # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
186 # define ZDICT_DEPRECATED(message) ZDICTLIB_API
187 # endif
188 #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
189
190 ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
100 191 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
101 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
102
192 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
103 193
104 194
105 195 #endif /* ZDICT_STATIC_LINKING_ONLY */
@@ -20,13 +20,16 b' extern "C" {'
20 20
21 21 /* ===== ZSTDLIB_API : control library symbols visibility ===== */
22 22 #if defined(__GNUC__) && (__GNUC__ >= 4)
23 # define ZSTDLIB_API __attribute__ ((visibility ("default")))
24 #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
25 # define ZSTDLIB_API __declspec(dllexport)
23 # define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
24 #else
25 # define ZSTDLIB_VISIBILITY
26 #endif
27 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28 # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
26 29 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
27 # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
30 # define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
28 31 #else
29 # define ZSTDLIB_API
32 # define ZSTDLIB_API ZSTDLIB_VISIBILITY
30 33 #endif
31 34
32 35
@@ -53,7 +56,7 b' extern "C" {'
53 56 /*------ Version ------*/
54 57 #define ZSTD_VERSION_MAJOR 1
55 58 #define ZSTD_VERSION_MINOR 1
56 #define ZSTD_VERSION_RELEASE 2
59 #define ZSTD_VERSION_RELEASE 3
57 60
58 61 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
59 62 #define ZSTD_QUOTE(str) #str
@@ -170,8 +173,8 b' typedef struct ZSTD_CDict_s ZSTD_CDict;'
170 173 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
171 174 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
172 175 * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
173 * `dict` can be released after ZSTD_CDict creation. */
174 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
176 * `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
177 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
175 178
176 179 /*! ZSTD_freeCDict() :
177 180 * Function frees memory allocated by ZSTD_createCDict(). */
@@ -191,8 +194,8 b' typedef struct ZSTD_DDict_s ZSTD_DDict;'
191 194
192 195 /*! ZSTD_createDDict() :
193 196 * Create a digested dictionary, ready to start decompression operation without startup delay.
194 * `dict` can be released after creation. */
195 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
197 * dictBuffer can be released after DDict creation, as its content is copied inside DDict */
198 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
196 199
197 200 /*! ZSTD_freeDDict() :
198 201 * Function frees memory allocated with ZSTD_createDDict() */
@@ -325,7 +328,7 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
325 328 * ***************************************************************************************/
326 329
327 330 /* --- Constants ---*/
328 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */
331 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
329 332 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
330 333
331 334 #define ZSTD_WINDOWLOG_MAX_32 25
@@ -345,8 +348,9 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
345 348 #define ZSTD_TARGETLENGTH_MAX 999
346 349
347 350 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */
351 #define ZSTD_FRAMEHEADERSIZE_MIN 6
348 352 static const size_t ZSTD_frameHeaderSize_prefix = 5;
349 static const size_t ZSTD_frameHeaderSize_min = 6;
353 static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
350 354 static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
351 355 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
352 356
@@ -365,9 +369,9 b' typedef struct {'
365 369 } ZSTD_compressionParameters;
366 370
367 371 typedef struct {
368 unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
369 unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
370 unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
372 unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
373 unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */
374 unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */
371 375 } ZSTD_frameParameters;
372 376
373 377 typedef struct {
@@ -397,9 +401,23 b' ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_a'
397 401 * Gives the amount of memory used by a given ZSTD_CCtx */
398 402 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
399 403
404 typedef enum {
405 ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
406 } ZSTD_CCtxParameter;
407 /*! ZSTD_setCCtxParameter() :
408 * Set advanced parameters, selected through enum ZSTD_CCtxParameter
409 * @result : 0, or an error code (which can be tested with ZSTD_isError()) */
410 ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
411
412 /*! ZSTD_createCDict_byReference() :
413 * Create a digested dictionary for compression
414 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
415 * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
416 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
417
400 418 /*! ZSTD_createCDict_advanced() :
401 419 * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
402 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
420 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
403 421 ZSTD_parameters params, ZSTD_customMem customMem);
404 422
405 423 /*! ZSTD_sizeof_CDict() :
@@ -455,6 +473,15 b' ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_a'
455 473 * Gives the amount of memory used by a given ZSTD_DCtx */
456 474 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
457 475
476 /*! ZSTD_createDDict_byReference() :
477 * Create a digested dictionary, ready to start decompression operation without startup delay.
478 * Dictionary content is simply referenced, and therefore stays in dictBuffer.
479 * It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
480 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
481
482 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
483 unsigned byReference, ZSTD_customMem customMem);
484
458 485 /*! ZSTD_sizeof_DDict() :
459 486 * Gives the amount of memory used by a given ZSTD_DDict */
460 487 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
@@ -463,13 +490,13 b' ZSTDLIB_API size_t ZSTD_sizeof_DDict(con'
463 490 * Provides the dictID stored within dictionary.
464 491 * if @return == 0, the dictionary is not conformant with Zstandard specification.
465 492 * It can still be loaded, but as a content-only dictionary. */
466 unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
493 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
467 494
468 495 /*! ZSTD_getDictID_fromDDict() :
469 496 * Provides the dictID of the dictionary loaded into `ddict`.
470 497 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
471 498 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
472 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
499 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
473 500
474 501 /*! ZSTD_getDictID_fromFrame() :
475 502 * Provides the dictID required to decompressed the frame stored within `src`.
@@ -481,7 +508,7 b' unsigned ZSTD_getDictID_fromDDict(const '
481 508 * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
482 509 * - This is not a Zstandard frame.
483 510 * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
484 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
511 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
485 512
486 513
487 514 /********************************************************************
@@ -491,7 +518,7 b' unsigned ZSTD_getDictID_fromFrame(const '
491 518 /*===== Advanced Streaming compression functions =====*/
492 519 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
493 520 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */
494 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
521 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
495 522 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
496 523 ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
497 524 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */
@@ -500,9 +527,9 b' ZSTDLIB_API size_t ZSTD_sizeof_CStream(c'
500 527
501 528
502 529 /*===== Advanced Streaming decompression functions =====*/
503 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
530 typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
504 531 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
505 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
532 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
506 533 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
507 534 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */
508 535 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
@@ -542,10 +569,10 b' ZSTDLIB_API size_t ZSTD_sizeof_DStream(c'
542 569 In which case, it will "discard" the relevant memory section from its history.
543 570
544 571 Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
545 It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
546 Without last block mark, frames will be considered unfinished (broken) by decoders.
572 It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
573 Without last block mark, frames will be considered unfinished (corrupted) by decoders.
547 574
548 You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
575 `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
549 576 */
550 577
551 578 /*===== Buffer-less streaming compression functions =====*/
@@ -553,6 +580,7 b' ZSTDLIB_API size_t ZSTD_compressBegin(ZS'
553 580 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
554 581 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
555 582 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
583 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
556 584 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
557 585 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
558 586
This diff has been collapsed as it changes many lines, (1233 lines changed) Show them Hide them
@@ -8,145 +8,1250 b''
8 8
9 9 from __future__ import absolute_import, unicode_literals
10 10
11 import io
11 import os
12 import sys
12 13
13 14 from _zstd_cffi import (
14 15 ffi,
15 16 lib,
16 17 )
17 18
19 if sys.version_info[0] == 2:
20 bytes_type = str
21 int_type = long
22 else:
23 bytes_type = bytes
24 int_type = int
18 25
19 _CSTREAM_IN_SIZE = lib.ZSTD_CStreamInSize()
20 _CSTREAM_OUT_SIZE = lib.ZSTD_CStreamOutSize()
26
27 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
28 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
29 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
30 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
31
32 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
33
34
35 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
36 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
37 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
38 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
39
40 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
41 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
42 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
43 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
44 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
45 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
46 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
47 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
48 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
49 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
50 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
51 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
52 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
53
54 STRATEGY_FAST = lib.ZSTD_fast
55 STRATEGY_DFAST = lib.ZSTD_dfast
56 STRATEGY_GREEDY = lib.ZSTD_greedy
57 STRATEGY_LAZY = lib.ZSTD_lazy
58 STRATEGY_LAZY2 = lib.ZSTD_lazy2
59 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
60 STRATEGY_BTOPT = lib.ZSTD_btopt
61
62 COMPRESSOBJ_FLUSH_FINISH = 0
63 COMPRESSOBJ_FLUSH_BLOCK = 1
64
65
66 def _cpu_count():
67 # os.cpu_count() was introducd in Python 3.4.
68 try:
69 return os.cpu_count() or 0
70 except AttributeError:
71 pass
72
73 # Linux.
74 try:
75 if sys.version_info[0] == 2:
76 return os.sysconf(b'SC_NPROCESSORS_ONLN')
77 else:
78 return os.sysconf(u'SC_NPROCESSORS_ONLN')
79 except (AttributeError, ValueError):
80 pass
81
82 # TODO implement on other platforms.
83 return 0
84
85
86 class ZstdError(Exception):
87 pass
21 88
22 89
23 class _ZstdCompressionWriter(object):
24 def __init__(self, cstream, writer):
25 self._cstream = cstream
90 class CompressionParameters(object):
91 def __init__(self, window_log, chain_log, hash_log, search_log,
92 search_length, target_length, strategy):
93 if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
94 raise ValueError('invalid window log value')
95
96 if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
97 raise ValueError('invalid chain log value')
98
99 if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
100 raise ValueError('invalid hash log value')
101
102 if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
103 raise ValueError('invalid search log value')
104
105 if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
106 raise ValueError('invalid search length value')
107
108 if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
109 raise ValueError('invalid target length value')
110
111 if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
112 raise ValueError('invalid strategy value')
113
114 self.window_log = window_log
115 self.chain_log = chain_log
116 self.hash_log = hash_log
117 self.search_log = search_log
118 self.search_length = search_length
119 self.target_length = target_length
120 self.strategy = strategy
121
122 zresult = lib.ZSTD_checkCParams(self.as_compression_parameters())
123 if lib.ZSTD_isError(zresult):
124 raise ValueError('invalid compression parameters: %s',
125 ffi.string(lib.ZSTD_getErrorName(zresult)))
126
127 def estimated_compression_context_size(self):
128 return lib.ZSTD_estimateCCtxSize(self.as_compression_parameters())
129
130 def as_compression_parameters(self):
131 p = ffi.new('ZSTD_compressionParameters *')[0]
132 p.windowLog = self.window_log
133 p.chainLog = self.chain_log
134 p.hashLog = self.hash_log
135 p.searchLog = self.search_log
136 p.searchLength = self.search_length
137 p.targetLength = self.target_length
138 p.strategy = self.strategy
139
140 return p
141
142 def get_compression_parameters(level, source_size=0, dict_size=0):
143 params = lib.ZSTD_getCParams(level, source_size, dict_size)
144 return CompressionParameters(window_log=params.windowLog,
145 chain_log=params.chainLog,
146 hash_log=params.hashLog,
147 search_log=params.searchLog,
148 search_length=params.searchLength,
149 target_length=params.targetLength,
150 strategy=params.strategy)
151
152
153 def estimate_compression_context_size(params):
154 if not isinstance(params, CompressionParameters):
155 raise ValueError('argument must be a CompressionParameters')
156
157 cparams = params.as_compression_parameters()
158 return lib.ZSTD_estimateCCtxSize(cparams)
159
160
161 def estimate_decompression_context_size():
162 return lib.ZSTD_estimateDCtxSize()
163
164
165 class ZstdCompressionWriter(object):
166 def __init__(self, compressor, writer, source_size, write_size):
167 self._compressor = compressor
26 168 self._writer = writer
169 self._source_size = source_size
170 self._write_size = write_size
171 self._entered = False
172 self._mtcctx = compressor._cctx if compressor._multithreaded else None
27 173
28 174 def __enter__(self):
175 if self._entered:
176 raise ZstdError('cannot __enter__ multiple times')
177
178 if self._mtcctx:
179 self._compressor._init_mtcstream(self._source_size)
180 else:
181 self._compressor._ensure_cstream(self._source_size)
182 self._entered = True
29 183 return self
30 184
31 185 def __exit__(self, exc_type, exc_value, exc_tb):
186 self._entered = False
187
32 188 if not exc_type and not exc_value and not exc_tb:
33 189 out_buffer = ffi.new('ZSTD_outBuffer *')
34 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
35 out_buffer.size = _CSTREAM_OUT_SIZE
190 dst_buffer = ffi.new('char[]', self._write_size)
191 out_buffer.dst = dst_buffer
192 out_buffer.size = self._write_size
36 193 out_buffer.pos = 0
37 194
38 195 while True:
39 res = lib.ZSTD_endStream(self._cstream, out_buffer)
40 if lib.ZSTD_isError(res):
41 raise Exception('error ending compression stream: %s' % lib.ZSTD_getErrorName)
196 if self._mtcctx:
197 zresult = lib.ZSTDMT_endStream(self._mtcctx, out_buffer)
198 else:
199 zresult = lib.ZSTD_endStream(self._compressor._cstream, out_buffer)
200 if lib.ZSTD_isError(zresult):
201 raise ZstdError('error ending compression stream: %s' %
202 ffi.string(lib.ZSTD_getErrorName(zresult)))
42 203
43 204 if out_buffer.pos:
44 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
205 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
45 206 out_buffer.pos = 0
46 207
47 if res == 0:
208 if zresult == 0:
48 209 break
49 210
211 self._compressor = None
212
50 213 return False
51 214
215 def memory_size(self):
216 if not self._entered:
217 raise ZstdError('cannot determine size of an inactive compressor; '
218 'call when a context manager is active')
219
220 return lib.ZSTD_sizeof_CStream(self._compressor._cstream)
221
52 222 def write(self, data):
223 if not self._entered:
224 raise ZstdError('write() must be called from an active context '
225 'manager')
226
227 total_write = 0
228
229 data_buffer = ffi.from_buffer(data)
230
231 in_buffer = ffi.new('ZSTD_inBuffer *')
232 in_buffer.src = data_buffer
233 in_buffer.size = len(data_buffer)
234 in_buffer.pos = 0
235
53 236 out_buffer = ffi.new('ZSTD_outBuffer *')
54 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
55 out_buffer.size = _CSTREAM_OUT_SIZE
237 dst_buffer = ffi.new('char[]', self._write_size)
238 out_buffer.dst = dst_buffer
239 out_buffer.size = self._write_size
240 out_buffer.pos = 0
241
242 while in_buffer.pos < in_buffer.size:
243 if self._mtcctx:
244 zresult = lib.ZSTDMT_compressStream(self._mtcctx, out_buffer,
245 in_buffer)
246 else:
247 zresult = lib.ZSTD_compressStream(self._compressor._cstream, out_buffer,
248 in_buffer)
249 if lib.ZSTD_isError(zresult):
250 raise ZstdError('zstd compress error: %s' %
251 ffi.string(lib.ZSTD_getErrorName(zresult)))
252
253 if out_buffer.pos:
254 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
255 total_write += out_buffer.pos
256 out_buffer.pos = 0
257
258 return total_write
259
260 def flush(self):
261 if not self._entered:
262 raise ZstdError('flush must be called from an active context manager')
263
264 total_write = 0
265
266 out_buffer = ffi.new('ZSTD_outBuffer *')
267 dst_buffer = ffi.new('char[]', self._write_size)
268 out_buffer.dst = dst_buffer
269 out_buffer.size = self._write_size
56 270 out_buffer.pos = 0
57 271
58 # TODO can we reuse existing memory?
59 in_buffer = ffi.new('ZSTD_inBuffer *')
60 in_buffer.src = ffi.new('char[]', data)
61 in_buffer.size = len(data)
62 in_buffer.pos = 0
63 while in_buffer.pos < in_buffer.size:
64 res = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
65 if lib.ZSTD_isError(res):
66 raise Exception('zstd compress error: %s' % lib.ZSTD_getErrorName(res))
272 while True:
273 if self._mtcctx:
274 zresult = lib.ZSTDMT_flushStream(self._mtcctx, out_buffer)
275 else:
276 zresult = lib.ZSTD_flushStream(self._compressor._cstream, out_buffer)
277 if lib.ZSTD_isError(zresult):
278 raise ZstdError('zstd compress error: %s' %
279 ffi.string(lib.ZSTD_getErrorName(zresult)))
280
281 if not out_buffer.pos:
282 break
283
284 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
285 total_write += out_buffer.pos
286 out_buffer.pos = 0
287
288 return total_write
289
290
291 class ZstdCompressionObj(object):
292 def compress(self, data):
293 if self._finished:
294 raise ZstdError('cannot call compress() after compressor finished')
295
296 data_buffer = ffi.from_buffer(data)
297 source = ffi.new('ZSTD_inBuffer *')
298 source.src = data_buffer
299 source.size = len(data_buffer)
300 source.pos = 0
301
302 chunks = []
303
304 while source.pos < len(data):
305 if self._mtcctx:
306 zresult = lib.ZSTDMT_compressStream(self._mtcctx,
307 self._out, source)
308 else:
309 zresult = lib.ZSTD_compressStream(self._compressor._cstream, self._out,
310 source)
311 if lib.ZSTD_isError(zresult):
312 raise ZstdError('zstd compress error: %s' %
313 ffi.string(lib.ZSTD_getErrorName(zresult)))
314
315 if self._out.pos:
316 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
317 self._out.pos = 0
318
319 return b''.join(chunks)
67 320
68 if out_buffer.pos:
69 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
70 out_buffer.pos = 0
321 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
322 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
323 raise ValueError('flush mode not recognized')
324
325 if self._finished:
326 raise ZstdError('compressor object already finished')
327
328 assert self._out.pos == 0
329
330 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
331 if self._mtcctx:
332 zresult = lib.ZSTDMT_flushStream(self._mtcctx, self._out)
333 else:
334 zresult = lib.ZSTD_flushStream(self._compressor._cstream, self._out)
335 if lib.ZSTD_isError(zresult):
336 raise ZstdError('zstd compress error: %s' %
337 ffi.string(lib.ZSTD_getErrorName(zresult)))
338
339 # Output buffer is guaranteed to hold full block.
340 assert zresult == 0
341
342 if self._out.pos:
343 result = ffi.buffer(self._out.dst, self._out.pos)[:]
344 self._out.pos = 0
345 return result
346 else:
347 return b''
348
349 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
350 self._finished = True
351
352 chunks = []
353
354 while True:
355 if self._mtcctx:
356 zresult = lib.ZSTDMT_endStream(self._mtcctx, self._out)
357 else:
358 zresult = lib.ZSTD_endStream(self._compressor._cstream, self._out)
359 if lib.ZSTD_isError(zresult):
360 raise ZstdError('error ending compression stream: %s' %
361 ffi.string(lib.ZSTD_getErroName(zresult)))
362
363 if self._out.pos:
364 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
365 self._out.pos = 0
366
367 if not zresult:
368 break
369
370 return b''.join(chunks)
71 371
72 372
73 373 class ZstdCompressor(object):
74 def __init__(self, level=3, dict_data=None, compression_params=None):
75 if dict_data:
76 raise Exception('dict_data not yet supported')
77 if compression_params:
78 raise Exception('compression_params not yet supported')
374 def __init__(self, level=3, dict_data=None, compression_params=None,
375 write_checksum=False, write_content_size=False,
376 write_dict_id=True, threads=0):
377 if level < 1:
378 raise ValueError('level must be greater than 0')
379 elif level > lib.ZSTD_maxCLevel():
380 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
381
382 if threads < 0:
383 threads = _cpu_count()
79 384
80 385 self._compression_level = level
386 self._dict_data = dict_data
387 self._cparams = compression_params
388 self._fparams = ffi.new('ZSTD_frameParameters *')[0]
389 self._fparams.checksumFlag = write_checksum
390 self._fparams.contentSizeFlag = write_content_size
391 self._fparams.noDictIDFlag = not write_dict_id
81 392
82 def compress(self, data):
83 # Just use the stream API for now.
84 output = io.BytesIO()
85 with self.write_to(output) as compressor:
86 compressor.write(data)
87 return output.getvalue()
393 if threads:
394 cctx = lib.ZSTDMT_createCCtx(threads)
395 if cctx == ffi.NULL:
396 raise MemoryError()
397
398 self._cctx = ffi.gc(cctx, lib.ZSTDMT_freeCCtx)
399 self._multithreaded = True
400 else:
401 cctx = lib.ZSTD_createCCtx()
402 if cctx == ffi.NULL:
403 raise MemoryError()
404
405 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
406 self._multithreaded = False
407
408 self._cstream = None
409
410 def compress(self, data, allow_empty=False):
411 if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
412 raise ValueError('cannot write empty inputs when writing content sizes')
413
414 if self._multithreaded and self._dict_data:
415 raise ZstdError('compress() cannot be used with both dictionaries and multi-threaded compression')
416
417 if self._multithreaded and self._cparams:
418 raise ZstdError('compress() cannot be used with both compression parameters and multi-threaded compression')
419
420 # TODO use a CDict for performance.
421 dict_data = ffi.NULL
422 dict_size = 0
423
424 if self._dict_data:
425 dict_data = self._dict_data.as_bytes()
426 dict_size = len(self._dict_data)
427
428 params = ffi.new('ZSTD_parameters *')[0]
429 if self._cparams:
430 params.cParams = self._cparams.as_compression_parameters()
431 else:
432 params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
433 dict_size)
434 params.fParams = self._fparams
435
436 dest_size = lib.ZSTD_compressBound(len(data))
437 out = new_nonzero('char[]', dest_size)
88 438
89 def copy_stream(self, ifh, ofh):
90 cstream = self._get_cstream()
439 if self._multithreaded:
440 zresult = lib.ZSTDMT_compressCCtx(self._cctx,
441 ffi.addressof(out), dest_size,
442 data, len(data),
443 self._compression_level)
444 else:
445 zresult = lib.ZSTD_compress_advanced(self._cctx,
446 ffi.addressof(out), dest_size,
447 data, len(data),
448 dict_data, dict_size,
449 params)
450
451 if lib.ZSTD_isError(zresult):
452 raise ZstdError('cannot compress: %s' %
453 ffi.string(lib.ZSTD_getErrorName(zresult)))
454
455 return ffi.buffer(out, zresult)[:]
456
457 def compressobj(self, size=0):
458 if self._multithreaded:
459 self._init_mtcstream(size)
460 else:
461 self._ensure_cstream(size)
462
463 cobj = ZstdCompressionObj()
464 cobj._out = ffi.new('ZSTD_outBuffer *')
465 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
466 cobj._out.dst = cobj._dst_buffer
467 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
468 cobj._out.pos = 0
469 cobj._compressor = self
470 cobj._finished = False
471
472 if self._multithreaded:
473 cobj._mtcctx = self._cctx
474 else:
475 cobj._mtcctx = None
476
477 return cobj
478
479 def copy_stream(self, ifh, ofh, size=0,
480 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
481 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
482
483 if not hasattr(ifh, 'read'):
484 raise ValueError('first argument must have a read() method')
485 if not hasattr(ofh, 'write'):
486 raise ValueError('second argument must have a write() method')
487
488 mt = self._multithreaded
489 if mt:
490 self._init_mtcstream(size)
491 else:
492 self._ensure_cstream(size)
91 493
92 494 in_buffer = ffi.new('ZSTD_inBuffer *')
93 495 out_buffer = ffi.new('ZSTD_outBuffer *')
94 496
95 out_buffer.dst = ffi.new('char[]', _CSTREAM_OUT_SIZE)
96 out_buffer.size = _CSTREAM_OUT_SIZE
497 dst_buffer = ffi.new('char[]', write_size)
498 out_buffer.dst = dst_buffer
499 out_buffer.size = write_size
97 500 out_buffer.pos = 0
98 501
99 502 total_read, total_write = 0, 0
100 503
101 504 while True:
102 data = ifh.read(_CSTREAM_IN_SIZE)
505 data = ifh.read(read_size)
103 506 if not data:
104 507 break
105 508
106 total_read += len(data)
107
108 in_buffer.src = ffi.new('char[]', data)
109 in_buffer.size = len(data)
509 data_buffer = ffi.from_buffer(data)
510 total_read += len(data_buffer)
511 in_buffer.src = data_buffer
512 in_buffer.size = len(data_buffer)
110 513 in_buffer.pos = 0
111 514
112 515 while in_buffer.pos < in_buffer.size:
113 res = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
114 if lib.ZSTD_isError(res):
115 raise Exception('zstd compress error: %s' %
116 lib.ZSTD_getErrorName(res))
516 if mt:
517 zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
518 else:
519 zresult = lib.ZSTD_compressStream(self._cstream,
520 out_buffer, in_buffer)
521 if lib.ZSTD_isError(zresult):
522 raise ZstdError('zstd compress error: %s' %
523 ffi.string(lib.ZSTD_getErrorName(zresult)))
117 524
118 525 if out_buffer.pos:
119 526 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
120 total_write = out_buffer.pos
527 total_write += out_buffer.pos
121 528 out_buffer.pos = 0
122 529
123 530 # We've finished reading. Flush the compressor.
124 531 while True:
125 res = lib.ZSTD_endStream(cstream, out_buffer)
126 if lib.ZSTD_isError(res):
127 raise Exception('error ending compression stream: %s' %
128 lib.ZSTD_getErrorName(res))
532 if mt:
533 zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
534 else:
535 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
536 if lib.ZSTD_isError(zresult):
537 raise ZstdError('error ending compression stream: %s' %
538 ffi.string(lib.ZSTD_getErrorName(zresult)))
129 539
130 540 if out_buffer.pos:
131 541 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
132 542 total_write += out_buffer.pos
133 543 out_buffer.pos = 0
134 544
135 if res == 0:
545 if zresult == 0:
136 546 break
137 547
138 548 return total_read, total_write
139 549
140 def write_to(self, writer):
141 return _ZstdCompressionWriter(self._get_cstream(), writer)
550 def write_to(self, writer, size=0,
551 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
552
553 if not hasattr(writer, 'write'):
554 raise ValueError('must pass an object with a write() method')
555
556 return ZstdCompressionWriter(self, writer, size, write_size)
557
558 def read_from(self, reader, size=0,
559 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
560 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
561 if hasattr(reader, 'read'):
562 have_read = True
563 elif hasattr(reader, '__getitem__'):
564 have_read = False
565 buffer_offset = 0
566 size = len(reader)
567 else:
568 raise ValueError('must pass an object with a read() method or '
569 'conforms to buffer protocol')
570
571 if self._multithreaded:
572 self._init_mtcstream(size)
573 else:
574 self._ensure_cstream(size)
575
576 in_buffer = ffi.new('ZSTD_inBuffer *')
577 out_buffer = ffi.new('ZSTD_outBuffer *')
578
579 in_buffer.src = ffi.NULL
580 in_buffer.size = 0
581 in_buffer.pos = 0
582
583 dst_buffer = ffi.new('char[]', write_size)
584 out_buffer.dst = dst_buffer
585 out_buffer.size = write_size
586 out_buffer.pos = 0
587
588 while True:
589 # We should never have output data sitting around after a previous
590 # iteration.
591 assert out_buffer.pos == 0
592
593 # Collect input data.
594 if have_read:
595 read_result = reader.read(read_size)
596 else:
597 remaining = len(reader) - buffer_offset
598 slice_size = min(remaining, read_size)
599 read_result = reader[buffer_offset:buffer_offset + slice_size]
600 buffer_offset += slice_size
601
602 # No new input data. Break out of the read loop.
603 if not read_result:
604 break
142 605
143 def _get_cstream(self):
606 # Feed all read data into the compressor and emit output until
607 # exhausted.
608 read_buffer = ffi.from_buffer(read_result)
609 in_buffer.src = read_buffer
610 in_buffer.size = len(read_buffer)
611 in_buffer.pos = 0
612
613 while in_buffer.pos < in_buffer.size:
614 if self._multithreaded:
615 zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
616 else:
617 zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
618 if lib.ZSTD_isError(zresult):
619 raise ZstdError('zstd compress error: %s' %
620 ffi.string(lib.ZSTD_getErrorName(zresult)))
621
622 if out_buffer.pos:
623 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
624 out_buffer.pos = 0
625 yield data
626
627 assert out_buffer.pos == 0
628
629 # And repeat the loop to collect more data.
630 continue
631
632 # If we get here, input is exhausted. End the stream and emit what
633 # remains.
634 while True:
635 assert out_buffer.pos == 0
636 if self._multithreaded:
637 zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
638 else:
639 zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
640 if lib.ZSTD_isError(zresult):
641 raise ZstdError('error ending compression stream: %s' %
642 ffi.string(lib.ZSTD_getErrorName(zresult)))
643
644 if out_buffer.pos:
645 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
646 out_buffer.pos = 0
647 yield data
648
649 if zresult == 0:
650 break
651
652 def _ensure_cstream(self, size):
653 if self._cstream:
654 zresult = lib.ZSTD_resetCStream(self._cstream, size)
655 if lib.ZSTD_isError(zresult):
656 raise ZstdError('could not reset CStream: %s' %
657 ffi.string(lib.ZSTD_getErrorName(zresult)))
658
659 return
660
144 661 cstream = lib.ZSTD_createCStream()
662 if cstream == ffi.NULL:
663 raise MemoryError()
664
145 665 cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
146 666
147 res = lib.ZSTD_initCStream(cstream, self._compression_level)
148 if lib.ZSTD_isError(res):
667 dict_data = ffi.NULL
668 dict_size = 0
669 if self._dict_data:
670 dict_data = self._dict_data.as_bytes()
671 dict_size = len(self._dict_data)
672
673 zparams = ffi.new('ZSTD_parameters *')[0]
674 if self._cparams:
675 zparams.cParams = self._cparams.as_compression_parameters()
676 else:
677 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
678 size, dict_size)
679 zparams.fParams = self._fparams
680
681 zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
682 zparams, size)
683 if lib.ZSTD_isError(zresult):
149 684 raise Exception('cannot init CStream: %s' %
150 lib.ZSTD_getErrorName(res))
685 ffi.string(lib.ZSTD_getErrorName(zresult)))
686
687 self._cstream = cstream
688
689 def _init_mtcstream(self, size):
690 assert self._multithreaded
691
692 dict_data = ffi.NULL
693 dict_size = 0
694 if self._dict_data:
695 dict_data = self._dict_data.as_bytes()
696 dict_size = len(self._dict_data)
697
698 zparams = ffi.new('ZSTD_parameters *')[0]
699 if self._cparams:
700 zparams.cParams = self._cparams.as_compression_parameters()
701 else:
702 zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
703 size, dict_size)
704
705 zparams.fParams = self._fparams
706
707 zresult = lib.ZSTDMT_initCStream_advanced(self._cctx, dict_data, dict_size,
708 zparams, size)
709
710 if lib.ZSTD_isError(zresult):
711 raise ZstdError('cannot init CStream: %s' %
712 ffi.string(lib.ZSTD_getErrorName(zresult)))
713
714
715 class FrameParameters(object):
716 def __init__(self, fparams):
717 self.content_size = fparams.frameContentSize
718 self.window_size = fparams.windowSize
719 self.dict_id = fparams.dictID
720 self.has_checksum = bool(fparams.checksumFlag)
721
722
723 def get_frame_parameters(data):
724 if not isinstance(data, bytes_type):
725 raise TypeError('argument must be bytes')
726
727 params = ffi.new('ZSTD_frameParams *')
728
729 zresult = lib.ZSTD_getFrameParams(params, data, len(data))
730 if lib.ZSTD_isError(zresult):
731 raise ZstdError('cannot get frame parameters: %s' %
732 ffi.string(lib.ZSTD_getErrorName(zresult)))
733
734 if zresult:
735 raise ZstdError('not enough data for frame parameters; need %d bytes' %
736 zresult)
737
738 return FrameParameters(params[0])
739
740
741 class ZstdCompressionDict(object):
742 def __init__(self, data, k=0, d=0):
743 assert isinstance(data, bytes_type)
744 self._data = data
745 self.k = k
746 self.d = d
747
748 def __len__(self):
749 return len(self._data)
750
751 def dict_id(self):
752 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
753
754 def as_bytes(self):
755 return self._data
756
757
758 def train_dictionary(dict_size, samples, selectivity=0, level=0,
759 notifications=0, dict_id=0):
760 if not isinstance(samples, list):
761 raise TypeError('samples must be a list')
762
763 total_size = sum(map(len, samples))
764
765 samples_buffer = new_nonzero('char[]', total_size)
766 sample_sizes = new_nonzero('size_t[]', len(samples))
767
768 offset = 0
769 for i, sample in enumerate(samples):
770 if not isinstance(sample, bytes_type):
771 raise ValueError('samples must be bytes')
772
773 l = len(sample)
774 ffi.memmove(samples_buffer + offset, sample, l)
775 offset += l
776 sample_sizes[i] = l
777
778 dict_data = new_nonzero('char[]', dict_size)
779
780 dparams = ffi.new('ZDICT_params_t *')[0]
781 dparams.selectivityLevel = selectivity
782 dparams.compressionLevel = level
783 dparams.notificationLevel = notifications
784 dparams.dictID = dict_id
785
786 zresult = lib.ZDICT_trainFromBuffer_advanced(
787 ffi.addressof(dict_data), dict_size,
788 ffi.addressof(samples_buffer),
789 ffi.addressof(sample_sizes, 0), len(samples),
790 dparams)
791
792 if lib.ZDICT_isError(zresult):
793 raise ZstdError('Cannot train dict: %s' %
794 ffi.string(lib.ZDICT_getErrorName(zresult)))
795
796 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
797
798
799 def train_cover_dictionary(dict_size, samples, k=0, d=0,
800 notifications=0, dict_id=0, level=0, optimize=False,
801 steps=0, threads=0):
802 if not isinstance(samples, list):
803 raise TypeError('samples must be a list')
804
805 if threads < 0:
806 threads = _cpu_count()
807
808 total_size = sum(map(len, samples))
809
810 samples_buffer = new_nonzero('char[]', total_size)
811 sample_sizes = new_nonzero('size_t[]', len(samples))
812
813 offset = 0
814 for i, sample in enumerate(samples):
815 if not isinstance(sample, bytes_type):
816 raise ValueError('samples must be bytes')
817
818 l = len(sample)
819 ffi.memmove(samples_buffer + offset, sample, l)
820 offset += l
821 sample_sizes[i] = l
822
823 dict_data = new_nonzero('char[]', dict_size)
824
825 dparams = ffi.new('COVER_params_t *')[0]
826 dparams.k = k
827 dparams.d = d
828 dparams.steps = steps
829 dparams.nbThreads = threads
830 dparams.notificationLevel = notifications
831 dparams.dictID = dict_id
832 dparams.compressionLevel = level
833
834 if optimize:
835 zresult = lib.COVER_optimizeTrainFromBuffer(
836 ffi.addressof(dict_data), dict_size,
837 ffi.addressof(samples_buffer),
838 ffi.addressof(sample_sizes, 0), len(samples),
839 ffi.addressof(dparams))
840 else:
841 zresult = lib.COVER_trainFromBuffer(
842 ffi.addressof(dict_data), dict_size,
843 ffi.addressof(samples_buffer),
844 ffi.addressof(sample_sizes, 0), len(samples),
845 dparams)
846
847 if lib.ZDICT_isError(zresult):
848 raise ZstdError('cannot train dict: %s' %
849 ffi.string(lib.ZDICT_getErrorName(zresult)))
850
851 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
852 k=dparams.k, d=dparams.d)
853
854
855 class ZstdDecompressionObj(object):
856 def __init__(self, decompressor):
857 self._decompressor = decompressor
858 self._finished = False
859
860 def decompress(self, data):
861 if self._finished:
862 raise ZstdError('cannot use a decompressobj multiple times')
863
864 assert(self._decompressor._dstream)
865
866 in_buffer = ffi.new('ZSTD_inBuffer *')
867 out_buffer = ffi.new('ZSTD_outBuffer *')
868
869 data_buffer = ffi.from_buffer(data)
870 in_buffer.src = data_buffer
871 in_buffer.size = len(data_buffer)
872 in_buffer.pos = 0
873
874 dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
875 out_buffer.dst = dst_buffer
876 out_buffer.size = len(dst_buffer)
877 out_buffer.pos = 0
878
879 chunks = []
880
881 while in_buffer.pos < in_buffer.size:
882 zresult = lib.ZSTD_decompressStream(self._decompressor._dstream,
883 out_buffer, in_buffer)
884 if lib.ZSTD_isError(zresult):
885 raise ZstdError('zstd decompressor error: %s' %
886 ffi.string(lib.ZSTD_getErrorName(zresult)))
887
888 if zresult == 0:
889 self._finished = True
890 self._decompressor = None
891
892 if out_buffer.pos:
893 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
894 out_buffer.pos = 0
895
896 return b''.join(chunks)
897
898
899 class ZstdDecompressionWriter(object):
900 def __init__(self, decompressor, writer, write_size):
901 self._decompressor = decompressor
902 self._writer = writer
903 self._write_size = write_size
904 self._entered = False
905
906 def __enter__(self):
907 if self._entered:
908 raise ZstdError('cannot __enter__ multiple times')
909
910 self._decompressor._ensure_dstream()
911 self._entered = True
912
913 return self
914
915 def __exit__(self, exc_type, exc_value, exc_tb):
916 self._entered = False
917
918 def memory_size(self):
919 if not self._decompressor._dstream:
920 raise ZstdError('cannot determine size of inactive decompressor '
921 'call when context manager is active')
922
923 return lib.ZSTD_sizeof_DStream(self._decompressor._dstream)
924
925 def write(self, data):
926 if not self._entered:
927 raise ZstdError('write must be called from an active context manager')
928
929 total_write = 0
930
931 in_buffer = ffi.new('ZSTD_inBuffer *')
932 out_buffer = ffi.new('ZSTD_outBuffer *')
933
934 data_buffer = ffi.from_buffer(data)
935 in_buffer.src = data_buffer
936 in_buffer.size = len(data_buffer)
937 in_buffer.pos = 0
938
939 dst_buffer = ffi.new('char[]', self._write_size)
940 out_buffer.dst = dst_buffer
941 out_buffer.size = len(dst_buffer)
942 out_buffer.pos = 0
943
944 dstream = self._decompressor._dstream
945
946 while in_buffer.pos < in_buffer.size:
947 zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
948 if lib.ZSTD_isError(zresult):
949 raise ZstdError('zstd decompress error: %s' %
950 ffi.string(lib.ZSTD_getErrorName(zresult)))
951
952 if out_buffer.pos:
953 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
954 total_write += out_buffer.pos
955 out_buffer.pos = 0
956
957 return total_write
958
959
960 class ZstdDecompressor(object):
961 def __init__(self, dict_data=None):
962 self._dict_data = dict_data
963
964 dctx = lib.ZSTD_createDCtx()
965 if dctx == ffi.NULL:
966 raise MemoryError()
967
968 self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
969 self._dstream = None
151 970
152 return cstream
971 @property
972 def _ddict(self):
973 if self._dict_data:
974 dict_data = self._dict_data.as_bytes()
975 dict_size = len(self._dict_data)
976
977 ddict = lib.ZSTD_createDDict(dict_data, dict_size)
978 if ddict == ffi.NULL:
979 raise ZstdError('could not create decompression dict')
980 else:
981 ddict = None
982
983 self.__dict__['_ddict'] = ddict
984 return ddict
985
986 def decompress(self, data, max_output_size=0):
987 data_buffer = ffi.from_buffer(data)
988
989 orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
990 dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
991 lib.ZSTD_copyDCtx(dctx, self._refdctx)
992
993 ddict = self._ddict
994
995 output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
996 if output_size:
997 result_buffer = ffi.new('char[]', output_size)
998 result_size = output_size
999 else:
1000 if not max_output_size:
1001 raise ZstdError('input data invalid or missing content size '
1002 'in frame header')
1003
1004 result_buffer = ffi.new('char[]', max_output_size)
1005 result_size = max_output_size
1006
1007 if ddict:
1008 zresult = lib.ZSTD_decompress_usingDDict(dctx,
1009 result_buffer, result_size,
1010 data_buffer, len(data_buffer),
1011 ddict)
1012 else:
1013 zresult = lib.ZSTD_decompressDCtx(dctx,
1014 result_buffer, result_size,
1015 data_buffer, len(data_buffer))
1016 if lib.ZSTD_isError(zresult):
1017 raise ZstdError('decompression error: %s' %
1018 ffi.string(lib.ZSTD_getErrorName(zresult)))
1019 elif output_size and zresult != output_size:
1020 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
1021 (zresult, output_size))
1022
1023 return ffi.buffer(result_buffer, zresult)[:]
1024
1025 def decompressobj(self):
1026 self._ensure_dstream()
1027 return ZstdDecompressionObj(self)
1028
1029 def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
1030 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1031 skip_bytes=0):
1032 if skip_bytes >= read_size:
1033 raise ValueError('skip_bytes must be smaller than read_size')
1034
1035 if hasattr(reader, 'read'):
1036 have_read = True
1037 elif hasattr(reader, '__getitem__'):
1038 have_read = False
1039 buffer_offset = 0
1040 size = len(reader)
1041 else:
1042 raise ValueError('must pass an object with a read() method or '
1043 'conforms to buffer protocol')
1044
1045 if skip_bytes:
1046 if have_read:
1047 reader.read(skip_bytes)
1048 else:
1049 if skip_bytes > size:
1050 raise ValueError('skip_bytes larger than first input chunk')
1051
1052 buffer_offset = skip_bytes
1053
1054 self._ensure_dstream()
1055
1056 in_buffer = ffi.new('ZSTD_inBuffer *')
1057 out_buffer = ffi.new('ZSTD_outBuffer *')
1058
1059 dst_buffer = ffi.new('char[]', write_size)
1060 out_buffer.dst = dst_buffer
1061 out_buffer.size = len(dst_buffer)
1062 out_buffer.pos = 0
1063
1064 while True:
1065 assert out_buffer.pos == 0
1066
1067 if have_read:
1068 read_result = reader.read(read_size)
1069 else:
1070 remaining = size - buffer_offset
1071 slice_size = min(remaining, read_size)
1072 read_result = reader[buffer_offset:buffer_offset + slice_size]
1073 buffer_offset += slice_size
1074
1075 # No new input. Break out of read loop.
1076 if not read_result:
1077 break
1078
1079 # Feed all read data into decompressor and emit output until
1080 # exhausted.
1081 read_buffer = ffi.from_buffer(read_result)
1082 in_buffer.src = read_buffer
1083 in_buffer.size = len(read_buffer)
1084 in_buffer.pos = 0
1085
1086 while in_buffer.pos < in_buffer.size:
1087 assert out_buffer.pos == 0
1088
1089 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
1090 if lib.ZSTD_isError(zresult):
1091 raise ZstdError('zstd decompress error: %s' %
1092 ffi.string(lib.ZSTD_getErrorName(zresult)))
1093
1094 if out_buffer.pos:
1095 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1096 out_buffer.pos = 0
1097 yield data
1098
1099 if zresult == 0:
1100 return
1101
1102 # Repeat loop to collect more input data.
1103 continue
1104
1105 # If we get here, input is exhausted.
1106
1107 def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1108 if not hasattr(writer, 'write'):
1109 raise ValueError('must pass an object with a write() method')
1110
1111 return ZstdDecompressionWriter(self, writer, write_size)
1112
1113 def copy_stream(self, ifh, ofh,
1114 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
1115 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1116 if not hasattr(ifh, 'read'):
1117 raise ValueError('first argument must have a read() method')
1118 if not hasattr(ofh, 'write'):
1119 raise ValueError('second argument must have a write() method')
1120
1121 self._ensure_dstream()
1122
1123 in_buffer = ffi.new('ZSTD_inBuffer *')
1124 out_buffer = ffi.new('ZSTD_outBuffer *')
1125
1126 dst_buffer = ffi.new('char[]', write_size)
1127 out_buffer.dst = dst_buffer
1128 out_buffer.size = write_size
1129 out_buffer.pos = 0
1130
1131 total_read, total_write = 0, 0
1132
1133 # Read all available input.
1134 while True:
1135 data = ifh.read(read_size)
1136 if not data:
1137 break
1138
1139 data_buffer = ffi.from_buffer(data)
1140 total_read += len(data_buffer)
1141 in_buffer.src = data_buffer
1142 in_buffer.size = len(data_buffer)
1143 in_buffer.pos = 0
1144
1145 # Flush all read data to output.
1146 while in_buffer.pos < in_buffer.size:
1147 zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
1148 if lib.ZSTD_isError(zresult):
1149 raise ZstdError('zstd decompressor error: %s' %
1150 ffi.string(lib.ZSTD_getErrorName(zresult)))
1151
1152 if out_buffer.pos:
1153 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1154 total_write += out_buffer.pos
1155 out_buffer.pos = 0
1156
1157 # Continue loop to keep reading.
1158
1159 return total_read, total_write
1160
1161 def decompress_content_dict_chain(self, frames):
1162 if not isinstance(frames, list):
1163 raise TypeError('argument must be a list')
1164
1165 if not frames:
1166 raise ValueError('empty input chain')
1167
1168 # First chunk should not be using a dictionary. We handle it specially.
1169 chunk = frames[0]
1170 if not isinstance(chunk, bytes_type):
1171 raise ValueError('chunk 0 must be bytes')
1172
1173 # All chunks should be zstd frames and should have content size set.
1174 chunk_buffer = ffi.from_buffer(chunk)
1175 params = ffi.new('ZSTD_frameParams *')
1176 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1177 if lib.ZSTD_isError(zresult):
1178 raise ValueError('chunk 0 is not a valid zstd frame')
1179 elif zresult:
1180 raise ValueError('chunk 0 is too small to contain a zstd frame')
1181
1182 if not params.frameContentSize:
1183 raise ValueError('chunk 0 missing content size in frame')
1184
1185 dctx = lib.ZSTD_createDCtx()
1186 if dctx == ffi.NULL:
1187 raise MemoryError()
1188
1189 dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
1190
1191 last_buffer = ffi.new('char[]', params.frameContentSize)
1192
1193 zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
1194 chunk_buffer, len(chunk_buffer))
1195 if lib.ZSTD_isError(zresult):
1196 raise ZstdError('could not decompress chunk 0: %s' %
1197 ffi.string(lib.ZSTD_getErrorName(zresult)))
1198
1199 # Special case of chain length of 1
1200 if len(frames) == 1:
1201 return ffi.buffer(last_buffer, len(last_buffer))[:]
1202
1203 i = 1
1204 while i < len(frames):
1205 chunk = frames[i]
1206 if not isinstance(chunk, bytes_type):
1207 raise ValueError('chunk %d must be bytes' % i)
1208
1209 chunk_buffer = ffi.from_buffer(chunk)
1210 zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
1211 if lib.ZSTD_isError(zresult):
1212 raise ValueError('chunk %d is not a valid zstd frame' % i)
1213 elif zresult:
1214 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
1215
1216 if not params.frameContentSize:
1217 raise ValueError('chunk %d missing content size in frame' % i)
1218
1219 dest_buffer = ffi.new('char[]', params.frameContentSize)
1220
1221 zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
1222 chunk_buffer, len(chunk_buffer),
1223 last_buffer, len(last_buffer))
1224 if lib.ZSTD_isError(zresult):
1225 raise ZstdError('could not decompress chunk %d' % i)
1226
1227 last_buffer = dest_buffer
1228 i += 1
1229
1230 return ffi.buffer(last_buffer, len(last_buffer))[:]
1231
1232 def _ensure_dstream(self):
1233 if self._dstream:
1234 zresult = lib.ZSTD_resetDStream(self._dstream)
1235 if lib.ZSTD_isError(zresult):
1236 raise ZstdError('could not reset DStream: %s' %
1237 ffi.string(lib.ZSTD_getErrorName(zresult)))
1238
1239 return
1240
1241 self._dstream = lib.ZSTD_createDStream()
1242 if self._dstream == ffi.NULL:
1243 raise MemoryError()
1244
1245 self._dstream = ffi.gc(self._dstream, lib.ZSTD_freeDStream)
1246
1247 if self._dict_data:
1248 zresult = lib.ZSTD_initDStream_usingDict(self._dstream,
1249 self._dict_data.as_bytes(),
1250 len(self._dict_data))
1251 else:
1252 zresult = lib.ZSTD_initDStream(self._dstream)
1253
1254 if lib.ZSTD_isError(zresult):
1255 self._dstream = None
1256 raise ZstdError('could not initialize DStream: %s' %
1257 ffi.string(lib.ZSTD_getErrorName(zresult)))
@@ -9,15 +9,15 b' import sys'
9 9 from mercurial import (
10 10 node,
11 11 revlog,
12 scmutil,
13 12 transaction,
14 13 util,
14 vfs as vfsmod,
15 15 )
16 16
17 17 for fp in (sys.stdin, sys.stdout, sys.stderr):
18 18 util.setbinary(fp)
19 19
20 opener = scmutil.opener('.', False)
20 opener = vfsmod.vfs('.', False)
21 21 tr = transaction.transaction(sys.stderr.write, opener, {'store': opener},
22 22 "undump.journal")
23 23 while True:
@@ -19,6 +19,8 b''
19 19 editor = notepad
20 20 ; show changed files and be a bit more verbose if True
21 21 ; verbose = True
22 ; colorize commands output
23 ; color = auto
22 24
23 25 ; username data to appear in commits
24 26 ; it usually takes the form: Joe User <joe.user@host.com>
@@ -40,7 +42,6 b' editor = notepad'
40 42 ;bugzilla =
41 43 ;children =
42 44 ;churn =
43 ;color =
44 45 ;convert =
45 46 ;eol =
46 47 ;extdiff =
@@ -15,6 +15,8 b''
15 15 <DirectoryRef Id="INSTALLDIR">
16 16 <Directory Id="helpdir" Name="help" FileSource="$(var.SourceDir)">
17 17 <Component Id="help.root" Guid="$(var.help.root.guid)" Win64='$(var.IsX64)'>
18 <File Name="bundlespec.txt" />
19 <File Name="color.txt" />
18 20 <File Name="config.txt" KeyPath="yes" />
19 21 <File Name="dates.txt" />
20 22 <File Name="diffs.txt" />
@@ -25,6 +27,7 b''
25 27 <File Name="hgignore.txt" />
26 28 <File Name="hgweb.txt" />
27 29 <File Name="merge-tools.txt" />
30 <File Name="pager.txt" />
28 31 <File Name="patterns.txt" />
29 32 <File Name="phases.txt" />
30 33 <File Name="revisions.txt" />
@@ -37,6 +40,7 b''
37 40 <Directory Id="help.internaldir" Name="internals">
38 41 <Component Id="help.internals" Guid="$(var.help.internals.guid)" Win64='$(var.IsX64)'>
39 42 <File Id="internals.bundles.txt" Name="bundles.txt" KeyPath="yes" />
43 <File Id="internals.censor.txt" Name="censor.txt" KeyPath="yes" />
40 44 <File Id="internals.changegroups.txt" Name="changegroups.txt" />
41 45 <File Id="internals.requirements.txt" Name="requirements.txt" />
42 46 <File Id="internals.revlogs.txt" Name="revlogs.txt" />
@@ -32,6 +32,7 b''
32 32 <File Name="map-cmdline.changelog" KeyPath="yes" />
33 33 <File Name="map-cmdline.compact" />
34 34 <File Name="map-cmdline.default" />
35 <File Name="map-cmdline.show" />
35 36 <File Name="map-cmdline.bisect" />
36 37 <File Name="map-cmdline.xml" />
37 38 <File Name="map-cmdline.status" />
@@ -225,6 +226,7 b''
225 226 <File Id="static.coal.file.png" Name="coal-file.png" />
226 227 <File Id="static.coal.folder.png" Name="coal-folder.png" />
227 228 <File Id="static.excanvas.js" Name="excanvas.js" />
229 <File Id="static.followlines.js" Name="followlines.js" />
228 230 <File Id="static.mercurial.js" Name="mercurial.js" />
229 231 <File Id="static.hgicon.png" Name="hgicon.png" />
230 232 <File Id="static.hglogo.png" Name="hglogo.png" />
@@ -4,7 +4,7 b''
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 """Check for unrecorded moves at commit time (EXPERIMENTAL)
7 """check for unrecorded moves at commit time (EXPERIMENTAL)
8 8
9 9 This extension checks at commit/amend time if any of the committed files
10 10 comes from an unrecorded mv.
@@ -15,14 +15,16 b' the Mercurial template mechanism.'
15 15 The bug references can optionally include an update for Bugzilla of the
16 16 hours spent working on the bug. Bugs can also be marked fixed.
17 17
18 Three basic modes of access to Bugzilla are provided:
18 Four basic modes of access to Bugzilla are provided:
19
20 1. Access via the Bugzilla REST-API. Requires bugzilla 5.0 or later.
19 21
20 1. Access via the Bugzilla XMLRPC interface. Requires Bugzilla 3.4 or later.
22 2. Access via the Bugzilla XMLRPC interface. Requires Bugzilla 3.4 or later.
21 23
22 2. Check data via the Bugzilla XMLRPC interface and submit bug change
24 3. Check data via the Bugzilla XMLRPC interface and submit bug change
23 25 via email to Bugzilla email interface. Requires Bugzilla 3.4 or later.
24 26
25 3. Writing directly to the Bugzilla database. Only Bugzilla installations
27 4. Writing directly to the Bugzilla database. Only Bugzilla installations
26 28 using MySQL are supported. Requires Python MySQLdb.
27 29
28 30 Writing directly to the database is susceptible to schema changes, and
@@ -50,11 +52,16 b' user, the email associated with the Bugz'
50 52 Bugzilla is used instead as the source of the comment. Marking bugs fixed
51 53 works on all supported Bugzilla versions.
52 54
55 Access via the REST-API needs either a Bugzilla username and password
56 or an apikey specified in the configuration. Comments are made under
57 the given username or the user assoicated with the apikey in Bugzilla.
58
53 59 Configuration items common to all access modes:
54 60
55 61 bugzilla.version
56 62 The access type to use. Values recognized are:
57 63
64 :``restapi``: Bugzilla REST-API, Bugzilla 5.0 and later.
58 65 :``xmlrpc``: Bugzilla XMLRPC interface.
59 66 :``xmlrpc+email``: Bugzilla XMLRPC and email interfaces.
60 67 :``3.0``: MySQL access, Bugzilla 3.0 and later.
@@ -135,7 +142,7 b' The ``[usermap]`` section is used to spe'
135 142 committer email to Bugzilla user email. See also ``bugzilla.usermap``.
136 143 Contains entries of the form ``committer = Bugzilla user``.
137 144
138 XMLRPC access mode configuration:
145 XMLRPC and REST-API access mode configuration:
139 146
140 147 bugzilla.bzurl
141 148 The base URL for the Bugzilla installation.
@@ -148,6 +155,13 b' bugzilla.user'
148 155 bugzilla.password
149 156 The password for Bugzilla login.
150 157
158 REST-API access mode uses the options listed above as well as:
159
160 bugzilla.apikey
161 An apikey generated on the Bugzilla instance for api access.
162 Using an apikey removes the need to store the user and password
163 options.
164
151 165 XMLRPC+email access mode uses the XMLRPC access mode configuration items,
152 166 and also:
153 167
@@ -279,6 +293,7 b' All the above add a comment to the Bugzi'
279 293
280 294 from __future__ import absolute_import
281 295
296 import json
282 297 import re
283 298 import time
284 299
@@ -288,10 +303,10 b' from mercurial import ('
288 303 cmdutil,
289 304 error,
290 305 mail,
306 url,
291 307 util,
292 308 )
293 309
294 urlparse = util.urlparse
295 310 xmlrpclib = util.xmlrpclib
296 311
297 312 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
@@ -641,7 +656,7 b' class bzxmlrpc(bzaccess):'
641 656 self.bztoken = login.get('token', '')
642 657
643 658 def transport(self, uri):
644 if urlparse.urlparse(uri, "http")[0] == "https":
659 if util.urlreq.urlparse(uri, "http")[0] == "https":
645 660 return cookiesafetransport()
646 661 else:
647 662 return cookietransport()
@@ -773,6 +788,136 b' class bzxmlrpcemail(bzxmlrpc):'
773 788 cmds.append(self.makecommandline("resolution", self.fixresolution))
774 789 self.send_bug_modify_email(bugid, cmds, text, committer)
775 790
791 class NotFound(LookupError):
792 pass
793
794 class bzrestapi(bzaccess):
795 """Read and write bugzilla data using the REST API available since
796 Bugzilla 5.0.
797 """
798 def __init__(self, ui):
799 bzaccess.__init__(self, ui)
800 bz = self.ui.config('bugzilla', 'bzurl',
801 'http://localhost/bugzilla/')
802 self.bzroot = '/'.join([bz, 'rest'])
803 self.apikey = self.ui.config('bugzilla', 'apikey', '')
804 self.user = self.ui.config('bugzilla', 'user', 'bugs')
805 self.passwd = self.ui.config('bugzilla', 'password')
806 self.fixstatus = self.ui.config('bugzilla', 'fixstatus', 'RESOLVED')
807 self.fixresolution = self.ui.config('bugzilla', 'fixresolution',
808 'FIXED')
809
810 def apiurl(self, targets, include_fields=None):
811 url = '/'.join([self.bzroot] + [str(t) for t in targets])
812 qv = {}
813 if self.apikey:
814 qv['api_key'] = self.apikey
815 elif self.user and self.passwd:
816 qv['login'] = self.user
817 qv['password'] = self.passwd
818 if include_fields:
819 qv['include_fields'] = include_fields
820 if qv:
821 url = '%s?%s' % (url, util.urlreq.urlencode(qv))
822 return url
823
824 def _fetch(self, burl):
825 try:
826 resp = url.open(self.ui, burl)
827 return json.loads(resp.read())
828 except util.urlerr.httperror as inst:
829 if inst.code == 401:
830 raise error.Abort(_('authorization failed'))
831 if inst.code == 404:
832 raise NotFound()
833 else:
834 raise
835
836 def _submit(self, burl, data, method='POST'):
837 data = json.dumps(data)
838 if method == 'PUT':
839 class putrequest(util.urlreq.request):
840 def get_method(self):
841 return 'PUT'
842 request_type = putrequest
843 else:
844 request_type = util.urlreq.request
845 req = request_type(burl, data,
846 {'Content-Type': 'application/json'})
847 try:
848 resp = url.opener(self.ui).open(req)
849 return json.loads(resp.read())
850 except util.urlerr.httperror as inst:
851 if inst.code == 401:
852 raise error.Abort(_('authorization failed'))
853 if inst.code == 404:
854 raise NotFound()
855 else:
856 raise
857
858 def filter_real_bug_ids(self, bugs):
859 '''remove bug IDs that do not exist in Bugzilla from bugs.'''
860 badbugs = set()
861 for bugid in bugs:
862 burl = self.apiurl(('bug', bugid), include_fields='status')
863 try:
864 self._fetch(burl)
865 except NotFound:
866 badbugs.add(bugid)
867 for bugid in badbugs:
868 del bugs[bugid]
869
870 def filter_cset_known_bug_ids(self, node, bugs):
871 '''remove bug IDs where node occurs in comment text from bugs.'''
872 sn = short(node)
873 for bugid in bugs.keys():
874 burl = self.apiurl(('bug', bugid, 'comment'), include_fields='text')
875 result = self._fetch(burl)
876 comments = result['bugs'][str(bugid)]['comments']
877 if any(sn in c['text'] for c in comments):
878 self.ui.status(_('bug %d already knows about changeset %s\n') %
879 (bugid, sn))
880 del bugs[bugid]
881
882 def updatebug(self, bugid, newstate, text, committer):
883 '''update the specified bug. Add comment text and set new states.
884
885 If possible add the comment as being from the committer of
886 the changeset. Otherwise use the default Bugzilla user.
887 '''
888 bugmod = {}
889 if 'hours' in newstate:
890 bugmod['work_time'] = newstate['hours']
891 if 'fix' in newstate:
892 bugmod['status'] = self.fixstatus
893 bugmod['resolution'] = self.fixresolution
894 if bugmod:
895 # if we have to change the bugs state do it here
896 bugmod['comment'] = {
897 'comment': text,
898 'is_private': False,
899 'is_markdown': False,
900 }
901 burl = self.apiurl(('bug', bugid))
902 self._submit(burl, bugmod, method='PUT')
903 self.ui.debug('updated bug %s\n' % bugid)
904 else:
905 burl = self.apiurl(('bug', bugid, 'comment'))
906 self._submit(burl, {
907 'comment': text,
908 'is_private': False,
909 'is_markdown': False,
910 })
911 self.ui.debug('added comment to bug %s\n' % bugid)
912
913 def notify(self, bugs, committer):
914 '''Force sending of Bugzilla notification emails.
915
916 Only required if the access method does not trigger notification
917 emails automatically.
918 '''
919 pass
920
776 921 class bugzilla(object):
777 922 # supported versions of bugzilla. different versions have
778 923 # different schemas.
@@ -781,7 +926,8 b' class bugzilla(object):'
781 926 '2.18': bzmysql_2_18,
782 927 '3.0': bzmysql_3_0,
783 928 'xmlrpc': bzxmlrpc,
784 'xmlrpc+email': bzxmlrpcemail
929 'xmlrpc+email': bzxmlrpcemail,
930 'restapi': bzrestapi,
785 931 }
786 932
787 933 _default_bug_re = (r'bugs?\s*,?\s*(?:#|nos?\.?|num(?:ber)?s?)?\s*'
@@ -177,7 +177,7 b' def capabilities(orig, repo, proto):'
177 177 # Only advertise if a manifest exists. This does add some I/O to requests.
178 178 # But this should be cheaper than a wasted network round trip due to
179 179 # missing file.
180 if repo.opener.exists('clonebundles.manifest'):
180 if repo.vfs.exists('clonebundles.manifest'):
181 181 caps.append('clonebundles')
182 182
183 183 return caps
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from hgext/color.py to mercurial/help/color.txt
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from mercurial/revset.py to mercurial/revsetlang.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from mercurial/revset.py to mercurial/smartset.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from mercurial/repair.py to mercurial/upgrade.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from mercurial/scmutil.py to mercurial/vfs.py
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from tests/test-check-module-imports.t to tests/test-imports-checker.t
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file copied from tests/test-pager.t to tests/test-pager-legacy.t
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now