upstream/mercurial-mirror Commit - r31796:e0dc4053

zstd: vendor python-zstandard 0.8.0...

Gregory Szorc -

r31796:e0dc4053 default

parent child

contrib/python-zstandard/c-ext/bufferutil.c

0 created 644 +770 0

This diff has been collapsed as it changes many lines, (770 lines changed) Show them Hide them
	@@ -0,0 +1,770
		1	/**
		2	* Copyright (c) 2017-present, Gregory Szorc
		3	* All rights reserved.
		4	*
		5	* This software may be modified and distributed under the terms
		6	* of the BSD license. See the LICENSE file for details.
		7	*/
		8
		9	#include "python-zstandard.h"
		10
		11	extern PyObject* ZstdError;
		12
		13	PyDoc_STRVAR(BufferWithSegments__doc__,
		14	"BufferWithSegments - A memory buffer holding known sub-segments.\n"
		15	"\n"
		16	"This type represents a contiguous chunk of memory containing N discrete\n"
		17	"items within sub-segments of that memory.\n"
		18	"\n"
		19	"Segments within the buffer are stored as an array of\n"
		20	"``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
		21	"integer using the host/native bit order representation.\n"
		22	"\n"
		23	"The type exists to facilitate operations against N>1 items without the\n"
		24	"overhead of Python object creation and management.\n"
		25	);
		26
		27	static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
		28	/* Backing memory is either canonically owned by a Py_buffer or by us. */
		29	if (self->parent.buf) {
		30	PyBuffer_Release(&self->parent);
		31	}
		32	else if (self->useFree) {
		33	free(self->data);
		34	}
		35	else {
		36	PyMem_Free(self->data);
		37	}
		38
		39	self->data = NULL;
		40
		41	if (self->useFree) {
		42	free(self->segments);
		43	}
		44	else {
		45	PyMem_Free(self->segments);
		46	}
		47
		48	self->segments = NULL;
		49
		50	PyObject_Del(self);
		51	}
		52
		53	static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
		54	static char* kwlist[] = {
		55	"data",
		56	"segments",
		57	NULL
		58	};
		59
		60	Py_buffer segments;
		61	Py_ssize_t segmentCount;
		62	Py_ssize_t i;
		63
		64	memset(&self->parent, 0, sizeof(self->parent));
		65
		66	#if PY_MAJOR_VERSION >= 3
		67	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "yy:BufferWithSegments",
		68	#else
		69	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ss:BufferWithSegments",
		70	#endif
		71	kwlist, &self->parent, &segments)) {
		72	return -1;
		73	}
		74
		75	if (!PyBuffer_IsContiguous(&self->parent, 'C') \|\| self->parent.ndim > 1) {
		76	PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
		77	goto except;
		78	}
		79
		80	if (!PyBuffer_IsContiguous(&segments, 'C') \|\| segments.ndim > 1) {
		81	PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
		82	goto except;
		83	}
		84
		85	if (segments.len % sizeof(BufferSegment)) {
		86	PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
		87	sizeof(BufferSegment));
		88	goto except;
		89	}
		90
		91	segmentCount = segments.len / sizeof(BufferSegment);
		92
		93	/* Validate segments data, as blindly trusting it could lead to arbitrary
		94	memory access. */
		95	for (i = 0; i < segmentCount; i++) {
		96	BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
		97
		98	if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
		99	PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
		100	goto except;
		101	return -1;
		102	}
		103	}
		104
		105	/* Make a copy of the segments data. It is cheap to do so and is a guard
		106	against caller changing offsets, which has security implications. */
		107	self->segments = PyMem_Malloc(segments.len);
		108	if (!self->segments) {
		109	PyErr_NoMemory();
		110	goto except;
		111	}
		112
		113	memcpy(self->segments, segments.buf, segments.len);
		114	PyBuffer_Release(&segments);
		115
		116	self->data = self->parent.buf;
		117	self->dataSize = self->parent.len;
		118	self->segmentCount = segmentCount;
		119
		120	return 0;
		121
		122	except:
		123	PyBuffer_Release(&self->parent);
		124	PyBuffer_Release(&segments);
		125	return -1;
		126	};
		127
		128	/**
		129	* Construct a BufferWithSegments from existing memory and offsets.
		130	*
		131	* Ownership of the backing memory and BufferSegments will be transferred to
		132	* the created object and freed when the BufferWithSegments is destroyed.
		133	*/
		134	ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
		135	BufferSegment* segments, Py_ssize_t segmentsSize) {
		136	ZstdBufferWithSegments* result = NULL;
		137	Py_ssize_t i;
		138
		139	if (NULL == data) {
		140	PyErr_SetString(PyExc_ValueError, "data is NULL");
		141	return NULL;
		142	}
		143
		144	if (NULL == segments) {
		145	PyErr_SetString(PyExc_ValueError, "segments is NULL");
		146	return NULL;
		147	}
		148
		149	for (i = 0; i < segmentsSize; i++) {
		150	BufferSegment* segment = &segments[i];
		151
		152	if (segment->offset + segment->length > dataSize) {
		153	PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
		154	return NULL;
		155	}
		156	}
		157
		158	result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
		159	if (NULL == result) {
		160	return NULL;
		161	}
		162
		163	result->useFree = 0;
		164
		165	memset(&result->parent, 0, sizeof(result->parent));
		166	result->data = data;
		167	result->dataSize = dataSize;
		168	result->segments = segments;
		169	result->segmentCount = segmentsSize;
		170
		171	return result;
		172	}
		173
		174	static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
		175	return self->segmentCount;
		176	}
		177
		178	static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
		179	ZstdBufferSegment* result = NULL;
		180
		181	if (i < 0) {
		182	PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
		183	return NULL;
		184	}
		185
		186	if (i >= self->segmentCount) {
		187	PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
		188	return NULL;
		189	}
		190
		191	result = (ZstdBufferSegment)PyObject_CallObject((PyObject)&ZstdBufferSegmentType, NULL);
		192	if (NULL == result) {
		193	return NULL;
		194	}
		195
		196	result->parent = (PyObject*)self;
		197	Py_INCREF(self);
		198
		199	result->data = (char*)self->data + self->segments[i].offset;
		200	result->dataSize = self->segments[i].length;
		201	result->offset = self->segments[i].offset;
		202
		203	return result;
		204	}
		205
		206	#if PY_MAJOR_VERSION >= 3
		207	static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
		208	return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
		209	}
		210	#else
		211	static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
		212	if (segment != 0) {
		213	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
		214	return -1;
		215	}
		216
		217	*ptrptr = self->data;
		218	return self->dataSize;
		219	}
		220
		221	static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
		222	if (len) {
		223	*len = 1;
		224	}
		225
		226	return 1;
		227	}
		228	#endif
		229
		230	PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
		231	"Obtain a bytes instance for this buffer.\n"
		232	);
		233
		234	static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
		235	return PyBytes_FromStringAndSize(self->data, self->dataSize);
		236	}
		237
		238	PyDoc_STRVAR(BufferWithSegments_segments__doc__,
		239	"Obtain a BufferSegments describing segments in this sintance.\n"
		240	);
		241
		242	static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
		243	ZstdBufferSegments* result = (ZstdBufferSegments)PyObject_CallObject((PyObject)&ZstdBufferSegmentsType, NULL);
		244	if (NULL == result) {
		245	return NULL;
		246	}
		247
		248	result->parent = (PyObject*)self;
		249	Py_INCREF(self);
		250	result->segments = self->segments;
		251	result->segmentCount = self->segmentCount;
		252
		253	return result;
		254	}
		255
		256	static PySequenceMethods BufferWithSegments_sq = {
		257	(lenfunc)BufferWithSegments_length, /* sq_length */
		258	0, /* sq_concat */
		259	0, /* sq_repeat */
		260	(ssizeargfunc)BufferWithSegments_item, /* sq_item */
		261	0, /* sq_ass_item */
		262	0, /* sq_contains */
		263	0, /* sq_inplace_concat */
		264	0 /* sq_inplace_repeat */
		265	};
		266
		267	static PyBufferProcs BufferWithSegments_as_buffer = {
		268	#if PY_MAJOR_VERSION >= 3
		269	(getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
		270	0 /* bf_releasebuffer */
		271	#else
		272	(readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
		273	0, /* bf_getwritebuffer */
		274	(segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
		275	0 /* bf_getcharbuffer */
		276	#endif
		277	};
		278
		279	static PyMethodDef BufferWithSegments_methods[] = {
		280	{ "segments", (PyCFunction)BufferWithSegments_segments,
		281	METH_NOARGS, BufferWithSegments_segments__doc__ },
		282	{ "tobytes", (PyCFunction)BufferWithSegments_tobytes,
		283	METH_NOARGS, BufferWithSegments_tobytes__doc__ },
		284	{ NULL, NULL }
		285	};
		286
		287	static PyMemberDef BufferWithSegments_members[] = {
		288	{ "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
		289	READONLY, "total size of the buffer in bytes" },
		290	{ NULL }
		291	};
		292
		293	PyTypeObject ZstdBufferWithSegmentsType = {
		294	PyVarObject_HEAD_INIT(NULL, 0)
		295	"zstd.BufferWithSegments", /* tp_name */
		296	sizeof(ZstdBufferWithSegments),/* tp_basicsize */
		297	0, /* tp_itemsize */
		298	(destructor)BufferWithSegments_dealloc, /* tp_dealloc */
		299	0, /* tp_print */
		300	0, /* tp_getattr */
		301	0, /* tp_setattr */
		302	0, /* tp_compare */
		303	0, /* tp_repr */
		304	0, /* tp_as_number */
		305	&BufferWithSegments_sq, /* tp_as_sequence */
		306	0, /* tp_as_mapping */
		307	0, /* tp_hash */
		308	0, /* tp_call */
		309	0, /* tp_str */
		310	0, /* tp_getattro */
		311	0, /* tp_setattro */
		312	&BufferWithSegments_as_buffer, /* tp_as_buffer */
		313	Py_TPFLAGS_DEFAULT, /* tp_flags */
		314	BufferWithSegments__doc__, /* tp_doc */
		315	0, /* tp_traverse */
		316	0, /* tp_clear */
		317	0, /* tp_richcompare */
		318	0, /* tp_weaklistoffset */
		319	0, /* tp_iter */
		320	0, /* tp_iternext */
		321	BufferWithSegments_methods, /* tp_methods */
		322	BufferWithSegments_members, /* tp_members */
		323	0, /* tp_getset */
		324	0, /* tp_base */
		325	0, /* tp_dict */
		326	0, /* tp_descr_get */
		327	0, /* tp_descr_set */
		328	0, /* tp_dictoffset */
		329	(initproc)BufferWithSegments_init, /* tp_init */
		330	0, /* tp_alloc */
		331	PyType_GenericNew, /* tp_new */
		332	};
		333
		334	PyDoc_STRVAR(BufferSegments__doc__,
		335	"BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
		336	);
		337
		338	static void BufferSegments_dealloc(ZstdBufferSegments* self) {
		339	Py_CLEAR(self->parent);
		340	PyObject_Del(self);
		341	}
		342
		343	#if PY_MAJOR_VERSION >= 3
		344	static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
		345	return PyBuffer_FillInfo(view, (PyObject*)self,
		346	(void)self->segments, self->segmentCount sizeof(BufferSegment),
		347	1, flags);
		348	}
		349	#else
		350	static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
		351	if (segment != 0) {
		352	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
		353	return -1;
		354	}
		355
		356	ptrptr = (void)self->segments;
		357	return self->segmentCount * sizeof(BufferSegment);
		358	}
		359
		360	static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
		361	if (len) {
		362	*len = 1;
		363	}
		364
		365	return 1;
		366	}
		367	#endif
		368
		369	static PyBufferProcs BufferSegments_as_buffer = {
		370	#if PY_MAJOR_VERSION >= 3
		371	(getbufferproc)BufferSegments_getbuffer,
		372	0
		373	#else
		374	(readbufferproc)BufferSegments_getreadbuffer,
		375	0,
		376	(segcountproc)BufferSegments_getsegcount,
		377	0
		378	#endif
		379	};
		380
		381	PyTypeObject ZstdBufferSegmentsType = {
		382	PyVarObject_HEAD_INIT(NULL, 0)
		383	"zstd.BufferSegments", /* tp_name */
		384	sizeof(ZstdBufferSegments),/* tp_basicsize */
		385	0, /* tp_itemsize */
		386	(destructor)BufferSegments_dealloc, /* tp_dealloc */
		387	0, /* tp_print */
		388	0, /* tp_getattr */
		389	0, /* tp_setattr */
		390	0, /* tp_compare */
		391	0, /* tp_repr */
		392	0, /* tp_as_number */
		393	0, /* tp_as_sequence */
		394	0, /* tp_as_mapping */
		395	0, /* tp_hash */
		396	0, /* tp_call */
		397	0, /* tp_str */
		398	0, /* tp_getattro */
		399	0, /* tp_setattro */
		400	&BufferSegments_as_buffer, /* tp_as_buffer */
		401	Py_TPFLAGS_DEFAULT, /* tp_flags */
		402	BufferSegments__doc__, /* tp_doc */
		403	0, /* tp_traverse */
		404	0, /* tp_clear */
		405	0, /* tp_richcompare */
		406	0, /* tp_weaklistoffset */
		407	0, /* tp_iter */
		408	0, /* tp_iternext */
		409	0, /* tp_methods */
		410	0, /* tp_members */
		411	0, /* tp_getset */
		412	0, /* tp_base */
		413	0, /* tp_dict */
		414	0, /* tp_descr_get */
		415	0, /* tp_descr_set */
		416	0, /* tp_dictoffset */
		417	0, /* tp_init */
		418	0, /* tp_alloc */
		419	PyType_GenericNew, /* tp_new */
		420	};
		421
		422	PyDoc_STRVAR(BufferSegment__doc__,
		423	"BufferSegment - Represents a segment within a BufferWithSegments\n"
		424	);
		425
		426	static void BufferSegment_dealloc(ZstdBufferSegment* self) {
		427	Py_CLEAR(self->parent);
		428	PyObject_Del(self);
		429	}
		430
		431	static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
		432	return self->dataSize;
		433	}
		434
		435	#if PY_MAJOR_VERSION >= 3
		436	static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
		437	return PyBuffer_FillInfo(view, (PyObject*)self,
		438	self->data, self->dataSize, 1, flags);
		439	}
		440	#else
		441	static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
		442	if (segment != 0) {
		443	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
		444	return -1;
		445	}
		446
		447	*ptrptr = self->data;
		448	return self->dataSize;
		449	}
		450
		451	static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
		452	if (len) {
		453	*len = 1;
		454	}
		455
		456	return 1;
		457	}
		458	#endif
		459
		460	PyDoc_STRVAR(BufferSegment_tobytes__doc__,
		461	"Obtain a bytes instance for this segment.\n"
		462	);
		463
		464	static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
		465	return PyBytes_FromStringAndSize(self->data, self->dataSize);
		466	}
		467
		468	static PySequenceMethods BufferSegment_sq = {
		469	(lenfunc)BufferSegment_length, /* sq_length */
		470	0, /* sq_concat */
		471	0, /* sq_repeat */
		472	0, /* sq_item */
		473	0, /* sq_ass_item */
		474	0, /* sq_contains */
		475	0, /* sq_inplace_concat */
		476	0 /* sq_inplace_repeat */
		477	};
		478
		479	static PyBufferProcs BufferSegment_as_buffer = {
		480	#if PY_MAJOR_VERSION >= 3
		481	(getbufferproc)BufferSegment_getbuffer,
		482	0
		483	#else
		484	(readbufferproc)BufferSegment_getreadbuffer,
		485	0,
		486	(segcountproc)BufferSegment_getsegcount,
		487	0
		488	#endif
		489	};
		490
		491	static PyMethodDef BufferSegment_methods[] = {
		492	{ "tobytes", (PyCFunction)BufferSegment_tobytes,
		493	METH_NOARGS, BufferSegment_tobytes__doc__ },
		494	{ NULL, NULL }
		495	};
		496
		497	static PyMemberDef BufferSegment_members[] = {
		498	{ "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
		499	"offset of segment within parent buffer" },
		500	{ NULL }
		501	};
		502
		503	PyTypeObject ZstdBufferSegmentType = {
		504	PyVarObject_HEAD_INIT(NULL, 0)
		505	"zstd.BufferSegment", /* tp_name */
		506	sizeof(ZstdBufferSegment),/* tp_basicsize */
		507	0, /* tp_itemsize */
		508	(destructor)BufferSegment_dealloc, /* tp_dealloc */
		509	0, /* tp_print */
		510	0, /* tp_getattr */
		511	0, /* tp_setattr */
		512	0, /* tp_compare */
		513	0, /* tp_repr */
		514	0, /* tp_as_number */
		515	&BufferSegment_sq, /* tp_as_sequence */
		516	0, /* tp_as_mapping */
		517	0, /* tp_hash */
		518	0, /* tp_call */
		519	0, /* tp_str */
		520	0, /* tp_getattro */
		521	0, /* tp_setattro */
		522	&BufferSegment_as_buffer, /* tp_as_buffer */
		523	Py_TPFLAGS_DEFAULT, /* tp_flags */
		524	BufferSegment__doc__, /* tp_doc */
		525	0, /* tp_traverse */
		526	0, /* tp_clear */
		527	0, /* tp_richcompare */
		528	0, /* tp_weaklistoffset */
		529	0, /* tp_iter */
		530	0, /* tp_iternext */
		531	BufferSegment_methods, /* tp_methods */
		532	BufferSegment_members, /* tp_members */
		533	0, /* tp_getset */
		534	0, /* tp_base */
		535	0, /* tp_dict */
		536	0, /* tp_descr_get */
		537	0, /* tp_descr_set */
		538	0, /* tp_dictoffset */
		539	0, /* tp_init */
		540	0, /* tp_alloc */
		541	PyType_GenericNew, /* tp_new */
		542	};
		543
		544	PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
		545	"Represents a collection of BufferWithSegments.\n"
		546	);
		547
		548	static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
		549	Py_ssize_t i;
		550
		551	if (self->firstElements) {
		552	PyMem_Free(self->firstElements);
		553	self->firstElements = NULL;
		554	}
		555
		556	if (self->buffers) {
		557	for (i = 0; i < self->bufferCount; i++) {
		558	Py_CLEAR(self->buffers[i]);
		559	}
		560
		561	PyMem_Free(self->buffers);
		562	self->buffers = NULL;
		563	}
		564
		565	PyObject_Del(self);
		566	}
		567
		568	static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
		569	Py_ssize_t size;
		570	Py_ssize_t i;
		571	Py_ssize_t offset = 0;
		572
		573	size = PyTuple_Size(args);
		574	if (-1 == size) {
		575	return -1;
		576	}
		577
		578	if (0 == size) {
		579	PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
		580	return -1;
		581	}
		582
		583	for (i = 0; i < size; i++) {
		584	PyObject* item = PyTuple_GET_ITEM(args, i);
		585	if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
		586	PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
		587	return -1;
		588	}
		589
		590	if (0 == ((ZstdBufferWithSegments*)item)->segmentCount \|\|
		591	0 == ((ZstdBufferWithSegments*)item)->dataSize) {
		592	PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
		593	return -1;
		594	}
		595	}
		596
		597	self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
		598	if (NULL == self->buffers) {
		599	PyErr_NoMemory();
		600	return -1;
		601	}
		602
		603	self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
		604	if (NULL == self->firstElements) {
		605	PyMem_Free(self->buffers);
		606	self->buffers = NULL;
		607	PyErr_NoMemory();
		608	return -1;
		609	}
		610
		611	self->bufferCount = size;
		612
		613	for (i = 0; i < size; i++) {
		614	ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
		615
		616	self->buffers[i] = item;
		617	Py_INCREF(item);
		618
		619	if (i > 0) {
		620	self->firstElements[i - 1] = offset;
		621	}
		622
		623	offset += item->segmentCount;
		624	}
		625
		626	self->firstElements[size - 1] = offset;
		627
		628	return 0;
		629	}
		630
		631	static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
		632	Py_ssize_t i;
		633	Py_ssize_t j;
		634	unsigned long long size = 0;
		635
		636	for (i = 0; i < self->bufferCount; i++) {
		637	for (j = 0; j < self->buffers[i]->segmentCount; j++) {
		638	size += self->buffers[i]->segments[j].length;
		639	}
		640	}
		641
		642	return PyLong_FromUnsignedLongLong(size);
		643	}
		644
		645	Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
		646	return self->firstElements[self->bufferCount - 1];
		647	}
		648
		649	static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
		650	Py_ssize_t bufferOffset;
		651
		652	if (i < 0) {
		653	PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
		654	return NULL;
		655	}
		656
		657	if (i >= BufferWithSegmentsCollection_length(self)) {
		658	PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
		659	BufferWithSegmentsCollection_length(self));
		660	return NULL;
		661	}
		662
		663	for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
		664	Py_ssize_t offset = 0;
		665
		666	if (i < self->firstElements[bufferOffset]) {
		667	if (bufferOffset > 0) {
		668	offset = self->firstElements[bufferOffset - 1];
		669	}
		670
		671	return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
		672	}
		673	}
		674
		675	PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
		676	return NULL;
		677	}
		678
		679	static PySequenceMethods BufferWithSegmentsCollection_sq = {
		680	(lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
		681	0, /* sq_concat */
		682	0, /* sq_repeat */
		683	(ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
		684	0, /* sq_ass_item */
		685	0, /* sq_contains */
		686	0, /* sq_inplace_concat */
		687	0 /* sq_inplace_repeat */
		688	};
		689
		690	static PyMethodDef BufferWithSegmentsCollection_methods[] = {
		691	{ "size", (PyCFunction)BufferWithSegmentsCollection_size,
		692	METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
		693	{ NULL, NULL }
		694	};
		695
		696	PyTypeObject ZstdBufferWithSegmentsCollectionType = {
		697	PyVarObject_HEAD_INIT(NULL, 0)
		698	"zstd.BufferWithSegmentsCollection", /* tp_name */
		699	sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
		700	0, /* tp_itemsize */
		701	(destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
		702	0, /* tp_print */
		703	0, /* tp_getattr */
		704	0, /* tp_setattr */
		705	0, /* tp_compare */
		706	0, /* tp_repr */
		707	0, /* tp_as_number */
		708	&BufferWithSegmentsCollection_sq, /* tp_as_sequence */
		709	0, /* tp_as_mapping */
		710	0, /* tp_hash */
		711	0, /* tp_call */
		712	0, /* tp_str */
		713	0, /* tp_getattro */
		714	0, /* tp_setattro */
		715	0, /* tp_as_buffer */
		716	Py_TPFLAGS_DEFAULT, /* tp_flags */
		717	BufferWithSegmentsCollection__doc__, /* tp_doc */
		718	0, /* tp_traverse */
		719	0, /* tp_clear */
		720	0, /* tp_richcompare */
		721	0, /* tp_weaklistoffset */
		722	/* TODO implement iterator for performance. */
		723	0, /* tp_iter */
		724	0, /* tp_iternext */
		725	BufferWithSegmentsCollection_methods, /* tp_methods */
		726	0, /* tp_members */
		727	0, /* tp_getset */
		728	0, /* tp_base */
		729	0, /* tp_dict */
		730	0, /* tp_descr_get */
		731	0, /* tp_descr_set */
		732	0, /* tp_dictoffset */
		733	(initproc)BufferWithSegmentsCollection_init, /* tp_init */
		734	0, /* tp_alloc */
		735	PyType_GenericNew, /* tp_new */
		736	};
		737
		738	void bufferutil_module_init(PyObject* mod) {
		739	Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
		740	if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
		741	return;
		742	}
		743
		744	Py_INCREF(&ZstdBufferWithSegmentsType);
		745	PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
		746
		747	Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
		748	if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
		749	return;
		750	}
		751
		752	Py_INCREF(&ZstdBufferSegmentsType);
		753	PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
		754
		755	Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
		756	if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
		757	return;
		758	}
		759
		760	Py_INCREF(&ZstdBufferSegmentType);
		761	PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
		762
		763	Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
		764	if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
		765	return;
		766	}
		767
		768	Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
		769	PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
		770	}

contrib/python-zstandard/tests/test_buffer_util.py

0 created 644 +112 0

@@ -0,0 +1,112
	1	import struct
	2
	3	try:
	4	import unittest2 as unittest
	5	except ImportError:
	6	import unittest
	7
	8	import zstd
	9
	10	ss = struct.Struct('=QQ')
	11
	12
	13	class TestBufferWithSegments(unittest.TestCase):
	14	def test_arguments(self):
	15	with self.assertRaises(TypeError):
	16	zstd.BufferWithSegments()
	17
	18	with self.assertRaises(TypeError):
	19	zstd.BufferWithSegments(b'foo')
	20
	21	# Segments data should be a multiple of 16.
	22	with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
	23	zstd.BufferWithSegments(b'foo', b'\x00\x00')
	24
	25	def test_invalid_offset(self):
	26	with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
	27	zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
	28
	29	def test_invalid_getitem(self):
	30	b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
	31
	32	with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
	33	test = b[-10]
	34
	35	with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
	36	test = b[1]
	37
	38	with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
	39	test = b[2]
	40
	41	def test_single(self):
	42	b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
	43	self.assertEqual(len(b), 1)
	44	self.assertEqual(b.size, 3)
	45	self.assertEqual(b.tobytes(), b'foo')
	46
	47	self.assertEqual(len(b[0]), 3)
	48	self.assertEqual(b[0].offset, 0)
	49	self.assertEqual(b[0].tobytes(), b'foo')
	50
	51	def test_multiple(self):
	52	b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
	53	ss.pack(3, 4),
	54	ss.pack(7, 5)]))
	55	self.assertEqual(len(b), 3)
	56	self.assertEqual(b.size, 12)
	57	self.assertEqual(b.tobytes(), b'foofooxfooxy')
	58
	59	self.assertEqual(b[0].tobytes(), b'foo')
	60	self.assertEqual(b[1].tobytes(), b'foox')
	61	self.assertEqual(b[2].tobytes(), b'fooxy')
	62
	63
	64	class TestBufferWithSegmentsCollection(unittest.TestCase):
	65	def test_empty_constructor(self):
	66	with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
	67	zstd.BufferWithSegmentsCollection()
	68
	69	def test_argument_validation(self):
	70	with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
	71	zstd.BufferWithSegmentsCollection(None)
	72
	73	with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
	74	zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
	75	None)
	76
	77	with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
	78	zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
	79
	80	def test_length(self):
	81	b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
	82	b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
	83	ss.pack(3, 3)]))
	84
	85	c = zstd.BufferWithSegmentsCollection(b1)
	86	self.assertEqual(len(c), 1)
	87	self.assertEqual(c.size(), 3)
	88
	89	c = zstd.BufferWithSegmentsCollection(b2)
	90	self.assertEqual(len(c), 2)
	91	self.assertEqual(c.size(), 6)
	92
	93	c = zstd.BufferWithSegmentsCollection(b1, b2)
	94	self.assertEqual(len(c), 3)
	95	self.assertEqual(c.size(), 9)
	96
	97	def test_getitem(self):
	98	b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
	99	b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
	100	ss.pack(3, 3)]))
	101
	102	c = zstd.BufferWithSegmentsCollection(b1, b2)
	103
	104	with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
	105	c[3]
	106
	107	with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
	108	c[4]
	109
	110	self.assertEqual(c[0].tobytes(), b'foo')
	111	self.assertEqual(c[1].tobytes(), b'bar')
	112	self.assertEqual(c[2].tobytes(), b'baz')

contrib/python-zstandard/tests/test_compressor_fuzzing.py

0 created 644 +143 0

@@ -0,0 +1,143
	1	import io
	2	import os
	3
	4	try:
	5	import unittest2 as unittest
	6	except ImportError:
	7	import unittest
	8
	9	try:
	10	import hypothesis
	11	import hypothesis.strategies as strategies
	12	except ImportError:
	13	raise unittest.SkipTest('hypothesis not available')
	14
	15	import zstd
	16
	17	from . common import (
	18	make_cffi,
	19	random_input_data,
	20	)
	21
	22
	23	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	24	@make_cffi
	25	class TestCompressor_write_to_fuzzing(unittest.TestCase):
	26	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	27	level=strategies.integers(min_value=1, max_value=5),
	28	write_size=strategies.integers(min_value=1, max_value=1048576))
	29	def test_write_size_variance(self, original, level, write_size):
	30	refctx = zstd.ZstdCompressor(level=level)
	31	ref_frame = refctx.compress(original)
	32
	33	cctx = zstd.ZstdCompressor(level=level)
	34	b = io.BytesIO()
	35	with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
	36	compressor.write(original)
	37
	38	self.assertEqual(b.getvalue(), ref_frame)
	39
	40
	41	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	42	@make_cffi
	43	class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
	44	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	45	level=strategies.integers(min_value=1, max_value=5),
	46	read_size=strategies.integers(min_value=1, max_value=1048576),
	47	write_size=strategies.integers(min_value=1, max_value=1048576))
	48	def test_read_write_size_variance(self, original, level, read_size, write_size):
	49	refctx = zstd.ZstdCompressor(level=level)
	50	ref_frame = refctx.compress(original)
	51
	52	cctx = zstd.ZstdCompressor(level=level)
	53	source = io.BytesIO(original)
	54	dest = io.BytesIO()
	55
	56	cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
	57	write_size=write_size)
	58
	59	self.assertEqual(dest.getvalue(), ref_frame)
	60
	61
	62	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	63	@make_cffi
	64	class TestCompressor_compressobj_fuzzing(unittest.TestCase):
	65	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	66	level=strategies.integers(min_value=1, max_value=5),
	67	chunk_sizes=strategies.streaming(
	68	strategies.integers(min_value=1, max_value=4096)))
	69	def test_random_input_sizes(self, original, level, chunk_sizes):
	70	chunk_sizes = iter(chunk_sizes)
	71
	72	refctx = zstd.ZstdCompressor(level=level)
	73	ref_frame = refctx.compress(original)
	74
	75	cctx = zstd.ZstdCompressor(level=level)
	76	cobj = cctx.compressobj(size=len(original))
	77
	78	chunks = []
	79	i = 0
	80	while True:
	81	chunk_size = next(chunk_sizes)
	82	source = original[i:i + chunk_size]
	83	if not source:
	84	break
	85
	86	chunks.append(cobj.compress(source))
	87	i += chunk_size
	88
	89	chunks.append(cobj.flush())
	90
	91	self.assertEqual(b''.join(chunks), ref_frame)
	92
	93
	94	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	95	@make_cffi
	96	class TestCompressor_read_from_fuzzing(unittest.TestCase):
	97	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	98	level=strategies.integers(min_value=1, max_value=5),
	99	read_size=strategies.integers(min_value=1, max_value=4096),
	100	write_size=strategies.integers(min_value=1, max_value=4096))
	101	def test_read_write_size_variance(self, original, level, read_size, write_size):
	102	refcctx = zstd.ZstdCompressor(level=level)
	103	ref_frame = refcctx.compress(original)
	104
	105	source = io.BytesIO(original)
	106
	107	cctx = zstd.ZstdCompressor(level=level)
	108	chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
	109	write_size=write_size))
	110
	111	self.assertEqual(b''.join(chunks), ref_frame)
	112
	113
	114	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	115	class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
	116	@hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
	117	min_size=1, max_size=1024),
	118	threads=strategies.integers(min_value=1, max_value=8),
	119	use_dict=strategies.booleans())
	120	def test_data_equivalence(self, original, threads, use_dict):
	121	kwargs = {}
	122
	123	# Use a content dictionary because it is cheap to create.
	124	if use_dict:
	125	kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
	126
	127	cctx = zstd.ZstdCompressor(level=1,
	128	write_content_size=True,
	129	write_checksum=True,
	130	**kwargs)
	131
	132	result = cctx.multi_compress_to_buffer(original, threads=-1)
	133
	134	self.assertEqual(len(result), len(original))
	135
	136	# The frame produced via the batch APIs may not be bit identical to that
	137	# produced by compress() because compression parameters are adjusted
	138	# from the first input in batch mode. So the only thing we can do is
	139	# verify the decompressed data matches the input.
	140	dctx = zstd.ZstdDecompressor(**kwargs)
	141
	142	for i, frame in enumerate(result):
	143	self.assertEqual(dctx.decompress(frame), original[i])

contrib/python-zstandard/tests/test_data_structures_fuzzing.py

0 created 644 +79 0

@@ -0,0 +1,79
	1	import io
	2	import os
	3
	4	try:
	5	import unittest2 as unittest
	6	except ImportError:
	7	import unittest
	8
	9	try:
	10	import hypothesis
	11	import hypothesis.strategies as strategies
	12	except ImportError:
	13	raise unittest.SkipTest('hypothesis not available')
	14
	15	import zstd
	16
	17	from .common import (
	18	make_cffi,
	19	)
	20
	21
	22	s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
	23	max_value=zstd.WINDOWLOG_MAX)
	24	s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
	25	max_value=zstd.CHAINLOG_MAX)
	26	s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
	27	max_value=zstd.HASHLOG_MAX)
	28	s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
	29	max_value=zstd.SEARCHLOG_MAX)
	30	s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
	31	max_value=zstd.SEARCHLENGTH_MAX)
	32	s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
	33	max_value=zstd.TARGETLENGTH_MAX)
	34	s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
	35	zstd.STRATEGY_DFAST,
	36	zstd.STRATEGY_GREEDY,
	37	zstd.STRATEGY_LAZY,
	38	zstd.STRATEGY_LAZY2,
	39	zstd.STRATEGY_BTLAZY2,
	40	zstd.STRATEGY_BTOPT))
	41
	42
	43	@make_cffi
	44	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	45	class TestCompressionParametersHypothesis(unittest.TestCase):
	46	@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
	47	s_searchlength, s_targetlength, s_strategy)
	48	def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
	49	searchlength, targetlength, strategy):
	50	# ZSTD_checkCParams moves the goal posts on us from what's advertised
	51	# in the constants. So move along with them.
	52	if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
	53	searchlength += 1
	54	elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
	55	searchlength -= 1
	56
	57	p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
	58	searchlog, searchlength,
	59	targetlength, strategy)
	60
	61	cctx = zstd.ZstdCompressor(compression_params=p)
	62	with cctx.write_to(io.BytesIO()):
	63	pass
	64
	65	@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
	66	s_searchlength, s_targetlength, s_strategy)
	67	def test_estimate_compression_context_size(self, windowlog, chainlog,
	68	hashlog, searchlog,
	69	searchlength, targetlength,
	70	strategy):
	71	if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
	72	searchlength += 1
	73	elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
	74	searchlength -= 1
	75
	76	p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
	77	searchlog, searchlength,
	78	targetlength, strategy)
	79	size = zstd.estimate_compression_context_size(p)

contrib/python-zstandard/tests/test_decompressor_fuzzing.py

0 created 644 +151 0

@@ -0,0 +1,151
	1	import io
	2	import os
	3
	4	try:
	5	import unittest2 as unittest
	6	except ImportError:
	7	import unittest
	8
	9	try:
	10	import hypothesis
	11	import hypothesis.strategies as strategies
	12	except ImportError:
	13	raise unittest.SkipTest('hypothesis not available')
	14
	15	import zstd
	16
	17	from . common import (
	18	make_cffi,
	19	random_input_data,
	20	)
	21
	22
	23	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	24	@make_cffi
	25	class TestDecompressor_write_to_fuzzing(unittest.TestCase):
	26	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	27	level=strategies.integers(min_value=1, max_value=5),
	28	write_size=strategies.integers(min_value=1, max_value=8192),
	29	input_sizes=strategies.streaming(
	30	strategies.integers(min_value=1, max_value=4096)))
	31	def test_write_size_variance(self, original, level, write_size, input_sizes):
	32	input_sizes = iter(input_sizes)
	33
	34	cctx = zstd.ZstdCompressor(level=level)
	35	frame = cctx.compress(original)
	36
	37	dctx = zstd.ZstdDecompressor()
	38	source = io.BytesIO(frame)
	39	dest = io.BytesIO()
	40
	41	with dctx.write_to(dest, write_size=write_size) as decompressor:
	42	while True:
	43	chunk = source.read(next(input_sizes))
	44	if not chunk:
	45	break
	46
	47	decompressor.write(chunk)
	48
	49	self.assertEqual(dest.getvalue(), original)
	50
	51
	52	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	53	@make_cffi
	54	class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
	55	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	56	level=strategies.integers(min_value=1, max_value=5),
	57	read_size=strategies.integers(min_value=1, max_value=8192),
	58	write_size=strategies.integers(min_value=1, max_value=8192))
	59	def test_read_write_size_variance(self, original, level, read_size, write_size):
	60	cctx = zstd.ZstdCompressor(level=level)
	61	frame = cctx.compress(original)
	62
	63	source = io.BytesIO(frame)
	64	dest = io.BytesIO()
	65
	66	dctx = zstd.ZstdDecompressor()
	67	dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
	68
	69	self.assertEqual(dest.getvalue(), original)
	70
	71
	72	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	73	@make_cffi
	74	class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
	75	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	76	level=strategies.integers(min_value=1, max_value=5),
	77	chunk_sizes=strategies.streaming(
	78	strategies.integers(min_value=1, max_value=4096)))
	79	def test_random_input_sizes(self, original, level, chunk_sizes):
	80	chunk_sizes = iter(chunk_sizes)
	81
	82	cctx = zstd.ZstdCompressor(level=level)
	83	frame = cctx.compress(original)
	84
	85	source = io.BytesIO(frame)
	86
	87	dctx = zstd.ZstdDecompressor()
	88	dobj = dctx.decompressobj()
	89
	90	chunks = []
	91	while True:
	92	chunk = source.read(next(chunk_sizes))
	93	if not chunk:
	94	break
	95
	96	chunks.append(dobj.decompress(chunk))
	97
	98	self.assertEqual(b''.join(chunks), original)
	99
	100
	101	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	102	@make_cffi
	103	class TestDecompressor_read_from_fuzzing(unittest.TestCase):
	104	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
	105	level=strategies.integers(min_value=1, max_value=5),
	106	read_size=strategies.integers(min_value=1, max_value=4096),
	107	write_size=strategies.integers(min_value=1, max_value=4096))
	108	def test_read_write_size_variance(self, original, level, read_size, write_size):
	109	cctx = zstd.ZstdCompressor(level=level)
	110	frame = cctx.compress(original)
	111
	112	source = io.BytesIO(frame)
	113
	114	dctx = zstd.ZstdDecompressor()
	115	chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size))
	116
	117	self.assertEqual(b''.join(chunks), original)
	118
	119
	120	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	121	class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
	122	@hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
	123	min_size=1, max_size=1024),
	124	threads=strategies.integers(min_value=1, max_value=8),
	125	use_dict=strategies.booleans())
	126	def test_data_equivalence(self, original, threads, use_dict):
	127	kwargs = {}
	128	if use_dict:
	129	kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
	130
	131	cctx = zstd.ZstdCompressor(level=1,
	132	write_content_size=True,
	133	write_checksum=True,
	134	**kwargs)
	135
	136	frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
	137
	138	dctx = zstd.ZstdDecompressor(**kwargs)
	139
	140	result = dctx.multi_decompress_to_buffer(frames_buffer)
	141
	142	self.assertEqual(len(result), len(original))
	143	for i, frame in enumerate(result):
	144	self.assertEqual(frame.tobytes(), original[i])
	145
	146	frames_list = [f.tobytes() for f in frames_buffer]
	147	result = dctx.multi_decompress_to_buffer(frames_list)
	148
	149	self.assertEqual(len(result), len(original))
	150	for i, frame in enumerate(result):
	151	self.assertEqual(frame.tobytes(), original[i])

contrib/python-zstandard/NEWS.rst

0 +28 0

             Version History
             ===============
+.8.0 (released 2017-03-08)
+            ---------------------------
+            * CompressionParameters now has a estimated_compression_context_size() method.
+              zstd.estimate_compression_context_size() is now deprecated and slated for
+              removal.
+            * Implemented a lot of fuzzing tests.
+            * CompressionParameters instances now perform extra validation by calling
+              ZSTD_checkCParams() at construction time.
+            * multi_compress_to_buffer() API for compressing multiple inputs as a
+              single operation, as efficiently as possible.
+            * ZSTD_CStream instances are now used across multiple operations on
+              ZstdCompressor instances, resulting in much better performance for
+              APIs that do streaming.
+            * ZSTD_DStream instances are now used across multiple operations on
+              ZstdDecompressor instances, resulting in much better performance for
+              APIs that do streaming.
+            * train_dictionary() now releases the GIL.
+            * Support for training dictionaries using the COVER algorithm.
+            * multi_decompress_to_buffer() API for decompressing multiple frames as a
+              single operation, as efficiently as possible.
+            * Support for multi-threaded compression.
+            * Disable deprecation warnings when compiling CFFI module.
+            * Fixed memory leak in train_dictionary().
+            * Removed DictParameters type.
+            * train_dictionary() now accepts keyword arguments instead of a
+              DictParameters instance to control dictionary generation.
 .7.0 (released 2017-02-07)
             ---------------------------
             * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
             * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
               decompression of *content-only dictionary chains*.
             * CFFI module fully implemented; all tests run against both C extension and
               CFFI implementation.
             * Vendored version of zstd updated to 1.1.3.
             * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
               to avoid extra memory allocation of dict data.
             * Add function names to error messages (by using ":name" in PyArg_Parse*
               functions).
             * Reuse decompression context across operations. Previously, we created a
               new ZSTD_DCtx for each decompress(). This was measured to slow down
               decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
               is not thread safe. So we reuse the ZSTD_DCtx across operations and make
               things faster in the process.
             * ZstdCompressor.write_to()'s compress() and flush() methods now return number
               of bytes written.
             * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
               written to the underlying output object.
             * CompressionParameters instances now expose their values as attributes.
             * CompressionParameters instances no longer are subscriptable nor behave
               as tuples (backwards incompatible). Use attributes to obtain values.
             * DictParameters instances now expose their values as attributes.
 .6.0 (released 2017-01-14)
             ---------------------------
             * Support for legacy zstd protocols (build time opt in feature).
             * Automation improvements to test against Python 3.6, latest versions
               of Tox, more deterministic AppVeyor behavior.
             * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
               source code manually.
             * Vendored version of zstd updated to 1.1.2.
             * Documentation improvements.
             * Introduce a bench.py script for performing (crude) benchmarks.
             * ZSTD_CCtx instances are now reused across multiple compress() operations.
             * ZstdCompressor.write_to() now has a flush() method.
             * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
               flush a block (as opposed to ending the stream).
             * Disallow compress(b'') when writing content sizes by default (issue #11).
 .5.2 (released 2016-11-12)
             ---------------------------
             * more packaging fixes for source distribution
 .5.1 (released 2016-11-12)
             ---------------------------
             * setup_zstd.py is included in the source distribution
 .5.0 (released 2016-11-10)
             ---------------------------
             * Vendored version of zstd updated to 1.1.1.
             * Continuous integration for Python 3.6 and 3.7
             * Continuous integration for Conda
             * Added compression and decompression APIs providing similar interfaces
               to the standard library ``zlib`` and ``bz2`` modules. This allows
               coding to a common interface.
             * ``zstd.__version__` is now defined.
             * ``read_from()`` on various APIs now accepts objects implementing the buffer
               protocol.
             * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
               to pass in an existing buffer with a header without having to create a
               slice or a new object.
             * Implemented ``ZstdCompressionDict.as_bytes()``.
             * Python's memory allocator is now used instead of ``malloc()``.
             * Low-level zstd data structures are reused in more instances, cutting down
               on overhead for certain operations.
             * ``distutils`` boilerplate for obtaining an ``Extension`` instance
               has now been refactored into a standalone ``setup_zstd.py`` file. This
               allows other projects with ``setup.py`` files to reuse the
               ``distutils`` code for this project without copying code.
             * The monolithic ``zstd.c`` file has been split into a header file defining
               types and separate ``.c`` source files for the implementation.
             History of the Project
             ======================
 -08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
             Python extension for use by the Mercurial project. A very hacky prototype
             is sent to the mercurial-devel list for RFC.
 -09-03 - Most functionality from Zstandard C API implemented. Source
             code published on https://github.com/indygreg/python-zstandard. Travis-CI
             automation configured. 0.0.1 release on PyPI.
 -09-05 - After the API was rounded out a bit and support for Python
 .6 and 2.7 was added, version 0.1 was released to PyPI.
 -09-05 - After the compressor and decompressor APIs were changed, 0.2
             was released to PyPI.
 -09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
             now accepts arguments controlling frame parameters. The source size can now
             be declared when performing streaming compression. ZstdDecompressor.decompress()
             is implemented. Compression dictionaries are now cached when using the simple
             compression and decompression APIs. Memory size APIs added.
             ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
             implemented. This rounds out the major compression/decompression APIs planned
             by the author.
 -10-02 - 0.3.3 is released with a bug fix for read_from not fully
             decoding a zstd frame (issue #2).
 -10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
             write buffer sizes, and a few bug fixes involving failure to read/write
             all data when buffer sizes were too small to hold remaining data.
 -11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.

contrib/python-zstandard/README.rst

0 +515 -65

             ================
             python-zstandard
             ================
             This project provides Python bindings for interfacing with the
             `Zstandard <http://www.zstd.net>`_ compression library. A C extension
             and CFFI interface are provided.
             The primary goal of the project is to provide a rich interface to the
             underlying C API through a Pythonic interface while not sacrificing
             performance. This means exposing most of the features and flexibility
             of the C API while not sacrificing usability or safety that Python provides.
             The canonical home for this project is
             https://github.com/indygreg/python-zstandard.
             |  |ci-status| |win-ci-status|
             State of Project
             ================
             The project is officially in beta state. The author is reasonably satisfied
-            with the current API and that functionality works as advertised. There
+            that functionality works as advertised. **There will be some backwards
-            may be some backwards incompatible changes before 1.0. Though the author
+            incompatible changes before 1.0, probably in the 0.9 release.** This may
-            does not intend to make any major changes to the Python API.
+            involve renaming the main module from *zstd* to *zstandard* and renaming
+            various types and methods. Pin the package version to prevent unwanted
+            breakage when this change occurs!
             This project is vendored and distributed with Mercurial 4.1, where it is
             used in a production capacity.
             There is continuous integration for Python versions 2.6, 2.7, and 3.3+
             on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
             confident the extension is stable and works as advertised on these
             platforms.
+            The CFFI bindings are mostly feature complete. Where a feature is implemented
+            in CFFI, unit tests run against both C extension and CFFI implementation to
+            ensure behavior parity.
             Expected Changes
             ----------------
             The author is reasonably confident in the current state of what's
             implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
             Those APIs likely won't change significantly. Some low-level behavior
             (such as naming and types expected by arguments) may change.
             There will likely be arguments added to control the input and output
             buffer sizes (currently, certain operations read and write in chunk
             sizes using zstd's preferred defaults).
             There should be an API that accepts an object that conforms to the buffer
             interface and returns an iterator over compressed or decompressed output.
+            There should be an API that exposes an ``io.RawIOBase`` interface to
+            compressor and decompressor streams, like how ``gzip.GzipFile`` from
+            the standard library works (issue 13).
             The author is on the fence as to whether to support the extremely
             low level compression and decompression APIs. It could be useful to
             support compression without the framing headers. But the author doesn't
             believe it a high priority at this time.
-            The CFFI bindings are feature complete and all tests run against both
+            There will likely be a refactoring of the module names. Currently,
-            the C extension and CFFI bindings to ensure behavior parity.
+            ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
+            This means that all code for the C extension must be implemented in
+            C. ``zstd`` may be converted to a Python module so code can be reused
+            between CFFI and C and so not all code in the C extension has to be C.
             Requirements
             ============
             This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
 .6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
             currently well-tested as an architecture.
             Installing
             ==========
             This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
             So, to install this package::
                $ pip install zstandard
             Binary wheels are made available for some platforms. If you need to
             install from a source distribution, all you should need is a working C
             compiler and the Python development headers/libraries. On many Linux
             distributions, you can install a ``python-dev`` or ``python-devel``
             package to provide these dependencies.
             Packages are also uploaded to Anaconda Cloud at
             https://anaconda.org/indygreg/zstandard. See that URL for how to install
             this package with ``conda``.
             Performance
             ===========
             Very crude and non-scientific benchmarking (most benchmarks fall in this
             category because proper benchmarking is hard) show that the Python bindings
             perform within 10% of the native C implementation.
             The following table compares the performance of compressing and decompressing
             a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values
             obtained with the ``zstd`` program are on the left. The remaining columns detail
             performance of various compression APIs in the Python bindings.
             +-------+-----------------+-----------------+-----------------+---------------+
             | Level | Native          | Simple          | Stream In       | Stream Out    |
             |       | Comp / Decomp   | Comp / Decomp   | Comp / Decomp   | Comp          |
             +=======+=================+=================+=================+===============+
             |   1   | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s |  405 MB/s     |
             +-------+-----------------+-----------------+-----------------+---------------+
             |   2   | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s |  349 MB/s     |
             +-------+-----------------+-----------------+-----------------+---------------+
             |   3   | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s |  287 MB/s     |
             +-------+-----------------+-----------------+-----------------+---------------+
             |  11   |  64 / 1506 MB/s |  66 / 1436 MB/s |  56 / 1342 MB/s |   57 MB/s     |
             +-------+-----------------+-----------------+-----------------+---------------+
             Again, these are very unscientific. But it shows that Python is capable of
             compressing at several hundred MB/s and decompressing at over 1 GB/s.
             Comparison to Other Python Bindings
             ===================================
             https://pypi.python.org/pypi/zstd is an alternate Python binding to
             Zstandard. At the time this was written, the latest release of that
             package (1.1.2) only exposed the simple APIs for compression and decompression.
             This package exposes much more of the zstd API, including streaming and
             dictionary compression. This package also has CFFI support.
             Bundling of Zstandard Source Code
             =================================
             The source repository for this project contains a vendored copy of the
             Zstandard source code. This is done for a few reasons.
             First, Zstandard is relatively new and not yet widely available as a system
             package. Providing a copy of the source code enables the Python C extension
             to be compiled without requiring the user to obtain the Zstandard source code
             separately.
             Second, Zstandard has both a stable *public* API and an *experimental* API.
             The *experimental* API is actually quite useful (contains functionality for
             training dictionaries for example), so it is something we wish to expose to
             Python. However, the *experimental* API is only available via static linking.
             Furthermore, the *experimental* API can change at any time. So, control over
             the exact version of the Zstandard library linked against is important to
             ensure known behavior.
             Instructions for Building and Testing
             =====================================
             Once you have the source code, the extension can be built via setup.py::
                $ python setup.py build_ext
             We recommend testing with ``nose``::
                $ nosetests
             A Tox configuration is present to test against multiple Python versions::
                $ tox
             Tests use the ``hypothesis`` Python package to perform fuzzing. If you
-            don't have it, those tests won't run.
+            don't have it, those tests won't run. Since the fuzzing tests take longer
+            to execute than normal tests, you'll need to opt in to running them by
+            setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
+            automatically when using ``tox``.
-            There is also an experimental CFFI module. You need the ``cffi`` Python
+            The ``cffi`` Python package needs to be installed in order to build the CFFI
-            package installed to build and test that.
+            bindings. If it isn't present, the CFFI bindings won't be built.
             To create a virtualenv with all development dependencies, do something
             like the following::
               # Python 2
               $ virtualenv venv
               # Python 3
               $ python3 -m venv venv
               $ source venv/bin/activate
               $ pip install cffi hypothesis nose tox
             API
             ===
-            The compiled C extension provides a ``zstd`` Python module. This module
+            The compiled C extension provides a ``zstd`` Python module. The CFFI
-            exposes the following interfaces.
+            bindings provide a ``zstd_cffi`` module. Both provide an identical API
+            interface. The types, functions, and attributes exposed by these modules
+            are documented in the sections below.
+            .. note::
+               The documentation in this section makes references to various zstd
+               concepts and functionality. The ``Concepts`` section below explains
+               these concepts in more detail.
             ZstdCompressor
             --------------
             The ``ZstdCompressor`` class provides an interface for performing
             compression operations.
             Each instance is associated with parameters that control compression
             behavior. These come from the following named arguments (all optional):
             level
                Integer compression level. Valid values are between 1 and 22.
             dict_data
                Compression dictionary to use.
                Note: When using dictionary data and ``compress()`` is called multiple
                times, the ``CompressionParameters`` derived from an integer compression
                ``level`` and the first compressed data's size will be reused for all
                subsequent operations. This may not be desirable if source data size
                varies significantly.
             compression_params
                A ``CompressionParameters`` instance (overrides the ``level`` value).
             write_checksum
                Whether a 4 byte checksum should be written with the compressed data.
                Defaults to False. If True, the decompressor can verify that decompressed
                data matches the original input data.
             write_content_size
                Whether the size of the uncompressed data will be written into the
                header of compressed data. Defaults to False. The data will only be
                written if the compressor knows the size of the input data. This is
                likely not true for streaming compression.
             write_dict_id
                Whether to write the dictionary ID into the compressed data.
                Defaults to True. The dictionary ID is only written if a dictionary
                is being used.
+            threads
+               Enables and sets the number of threads to use for multi-threaded compression
+               operations. Defaults to 0, which means to use single-threaded compression.
+               Negative values will resolve to the number of logical CPUs in the system.
+               Read below for more info on multi-threaded compression. This argument only
+               controls thread count for operations that operate on individual pieces of
+               data. APIs that spawn multiple threads for working on multiple pieces of
+               data have their own ``threads`` argument.
             Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
             instances can be called from multiple Python threads simultaneously. In other
             words, assume instances are not thread safe unless stated otherwise.
             Simple API
             ^^^^^^^^^^
             ``compress(data)`` compresses and returns data as a one-shot operation.::
                cctx = zstd.ZstdCompressor()
                compressed = cctx.compress(b'data to compress')
+            The ``data`` argument can be any object that implements the *buffer protocol*.
             Unless ``compression_params`` or ``dict_data`` are passed to the
             ``ZstdCompressor``, each invocation of ``compress()`` will calculate the
             optimal compression parameters for the configured compression ``level`` and
             input data size (some parameters are fine-tuned for small input sizes).
             If a compression dictionary is being used, the compression parameters
             determined from the first input's size will be reused for subsequent
             operations.
             There is currently a deficiency in zstd's C APIs that makes it difficult
             to round trip empty inputs when ``write_content_size=True``. Attempting
             this will raise a ``ValueError`` unless ``allow_empty=True`` is passed
             to ``compress()``.
             Streaming Input API
             ^^^^^^^^^^^^^^^^^^^
             ``write_to(fh)`` (which behaves as a context manager) allows you to *stream*
             data into a compressor.::
                cctx = zstd.ZstdCompressor(level=10)
                with cctx.write_to(fh) as compressor:
                    compressor.write(b'chunk 0')
                    compressor.write(b'chunk 1')
                    ...
             The argument to ``write_to()`` must have a ``write(data)`` method. As
             compressed data is available, ``write()`` will be called with the compressed
             data as its argument. Many common Python types implement ``write()``, including
             open file handles and ``io.BytesIO``.
             ``write_to()`` returns an object representing a streaming compressor instance.
             It **must** be used as a context manager. That object's ``write(data)`` method
             is used to feed data into the compressor.
             A ``flush()`` method can be called to evict whatever data remains within the
             compressor's internal state into the output object. This may result in 0 or
             more ``write()`` calls to the output object.
             Both ``write()`` and ``flush()`` return the number of bytes written to the
             object's ``write()``. In many cases, small inputs do not accumulate enough
             data to cause a write and ``write()`` will return ``0``.
             If the size of the data being fed to this streaming compressor is known,
             you can declare it before compression begins::
                cctx = zstd.ZstdCompressor()
                with cctx.write_to(fh, size=data_len) as compressor:
                    compressor.write(chunk0)
                    compressor.write(chunk1)
                    ...
             Declaring the size of the source data allows compression parameters to
             be tuned. And if ``write_content_size`` is used, it also results in the
             content size being written into the frame header of the output data.
             The size of chunks being ``write()`` to the destination can be specified::
                 cctx = zstd.ZstdCompressor()
                 with cctx.write_to(fh, write_size=32768) as compressor:
                     ...
             To see how much memory is being used by the streaming compressor::
                 cctx = zstd.ZstdCompressor()
                 with cctx.write_to(fh) as compressor:
                     ...
                     byte_size = compressor.memory_size()
             Streaming Output API
             ^^^^^^^^^^^^^^^^^^^^
             ``read_from(reader)`` provides a mechanism to stream data out of a compressor
             as an iterator of data chunks.::
                cctx = zstd.ZstdCompressor()
                for chunk in cctx.read_from(fh):
                     # Do something with emitted data.
             ``read_from()`` accepts an object that has a ``read(size)`` method or conforms
             to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that
             provide the buffer protocol.)
             Uncompressed data is fetched from the source either by calling ``read(size)``
             or by fetching a slice of data from the object directly (in the case where
             the buffer protocol is being used). The returned iterator consists of chunks
             of compressed data.
             If reading from the source via ``read()``, ``read()`` will be called until
             it raises or returns an empty bytes (``b''``). It is perfectly valid for
             the source to deliver fewer bytes than were what requested by ``read(size)``.
             Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument
             declaring the size of the input stream::
                 cctx = zstd.ZstdCompressor()
                 for chunk in cctx.read_from(fh, size=some_int):
                     pass
             You can also control the size that data is ``read()`` from the source and
             the ideal size of output chunks::
                 cctx = zstd.ZstdCompressor()
                 for chunk in cctx.read_from(fh, read_size=16384, write_size=8192):
                     pass
             Unlike ``write_to()``, ``read_from()`` does not give direct control over the
             sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever
             the object being read from delivers. These will often be of a uniform size.
             Stream Copying API
             ^^^^^^^^^^^^^^^^^^
             ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
             compressing it.::
                cctx = zstd.ZstdCompressor()
                cctx.copy_stream(ifh, ofh)
             For example, say you wish to compress a file::
                cctx = zstd.ZstdCompressor()
                with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
                    cctx.copy_stream(ifh, ofh)
             It is also possible to declare the size of the source stream::
                cctx = zstd.ZstdCompressor()
                cctx.copy_stream(ifh, ofh, size=len_of_input)
             You can also specify how large the chunks that are ``read()`` and ``write()``
             from and to the streams::
                cctx = zstd.ZstdCompressor()
                cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
             The stream copier returns a 2-tuple of bytes read and written::
                cctx = zstd.ZstdCompressor()
                read_count, write_count = cctx.copy_stream(ifh, ofh)
             Compressor API
             ^^^^^^^^^^^^^^
             ``compressobj()`` returns an object that exposes ``compress(data)`` and
             ``flush()`` methods. Each returns compressed data or an empty bytes.
             The purpose of ``compressobj()`` is to provide an API-compatible interface
             with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to
             swap in different compressor objects while using the same API.
             ``flush()`` accepts an optional argument indicating how to end the stream.
             ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
             Once this type of flush is performed, ``compress()`` and ``flush()`` can
             no longer be called. This type of flush **must** be called to end the
             compression context. If not called, returned data may be incomplete.
             A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
             zstd block. Flushes of this type can be performed multiple times. The next
             call to ``compress()`` will begin a new zstd block.
             Here is how this API should be used::
                cctx = zstd.ZstdCompressor()
                cobj = cctx.compressobj()
                data = cobj.compress(b'raw input 0')
                data = cobj.compress(b'raw input 1')
                data = cobj.flush()
             Or to flush blocks::
                cctx.zstd.ZstdCompressor()
                cobj = cctx.compressobj()
                data = cobj.compress(b'chunk in first block')
                data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
                data = cobj.compress(b'chunk in second block')
                data = cobj.flush()
             For best performance results, keep input chunks under 256KB. This avoids
             extra allocations for a large output object.
             It is possible to declare the input size of the data that will be fed into
             the compressor::
                cctx = zstd.ZstdCompressor()
                cobj = cctx.compressobj(size=6)
                data = cobj.compress(b'foobar')
                data = cobj.flush()
+            Batch Compression API
+            ^^^^^^^^^^^^^^^^^^^^^
+            (Experimental. Not yet supported in CFFI bindings.)
+            ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
+            inputs as a single operation.
+            Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
+            ``BufferWithSegments``, or a list containing byte like objects. Each element of
+            the container will be compressed individually using the configured parameters
+            on the ``ZstdCompressor`` instance.
+            The ``threads`` argument controls how many threads to use for compression. The
+            default is ``0`` which means to use a single thread. Negative values use the
+            number of logical CPUs in the machine.
+            The function returns a ``BufferWithSegmentsCollection``. This type represents
+            N discrete memory allocations, eaching holding 1 or more compressed frames.
+            Output data is written to shared memory buffers. This means that unlike
+            regular Python objects, a reference to *any* object within the collection
+            keeps the shared buffer and therefore memory backing it alive. This can have
+            undesirable effects on process memory usage.
+            The API and behavior of this function is experimental and will likely change.
+            Known deficiencies include:
+            * If asked to use multiple threads, it will always spawn that many threads,
+              even if the input is too small to use them. It should automatically lower
+              the thread count when the extra threads would just add overhead.
+            * The buffer allocation strategy is fixed. There is room to make it dynamic,
+              perhaps even to allow one output buffer per input, facilitating a variation
+              of the API to return a list without the adverse effects of shared memory
+              buffers.
             ZstdDecompressor
             ----------------
             The ``ZstdDecompressor`` class provides an interface for performing
             decompression.
             Each instance is associated with parameters that control decompression. These
             come from the following named arguments (all optional):
             dict_data
                Compression dictionary to use.
             The interface of this class is very similar to ``ZstdCompressor`` (by design).
             Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
             instances can be called from multiple Python threads simultaneously. In other
             words, assume instances are not thread safe unless stated otherwise.
             Simple API
             ^^^^^^^^^^
             ``decompress(data)`` can be used to decompress an entire compressed zstd
             frame in a single operation.::
                 dctx = zstd.ZstdDecompressor()
                 decompressed = dctx.decompress(data)
             By default, ``decompress(data)`` will only work on data written with the content
             size encoded in its header. This can be achieved by creating a
             ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without
             an embedded content size is seen, ``zstd.ZstdError`` will be raised.
             If the compressed data doesn't have its content size embedded within it,
             decompression can be attempted by specifying the ``max_output_size``
             argument.::
                 dctx = zstd.ZstdDecompressor()
                 uncompressed = dctx.decompress(data, max_output_size=1048576)
             Ideally, ``max_output_size`` will be identical to the decompressed output
             size.
             If ``max_output_size`` is too small to hold the decompressed data,
             ``zstd.ZstdError`` will be raised.
             If ``max_output_size`` is larger than the decompressed data, the allocated
             output buffer will be resized to only use the space required.
             Please note that an allocation of the requested ``max_output_size`` will be
             performed every time the method is called. Setting to a very large value could
             result in a lot of work for the memory allocator and may result in
             ``MemoryError`` being raised if the allocation fails.
             If the exact size of decompressed data is unknown, it is **strongly**
             recommended to use a streaming API.
             Streaming Input API
             ^^^^^^^^^^^^^^^^^^^
             ``write_to(fh)`` can be used to incrementally send compressed data to a
             decompressor.::
                 dctx = zstd.ZstdDecompressor()
                 with dctx.write_to(fh) as decompressor:
                     decompressor.write(compressed_data)
             This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
             the decompressor by calling ``write(data)`` and decompressed output is written
             to the output object by calling its ``write(data)`` method.
             Calls to ``write()`` will return the number of bytes written to the output
             object. Not all inputs will result in bytes being written, so return values
             of ``0`` are possible.
             The size of chunks being ``write()`` to the destination can be specified::
                 dctx = zstd.ZstdDecompressor()
                 with dctx.write_to(fh, write_size=16384) as decompressor:
                     pass
             You can see how much memory is being used by the decompressor::
                 dctx = zstd.ZstdDecompressor()
                 with dctx.write_to(fh) as decompressor:
                     byte_size = decompressor.memory_size()
             Streaming Output API
             ^^^^^^^^^^^^^^^^^^^^
             ``read_from(fh)`` provides a mechanism to stream decompressed data out of a
             compressed source as an iterator of data chunks.::
                 dctx = zstd.ZstdDecompressor()
                 for chunk in dctx.read_from(fh):
                     # Do something with original data.
             ``read_from()`` accepts a) an object with a ``read(size)`` method that will
             return  compressed bytes b) an object conforming to the buffer protocol that
             can expose its data as a contiguous range of bytes. The ``bytes`` and
             ``memoryview`` types expose this buffer protocol.
             ``read_from()`` returns an iterator whose elements are chunks of the
             decompressed data.
             The size of requested ``read()`` from the source can be specified::
                 dctx = zstd.ZstdDecompressor()
                 for chunk in dctx.read_from(fh, read_size=16384):
                     pass
             It is also possible to skip leading bytes in the input data::
                 dctx = zstd.ZstdDecompressor()
                 for chunk in dctx.read_from(fh, skip_bytes=1):
                     pass
             Skipping leading bytes is useful if the source data contains extra
             *header* data but you want to avoid the overhead of making a buffer copy
             or allocating a new ``memoryview`` object in order to decompress the data.
             Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator
             controls when data is decompressed. If the iterator isn't consumed,
             decompression is put on hold.
             When ``read_from()`` is passed an object conforming to the buffer protocol,
             the behavior may seem similar to what occurs when the simple decompression
             API is used. However, this API works when the decompressed size is unknown.
             Furthermore, if feeding large inputs, the decompressor will work in chunks
             instead of performing a single operation.
             Stream Copying API
             ^^^^^^^^^^^^^^^^^^
             ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
             performing decompression.::
                 dctx = zstd.ZstdDecompressor()
                 dctx.copy_stream(ifh, ofh)
             e.g. to decompress a file to another file::
                 dctx = zstd.ZstdDecompressor()
                 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
                     dctx.copy_stream(ifh, ofh)
             The size of chunks being ``read()`` and ``write()`` from and to the streams
             can be specified::
                 dctx = zstd.ZstdDecompressor()
                 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
             Decompressor API
             ^^^^^^^^^^^^^^^^
             ``decompressobj()`` returns an object that exposes a ``decompress(data)``
             methods. Compressed data chunks are fed into ``decompress(data)`` and
             uncompressed output (or an empty bytes) is returned. Output from subsequent
             calls needs to be concatenated to reassemble the full decompressed byte
             sequence.
             The purpose of ``decompressobj()`` is to provide an API-compatible interface
             with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
             to swap in different decompressor objects while using the same API.
             Each object is single use: once an input frame is decoded, ``decompress()``
             can no longer be called.
             Here is how this API should be used::
                dctx = zstd.ZstdDeompressor()
                dobj = cctx.decompressobj()
                data = dobj.decompress(compressed_chunk_0)
                data = dobj.decompress(compressed_chunk_1)
+            Batch Decompression API
+            ^^^^^^^^^^^^^^^^^^^^^^^
+            (Experimental. Not yet supported in CFFI bindings.)
+            ``multi_decompress_to_buffer()`` performs decompression of multiple
+            frames as a single operation and returns a ``BufferWithSegmentsCollection``
+            containing decompressed data for all inputs.
+            Compressed frames can be passed to the function as a ``BufferWithSegments``,
+            a ``BufferWithSegmentsCollection``, or as a list containing objects that
+            conform to the buffer protocol. For best performance, pass a
+            ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
+            minimal input validation will be done for that type. If calling from
+            Python (as opposed to C), constructing one of these instances may add
+            overhead cancelling out the performance overhead of validation for list
+            inputs.
+            The decompressed size of each frame must be discoverable. It can either be
+            embedded within the zstd frame (``write_content_size=True`` argument to
+            ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
+            The ``decompressed_sizes`` argument is an object conforming to the buffer
+            protocol which holds an array of 64-bit unsigned integers in the machine's
+            native format defining the decompressed sizes of each frame. If this argument
+            is passed, it avoids having to scan each frame for its decompressed size.
+            This frame scanning can add noticeable overhead in some scenarios.
+            The ``threads`` argument controls the number of threads to use to perform
+            decompression operations. The default (``0``) or the value ``1`` means to
+            use a single thread. Negative values use the number of logical CPUs in the
+            machine.
+            .. note::
+               It is possible to pass a ``mmap.mmap()`` instance into this function by
+               wrapping it with a ``BufferWithSegments`` instance (which will define the
+               offsets of frames within the memory mapped region).
+            This function is logically equivalent to performing ``dctx.decompress()``
+            on each input frame and returning the result.
+            This function exists to perform decompression on multiple frames as fast
+            as possible by having as little overhead as possible. Since decompression is
+            performed as a single operation and since the decompressed output is stored in
+            a single buffer, extra memory allocations, Python objects, and Python function
+            calls are avoided. This is ideal for scenarios where callers need to access
+            decompressed data for multiple frames.
+            Currently, the implementation always spawns multiple threads when requested,
+            even if the amount of work to do is small. In the future, it will be smarter
+            about avoiding threads and their associated overhead when the amount of
+            work to do is small.
             Content-Only Dictionary Chain Decompression
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
             ``decompress_content_dict_chain(frames)`` performs decompression of a list of
             zstd frames produced using chained *content-only* dictionary compression. Such
             a list of frames is produced by compressing discrete inputs where each
             non-initial input is compressed with a *content-only* dictionary consisting
             of the content of the previous input.
             For example, say you have the following inputs::
                inputs = [b'input 1', b'input 2', b'input 3']
             The zstd frame chain consists of:
 . ``b'input 1'`` compressed in standalone/discrete mode
 . ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary
 . ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary
             Each zstd frame **must** have the content size written.
             The following Python code can be used to produce a *content-only dictionary
             chain*::
-            	def make_chain(inputs):
+                def make_chain(inputs):
-            	    frames = []
+                    frames = []
-            		# First frame is compressed in standalone/discrete mode.
+                    # First frame is compressed in standalone/discrete mode.
-            		zctx = zstd.ZstdCompressor(write_content_size=True)
+                    zctx = zstd.ZstdCompressor(write_content_size=True)
-            		frames.append(zctx.compress(inputs[0]))
+                    frames.append(zctx.compress(inputs[0]))
-            		# Subsequent frames use the previous fulltext as a content-only dictionary
+                    # Subsequent frames use the previous fulltext as a content-only dictionary
-            		for i, raw in enumerate(inputs[1:]):
+                    for i, raw in enumerate(inputs[1:]):
-            		    dict_data = zstd.ZstdCompressionDict(inputs[i])
+                        dict_data = zstd.ZstdCompressionDict(inputs[i])
-            			zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
+                        zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
-            			frames.append(zctx.compress(raw))
+                        frames.append(zctx.compress(raw))
-            		return frames
+                    return frames
             ``decompress_content_dict_chain()`` returns the uncompressed data of the last
             element in the input chain.
             It is possible to implement *content-only dictionary chain* decompression
             on top of other Python APIs. However, this function will likely be significantly
             faster, especially for long input chains, as it avoids the overhead of
             instantiating and passing around intermediate objects between C and Python.
-            Choosing an API
+            Multi-Threaded Compression
-            ---------------
+            --------------------------
-            Various forms of compression and decompression APIs are provided because each
-            are suitable for different use cases.
-            The simple/one-shot APIs are useful for small data, when the decompressed
+            ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
-            data size is known (either recorded in the zstd frame header via
+            of threads to use for compression. The way this works is that input is split
-            ``write_content_size`` or known via an out-of-band mechanism, such as a file
+            into segments and each segment is fed into a worker pool for compression. Once
-            size).
+            a segment is compressed, it is flushed/appended to the output.
-            A limitation of the simple APIs is that input or output data must fit in memory.
+            The segment size for multi-threaded compression is chosen from the window size
-            And unless using advanced tricks with Python *buffer objects*, both input and
+            of the compressor. This is derived from the ``window_log`` attribute of a
-            output must fit in memory simultaneously.
+            ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB
+            range.
-            Another limitation is that compression or decompression is performed as a single
-            operation. So if you feed large input, it could take a long time for the
-            function to return.
-            The streaming APIs do not have the limitations of the simple API. The cost to
+            If multi-threaded compression is requested and the input is smaller than the
-            this is they are more complex to use than a single function call.
+            configured segment size, only a single compression thread will be used. If the
+            input is smaller than the segment size multiplied by the thread pool size or
-            The streaming APIs put the caller in control of compression and decompression
+            if data cannot be delivered to the compressor fast enough, not all requested
-            behavior by allowing them to directly control either the input or output side
+            compressor threads may be active simultaneously.
-            of the operation.
-            With the streaming input APIs, the caller feeds data into the compressor or
-            decompressor as they see fit. Output data will only be written after the caller
-            has explicitly written data.
-            With the streaming output APIs, the caller consumes output from the compressor
+            Compared to non-multi-threaded compression, multi-threaded compression has
-            or decompressor as they see fit. The compressor or decompressor will only
+            higher per-operation overhead. This includes extra memory operations,
-            consume data from the source when the caller is ready to receive it.
+            thread creation, lock acquisition, etc.
-            One end of the streaming APIs involves a file-like object that must
+            Due to the nature of multi-threaded compression using *N* compression
-            ``write()`` output data or ``read()`` input data. Depending on what the
+            *states*, the output from multi-threaded compression will likely be larger
-            backing storage for these objects is, those operations may not complete quickly.
+            than non-multi-threaded compression. The difference is usually small. But
-            For example, when streaming compressed data to a file, the ``write()`` into
+            there is a CPU/wall time versus size trade off that may warrant investigation.
-            a streaming compressor could result in a ``write()`` to the filesystem, which
-            may take a long time to finish due to slow I/O on the filesystem. So, there
+            Output from multi-threaded compression does not require any special handling
-            may be overhead in streaming APIs beyond the compression and decompression
+            on the decompression side. In other words, any zstd decompressor should be able
-            operations.
+            to consume data produced with multi-threaded compression.
             Dictionary Creation and Management
             ----------------------------------
-            Zstandard allows *dictionaries* to be used when compressing and
+            Compression dictionaries are represented as the ``ZstdCompressionDict`` type.
-            decompressing data. The idea is that if you are compressing a lot of similar
-            data, you can precompute common properties of that data (such as recurring
-            byte sequences) to achieve better compression ratios.
-            In Python, compression dictionaries are represented as the
-            ``ZstdCompressionDict`` type.
             Instances can be constructed from bytes::
                dict_data = zstd.ZstdCompressionDict(data)
             It is possible to construct a dictionary from *any* data. Unless the
             data begins with a magic header, the dictionary will be treated as
             *content-only*. *Content-only* dictionaries allow compression operations
             that follow to reference raw data within the content. For one use of
             *content-only* dictionaries, see
             ``ZstdDecompressor.decompress_content_dict_chain()``.
             More interestingly, instances can be created by *training* on sample data::
                dict_data = zstd.train_dictionary(size, samples)
             This takes a list of bytes instances and creates and returns a
             ``ZstdCompressionDict``.
             You can see how many bytes are in the dictionary by calling ``len()``::
                dict_data = zstd.train_dictionary(size, samples)
                dict_size = len(dict_data)  # will not be larger than ``size``
             Once you have a dictionary, you can pass it to the objects performing
             compression and decompression::
                dict_data = zstd.train_dictionary(16384, samples)
                cctx = zstd.ZstdCompressor(dict_data=dict_data)
                for source_data in input_data:
                    compressed = cctx.compress(source_data)
                    # Do something with compressed data.
                dctx = zstd.ZstdDecompressor(dict_data=dict_data)
                for compressed_data in input_data:
                    buffer = io.BytesIO()
                    with dctx.write_to(buffer) as decompressor:
                        decompressor.write(compressed_data)
                    # Do something with raw data in ``buffer``.
             Dictionaries have unique integer IDs. You can retrieve this ID via::
                dict_id = zstd.dictionary_id(dict_data)
             You can obtain the raw data in the dict (useful for persisting and constructing
             a ``ZstdCompressionDict`` later) via ``as_bytes()``::
                dict_data = zstd.train_dictionary(size, samples)
                raw_data = dict_data.as_bytes()
+            The following named arguments to ``train_dictionary`` can also be used
+            to further control dictionary generation.
+            selectivity
+               Integer selectivity level. Default is 9. Larger values yield more data in
+               dictionary.
+            level
+               Integer compression level. Default is 6.
+            dict_id
+               Integer dictionary ID for the produced dictionary. Default is 0, which
+               means to use a random value.
+            notifications
+               Controls writing of informational messages to ``stderr``. ``0`` (the
+               default) means to write nothing. ``1`` writes errors. ``2`` writes
+               progression info. ``3`` writes more details. And ``4`` writes all info.
+            Cover Dictionaries
+            ^^^^^^^^^^^^^^^^^^
+            An alternate dictionary training mechanism named *cover* is also available.
+            More details about this training mechanism are available in the paper
+            *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors:
+            Liao, Petri, Moffat, Wirth).
+            To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
+            ``zstd.train_dictionary()``. The function behaves nearly the same except
+            its arguments are different and the returned dictionary will contain ``k``
+            and ``d`` attributes reflecting the parameters to the cover algorithm.
+            .. note::
+               The ``k`` and ``d`` attributes are only populated on dictionary
+               instances created by this function. If a ``ZstdCompressionDict`` is
+               constructed from raw bytes data, the ``k`` and ``d`` attributes will
+               be ``0``.
+            The segment and dmer size parameters to the cover algorithm can either be
+            specified manually or you can ask ``train_cover_dictionary()`` to try
+            multiple values and pick the best one, where *best* means the smallest
+            compressed data size.
+            In manual mode, the ``k`` and ``d`` arguments must be specified or a
+            ``ZstdError`` will be raised.
+            In automatic mode (triggered by specifying ``optimize=True``), ``k``
+            and ``d`` are optional. If a value isn't specified, then default values for
+            both are tested.  The ``steps`` argument can control the number of steps
+            through ``k`` values. The ``level`` argument defines the compression level
+            that will be used when testing the compressed size. And ``threads`` can
+            specify the number of threads to use for concurrent operation.
+            This function takes the following arguments:
+            dict_size
+               Target size in bytes of the dictionary to generate.
+            samples
+               A list of bytes holding samples the dictionary will be trained from.
+            k
+               Parameter to cover algorithm defining the segment size. A reasonable range
+               is [16, 2048+].
+            d
+               Parameter to cover algorithm defining the dmer size. A reasonable range is
+               [6, 16]. ``d`` must be less than or equal to ``k``.
+            dict_id
+               Integer dictionary ID for the produced dictionary. Default is 0, which uses
+               a random value.
+            optimize
+               When true, test dictionary generation with multiple parameters.
+            level
+               Integer target compression level when testing compression with
+               ``optimize=True``. Default is 1.
+            steps
+               Number of steps through ``k`` values to perform when ``optimize=True``.
+               Default is 32.
+            threads
+               Number of threads to use when ``optimize=True``. Default is 0, which means
+               to use a single thread. A negative value can be specified to use as many
+               threads as there are detected logical CPUs.
+            notifications
+               Controls writing of informational messages to ``stderr``. See the
+               documentation for ``train_dictionary()`` for more.
             Explicit Compression Parameters
             -------------------------------
             Zstandard's integer compression levels along with the input size and dictionary
             size are converted into a data structure defining multiple parameters to tune
             behavior of the compression algorithm. It is possible to use define this
             data structure explicitly to have lower-level control over compression behavior.
             The ``zstd.CompressionParameters`` type represents this data structure.
             You can see how Zstandard converts compression levels to this data structure
             by calling ``zstd.get_compression_parameters()``. e.g.::
                 params = zstd.get_compression_parameters(5)
             This function also accepts the uncompressed data size and dictionary size
             to adjust parameters::
                 params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))
             You can also construct compression parameters from their low-level components::
                 params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
             You can then configure a compressor to use the custom parameters::
                 cctx = zstd.ZstdCompressor(compression_params=params)
             The members/attributes of ``CompressionParameters`` instances are as follows::
             * window_log
             * chain_log
             * hash_log
             * search_log
             * search_length
             * target_length
             * strategy
             This is the order the arguments are passed to the constructor if not using
             named arguments.
             You'll need to read the Zstandard documentation for what these parameters
             do.
             Frame Inspection
             ----------------
             Data emitted from zstd compression is encapsulated in a *frame*. This frame
             begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
             the frame in more detail. For more info, see
             https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
             ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
             instance and return a ``FrameParameters`` object describing the frame.
             Depending on which fields are present in the frame and their values, the
             length of the frame parameters varies. If insufficient bytes are passed
             in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
             frame parameters can be parsed, pass in at least 18 bytes.
             ``FrameParameters`` instances have the following attributes:
             content_size
                Integer size of original, uncompressed content. This will be ``0`` if the
                original content size isn't written to the frame (controlled with the
                ``write_content_size`` argument to ``ZstdCompressor``) or if the input
                content size was ``0``.
             window_size
                Integer size of maximum back-reference distance in compressed data.
             dict_id
                Integer of dictionary ID used for compression. ``0`` if no dictionary
                ID was used or if the dictionary ID was ``0``.
             has_checksum
                Bool indicating whether a 4 byte content checksum is stored at the end
                of the frame.
             Misc Functionality
             ------------------
             estimate_compression_context_size(CompressionParameters)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
             Given a ``CompressionParameters`` struct, estimate the memory size required
             to perform compression.
             estimate_decompression_context_size()
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
             Estimate the memory size requirements for a decompressor instance.
             Constants
             ---------
             The following module constants/attributes are exposed:
             ZSTD_VERSION
                 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
                 ``(1, 0, 0)``
             MAX_COMPRESSION_LEVEL
                 Integer max compression level accepted by compression functions
             COMPRESSION_RECOMMENDED_INPUT_SIZE
                 Recommended chunk size to feed to compressor functions
             COMPRESSION_RECOMMENDED_OUTPUT_SIZE
                 Recommended chunk size for compression output
             DECOMPRESSION_RECOMMENDED_INPUT_SIZE
                 Recommended chunk size to feed into decompresor functions
             DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
                 Recommended chunk size for decompression output
             FRAME_HEADER
                 bytes containing header of the Zstandard frame
             MAGIC_NUMBER
                 Frame header as an integer
             WINDOWLOG_MIN
                 Minimum value for compression parameter
             WINDOWLOG_MAX
                 Maximum value for compression parameter
             CHAINLOG_MIN
                 Minimum value for compression parameter
             CHAINLOG_MAX
                 Maximum value for compression parameter
             HASHLOG_MIN
                 Minimum value for compression parameter
             HASHLOG_MAX
                 Maximum value for compression parameter
             SEARCHLOG_MIN
                 Minimum value for compression parameter
             SEARCHLOG_MAX
                 Maximum value for compression parameter
             SEARCHLENGTH_MIN
                 Minimum value for compression parameter
             SEARCHLENGTH_MAX
                 Maximum value for compression parameter
             TARGETLENGTH_MIN
                 Minimum value for compression parameter
             TARGETLENGTH_MAX
                 Maximum value for compression parameter
             STRATEGY_FAST
                 Compression strategy
             STRATEGY_DFAST
                 Compression strategy
             STRATEGY_GREEDY
                 Compression strategy
             STRATEGY_LAZY
                 Compression strategy
             STRATEGY_LAZY2
                 Compression strategy
             STRATEGY_BTLAZY2
                 Compression strategy
             STRATEGY_BTOPT
                 Compression strategy
             Performance Considerations
             --------------------------
             The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
             persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
             or ``ZstdDecompressor`` instance for multiple operations is faster than
             instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
             operation. The differences are magnified as the size of data decreases. For
             example, the difference between *context* reuse and non-reuse for 100,000
 byte inputs will be significant (possiby over 10x faster to reuse contexts)
             whereas 10 1,000,000 byte inputs will be more similar in speed (because the
             time spent doing compression dwarfs time spent creating new *contexts*).
+            Buffer Types
+            ------------
+            The API exposes a handful of custom types for interfacing with memory buffers.
+            The primary goal of these types is to facilitate efficient multi-object
+            operations.
+            The essential idea is to have a single memory allocation provide backing
+            storage for multiple logical objects. This has 2 main advantages: fewer
+            allocations and optimal memory access patterns. This avoids having to allocate
+            a Python object for each logical object and furthermore ensures that access of
+            data for objects can be sequential (read: fast) in memory.
+            BufferWithSegments
+            ^^^^^^^^^^^^^^^^^^
+            The ``BufferWithSegments`` type represents a memory buffer containing N
+            discrete items of known lengths (segments). It is essentially a fixed size
+            memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
+            unsigned native endian integers defining the byte offset and length of each
+            segment within the buffer.
+            Instances behave like containers.
+            ``len()`` returns the number of segments within the instance.
+            ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
+            individual segment within the backing buffer. That returned object references
+            (not copies) memory. This means that iterating all objects doesn't copy
+            data within the buffer.
+            The ``.size`` attribute contains the total size in bytes of the backing
+            buffer.
+            Instances conform to the buffer protocol. So a reference to the backing bytes
+            can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
+            be obtained via ``.tobytes()``.
+            The ``.segments`` attribute exposes the array of ``(offset, length)`` for
+            segments within the buffer. It is a ``BufferSegments`` type.
+            BufferSegment
+            ^^^^^^^^^^^^^
+            The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
+            It is essentially a reference to N bytes within a ``BufferWithSegments``.
+            ``len()`` returns the length of the segment in bytes.
+            ``.offset`` contains the byte offset of this segment within its parent
+            ``BufferWithSegments`` instance.
+            The object conforms to the buffer protocol. ``.tobytes()`` can be called to
+            obtain a ``bytes`` instance with a copy of the backing bytes.
+            BufferSegments
+            ^^^^^^^^^^^^^^
+            This type represents an array of ``(offset, length)`` integers defining segments
+            within a ``BufferWithSegments``.
+            The array members are 64-bit unsigned integers using host/native bit order.
+            Instances conform to the buffer protocol.
+            BufferWithSegmentsCollection
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+            The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
+            of multiple ``BufferWithSegments`` instances.
+            Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
+            resulting object behaves like an ordered sequence whose members are the
+            segments within each ``BufferWithSegments``.
+            ``len()`` returns the number of segments within all ``BufferWithSegments``
+            instances.
+            ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
+            that offset as if all ``BufferWithSegments`` instances were a single
+            entity.
+            If the object is composed of 2 ``BufferWithSegments`` instances with the
+            first having 2 segments and the second have 3 segments, then ``b[0]``
+            and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
+            and ``b[4]`` access segments from the second.
+            Choosing an API
+            ===============
+            There are multiple APIs for performing compression and decompression. This is
+            because different applications have different needs and the library wants to
+            facilitate optimal use in as many use cases as possible.
+            From a high-level, APIs are divided into *one-shot* and *streaming*. See
+            the ``Concepts`` section for a description of how these are different at
+            the C layer.
+            The *one-shot* APIs are useful for small data, where the input or output
+            size is known. (The size can come from a buffer length, file size, or
+            stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
+            input and output must fit in memory simultaneously. For say a 4 GB input,
+            this is often not feasible.
+            The *one-shot* APIs also perform all work as a single operation. So, if you
+            feed it large input, it could take a long time for the function to return.
+            The streaming APIs do not have the limitations of the simple API. But the
+            price you pay for this flexibility is that they are more complex than a
+            single function call.
+            The streaming APIs put the caller in control of compression and decompression
+            behavior by allowing them to directly control either the input or output side
+            of the operation.
+            With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
+            has full control over the input to the compression or decompression stream.
+            They can directly choose when new data is operated on.
+            With the *streaming ouput* APIs, the caller has full control over the output
+            of the compression or decompression stream. It can choose when to receive
+            new data.
+            When using the *streaming* APIs that operate on file-like or stream objects,
+            it is important to consider what happens in that object when I/O is requested.
+            There is potential for long pauses as data is read or written from the
+            underlying stream (say from interacting with a filesystem or network). This
+            could add considerable overhead.
+            Concepts
+            ========
+            It is important to have a basic understanding of how Zstandard works in order
+            to optimally use this library. In addition, there are some low-level Python
+            concepts that are worth explaining to aid understanding. This section aims to
+            provide that knowledge.
+            Zstandard Frames and Compression Format
+            ---------------------------------------
+            Compressed zstandard data almost always exists within a container called a
+            *frame*. (For the technically curious, see the
+            `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
+            The frame contains a header and optional trailer. The header contains a
+            magic number to self-identify as a zstd frame and a description of the
+            compressed data that follows.
+            Among other things, the frame *optionally* contains the size of the
+            decompressed data the frame represents, a 32-bit checksum of the
+            decompressed data (to facilitate verification during decompression),
+            and the ID of the dictionary used to compress the data.
+            Storing the original content size in the frame (``write_content_size=True``
+            to ``ZstdCompressor``) is important for performance in some scenarios. Having
+            the decompressed size stored there (or storing it elsewhere) allows
+            decompression to perform a single memory allocation that is exactly sized to
+            the output. This is faster than continuously growing a memory buffer to hold
+            output.
+            Compression and Decompression Contexts
+            --------------------------------------
+            In order to perform a compression or decompression operation with the zstd
+            C API, you need what's called a *context*. A context essentially holds
+            configuration and state for a compression or decompression operation. For
+            example, a compression context holds the configured compression level.
+            Contexts can be reused for multiple operations. Since creating and
+            destroying contexts is not free, there are performance advantages to
+            reusing contexts.
+            The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
+            wrappers around these contexts in the zstd C API.
+            One-shot And Streaming Operations
+            ---------------------------------
+            A compression or decompression operation can either be performed as a
+            single *one-shot* operation or as a continuous *streaming* operation.
+            In one-shot mode (the *simple* APIs provided by the Python interface),
+            **all** input is handed to the compressor or decompressor as a single buffer
+            and **all** output is returned as a single buffer.
+            In streaming mode, input is delivered to the compressor or decompressor as
+            a series of chunks via multiple function calls. Likewise, output is
+            obtained in chunks as well.
+            Streaming operations require an additional *stream* object to be created
+            to track the operation. These are logical extensions of *context*
+            instances.
+            There are advantages and disadvantages to each mode of operation. There
+            are scenarios where certain modes can't be used. See the
+            ``Choosing an API`` section for more.
+            Dictionaries
+            ------------
+            A compression *dictionary* is essentially data used to seed the compressor
+            state so it can achieve better compression. The idea is that if you are
+            compressing a lot of similar pieces of data (e.g. JSON documents or anything
+            sharing similar structure), then you can find common patterns across multiple
+            objects then leverage those common patterns during compression and
+            decompression operations to achieve better compression ratios.
+            Dictionary compression is generally only useful for small inputs - data no
+            larger than a few kilobytes. The upper bound on this range is highly dependent
+            on the input data and the dictionary.
+            Python Buffer Protocol
+            ----------------------
+            Many functions in the library operate on objects that implement Python's
+            `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
+            The *buffer protocol* is an internal implementation detail of a Python
+            type that allows instances of that type (objects) to be exposed as a raw
+            pointer (or buffer) in the C API. In other words, it allows objects to be
+            exposed as an array of bytes.
+            From the perspective of the C API, objects implementing the *buffer protocol*
+            all look the same: they are just a pointer to a memory address of a defined
+            length. This allows the C API to be largely type agnostic when accessing their
+            data. This allows custom types to be passed in without first converting them
+            to a specific type.
+            Many Python types implement the buffer protocol. These include ``bytes``
+            (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,
+            ``mmap.mmap``, and ``memoryview``.
+            ``python-zstandard`` APIs that accept objects conforming to the buffer
+            protocol require that the buffer is *C contiguous* and has a single
+            dimension (``ndim==1``). This is usually the case. An example of where it
+            is not is a Numpy matrix type.
+            Requiring Output Sizes for Non-Streaming Decompression APIs
+            -----------------------------------------------------------
+            Non-streaming decompression APIs require that either the output size is
+            explicitly defined (either in the zstd frame header or passed into the
+            function) or that a max output size is specified. This restriction is for
+            your safety.
+            The *one-shot* decompression APIs store the decompressed result in a
+            single buffer. This means that a buffer needs to be pre-allocated to hold
+            the result. If the decompressed size is not known, then there is no universal
+            good default size to use. Any default will fail or will be highly sub-optimal
+            in some scenarios (it will either be too small or will put stress on the
+            memory allocator to allocate a too large block).
+            A *helpful* API may retry decompression with buffers of increasing size.
+            While useful, there are obvious performance disadvantages, namely redoing
+            decompression N times until it works. In addition, there is a security
+            concern. Say the input came from highly compressible data, like 1 GB of the
+            same byte value. The output size could be several magnitudes larger than the
+            input size. An input of <100KB could decompress to >1GB. Without a bounds
+            restriction on the decompressed size, certain inputs could exhaust all system
+            memory. That's not good and is why the maximum output size is limited.
             Note on Zstandard's *Experimental* API
             ======================================
             Many of the Zstandard APIs used by this module are marked as *experimental*
             within the Zstandard project. This includes a large number of useful
             features, such as compression and frame parameters and parts of dictionary
             compression.
             It is unclear how Zstandard's C API will evolve over time, especially with
             regards to this *experimental* functionality. We will try to maintain
             backwards compatibility at the Python API level. However, we cannot
             guarantee this for things not under our control.
             Since a copy of the Zstandard source code is distributed with this
             module and since we compile against it, the behavior of a specific
             version of this module should be constant for all of time. So if you
             pin the version of this module used in your projects (which is a Python
             best practice), you should be buffered from unwanted future changes.
             Donate
             ======
             A lot of time has been invested into this project by the author.
             If you find this project useful and would like to thank the author for
             their work, consider donating some money. Any amount is appreciated.
             .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
                 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
                 :alt: Donate via PayPal
             .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
                 :target: https://travis-ci.org/indygreg/python-zstandard
             .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
                 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
                 :alt: Windows build status

contrib/python-zstandard/c-ext/compressiondict.c

0 +177 -33

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
-            	static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
+            	static char* kwlist[] = {
+            		"dict_size",
+            		"samples",
+            		"selectivity",
+            		"level",
+            		"notifications",
+            		"dict_id",
+            		NULL
+            	};
             	size_t capacity;
             	PyObject* samples;
             	Py_ssize_t samplesLen;
-            	PyObject* parameters = NULL;
+            	unsigned  selectivity = 0;
+            	int level = 0;
+            	unsigned notifications = 0;
+            	unsigned dictID = 0;
             	ZDICT_params_t zparams;
             	Py_ssize_t sampleIndex;
             	Py_ssize_t sampleSize;
             	PyObject* sampleItem;
             	size_t zresult;
-            	void* sampleBuffer;
+            	void* sampleBuffer = NULL;
             	void* sampleOffset;
             	size_t samplesSize = 0;
-            	size_t* sampleSizes;
+            	size_t* sampleSizes = NULL;
-            	void* dict;
+            	void* dict = NULL;
-            	ZstdCompressionDict* result;
+            	ZstdCompressionDict* result = NULL;
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!:train_dictionary",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
             		kwlist,
             		&capacity,
             		&PyList_Type, &samples,
-            		(PyObject*)&DictParametersType, &parameters)) {
+            		&selectivity, &level, &notifications, &dictID)) {
             		return NULL;
             	}
-            	/* Validate parameters first since it is easiest. */
+            	memset(&zparams, 0, sizeof(zparams));
-            	zparams.selectivityLevel = 0;
-            	zparams.compressionLevel = 0;
-            	zparams.notificationLevel = 0;
-            	zparams.dictID = 0;
-            	zparams.reserved[0] = 0;
-            	zparams.reserved[1] = 0;
-            	if (parameters) {
+            	zparams.selectivityLevel = selectivity;
-            		/* TODO validate data ranges */
+            	zparams.compressionLevel = level;
-            		zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
+            	zparams.notificationLevel = notifications;
-            		zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
+            	zparams.dictID = dictID;
-            		zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
-            		zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
             	/* Figure out the size of the raw samples */
             	samplesLen = PyList_Size(samples);
             	for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
             		sampleItem = PyList_GetItem(samples, sampleIndex);
             		if (!PyBytes_Check(sampleItem)) {
             			PyErr_SetString(PyExc_ValueError, "samples must be bytes");
             			return NULL;
             		}
             		samplesSize += PyBytes_GET_SIZE(sampleItem);
             	}
             	/* Now that we know the total size of the raw simples, we can allocate
             	a buffer for the raw data */
             	sampleBuffer = PyMem_Malloc(samplesSize);
             	if (!sampleBuffer) {
             		PyErr_NoMemory();
-            		return NULL;
+            		goto finally;
             	}
             	sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
             	if (!sampleSizes) {
-            		PyMem_Free(sampleBuffer);
             		PyErr_NoMemory();
-            		return NULL;
+            		goto finally;
             	}
             	sampleOffset = sampleBuffer;
             	/* Now iterate again and assemble the samples in the buffer */
             	for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
             		sampleItem = PyList_GetItem(samples, sampleIndex);
             		sampleSize = PyBytes_GET_SIZE(sampleItem);
             		sampleSizes[sampleIndex] = sampleSize;
             		memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
             		sampleOffset = (char*)sampleOffset + sampleSize;
             	}
             	dict = PyMem_Malloc(capacity);
             	if (!dict) {
-            		PyMem_Free(sampleSizes);
-            		PyMem_Free(sampleBuffer);
             		PyErr_NoMemory();
-            		return NULL;
+            		goto finally;
             	}
+            	/* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
+            	Py_BEGIN_ALLOW_THREADS
             	zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
             		sampleBuffer, sampleSizes, (unsigned int)samplesLen,
             		zparams);
+            	Py_END_ALLOW_THREADS
             	if (ZDICT_isError(zresult)) {
             		PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
             		PyMem_Free(dict);
-            		PyMem_Free(sampleSizes);
+            		goto finally;
-            		PyMem_Free(sampleBuffer);
-            		return NULL;
             	}
             	result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
             	if (!result) {
-            		return NULL;
+            		goto finally;
             	}
             	result->dictData = dict;
             	result->dictSize = zresult;
+            	result->d = 0;
+            	result->k = 0;
+            finally:
+            	PyMem_Free(sampleBuffer);
+            	PyMem_Free(sampleSizes);
             	return result;
             }
+            ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
+            	static char* kwlist[] = {
+            		"dict_size",
+            		"samples",
+            		"k",
+            		"d",
+            		"notifications",
+            		"dict_id",
+            		"level",
+            		"optimize",
+            		"steps",
+            		"threads",
+            		NULL
+            	};
+            	size_t capacity;
+            	PyObject* samples;
+            	unsigned k = 0;
+            	unsigned d = 0;
+            	unsigned notifications = 0;
+            	unsigned dictID = 0;
+            	int level = 0;
+            	PyObject* optimize = NULL;
+            	unsigned steps = 0;
+            	int threads = 0;
+            	COVER_params_t params;
+            	Py_ssize_t samplesLen;
+            	Py_ssize_t i;
+            	size_t samplesSize = 0;
+            	void* sampleBuffer = NULL;
+            	size_t* sampleSizes = NULL;
+            	void* sampleOffset;
+            	Py_ssize_t sampleSize;
+            	void* dict = NULL;
+            	size_t zresult;
+            	ZstdCompressionDict* result = NULL;
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
+            		kwlist, &capacity, &PyList_Type, &samples,
+            		&k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
+            		return NULL;
+            	}
+            	if (threads < 0) {
+            		threads = cpu_count();
+            	}
+            	memset(&params, 0, sizeof(params));
+            	params.k = k;
+            	params.d = d;
+            	params.steps = steps;
+            	params.nbThreads = threads;
+            	params.notificationLevel = notifications;
+            	params.dictID = dictID;
+            	params.compressionLevel = level;
+            	/* Figure out total size of input samples. */
+            	samplesLen = PyList_Size(samples);
+            	for (i = 0; i < samplesLen; i++) {
+            		PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+            		if (!PyBytes_Check(sampleItem)) {
+            			PyErr_SetString(PyExc_ValueError, "samples must be bytes");
+            			return NULL;
+            		}
+            		samplesSize += PyBytes_GET_SIZE(sampleItem);
+            	}
+            	sampleBuffer = PyMem_Malloc(samplesSize);
+            	if (!sampleBuffer) {
+            		PyErr_NoMemory();
+            		goto finally;
+            	}
+            	sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
+            	if (!sampleSizes) {
+            		PyErr_NoMemory();
+            		goto finally;
+            	}
+            	sampleOffset = sampleBuffer;
+            	for (i = 0; i < samplesLen; i++) {
+            		PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+            		sampleSize = PyBytes_GET_SIZE(sampleItem);
+            		sampleSizes[i] = sampleSize;
+            		memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
+            		sampleOffset = (char*)sampleOffset + sampleSize;
+            	}
+            	dict = PyMem_Malloc(capacity);
+            	if (!dict) {
+            		PyErr_NoMemory();
+            		goto finally;
+            	}
+            	Py_BEGIN_ALLOW_THREADS
+            	if (optimize && PyObject_IsTrue(optimize)) {
+            		zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
+            			sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
+            	}
+            	else {
+            		zresult = COVER_trainFromBuffer(dict, capacity,
+            			sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
+            	}
+            	Py_END_ALLOW_THREADS
+            	if (ZDICT_isError(zresult)) {
+            		PyMem_Free(dict);
+            		PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
+            		goto finally;
+            	}
+            	result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
+            	if (!result) {
+            		PyMem_Free(dict);
+            		goto finally;
+            	}
+            	result->dictData = dict;
+            	result->dictSize = zresult;
+            	result->d = params.d;
+            	result->k = params.k;
+            finally:
+            	PyMem_Free(sampleBuffer);
+            	PyMem_Free(sampleSizes);
+            	return result;
+            }
             PyDoc_STRVAR(ZstdCompressionDict__doc__,
             "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
             "\n"
             "This type holds the results of a computed Zstandard compression dictionary.\n"
             "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
             "obtained from another source into the constructor.\n"
             );
             static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	self->dictData = NULL;
             	self->dictSize = 0;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
             #else
             	if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
             #endif
             		&source, &sourceSize)) {
             		return -1;
             	}
             	self->dictData = PyMem_Malloc(sourceSize);
             	if (!self->dictData) {
             		PyErr_NoMemory();
             		return -1;
             	}
             	memcpy(self->dictData, source, sourceSize);
             	self->dictSize = sourceSize;
             	return 0;
             	}
             static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
             	if (self->dictData) {
             		PyMem_Free(self->dictData);
             		self->dictData = NULL;
             	}
             	PyObject_Del(self);
             }
             static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
             	unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
             	return PyLong_FromLong(dictID);
             }
             static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
             	return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
             }
             static PyMethodDef ZstdCompressionDict_methods[] = {
             	{ "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
             	PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
             	{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
             	PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
             	{ NULL, NULL }
             };
+            static PyMemberDef ZstdCompressionDict_members[] = {
+            	{ "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
+            	  "segment size" },
+            	{ "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
+            	  "dmer size" },
+            	{ NULL }
+            };
             static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
             	return self->dictSize;
             }
             static PySequenceMethods ZstdCompressionDict_sq = {
             	(lenfunc)ZstdCompressionDict_length, /* sq_length */
 ,                                   /* sq_concat */
 ,                                   /* sq_repeat */
 ,                                   /* sq_item */
 ,                                   /* sq_ass_item */
 ,                                   /* sq_contains */
 ,                                   /* sq_inplace_concat */
 /* sq_inplace_repeat */
             };
             PyTypeObject ZstdCompressionDictType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdCompressionDict",     /* tp_name */
             	sizeof(ZstdCompressionDict),    /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
             	&ZstdCompressionDict_sq,        /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdCompressionDict__doc__,     /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	ZstdCompressionDict_methods,    /* tp_methods */
-,                              /* tp_members */
+            	ZstdCompressionDict_members,    /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
             	(initproc)ZstdCompressionDict_init, /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void compressiondict_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
             	if (PyType_Ready(&ZstdCompressionDictType) < 0) {
             		return;
             	}
             	Py_INCREF((PyObject*)&ZstdCompressionDictType);
             	PyModule_AddObject(mod, "ZstdCompressionDict",
             		(PyObject*)&ZstdCompressionDictType);
             }

contrib/python-zstandard/c-ext/compressionparams.c

0 +35 -2

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {
             	zparams->windowLog = params->windowLog;
             	zparams->chainLog = params->chainLog;
             	zparams->hashLog = params->hashLog;
             	zparams->searchLog = params->searchLog;
             	zparams->searchLength = params->searchLength;
             	zparams->targetLength = params->targetLength;
             	zparams->strategy = params->strategy;
             }
             CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {
             	int compressionLevel;
             	unsigned PY_LONG_LONG sourceSize = 0;
             	Py_ssize_t dictSize = 0;
             	ZSTD_compressionParameters params;
             	CompressionParametersObject* result;
             	if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters",
             		&compressionLevel, &sourceSize, &dictSize)) {
             		return NULL;
             	}
             	params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);
             	result = PyObject_New(CompressionParametersObject, &CompressionParametersType);
             	if (!result) {
             		return NULL;
             	}
             	result->windowLog = params.windowLog;
             	result->chainLog = params.chainLog;
             	result->hashLog = params.hashLog;
             	result->searchLog = params.searchLog;
             	result->searchLength = params.searchLength;
             	result->targetLength = params.targetLength;
             	result->strategy = params.strategy;
             	return result;
             }
             static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"window_log",
             		"chain_log",
             		"hash_log",
             		"search_log",
             		"search_length",
             		"target_length",
             		"strategy",
             		NULL
             	};
             	unsigned windowLog;
             	unsigned chainLog;
             	unsigned hashLog;
             	unsigned searchLog;
             	unsigned searchLength;
             	unsigned targetLength;
             	unsigned strategy;
+            	ZSTD_compressionParameters params;
+            	size_t zresult;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
             		kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
             		&targetLength, &strategy)) {
             		return -1;
             	}
             	if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid window log value");
             		return -1;
             	}
             	if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid chain log value");
             		return -1;
             	}
             	if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid hash log value");
             		return -1;
             	}
             	if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid search log value");
             		return -1;
             	}
             	if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid search length value");
             		return -1;
             	}
             	if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) {
             		PyErr_SetString(PyExc_ValueError, "invalid target length value");
             		return -1;
             	}
             	if (strategy < ZSTD_fast || strategy > ZSTD_btopt) {
             		PyErr_SetString(PyExc_ValueError, "invalid strategy value");
             		return -1;
             	}
             	self->windowLog = windowLog;
             	self->chainLog = chainLog;
             	self->hashLog = hashLog;
             	self->searchLog = searchLog;
             	self->searchLength = searchLength;
             	self->targetLength = targetLength;
             	self->strategy = strategy;
+            	ztopy_compression_parameters(self, &params);
+            	zresult = ZSTD_checkCParams(params);
+            	if (ZSTD_isError(zresult)) {
+            		PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
+            			ZSTD_getErrorName(zresult));
+            		return -1;
+            	}
             	return 0;
             }
+            PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__,
+            "Estimate the size in bytes of a compression context for compression parameters\n"
+            );
+            PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {
+            	ZSTD_compressionParameters params;
+            	ztopy_compression_parameters(self, &params);
+            	return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
+            }
             PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
             	CompressionParametersObject* params;
             	ZSTD_compressionParameters zparams;
             	PyObject* result;
             	if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
             		&CompressionParametersType, &params)) {
             		return NULL;
             	}
             	ztopy_compression_parameters(params, &zparams);
             	result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
             	return result;
             }
             PyDoc_STRVAR(CompressionParameters__doc__,
             "CompressionParameters: low-level control over zstd compression");
             static void CompressionParameters_dealloc(PyObject* self) {
             	PyObject_Del(self);
             }
+            static PyMethodDef CompressionParameters_methods[] = {
+            	{
+            		"estimated_compression_context_size",
+            		(PyCFunction)CompressionParameters_estimated_compression_context_size,
+            		METH_NOARGS,
+            		CompressionParameters_estimated_compression_context_size__doc__
+            	},
+            	{ NULL, NULL }
+            };
             static PyMemberDef CompressionParameters_members[] = {
             	{ "window_log", T_UINT,
             	  offsetof(CompressionParametersObject, windowLog), READONLY,
             	  "window log" },
             	{ "chain_log", T_UINT,
             	  offsetof(CompressionParametersObject, chainLog), READONLY,
             	  "chain log" },
             	{ "hash_log", T_UINT,
             	  offsetof(CompressionParametersObject, hashLog), READONLY,
             	  "hash log" },
             	{ "search_log", T_UINT,
             	  offsetof(CompressionParametersObject, searchLog), READONLY,
             	  "search log" },
             	{ "search_length", T_UINT,
             	  offsetof(CompressionParametersObject, searchLength), READONLY,
             	  "search length" },
             	{ "target_length", T_UINT,
             	  offsetof(CompressionParametersObject, targetLength), READONLY,
             	  "target length" },
             	{ "strategy", T_INT,
             	  offsetof(CompressionParametersObject, strategy), READONLY,
             	  "strategy" },
             	{ NULL }
             };
             PyTypeObject CompressionParametersType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"CompressionParameters", /* tp_name */
             	sizeof(CompressionParametersObject), /* tp_basicsize */
 ,                         /* tp_itemsize */
             	(destructor)CompressionParameters_dealloc, /* tp_dealloc */
 ,                         /* tp_print */
 ,                         /* tp_getattr */
 ,                         /* tp_setattr */
 ,                         /* tp_compare */
 ,                         /* tp_repr */
 ,                         /* tp_as_number */
 ,                         /* tp_as_sequence */
 ,                         /* tp_as_mapping */
 ,                         /* tp_hash  */
 ,                         /* tp_call */
 ,                         /* tp_str */
 ,                         /* tp_getattro */
 ,                         /* tp_setattro */
 ,                         /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	CompressionParameters__doc__, /* tp_doc */
 ,                         /* tp_traverse */
 ,                         /* tp_clear */
 ,                         /* tp_richcompare */
 ,                         /* tp_weaklistoffset */
 ,                         /* tp_iter */
 ,                         /* tp_iternext */
-,                         /* tp_methods */
+            	CompressionParameters_methods, /* tp_methods */
             	CompressionParameters_members, /* tp_members */
 ,                         /* tp_getset */
 ,                         /* tp_base */
 ,                         /* tp_dict */
 ,                         /* tp_descr_get */
 ,                         /* tp_descr_set */
 ,                         /* tp_dictoffset */
             	(initproc)CompressionParameters_init, /* tp_init */
 ,                         /* tp_alloc */
             	PyType_GenericNew,         /* tp_new */
             };
             void compressionparams_module_init(PyObject* mod) {
             	Py_TYPE(&CompressionParametersType) = &PyType_Type;
             	if (PyType_Ready(&CompressionParametersType) < 0) {
             		return;
             	}
-            	Py_IncRef((PyObject*)&CompressionParametersType);
+            	Py_INCREF(&CompressionParametersType);
             	PyModule_AddObject(mod, "CompressionParameters",
             		(PyObject*)&CompressionParametersType);
             }

contrib/python-zstandard/c-ext/compressionwriter.c

0 +32 -17

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
             """A context manager used for writing compressed output to a writer.\n"
             );
             static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
             	Py_XDECREF(self->compressor);
             	Py_XDECREF(self->writer);
-            	if (self->cstream) {
-            		ZSTD_freeCStream(self->cstream);
-            		self->cstream = NULL;
             	PyObject_Del(self);
             }
             static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
             	if (self->entered) {
             		PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
             		return NULL;
             	}
-            	self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
+            	if (self->compressor->mtcctx) {
-            	if (!self->cstream) {
+            		if (init_mtcstream(self->compressor, self->sourceSize)) {
-            		return NULL;
+            			return NULL;
+            		}
+            	}
+            	else {
+            		if (0 != init_cstream(self->compressor, self->sourceSize)) {
+            			return NULL;
+            		}
             	}
             	self->entered = 1;
             	Py_INCREF(self);
             	return (PyObject*)self;
             }
             static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
             	PyObject* exc_type;
             	PyObject* exc_value;
             	PyObject* exc_tb;
             	size_t zresult;
             	ZSTD_outBuffer output;
             	PyObject* res;
             	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
             		return NULL;
             	}
             	self->entered = 0;
-            	if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
+            	if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
-            		exc_tb == Py_None) {
+            		&& exc_value == Py_None && exc_tb == Py_None) {
             		output.dst = PyMem_Malloc(self->outSize);
             		if (!output.dst) {
             			return PyErr_NoMemory();
             		}
             		output.size = self->outSize;
             		output.pos = 0;
             		while (1) {
-            			zresult = ZSTD_endStream(self->cstream, &output);
+            			if (self->compressor->mtcctx) {
+            				zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
+            			}
+            			else {
+            				zresult = ZSTD_endStream(self->compressor->cstream, &output);
+            			}
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "error ending compression stream: %s",
             					ZSTD_getErrorName(zresult));
             				PyMem_Free(output.dst);
             				return NULL;
             			}
             			if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             				res = PyObject_CallMethod(self->writer, "write", "y#",
             #else
             				res = PyObject_CallMethod(self->writer, "write", "s#",
             #endif
             					output.dst, output.pos);
             				Py_XDECREF(res);
             			}
             			if (!zresult) {
             				break;
             			}
             			output.pos = 0;
             		}
             		PyMem_Free(output.dst);
-            		ZSTD_freeCStream(self->cstream);
-            		self->cstream = NULL;
             	}
             	Py_RETURN_FALSE;
             }
             static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
-            	if (!self->cstream) {
+            	if (!self->compressor->cstream) {
             		PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
             			"call when a context manager is active");
             		return NULL;
             	}
-            	return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
+            	return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
             }
             static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	size_t zresult;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	PyObject* res;
             	Py_ssize_t totalWrite = 0;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
             #else
             	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
             #endif
             		return NULL;
             	}
             	if (!self->entered) {
             		PyErr_SetString(ZstdError, "compress must be called from an active context manager");
             		return NULL;
             	}
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
             		return PyErr_NoMemory();
             	}
             	output.size = self->outSize;
             	output.pos = 0;
             	input.src = source;
             	input.size = sourceSize;
             	input.pos = 0;
             	while ((ssize_t)input.pos < sourceSize) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_compressStream(self->cstream, &output, &input);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+            				&output, &input);
+            		}
+            		else {
+            			zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
+            		}
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyMem_Free(output.dst);
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		/* Copy data from output buffer to writer. */
             		if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             			res = PyObject_CallMethod(self->writer, "write", "y#",
             #else
             			res = PyObject_CallMethod(self->writer, "write", "s#",
             #endif
             				output.dst, output.pos);
             			Py_XDECREF(res);
             			totalWrite += output.pos;
             		}
             		output.pos = 0;
             	}
             	PyMem_Free(output.dst);
             	return PyLong_FromSsize_t(totalWrite);
             }
             static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
             	size_t zresult;
             	ZSTD_outBuffer output;
             	PyObject* res;
             	Py_ssize_t totalWrite = 0;
             	if (!self->entered) {
             		PyErr_SetString(ZstdError, "flush must be called from an active context manager");
             		return NULL;
             	}
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
             		return PyErr_NoMemory();
             	}
             	output.size = self->outSize;
             	output.pos = 0;
             	while (1) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_flushStream(self->cstream, &output);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
+            		}
+            		else {
+            			zresult = ZSTD_flushStream(self->compressor->cstream, &output);
+            		}
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyMem_Free(output.dst);
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		if (!output.pos) {
             			break;
             		}
             		/* Copy data from output buffer to writer. */
             		if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             			res = PyObject_CallMethod(self->writer, "write", "y#",
             #else
             			res = PyObject_CallMethod(self->writer, "write", "s#",
             #endif
             				output.dst, output.pos);
             			Py_XDECREF(res);
             			totalWrite += output.pos;
             		}
             		output.pos = 0;
             	}
             	PyMem_Free(output.dst);
             	return PyLong_FromSsize_t(totalWrite);
             }
             static PyMethodDef ZstdCompressionWriter_methods[] = {
             	{ "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
             	PyDoc_STR("Enter a compression context.") },
             	{ "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
             	PyDoc_STR("Exit a compression context.") },
             	{ "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
             	PyDoc_STR("Obtain the memory size of the underlying compressor") },
             	{ "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
             	PyDoc_STR("Compress data") },
             	{ "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
             	PyDoc_STR("Flush data and finish a zstd frame") },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdCompressionWriterType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdCompressionWriter",  /* tp_name */
             	sizeof(ZstdCompressionWriter),  /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdCompresssionWriter__doc__,  /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	ZstdCompressionWriter_methods,  /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void compressionwriter_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
             	if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/compressobj.c

0 +19 -11

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             PyDoc_STRVAR(ZstdCompressionObj__doc__,
             "Perform compression using a standard library compatible API.\n"
             );
             static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
             	PyMem_Free(self->output.dst);
             	self->output.dst = NULL;
-            	if (self->cstream) {
-            		ZSTD_freeCStream(self->cstream);
-            		self->cstream = NULL;
             	Py_XDECREF(self->compressor);
             	PyObject_Del(self);
             }
             static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	ZSTD_inBuffer input;
             	size_t zresult;
             	PyObject* result = NULL;
             	Py_ssize_t resultSize = 0;
             	if (self->finished) {
             		PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
             		return NULL;
             	}
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
             #else
             	if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
             #endif
             		return NULL;
             	}
             	input.src = source;
             	input.size = sourceSize;
             	input.pos = 0;
             	while ((ssize_t)input.pos < sourceSize) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+            				&self->output, &input);
+            		}
+            		else {
+            			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
+            		}
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		if (self->output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
             				if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
             					return NULL;
             				}
             				memcpy(PyBytes_AS_STRING(result) + resultSize,
             					self->output.dst, self->output.pos);
             			}
             			else {
             				result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             				if (!result) {
             					return NULL;
             				}
             			}
             			self->output.pos = 0;
             		}
             	}
             	if (result) {
             		return result;
             	}
             	else {
             		return PyBytes_FromString("");
             	}
             }
             static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
             	int flushMode = compressorobj_flush_finish;
             	size_t zresult;
             	PyObject* result = NULL;
             	Py_ssize_t resultSize = 0;
             	if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
             		return NULL;
             	}
             	if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
             		PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
             		return NULL;
             	}
             	if (self->finished) {
             		PyErr_SetString(ZstdError, "compressor object already finished");
             		return NULL;
             	}
             	assert(self->output.pos == 0);
             	if (flushMode == compressorobj_flush_block) {
             		/* The output buffer is of size ZSTD_CStreamOutSize(), which is
             		   guaranteed to hold a full block. */
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_flushStream(self->cstream, &self->output);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
+            		}
+            		else {
+            			zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
+            		}
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		/* Output buffer is guaranteed to hold full block. */
             		assert(zresult == 0);
             		if (self->output.pos) {
             			result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             			if (!result) {
             				return NULL;
             			}
             		}
             		self->output.pos = 0;
             		if (result) {
             			return result;
             		}
             		else {
             			return PyBytes_FromString("");
             		}
             	}
             	assert(flushMode == compressorobj_flush_finish);
             	self->finished = 1;
             	while (1) {
-            		zresult = ZSTD_endStream(self->cstream, &self->output);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+            		}
+            		else {
+            			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
+            		}
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "error ending compression stream: %s",
             				ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		if (self->output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
             				if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
             					return NULL;
             				}
             				memcpy(PyBytes_AS_STRING(result) + resultSize,
             					self->output.dst, self->output.pos);
             			}
             			else {
             				result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             				if (!result) {
             					return NULL;
             				}
             			}
             			self->output.pos = 0;
             		}
             		if (!zresult) {
             			break;
             		}
             	}
-            	ZSTD_freeCStream(self->cstream);
-            	self->cstream = NULL;
             	if (result) {
             		return result;
             	}
             	else {
             		return PyBytes_FromString("");
             	}
             }
             static PyMethodDef ZstdCompressionObj_methods[] = {
             	{ "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
             	PyDoc_STR("compress data") },
             	{ "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
             	PyDoc_STR("finish compression operation") },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdCompressionObjType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdCompressionObj",      /* tp_name */
             	sizeof(ZstdCompressionObj),     /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdCompressionObj__doc__,      /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	ZstdCompressionObj_methods,     /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void compressobj_module_init(PyObject* module) {
             	Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
             	if (PyType_Ready(&ZstdCompressionObjType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/compressor.c

0 +855 -102

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
+            #include "pool.h"
             extern PyObject* ZstdError;
-            int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
+            int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) {
             	ZSTD_customMem zmem;
-            	assert(!compressor->cdict);
+            	if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) {
+            		return 0;
+            	}
             	Py_BEGIN_ALLOW_THREADS
             	memset(&zmem, 0, sizeof(zmem));
             	compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
             		compressor->dict->dictSize, 1, *zparams, zmem);
             	Py_END_ALLOW_THREADS
             	if (!compressor->cdict) {
             		PyErr_SetString(ZstdError, "could not create compression dictionary");
             		return 1;
             	}
             	return 0;
             }
             /**
-            * Initialize a zstd CStream from a ZstdCompressor instance.
+             * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.
+             *
-            * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
+             * Returns 0 on success. Other value on failure. Will set a Python exception
-            * exception will be set.
+             * on failure.
-            */
+             */
-            ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
+            int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
-            	ZSTD_CStream* cstream;
             	ZSTD_parameters zparams;
             	void* dictData = NULL;
             	size_t dictSize = 0;
             	size_t zresult;
-            	cstream = ZSTD_createCStream();
+            	if (compressor->cstream) {
-            	if (!cstream) {
+            		zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);
-            		PyErr_SetString(ZstdError, "cannot create CStream");
+            		if (ZSTD_isError(zresult)) {
-            		return NULL;
+            			PyErr_Format(ZstdError, "could not reset CStream: %s",
+            				ZSTD_getErrorName(zresult));
+            			return -1;
+            		}
+            		return 0;
+            	}
+            	compressor->cstream = ZSTD_createCStream();
+            	if (!compressor->cstream) {
+            		PyErr_SetString(ZstdError, "could not create CStream");
+            		return -1;
             	}
             	if (compressor->dict) {
             		dictData = compressor->dict->dictData;
             		dictSize = compressor->dict->dictSize;
             	}
             	memset(&zparams, 0, sizeof(zparams));
             	if (compressor->cparams) {
             		ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
             		/* Do NOT call ZSTD_adjustCParams() here because the compression params
             		come from the user. */
             	}
             	else {
             		zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
             	}
             	zparams.fParams = compressor->fparams;
-            	zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, zparams, sourceSize);
+            	zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
+            		zparams, sourceSize);
             	if (ZSTD_isError(zresult)) {
-            		ZSTD_freeCStream(cstream);
+            		ZSTD_freeCStream(compressor->cstream);
+            		compressor->cstream = NULL;
             		PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
-            		return NULL;
+            		return -1;
             	}
-            	return cstream;
+            	return 0;;
+            }
+            int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
+            	size_t zresult;
+            	void* dictData = NULL;
+            	size_t dictSize = 0;
+            	ZSTD_parameters zparams;
+            	assert(compressor->mtcctx);
+            	if (compressor->dict) {
+            		dictData = compressor->dict->dictData;
+            		dictSize = compressor->dict->dictSize;
+            	}
+            	memset(&zparams, 0, sizeof(zparams));
+            	if (compressor->cparams) {
+            		ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
+            	}
+            	else {
+            		zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
+            	}
+            	zparams.fParams = compressor->fparams;
+            	zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,
+            		zparams, sourceSize);
+            	if (ZSTD_isError(zresult)) {
+            		PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
+            		return -1;
+            	}
+            	return 0;
             }
             PyDoc_STRVAR(ZstdCompressor__doc__,
             "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
             "\n"
             "Create an object used to perform Zstandard compression.\n"
             "\n"
             "An instance can compress data various ways. Instances can be used multiple\n"
             "times. Each compression operation will use the compression parameters\n"
             "defined at construction time.\n"
             "\n"
             "Compression can be configured via the following names arguments:\n"
             "\n"
             "level\n"
             "   Integer compression level.\n"
             "dict_data\n"
             "   A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
             "compression_params\n"
             "   A ``CompressionParameters`` instance defining low-level compression"
             "   parameters. If defined, this will overwrite the ``level`` argument.\n"
             "write_checksum\n"
             "   If True, a 4 byte content checksum will be written with the compressed\n"
             "   data, allowing the decompressor to perform content verification.\n"
             "write_content_size\n"
             "   If True, the decompressed content size will be included in the header of\n"
             "   the compressed data. This data will only be written if the compressor\n"
             "   knows the size of the input data.\n"
             "write_dict_id\n"
             "   Determines whether the dictionary ID will be written into the compressed\n"
             "   data. Defaults to True. Only adds content to the compressed data if\n"
             "   a dictionary is being used.\n"
+            "threads\n"
+            "   Number of threads to use to compress data concurrently. When set,\n"
+            "   compression operations are performed on multiple threads. The default\n"
+            "   value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
+            "   set the number of threads to the number of detected logical CPUs.\n"
             );
             static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"level",
             		"dict_data",
             		"compression_params",
             		"write_checksum",
             		"write_content_size",
             		"write_dict_id",
+            		"threads",
             		NULL
             	};
             	int level = 3;
             	ZstdCompressionDict* dict = NULL;
             	CompressionParametersObject* params = NULL;
             	PyObject* writeChecksum = NULL;
             	PyObject* writeContentSize = NULL;
             	PyObject* writeDictID = NULL;
+            	int threads = 0;
-            	self->cctx = NULL;
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
-            	self->dict = NULL;
-            	self->cparams = NULL;
-            	self->cdict = NULL;
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor",
             		kwlist,	&level, &ZstdCompressionDictType, &dict,
             		&CompressionParametersType, &params,
-            		&writeChecksum, &writeContentSize, &writeDictID)) {
+            		&writeChecksum, &writeContentSize, &writeDictID, &threads)) {
             		return -1;
             	}
             	if (level < 1) {
             		PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
             		return -1;
             	}
             	if (level > ZSTD_maxCLevel()) {
             		PyErr_Format(PyExc_ValueError, "level must be less than %d",
             			ZSTD_maxCLevel() + 1);
             		return -1;
             	}
+            	if (threads < 0) {
+            		threads = cpu_count();
+            	}
+            	self->threads = threads;
             	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
             	   overhead of each compression operation. */
-            	self->cctx = ZSTD_createCCtx();
+            	if (threads) {
-            	if (!self->cctx) {
+            		self->mtcctx = ZSTDMT_createCCtx(threads);
-            		PyErr_NoMemory();
+            		if (!self->mtcctx) {
-            		return -1;
+            			PyErr_NoMemory();
+            			return -1;
+            		}
+            	}
+            	else {
+            		self->cctx = ZSTD_createCCtx();
+            		if (!self->cctx) {
+            			PyErr_NoMemory();
+            			return -1;
+            		}
             	}
             	self->compressionLevel = level;
             	if (dict) {
             		self->dict = dict;
             		Py_INCREF(dict);
             	}
             	if (params) {
             		self->cparams = params;
             		Py_INCREF(params);
             	}
             	memset(&self->fparams, 0, sizeof(self->fparams));
             	if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
             		self->fparams.checksumFlag = 1;
             	}
             	if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
             		self->fparams.contentSizeFlag = 1;
             	}
             	if (writeDictID && PyObject_Not(writeDictID)) {
             		self->fparams.noDictIDFlag = 1;
             	}
             	return 0;
             }
             static void ZstdCompressor_dealloc(ZstdCompressor* self) {
+            	if (self->cstream) {
+            		ZSTD_freeCStream(self->cstream);
+            		self->cstream = NULL;
+            	}
             	Py_XDECREF(self->cparams);
             	Py_XDECREF(self->dict);
             	if (self->cdict) {
             		ZSTD_freeCDict(self->cdict);
             		self->cdict = NULL;
             	}
             	if (self->cctx) {
             		ZSTD_freeCCtx(self->cctx);
             		self->cctx = NULL;
             	}
+            	if (self->mtcctx) {
+            		ZSTDMT_freeCCtx(self->mtcctx);
+            		self->mtcctx = NULL;
+            	}
             	PyObject_Del(self);
             }
             PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
             "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
             "compress data between streams\n"
             "\n"
             "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
             "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
             "method.\n"
             "\n"
             "An optional ``size`` argument specifies the size of the source stream.\n"
             "If defined, compression parameters will be tuned based on the size.\n"
             "\n"
             "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
             "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
             "the default compression stream input and output sizes, respectively.\n"
             );
             static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"ifh",
             		"ofh",
             		"size",
             		"read_size",
             		"write_size",
             		NULL
             	};
             	PyObject* source;
             	PyObject* dest;
             	Py_ssize_t sourceSize = 0;
             	size_t inSize = ZSTD_CStreamInSize();
             	size_t outSize = ZSTD_CStreamOutSize();
-            	ZSTD_CStream* cstream;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	Py_ssize_t totalRead = 0;
             	Py_ssize_t totalWrite = 0;
             	char* readBuffer;
             	Py_ssize_t readSize;
             	PyObject* readResult;
             	PyObject* res = NULL;
             	size_t zresult;
             	PyObject* writeResult;
             	PyObject* totalReadPy;
             	PyObject* totalWritePy;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist,
             		&source, &dest, &sourceSize, &inSize, &outSize)) {
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(source, "read")) {
             		PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(dest, "write")) {
             		PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
             		return NULL;
             	}
             	/* Prevent free on uninitialized memory in finally. */
             	output.dst = NULL;
-            	cstream = CStream_from_ZstdCompressor(self, sourceSize);
+            	if (self->mtcctx) {
-            	if (!cstream) {
+            		if (init_mtcstream(self, sourceSize)) {
-            		res = NULL;
+            			res = NULL;
-            		goto finally;
+            			goto finally;
+            		}
+            	}
+            	else {
+            		if (0 != init_cstream(self, sourceSize)) {
+            			res = NULL;
+            			goto finally;
+            		}
             	}
             	output.dst = PyMem_Malloc(outSize);
             	if (!output.dst) {
             		PyErr_NoMemory();
             		res = NULL;
             		goto finally;
             	}
             	output.size = outSize;
             	output.pos = 0;
             	while (1) {
             		/* Try to read from source stream. */
             		readResult = PyObject_CallMethod(source, "read", "n", inSize);
             		if (!readResult) {
             			PyErr_SetString(ZstdError, "could not read() from source");
             			goto finally;
             		}
             		PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		/* If no data was read, we're at EOF. */
             		if (0 == readSize) {
             			break;
             		}
             		totalRead += readSize;
             		/* Send data to compressor */
             		input.src = readBuffer;
             		input.size = readSize;
             		input.pos = 0;
             		while (input.pos < input.size) {
             			Py_BEGIN_ALLOW_THREADS
-            			zresult = ZSTD_compressStream(cstream, &output, &input);
+            			if (self->mtcctx) {
+            				zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
+            			}
+            			else {
+            				zresult = ZSTD_compressStream(self->cstream, &output, &input);
+            			}
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				res = NULL;
             				PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             				goto finally;
             			}
             			if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             				writeResult = PyObject_CallMethod(dest, "write", "y#",
             #else
             				writeResult = PyObject_CallMethod(dest, "write", "s#",
             #endif
             					output.dst, output.pos);
             				Py_XDECREF(writeResult);
             				totalWrite += output.pos;
             				output.pos = 0;
             			}
             		}
             	}
             	/* We've finished reading. Now flush the compressor stream. */
             	while (1) {
-            		zresult = ZSTD_endStream(cstream, &output);
+            		if (self->mtcctx) {
+            			zresult = ZSTDMT_endStream(self->mtcctx, &output);
+            		}
+            		else {
+            			zresult = ZSTD_endStream(self->cstream, &output);
+            		}
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "error ending compression stream: %s",
             				ZSTD_getErrorName(zresult));
             			res = NULL;
             			goto finally;
             		}
             		if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             			writeResult = PyObject_CallMethod(dest, "write", "y#",
             #else
             			writeResult = PyObject_CallMethod(dest, "write", "s#",
             #endif
             				output.dst, output.pos);
             			totalWrite += output.pos;
             			Py_XDECREF(writeResult);
             			output.pos = 0;
             		}
             		if (!zresult) {
             			break;
             		}
             	}
-            	ZSTD_freeCStream(cstream);
-            	cstream = NULL;
             	totalReadPy = PyLong_FromSsize_t(totalRead);
             	totalWritePy = PyLong_FromSsize_t(totalWrite);
             	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
-            	Py_DecRef(totalReadPy);
+            	Py_DECREF(totalReadPy);
-            	Py_DecRef(totalWritePy);
+            	Py_DECREF(totalWritePy);
             finally:
             	if (output.dst) {
             		PyMem_Free(output.dst);
             	}
-            	if (cstream) {
-            		ZSTD_freeCStream(cstream);
             	return res;
             }
             PyDoc_STRVAR(ZstdCompressor_compress__doc__,
             "compress(data, allow_empty=False)\n"
             "\n"
             "Compress data in a single operation.\n"
             "\n"
             "This is the simplest mechanism to perform compression: simply pass in a\n"
             "value and get a compressed value back. It is almost the most prone to abuse.\n"
             "The input and output values must fit in memory, so passing in very large\n"
             "values can result in excessive memory usage. For this reason, one of the\n"
             "streaming based APIs is preferred for larger values.\n"
             );
             static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"data",
             		"allow_empty",
             		NULL
             	};
             	const char* source;
             	Py_ssize_t sourceSize;
             	PyObject* allowEmpty = NULL;
             	size_t destSize;
             	PyObject* output;
             	char* dest;
             	void* dictData = NULL;
             	size_t dictSize = 0;
             	size_t zresult;
             	ZSTD_parameters zparams;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress",
             #else
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress",
             #endif
             		kwlist, &source, &sourceSize, &allowEmpty)) {
             		return NULL;
             	}
+            	if (self->threads && self->dict) {
+            		PyErr_SetString(ZstdError,
+            			"compress() cannot be used with both dictionaries and multi-threaded compression");
+            		return NULL;
+            	}
+            	if (self->threads && self->cparams) {
+            		PyErr_SetString(ZstdError,
+            			"compress() cannot be used with both compression parameters and multi-threaded compression");
+            		return NULL;
+            	}
             	/* Limitation in zstd C API doesn't let decompression side distinguish
             	   between content size of 0 and unknown content size. This can make round
             	   tripping via Python difficult. Until this is fixed, require a flag
             	   to fire the footgun.
             	   https://github.com/indygreg/python-zstandard/issues/11 */
             	if (0 == sourceSize && self->fparams.contentSizeFlag
             		&& (!allowEmpty || PyObject_Not(allowEmpty))) {
             		PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
             		return NULL;
             	}
             	destSize = ZSTD_compressBound(sourceSize);
             	output = PyBytes_FromStringAndSize(NULL, destSize);
             	if (!output) {
             		return NULL;
             	}
             	dest = PyBytes_AsString(output);
             	if (self->dict) {
             		dictData = self->dict->dictData;
             		dictSize = self->dict->dictSize;
             	}
             	memset(&zparams, 0, sizeof(zparams));
             	if (!self->cparams) {
             		zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
             	}
             	else {
             		ztopy_compression_parameters(self->cparams, &zparams.cParams);
             		/* Do NOT call ZSTD_adjustCParams() here because the compression params
             		come from the user. */
             	}
             	zparams.fParams = self->fparams;
             	/* The raw dict data has to be processed before it can be used. Since this
             	adds overhead - especially if multiple dictionary compression operations
             	are performed on the same ZstdCompressor instance - we create a
             	ZSTD_CDict once and reuse it for all operations.
             	Note: the compression parameters used for the first invocation (possibly
             	derived from the source size) will be reused on all subsequent invocations.
             	https://github.com/facebook/zstd/issues/358 contains more info. We could
             	potentially add an argument somewhere to control this behavior.
             	*/
-            	if (dictData && !self->cdict) {
+            	if (0 != populate_cdict(self, &zparams)) {
-            		if (populate_cdict(self, dictData, dictSize, &zparams)) {
+            		Py_DECREF(output);
-            			Py_DECREF(output);
+            		return NULL;
-            			return NULL;
             	}
             	Py_BEGIN_ALLOW_THREADS
-            	/* By avoiding ZSTD_compress(), we don't necessarily write out content
+            	if (self->mtcctx) {
-            	   size. This means the argument to ZstdCompressor to control frame
+            		zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,
-            	   parameters is honored. */
+            			source, sourceSize, self->compressionLevel);
-            	if (self->cdict) {
-            		zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
-            			source, sourceSize, self->cdict);
             	}
             	else {
-            		zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
+            		/* By avoiding ZSTD_compress(), we don't necessarily write out content
-            			source, sourceSize, dictData, dictSize, zparams);
+            		   size. This means the argument to ZstdCompressor to control frame
+            		   parameters is honored. */
+            		if (self->cdict) {
+            			zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
+            				source, sourceSize, self->cdict);
+            		}
+            		else {
+            			zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
+            				source, sourceSize, dictData, dictSize, zparams);
+            		}
             	}
             	Py_END_ALLOW_THREADS
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
             		Py_CLEAR(output);
             		return NULL;
             	}
             	else {
             		Py_SIZE(output) = zresult;
             	}
             	return output;
             }
             PyDoc_STRVAR(ZstdCompressionObj__doc__,
             "compressobj()\n"
             "\n"
             "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
             "\n"
             "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
             "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
             "without changing how compression is performed.\n"
             );
             static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"size",
             		NULL
             	};
             	Py_ssize_t inSize = 0;
             	size_t outSize = ZSTD_CStreamOutSize();
-            	ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
+            	ZstdCompressionObj* result = NULL;
-            	if (!result) {
-            		return NULL;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) {
             		return NULL;
             	}
-            	result->cstream = CStream_from_ZstdCompressor(self, inSize);
+            	result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
-            	if (!result->cstream) {
+            	if (!result) {
-            		Py_DECREF(result);
             		return NULL;
             	}
+            	if (self->mtcctx) {
+            		if (init_mtcstream(self, inSize)) {
+            			Py_DECREF(result);
+            			return NULL;
+            		}
+            	}
+            	else {
+            		if (0 != init_cstream(self, inSize)) {
+            			Py_DECREF(result);
+            			return NULL;
+            		}
+            	}
             	result->output.dst = PyMem_Malloc(outSize);
             	if (!result->output.dst) {
             		PyErr_NoMemory();
             		Py_DECREF(result);
             		return NULL;
             	}
             	result->output.size = outSize;
-            	result->output.pos = 0;
             	result->compressor = self;
             	Py_INCREF(result->compressor);
-            	result->finished = 0;
             	return result;
             }
             PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
             "read_from(reader, [size=0, read_size=default, write_size=default])\n"
             "Read uncompress data from a reader and return an iterator\n"
             "\n"
             "Returns an iterator of compressed data produced from reading from ``reader``.\n"
             "\n"
             "Uncompressed data will be obtained from ``reader`` by calling the\n"
             "``read(size)`` method of it. The source data will be streamed into a\n"
             "compressor. As compressed data is available, it will be exposed to the\n"
             "iterator.\n"
             "\n"
             "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
             "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
             "and output defaults, respectively.\n"
             "\n"
             "The caller is partially in control of how fast data is fed into the\n"
             "compressor by how it consumes the returned iterator. The compressor will\n"
             "not consume from the reader unless the caller consumes from the iterator.\n"
             );
             static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"reader",
             		"size",
             		"read_size",
             		"write_size",
             		NULL
             	};
             	PyObject* reader;
             	Py_ssize_t sourceSize = 0;
             	size_t inSize = ZSTD_CStreamInSize();
             	size_t outSize = ZSTD_CStreamOutSize();
             	ZstdCompressorIterator* result;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist,
             		&reader, &sourceSize, &inSize, &outSize)) {
             		return NULL;
             	}
-            	result = PyObject_New(ZstdCompressorIterator, &ZstdCompressorIteratorType);
+            	result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
             	if (!result) {
             		return NULL;
             	}
-            	result->compressor = NULL;
-            	result->reader = NULL;
-            	result->buffer = NULL;
-            	result->cstream = NULL;
-            	result->input.src = NULL;
-            	result->output.dst = NULL;
-            	result->readResult = NULL;
             	if (PyObject_HasAttrString(reader, "read")) {
             		result->reader = reader;
             		Py_INCREF(result->reader);
             	}
             	else if (1 == PyObject_CheckBuffer(reader)) {
             		result->buffer = PyMem_Malloc(sizeof(Py_buffer));
             		if (!result->buffer) {
             			goto except;
             		}
             		memset(result->buffer, 0, sizeof(Py_buffer));
             		if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
             			goto except;
             		}
-            		result->bufferOffset = 0;
             		sourceSize = result->buffer->len;
             	}
             	else {
             		PyErr_SetString(PyExc_ValueError,
             			"must pass an object with a read() method or conforms to buffer protocol");
             		goto except;
             	}
             	result->compressor = self;
             	Py_INCREF(result->compressor);
             	result->sourceSize = sourceSize;
-            	result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
-            	if (!result->cstream) {
+            	if (self->mtcctx) {
-            		goto except;
+            		if (init_mtcstream(self, sourceSize)) {
+            			goto except;
+            		}
+            	}
+            	else {
+            		if (0 != init_cstream(self, sourceSize)) {
+            			goto except;
+            		}
             	}
             	result->inSize = inSize;
             	result->outSize = outSize;
             	result->output.dst = PyMem_Malloc(outSize);
             	if (!result->output.dst) {
             		PyErr_NoMemory();
             		goto except;
             	}
             	result->output.size = outSize;
-            	result->output.pos = 0;
-            	result->input.src = NULL;
-            	result->input.size = 0;
-            	result->input.pos = 0;
-            	result->finishedInput = 0;
-            	result->finishedOutput = 0;
             	goto finally;
             except:
-            	if (result->cstream) {
+            	Py_XDECREF(result->compressor);
-            		ZSTD_freeCStream(result->cstream);
+            	Py_XDECREF(result->reader);
-            		result->cstream = NULL;
-            	Py_DecRef((PyObject*)result->compressor);
-            	Py_DecRef(result->reader);
             	Py_DECREF(result);
             	result = NULL;
             finally:
             	return result;
             }
             PyDoc_STRVAR(ZstdCompressor_write_to___doc__,
             "Create a context manager to write compressed data to an object.\n"
             "\n"
             "The passed object must have a ``write()`` method.\n"
             "\n"
             "The caller feeds input data to the object by calling ``compress(data)``.\n"
             "Compressed data is written to the argument given to this function.\n"
             "\n"
             "The function takes an optional ``size`` argument indicating the total size\n"
             "of the eventual input. If specified, the size will influence compression\n"
             "parameter tuning and could result in the size being written into the\n"
             "header of the compressed data.\n"
             "\n"
             "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
             "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
             "for a compressor output stream.\n"
             );
             static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"writer",
             		"size",
             		"write_size",
             		NULL
             	};
             	PyObject* writer;
             	ZstdCompressionWriter* result;
             	Py_ssize_t sourceSize = 0;
             	size_t outSize = ZSTD_CStreamOutSize();
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist,
             		&writer, &sourceSize, &outSize)) {
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(writer, "write")) {
             		PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
             		return NULL;
             	}
-            	result = PyObject_New(ZstdCompressionWriter, &ZstdCompressionWriterType);
+            	result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
             	if (!result) {
             		return NULL;
             	}
             	result->compressor = self;
             	Py_INCREF(result->compressor);
             	result->writer = writer;
             	Py_INCREF(result->writer);
             	result->sourceSize = sourceSize;
             	result->outSize = outSize;
-            	result->entered = 0;
+            	return result;
-            	result->cstream = NULL;
+            typedef struct {
+            	void* sourceData;
+            	size_t sourceSize;
+            } DataSource;
+            typedef struct {
+            	DataSource* sources;
+            	Py_ssize_t sourcesSize;
+            	unsigned long long totalSourceSize;
+            } DataSources;
+            typedef struct {
+            	void* dest;
+            	Py_ssize_t destSize;
+            	BufferSegment* segments;
+            	Py_ssize_t segmentsSize;
+            } DestBuffer;
+            typedef enum {
+            	WorkerError_none = 0,
+            	WorkerError_zstd = 1,
+            	WorkerError_no_memory = 2,
+            } WorkerError;
+            /**
+             * Holds state for an individual worker performing multi_compress_to_buffer work.
+             */
+            typedef struct {
+            	/* Used for compression. */
+            	ZSTD_CCtx* cctx;
+            	ZSTD_CDict* cdict;
+            	int cLevel;
+            	CompressionParametersObject* cParams;
+            	ZSTD_frameParameters fParams;
+            	/* What to compress. */
+            	DataSource* sources;
+            	Py_ssize_t sourcesSize;
+            	Py_ssize_t startOffset;
+            	Py_ssize_t endOffset;
+            	unsigned long long totalSourceSize;
+            	/* Result storage. */
+            	DestBuffer* destBuffers;
+            	Py_ssize_t destCount;
+            	/* Error tracking. */
+            	WorkerError error;
+            	size_t zresult;
+            	Py_ssize_t errorOffset;
+            } WorkerState;
+            static void compress_worker(WorkerState* state) {
+            	Py_ssize_t inputOffset = state->startOffset;
+            	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
+            	Py_ssize_t currentBufferStartOffset = state->startOffset;
+            	size_t zresult;
+            	ZSTD_parameters zparams;
+            	void* newDest;
+            	size_t allocationSize;
+            	size_t boundSize;
+            	Py_ssize_t destOffset = 0;
+            	DataSource* sources = state->sources;
+            	DestBuffer* destBuffer;
+            	assert(!state->destBuffers);
+            	assert(0 == state->destCount);
+            	if (state->cParams) {
+            		ztopy_compression_parameters(state->cParams, &zparams.cParams);
+            	}
+            	zparams.fParams = state->fParams;
+            	/*
+            	 * The total size of the compressed data is unknown until we actually
+            	 * compress data. That means we can't pre-allocate the exact size we need.
+            	 *
+            	 * There is a cost to every allocation and reallocation. So, it is in our
+            	 * interest to minimize the number of allocations.
+            	 *
+            	 * There is also a cost to too few allocations. If allocations are too
+            	 * large they may fail. If buffers are shared and all inputs become
+            	 * irrelevant at different lifetimes, then a reference to one segment
+            	 * in the buffer will keep the entire buffer alive. This leads to excessive
+            	 * memory usage.
+            	 *
+            	 * Our current strategy is to assume a compression ratio of 16:1 and
+            	 * allocate buffers of that size, rounded up to the nearest power of 2
+            	 * (because computers like round numbers). That ratio is greater than what
+            	 * most inputs achieve. This is by design: we don't want to over-allocate.
+            	 * But we don't want to under-allocate and lead to too many buffers either.
+            	 */
+            	state->destCount = 1;
+            	state->destBuffers = calloc(1, sizeof(DestBuffer));
+            	if (NULL == state->destBuffers) {
+            		state->error = WorkerError_no_memory;
+            		return;
+            	}
+            	destBuffer = &state->destBuffers[state->destCount - 1];
+            	/*
+            	 * Rather than track bounds and grow the segments buffer, allocate space
+            	 * to hold remaining items then truncate when we're done with it.
+            	 */
+            	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+            	if (NULL == destBuffer->segments) {
+            		state->error = WorkerError_no_memory;
+            		return;
+            	}
+            	destBuffer->segmentsSize = remainingItems;
+            	allocationSize = roundpow2(state->totalSourceSize >> 4);
+            	/* If the maximum size of the output is larger than that, round up. */
+            	boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
+            	if (boundSize > allocationSize) {
+            		allocationSize = roundpow2(boundSize);
+            	}
+            	destBuffer->dest = malloc(allocationSize);
+            	if (NULL == destBuffer->dest) {
+            		state->error = WorkerError_no_memory;
+            		return;
+            	}
+            	destBuffer->destSize = allocationSize;
+            	for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
+            		void* source = sources[inputOffset].sourceData;
+            		size_t sourceSize = sources[inputOffset].sourceSize;
+            		size_t destAvailable;
+            		void* dest;
+            		destAvailable = destBuffer->destSize - destOffset;
+            		boundSize = ZSTD_compressBound(sourceSize);
+            		/*
+            		 * Not enough space in current buffer to hold largest compressed output.
+            		 * So allocate and switch to a new output buffer.
+            		 */
+            		if (boundSize > destAvailable) {
+            			/*
+            			 * The downsizing of the existing buffer is optional. It should be cheap
+            			 * (unlike growing). So we just do it.
+            			 */
+            			if (destAvailable) {
+            				newDest = realloc(destBuffer->dest, destOffset);
+            				if (NULL == newDest) {
+            					state->error = WorkerError_no_memory;
+            					return;
+            				}
+            				destBuffer->dest = newDest;
+            				destBuffer->destSize = destOffset;
+            			}
+            			/* Truncate segments buffer. */
+            			newDest = realloc(destBuffer->segments,
+            				(inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
+            			if (NULL == newDest) {
+            				state->error = WorkerError_no_memory;
+            				return;
+            			}
+            			destBuffer->segments = newDest;
+            			destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
+            			/* Grow space for new struct. */
+            			/* TODO consider over-allocating so we don't do this every time. */
+            			newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
+            			if (NULL == newDest) {
+            				state->error = WorkerError_no_memory;
+            				return;
+            			}
+            			state->destBuffers = newDest;
+            			state->destCount++;
+            			destBuffer = &state->destBuffers[state->destCount - 1];
+            			/* Don't take any chances with non-NULL pointers. */
+            			memset(destBuffer, 0, sizeof(DestBuffer));
+            			/**
+            			 * We could dynamically update allocation size based on work done so far.
+            			 * For now, keep is simple.
+            			 */
+            			allocationSize = roundpow2(state->totalSourceSize >> 4);
+            			if (boundSize > allocationSize) {
+            				allocationSize = roundpow2(boundSize);
+            			}
+            			destBuffer->dest = malloc(allocationSize);
+            			if (NULL == destBuffer->dest) {
+            				state->error = WorkerError_no_memory;
+            				return;
+            			}
+            			destBuffer->destSize = allocationSize;
+            			destAvailable = allocationSize;
+            			destOffset = 0;
+            			destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+            			if (NULL == destBuffer->segments) {
+            				state->error = WorkerError_no_memory;
+            				return;
+            			}
+            			destBuffer->segmentsSize = remainingItems;
+            			currentBufferStartOffset = inputOffset;
+            		}
+            		dest = (char*)destBuffer->dest + destOffset;
+            		if (state->cdict) {
+            			zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,
+            				source, sourceSize, state->cdict);
+            		}
+            		else {
+            			if (!state->cParams) {
+            				zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
+            			}
+            			zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,
+            				source, sourceSize, NULL, 0, zparams);
+            		}
+            		if (ZSTD_isError(zresult)) {
+            			state->error = WorkerError_zstd;
+            			state->zresult = zresult;
+            			state->errorOffset = inputOffset;
+            			break;
+            		}
+            		destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
+            		destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult;
+            		destOffset += zresult;
+            		remainingItems--;
+            	}
+            	if (destBuffer->destSize > destOffset) {
+            		newDest = realloc(destBuffer->dest, destOffset);
+            		if (NULL == newDest) {
+            			state->error = WorkerError_no_memory;
+            			return;
+            		}
+            		destBuffer->dest = newDest;
+            		destBuffer->destSize = destOffset;
+            	}
+            }
+            ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
+            	DataSources* sources, unsigned int threadCount) {
+            	ZSTD_parameters zparams;
+            	unsigned long long bytesPerWorker;
+            	POOL_ctx* pool = NULL;
+            	WorkerState* workerStates = NULL;
+            	Py_ssize_t i;
+            	unsigned long long workerBytes = 0;
+            	Py_ssize_t workerStartOffset = 0;
+            	size_t currentThread = 0;
+            	int errored = 0;
+            	Py_ssize_t segmentsCount = 0;
+            	Py_ssize_t segmentIndex;
+            	PyObject* segmentsArg = NULL;
+            	ZstdBufferWithSegments* buffer;
+            	ZstdBufferWithSegmentsCollection* result = NULL;
+            	assert(sources->sourcesSize > 0);
+            	assert(sources->totalSourceSize > 0);
+            	assert(threadCount >= 1);
+            	/* More threads than inputs makes no sense. */
+            	threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize
+            													 : threadCount;
+            	/* TODO lower thread count when input size is too small and threads would add
+            	overhead. */
+            	/*
+            	 * When dictionaries are used, parameters are derived from the size of the
+            	 * first element.
+            	 *
+            	 * TODO come up with a better mechanism.
+            	 */
+            	memset(&zparams, 0, sizeof(zparams));
+            	if (compressor->cparams) {
+            		ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
+            	}
+            	else {
+            		zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
+            			sources->sources[0].sourceSize,
+            			compressor->dict ? compressor->dict->dictSize : 0);
+            	}
+            	zparams.fParams = compressor->fparams;
+            	if (0 != populate_cdict(compressor, &zparams)) {
+            		return NULL;
+            	}
+            	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
+            	if (NULL == workerStates) {
+            		PyErr_NoMemory();
+            		goto finally;
+            	}
+            	memset(workerStates, 0, threadCount * sizeof(WorkerState));
+            	if (threadCount > 1) {
+            		pool = POOL_create(threadCount, 1);
+            		if (NULL == pool) {
+            			PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
+            			goto finally;
+            		}
+            	}
+            	bytesPerWorker = sources->totalSourceSize / threadCount;
+            	for (i = 0; i < threadCount; i++) {
+            		workerStates[i].cctx = ZSTD_createCCtx();
+            		if (!workerStates[i].cctx) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		workerStates[i].cdict = compressor->cdict;
+            		workerStates[i].cLevel = compressor->compressionLevel;
+            		workerStates[i].cParams = compressor->cparams;
+            		workerStates[i].fParams = compressor->fparams;
+            		workerStates[i].sources = sources->sources;
+            		workerStates[i].sourcesSize = sources->sourcesSize;
+            	}
+            	Py_BEGIN_ALLOW_THREADS
+            	for (i = 0; i < sources->sourcesSize; i++) {
+            		workerBytes += sources->sources[i].sourceSize;
+            		/*
+            		 * The last worker/thread needs to handle all remaining work. Don't
+            		 * trigger it prematurely. Defer to the block outside of the loop
+            		 * to run the last worker/thread. But do still process this loop
+            		 * so workerBytes is correct.
+            		 */
+            		if (currentThread == threadCount - 1) {
+            			continue;
+            		}
+            		if (workerBytes >= bytesPerWorker) {
+            			assert(currentThread < threadCount);
+            			workerStates[currentThread].totalSourceSize = workerBytes;
+            			workerStates[currentThread].startOffset = workerStartOffset;
+            			workerStates[currentThread].endOffset = i;
+            			if (threadCount > 1) {
+            				POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
+            			}
+            			else {
+            				compress_worker(&workerStates[currentThread]);
+            			}
+            			currentThread++;
+            			workerStartOffset = i + 1;
+            			workerBytes = 0;
+            		}
+            	}
+            	if (workerBytes) {
+            		assert(currentThread < threadCount);
+            		workerStates[currentThread].totalSourceSize = workerBytes;
+            		workerStates[currentThread].startOffset = workerStartOffset;
+            		workerStates[currentThread].endOffset = sources->sourcesSize - 1;
+            		if (threadCount > 1) {
+            			POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
+            		}
+            		else {
+            			compress_worker(&workerStates[currentThread]);
+            		}
+            	}
+            	if (threadCount > 1) {
+            		POOL_free(pool);
+            		pool = NULL;
+            	}
+            	Py_END_ALLOW_THREADS
+            	for (i = 0; i < threadCount; i++) {
+            		switch (workerStates[i].error) {
+            		case WorkerError_no_memory:
+            			PyErr_NoMemory();
+            			errored = 1;
+            			break;
+            		case WorkerError_zstd:
+            			PyErr_Format(ZstdError, "error compressing item %zd: %s",
+            				workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
+            			errored = 1;
+            			break;
+            		default:
+            			;
+            		}
+            		if (errored) {
+            			break;
+            		}
+            	}
+            	if (errored) {
+            		goto finally;
+            	}
+            	segmentsCount = 0;
+            	for (i = 0; i < threadCount; i++) {
+            		WorkerState* state = &workerStates[i];
+            		segmentsCount += state->destCount;
+            	}
+            	segmentsArg = PyTuple_New(segmentsCount);
+            	if (NULL == segmentsArg) {
+            		goto finally;
+            	}
+            	segmentIndex = 0;
+            	for (i = 0; i < threadCount; i++) {
+            		Py_ssize_t j;
+            		WorkerState* state = &workerStates[i];
+            		for (j = 0; j < state->destCount; j++) {
+            			DestBuffer* destBuffer = &state->destBuffers[j];
+            			buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
+            				destBuffer->segments, destBuffer->segmentsSize);
+            			if (NULL == buffer) {
+            				goto finally;
+            			}
+            			/* Tell instance to use free() instsead of PyMem_Free(). */
+            			buffer->useFree = 1;
+            			/*
+            			 * BufferWithSegments_FromMemory takes ownership of the backing memory.
+            			 * Unset it here so it doesn't get freed below.
+            			 */
+            			destBuffer->dest = NULL;
+            			destBuffer->segments = NULL;
+            			PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
+            		}
+            	}
+            	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
+            		(PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
+            finally:
+            	Py_CLEAR(segmentsArg);
+            	if (pool) {
+            		POOL_free(pool);
+            	}
+            	if (workerStates) {
+            		Py_ssize_t j;
+            		for (i = 0; i < threadCount; i++) {
+            			WorkerState state = workerStates[i];
+            			if (state.cctx) {
+            				ZSTD_freeCCtx(state.cctx);
+            			}
+            			/* malloc() is used in worker thread. */
+            			for (j = 0; j < state.destCount; j++) {
+            				if (state.destBuffers) {
+            					free(state.destBuffers[j].dest);
+            					free(state.destBuffers[j].segments);
+            				}
+            			}
+            			free(state.destBuffers);
+            		}
+            		PyMem_Free(workerStates);
+            	}
+            	return result;
+            }
+            PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
+            "Compress multiple pieces of data as a single operation\n"
+            "\n"
+            "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
+            "a list of bytes like objects holding data to compress.\n"
+            "\n"
+            "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
+            "\n"
+            "This function is optimized to perform multiple compression operations as\n"
+            "as possible with as little overhead as possbile.\n"
+            );
+            static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+            	static char* kwlist[] = {
+            		"data",
+            		"threads",
+            		NULL
+            	};
+            	PyObject* data;
+            	int threads = 0;
+            	Py_buffer* dataBuffers = NULL;
+            	DataSources sources;
+            	Py_ssize_t i;
+            	Py_ssize_t sourceCount = 0;
+            	ZstdBufferWithSegmentsCollection* result = NULL;
+            	if (self->mtcctx) {
+            		PyErr_SetString(ZstdError,
+            			"function cannot be called on ZstdCompressor configured for multi-threaded compression");
+            		return NULL;
+            	}
+            	memset(&sources, 0, sizeof(sources));
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
+            		&data, &threads)) {
+            		return NULL;
+            	}
+            	if (threads < 0) {
+            		threads = cpu_count();
+            	}
+            	if (threads < 2) {
+            		threads = 1;
+            	}
+            	if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
+            		ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
+            		sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
+            		if (NULL == sources.sources) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		for (i = 0; i < buffer->segmentCount; i++) {
+            			sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
+            			sources.sources[i].sourceSize = buffer->segments[i].length;
+            			sources.totalSourceSize += buffer->segments[i].length;
+            		}
+            		sources.sourcesSize = buffer->segmentCount;
+            	}
+            	else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
+            		Py_ssize_t j;
+            		Py_ssize_t offset = 0;
+            		ZstdBufferWithSegments* buffer;
+            		ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
+            		sourceCount = BufferWithSegmentsCollection_length(collection);
+            		sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
+            		if (NULL == sources.sources) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		for (i = 0; i < collection->bufferCount; i++) {
+            			buffer = collection->buffers[i];
+            			for (j = 0; j < buffer->segmentCount; j++) {
+            				sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
+            				sources.sources[offset].sourceSize = buffer->segments[j].length;
+            				sources.totalSourceSize += buffer->segments[j].length;
+            				offset++;
+            			}
+            		}
+            		sources.sourcesSize = sourceCount;
+            	}
+            	else if (PyList_Check(data)) {
+            		sourceCount = PyList_GET_SIZE(data);
+            		sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
+            		if (NULL == sources.sources) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		/*
+            		 * It isn't clear whether the address referred to by Py_buffer.buf
+            		 * is still valid after PyBuffer_Release. We we hold a reference to all
+            		 * Py_buffer instances for the duration of the operation.
+            		 */
+            		dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
+            		if (NULL == dataBuffers) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
+            		for (i = 0; i < sourceCount; i++) {
+            			if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
+            				&dataBuffers[i], PyBUF_CONTIG_RO)) {
+            				PyErr_Clear();
+            				PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
+            				goto finally;
+            			}
+            			sources.sources[i].sourceData = dataBuffers[i].buf;
+            			sources.sources[i].sourceSize = dataBuffers[i].len;
+            			sources.totalSourceSize += dataBuffers[i].len;
+            		}
+            		sources.sourcesSize = sourceCount;
+            	}
+            	else {
+            		PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
+            		goto finally;
+            	}
+            	if (0 == sources.sourcesSize) {
+            		PyErr_SetString(PyExc_ValueError, "no source elements found");
+            		goto finally;
+            	}
+            	if (0 == sources.totalSourceSize) {
+            		PyErr_SetString(PyExc_ValueError, "source elements are empty");
+            		goto finally;
+            	}
+            	result = compress_from_datasources(self, &sources, threads);
+            finally:
+            	PyMem_Free(sources.sources);
+            	if (dataBuffers) {
+            		for (i = 0; i < sourceCount; i++) {
+            			PyBuffer_Release(&dataBuffers[i]);
+            		}
+            		PyMem_Free(dataBuffers);
+            	}
             	return result;
             }
             static PyMethodDef ZstdCompressor_methods[] = {
             	{ "compress", (PyCFunction)ZstdCompressor_compress,
             	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
             	{ "compressobj", (PyCFunction)ZstdCompressor_compressobj,
             	METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
             	{ "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
             	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
             	{ "read_from", (PyCFunction)ZstdCompressor_read_from,
             	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
             	{ "write_to", (PyCFunction)ZstdCompressor_write_to,
             	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
+            	{ "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
+            	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdCompressorType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdCompressor",         /* tp_name */
             	sizeof(ZstdCompressor),        /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)ZstdCompressor_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdCompressor__doc__,          /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	ZstdCompressor_methods,         /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
             	(initproc)ZstdCompressor_init,  /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void compressor_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdCompressorType) = &PyType_Type;
             	if (PyType_Ready(&ZstdCompressorType) < 0) {
             		return;
             	}
             	Py_INCREF((PyObject*)&ZstdCompressorType);
             	PyModule_AddObject(mod, "ZstdCompressor",
             		(PyObject*)&ZstdCompressorType);
             }

contrib/python-zstandard/c-ext/compressoriterator.c

0 +21 -8

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             #define min(a, b) (((a) < (b)) ? (a) : (b))
             extern PyObject* ZstdError;
             PyDoc_STRVAR(ZstdCompressorIterator__doc__,
             "Represents an iterator of compressed data.\n"
             );
             static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
             	Py_XDECREF(self->readResult);
             	Py_XDECREF(self->compressor);
             	Py_XDECREF(self->reader);
             	if (self->buffer) {
             		PyBuffer_Release(self->buffer);
             		PyMem_FREE(self->buffer);
             		self->buffer = NULL;
             	}
-            	if (self->cstream) {
-            		ZSTD_freeCStream(self->cstream);
-            		self->cstream = NULL;
             	if (self->output.dst) {
             		PyMem_Free(self->output.dst);
             		self->output.dst = NULL;
             	}
             	PyObject_Del(self);
             }
             static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
             	Py_INCREF(self);
             	return self;
             }
             static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
             	size_t zresult;
             	PyObject* readResult = NULL;
             	PyObject* chunk;
             	char* readBuffer;
             	Py_ssize_t readSize = 0;
             	Py_ssize_t bufferRemaining;
             	if (self->finishedOutput) {
             		PyErr_SetString(PyExc_StopIteration, "output flushed");
             		return NULL;
             	}
             feedcompressor:
             	/* If we have data left in the input, consume it. */
             	if (self->input.pos < self->input.size) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+            				&self->output, &self->input);
+            		}
+            		else {
+            			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
+            				&self->input);
+            		}
             		Py_END_ALLOW_THREADS
             		/* Release the Python object holding the input buffer. */
             		if (self->input.pos == self->input.size) {
             			self->input.src = NULL;
             			self->input.pos = 0;
             			self->input.size = 0;
             			Py_DECREF(self->readResult);
             			self->readResult = NULL;
             		}
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		/* If it produced output data, emit it. */
             		if (self->output.pos) {
             			chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             			self->output.pos = 0;
             			return chunk;
             		}
             	}
             	/* We should never have output data sitting around after a previous call. */
             	assert(self->output.pos == 0);
             	/* The code above should have either emitted a chunk and returned or consumed
             	the entire input buffer. So the state of the input buffer is not
             	relevant. */
             	if (!self->finishedInput) {
             		if (self->reader) {
             			readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
             			if (!readResult) {
             				PyErr_SetString(ZstdError, "could not read() from source");
             				return NULL;
             			}
             			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		}
             		else {
             			assert(self->buffer && self->buffer->buf);
             			/* Only support contiguous C arrays. */
             			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
             			assert(self->buffer->itemsize == 1);
             			readBuffer = (char*)self->buffer->buf + self->bufferOffset;
             			bufferRemaining = self->buffer->len - self->bufferOffset;
             			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
             			self->bufferOffset += readSize;
             		}
             		if (0 == readSize) {
             			Py_XDECREF(readResult);
             			self->finishedInput = 1;
             		}
             		else {
             			self->readResult = readResult;
             		}
             	}
             	/* EOF */
             	if (0 == readSize) {
-            		zresult = ZSTD_endStream(self->cstream, &self->output);
+            		if (self->compressor->mtcctx) {
+            			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+            		}
+            		else {
+            			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
+            		}
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "error ending compression stream: %s",
             				ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		assert(self->output.pos);
             		if (0 == zresult) {
             			self->finishedOutput = 1;
             		}
             		chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             		self->output.pos = 0;
             		return chunk;
             	}
             	/* New data from reader. Feed into compressor. */
             	self->input.src = readBuffer;
             	self->input.size = readSize;
             	self->input.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
+            	if (self->compressor->mtcctx) {
+            		zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
+            			&self->input);
+            	}
+            	else {
+            		zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
+            	}
             	Py_END_ALLOW_THREADS
             	/* The input buffer currently points to memory managed by Python
             	(readBuffer). This object was allocated by this function. If it wasn't
             	fully consumed, we need to release it in a subsequent function call.
             	If it is fully consumed, do that now.
             	*/
             	if (self->input.pos == self->input.size) {
             		self->input.src = NULL;
             		self->input.pos = 0;
             		self->input.size = 0;
             		Py_XDECREF(self->readResult);
             		self->readResult = NULL;
             	}
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
             		return NULL;
             	}
             	assert(self->input.pos <= self->input.size);
             	/* If we didn't write anything, start the process over. */
             	if (0 == self->output.pos) {
             		goto feedcompressor;
             	}
             	chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             	self->output.pos = 0;
             	return chunk;
             }
             PyTypeObject ZstdCompressorIteratorType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdCompressorIterator",   /* tp_name */
             	sizeof(ZstdCompressorIterator),  /* tp_basicsize */
 ,                               /* tp_itemsize */
             	(destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
 ,                               /* tp_print */
 ,                               /* tp_getattr */
 ,                               /* tp_setattr */
 ,                               /* tp_compare */
 ,                               /* tp_repr */
 ,                               /* tp_as_number */
 ,                               /* tp_as_sequence */
 ,                               /* tp_as_mapping */
 ,                               /* tp_hash */
 ,                               /* tp_call */
 ,                               /* tp_str */
 ,                               /* tp_getattro */
 ,                               /* tp_setattro */
 ,                               /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdCompressorIterator__doc__,   /* tp_doc */
 ,                               /* tp_traverse */
 ,                               /* tp_clear */
 ,                               /* tp_richcompare */
 ,                               /* tp_weaklistoffset */
             	ZstdCompressorIterator_iter,     /* tp_iter */
             	(iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
 ,                               /* tp_methods */
 ,                               /* tp_members */
 ,                               /* tp_getset */
 ,                               /* tp_base */
 ,                               /* tp_dict */
 ,                               /* tp_descr_get */
 ,                               /* tp_descr_set */
 ,                               /* tp_dictoffset */
 ,                               /* tp_init */
 ,                               /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void compressoriterator_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
             	if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/constants.c

0 +1 -1

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             static char frame_header[] = {
             	'\x28',
             	'\xb5',
             	'\x2f',
             	'\xfd',
             };
             void constants_module_init(PyObject* mod) {
             	PyObject* version;
             	PyObject* zstdVersion;
             	PyObject* frameHeader;
             #if PY_MAJOR_VERSION >= 3
             	version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
             #else
             	version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
             #endif
             	Py_INCREF(version);
             	PyModule_AddObject(mod, "__version__", version);
             	ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
             	PyModule_AddObject(mod, "ZstdError", ZstdError);
             	PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
             	PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
             	/* For now, the version is a simple tuple instead of a dedicated type. */
             	zstdVersion = PyTuple_New(3);
             	PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
             	PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
             	PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
-            	Py_IncRef(zstdVersion);
+            	Py_INCREF(zstdVersion);
             	PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
             	frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
             	if (frameHeader) {
             		PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
             	}
             	else {
             		PyErr_Format(PyExc_ValueError, "could not create frame header object");
             	}
             	PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
             	PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
             		(long)ZSTD_CStreamInSize());
             	PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
             		(long)ZSTD_CStreamOutSize());
             	PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
             		(long)ZSTD_DStreamInSize());
             	PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
             		(long)ZSTD_DStreamOutSize());
             	PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
             	PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
             	PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
             	PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
             	PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
             	PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
             	PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
             	PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
             	PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
             	PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
             	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
             	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
             	PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
             	PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
             	PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
             	PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
             	PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
             	PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
             	PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
             	PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
             	PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
             }

contrib/python-zstandard/c-ext/decompressionwriter.c

0 +6 -15

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             PyDoc_STRVAR(ZstdDecompressionWriter__doc,
             """A context manager used for writing decompressed output.\n"
             );
             static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
             	Py_XDECREF(self->decompressor);
             	Py_XDECREF(self->writer);
-            	if (self->dstream) {
-            		ZSTD_freeDStream(self->dstream);
-            		self->dstream = NULL;
             	PyObject_Del(self);
             }
             static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
             	if (self->entered) {
             		PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
             		return NULL;
             	}
-            	self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
+            	if (0 != init_dstream(self->decompressor)) {
-            	if (!self->dstream) {
             		return NULL;
             	}
             	self->entered = 1;
             	Py_INCREF(self);
             	return (PyObject*)self;
             }
             static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
             	self->entered = 0;
-            	if (self->dstream) {
-            		ZSTD_freeDStream(self->dstream);
-            		self->dstream = NULL;
             	Py_RETURN_FALSE;
             }
             static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
-            	if (!self->dstream) {
+            	if (!self->decompressor->dstream) {
             		PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
             			"call when context manager is active");
             		return NULL;
             	}
-            	return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
+            	return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
             }
             static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	size_t zresult = 0;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	PyObject* res;
             	Py_ssize_t totalWrite = 0;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
             #else
             	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
             #endif
             		return NULL;
             	}
             	if (!self->entered) {
             		PyErr_SetString(ZstdError, "write must be called from an active context manager");
             		return NULL;
             	}
+            	assert(self->decompressor->dstream);
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
             		return PyErr_NoMemory();
             	}
             	output.size = self->outSize;
             	output.pos = 0;
             	input.src = source;
             	input.size = sourceSize;
             	input.pos = 0;
             	while ((ssize_t)input.pos < sourceSize) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_decompressStream(self->dstream, &output, &input);
+            		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyMem_Free(output.dst);
             			PyErr_Format(ZstdError, "zstd decompress error: %s",
             				ZSTD_getErrorName(zresult));
             			return NULL;
             		}
             		if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             			res = PyObject_CallMethod(self->writer, "write", "y#",
             #else
             			res = PyObject_CallMethod(self->writer, "write", "s#",
             #endif
             				output.dst, output.pos);
             			Py_XDECREF(res);
             			totalWrite += output.pos;
             			output.pos = 0;
             		}
             	}
             	PyMem_Free(output.dst);
             	return PyLong_FromSsize_t(totalWrite);
             }
             static PyMethodDef ZstdDecompressionWriter_methods[] = {
             	{ "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
             	PyDoc_STR("Enter a decompression context.") },
             	{ "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
             	PyDoc_STR("Exit a decompression context.") },
             	{ "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
             	PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
             	{ "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
             	PyDoc_STR("Compress data") },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdDecompressionWriterType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdDecompressionWriter", /* tp_name */
             	sizeof(ZstdDecompressionWriter),/* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdDecompressionWriter__doc,   /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	ZstdDecompressionWriter_methods,/* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void decompressionwriter_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
             	if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/decompressobj.c

0 +5 -8

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             PyDoc_STRVAR(DecompressionObj__doc__,
             "Perform decompression using a standard library compatible API.\n"
             );
             static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
-            	if (self->dstream) {
-            		ZSTD_freeDStream(self->dstream);
-            		self->dstream = NULL;
             	Py_XDECREF(self->decompressor);
             	PyObject_Del(self);
             }
             static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	size_t zresult;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	size_t outSize = ZSTD_DStreamOutSize();
             	PyObject* result = NULL;
             	Py_ssize_t resultSize = 0;
+            	/* Constructor should ensure stream is populated. */
+            	assert(self->decompressor->dstream);
             	if (self->finished) {
             		PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
             		return NULL;
             	}
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:decompress",
             #else
             	if (!PyArg_ParseTuple(args, "s#:decompress",
             #endif
             		&source, &sourceSize)) {
             		return NULL;
             	}
             	input.src = source;
             	input.size = sourceSize;
             	input.pos = 0;
             	output.dst = PyMem_Malloc(outSize);
             	if (!output.dst) {
             		PyErr_NoMemory();
             		return NULL;
             	}
             	output.size = outSize;
             	output.pos = 0;
             	/* Read input until exhausted. */
             	while (input.pos < input.size) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_decompressStream(self->dstream, &output, &input);
+            		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd decompressor error: %s",
             				ZSTD_getErrorName(zresult));
             			result = NULL;
             			goto finally;
             		}
             		if (0 == zresult) {
             			self->finished = 1;
             		}
             		if (output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
             				if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
             					goto except;
             				}
             				memcpy(PyBytes_AS_STRING(result) + resultSize,
             					output.dst, output.pos);
             			}
             			else {
             				result = PyBytes_FromStringAndSize(output.dst, output.pos);
             				if (!result) {
             					goto except;
             				}
             			}
             			output.pos = 0;
             		}
             	}
             	if (!result) {
             		result = PyBytes_FromString("");
             	}
             	goto finally;
             except:
-            	Py_DecRef(result);
+            	Py_CLEAR(result);
-            	result = NULL;
             finally:
             	PyMem_Free(output.dst);
             	return result;
             }
             static PyMethodDef DecompressionObj_methods[] = {
             	{ "decompress", (PyCFunction)DecompressionObj_decompress,
             	  METH_VARARGS, PyDoc_STR("decompress data") },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdDecompressionObjType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdDecompressionObj",    /* tp_name */
             	sizeof(ZstdDecompressionObj),   /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)DecompressionObj_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	DecompressionObj__doc__,        /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	DecompressionObj_methods,       /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void decompressobj_module_init(PyObject* module) {
             	Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
             	if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/decompressor.c

0 +797 -62

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
+            #include "pool.h"
             extern PyObject* ZstdError;
-            ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
+            /**
-            	ZSTD_DStream* dstream;
+              * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
+              *
+              * This should be called before starting a decompression operation with a
+              * ZSTD_DStream on a ZstdDecompressor.
+              */
+            int init_dstream(ZstdDecompressor* decompressor) {
             	void* dictData = NULL;
             	size_t dictSize = 0;
             	size_t zresult;
-            	dstream = ZSTD_createDStream();
+            	/* Simple case of dstream already exists. Just reset it. */
-            	if (!dstream) {
+            	if (decompressor->dstream) {
+            		zresult = ZSTD_resetDStream(decompressor->dstream);
+            		if (ZSTD_isError(zresult)) {
+            			PyErr_Format(ZstdError, "could not reset DStream: %s",
+            				ZSTD_getErrorName(zresult));
+            			return -1;
+            		}
+            		return 0;
+            	}
+            	decompressor->dstream = ZSTD_createDStream();
+            	if (!decompressor->dstream) {
             		PyErr_SetString(ZstdError, "could not create DStream");
-            		return NULL;
+            		return -1;
             	}
             	if (decompressor->dict) {
             		dictData = decompressor->dict->dictData;
             		dictSize = decompressor->dict->dictSize;
             	}
             	if (dictData) {
-            		zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
+            		zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
             	}
             	else {
-            		zresult = ZSTD_initDStream(dstream);
+            		zresult = ZSTD_initDStream(decompressor->dstream);
             	}
             	if (ZSTD_isError(zresult)) {
+            		/* Don't leave a reference to an invalid object. */
+            		ZSTD_freeDStream(decompressor->dstream);
+            		decompressor->dstream = NULL;
             		PyErr_Format(ZstdError, "could not initialize DStream: %s",
             			ZSTD_getErrorName(zresult));
-            		return NULL;
+            		return -1;
             	}
-            	return dstream;
+            	return 0;
             }
             PyDoc_STRVAR(Decompressor__doc__,
             "ZstdDecompressor(dict_data=None)\n"
             "\n"
             "Create an object used to perform Zstandard decompression.\n"
             "\n"
             "An instance can perform multiple decompression operations."
             );
             static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"dict_data",
             		NULL
             	};
             	ZstdCompressionDict* dict = NULL;
             	self->dctx = NULL;
             	self->dict = NULL;
             	self->ddict = NULL;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
             		&ZstdCompressionDictType, &dict)) {
             		return -1;
             	}
             	/* TODO lazily initialize the reference ZSTD_DCtx on first use since
             	   not instances of ZstdDecompressor will use a ZSTD_DCtx. */
             	self->dctx = ZSTD_createDCtx();
             	if (!self->dctx) {
             		PyErr_NoMemory();
             		goto except;
             	}
             	if (dict) {
             		self->dict = dict;
             		Py_INCREF(dict);
             	}
             	return 0;
             except:
             	if (self->dctx) {
             		ZSTD_freeDCtx(self->dctx);
             		self->dctx = NULL;
             	}
             	return -1;
             }
             static void Decompressor_dealloc(ZstdDecompressor* self) {
-            	if (self->dctx) {
+            	Py_CLEAR(self->dict);
-            		ZSTD_freeDCtx(self->dctx);
-            	Py_XDECREF(self->dict);
             	if (self->ddict) {
             		ZSTD_freeDDict(self->ddict);
             		self->ddict = NULL;
             	}
+            	if (self->dstream) {
+            		ZSTD_freeDStream(self->dstream);
+            		self->dstream = NULL;
+            	}
+            	if (self->dctx) {
+            		ZSTD_freeDCtx(self->dctx);
+            		self->dctx = NULL;
+            	}
             	PyObject_Del(self);
             }
             PyDoc_STRVAR(Decompressor_copy_stream__doc__,
             	"copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
             	"\n"
             	"Compressed data will be read from ``ifh``, decompressed, and written to\n"
             	"``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
             	"``write(data)`` method.\n"
             	"\n"
             	"The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
             	"size of data that is ``read()`` and ``write()`` between streams. They default\n"
             	"to the default input and output sizes of zstd decompressor streams.\n"
             );
             static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"ifh",
             		"ofh",
             		"read_size",
             		"write_size",
             		NULL
             	};
             	PyObject* source;
             	PyObject* dest;
             	size_t inSize = ZSTD_DStreamInSize();
             	size_t outSize = ZSTD_DStreamOutSize();
-            	ZSTD_DStream* dstream;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	Py_ssize_t totalRead = 0;
             	Py_ssize_t totalWrite = 0;
             	char* readBuffer;
             	Py_ssize_t readSize;
             	PyObject* readResult;
             	PyObject* res = NULL;
             	size_t zresult = 0;
             	PyObject* writeResult;
             	PyObject* totalReadPy;
             	PyObject* totalWritePy;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
             		&source, &dest, &inSize, &outSize)) {
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(source, "read")) {
             		PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(dest, "write")) {
             		PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
             		return NULL;
             	}
             	/* Prevent free on uninitialized memory in finally. */
             	output.dst = NULL;
-            	dstream = DStream_from_ZstdDecompressor(self);
+            	if (0 != init_dstream(self)) {
-            	if (!dstream) {
             		res = NULL;
             		goto finally;
             	}
             	output.dst = PyMem_Malloc(outSize);
             	if (!output.dst) {
             		PyErr_NoMemory();
             		res = NULL;
             		goto finally;
             	}
             	output.size = outSize;
             	output.pos = 0;
             	/* Read source stream until EOF */
             	while (1) {
             		readResult = PyObject_CallMethod(source, "read", "n", inSize);
             		if (!readResult) {
             			PyErr_SetString(ZstdError, "could not read() from source");
             			goto finally;
             		}
             		PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		/* If no data was read, we're at EOF. */
             		if (0 == readSize) {
             			break;
             		}
             		totalRead += readSize;
             		/* Send data to decompressor */
             		input.src = readBuffer;
             		input.size = readSize;
             		input.pos = 0;
             		while (input.pos < input.size) {
             			Py_BEGIN_ALLOW_THREADS
-            			zresult = ZSTD_decompressStream(dstream, &output, &input);
+            			zresult = ZSTD_decompressStream(self->dstream, &output, &input);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "zstd decompressor error: %s",
             					ZSTD_getErrorName(zresult));
             				res = NULL;
             				goto finally;
             			}
             			if (output.pos) {
             #if PY_MAJOR_VERSION >= 3
             				writeResult = PyObject_CallMethod(dest, "write", "y#",
             #else
             				writeResult = PyObject_CallMethod(dest, "write", "s#",
             #endif
             					output.dst, output.pos);
             				Py_XDECREF(writeResult);
             				totalWrite += output.pos;
             				output.pos = 0;
             			}
             		}
             	}
             	/* Source stream is exhausted. Finish up. */
-            	ZSTD_freeDStream(dstream);
-            	dstream = NULL;
             	totalReadPy = PyLong_FromSsize_t(totalRead);
             	totalWritePy = PyLong_FromSsize_t(totalWrite);
             	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
-            	Py_DecRef(totalReadPy);
+            	Py_DECREF(totalReadPy);
-            	Py_DecRef(totalWritePy);
+            	Py_DECREF(totalWritePy);
             finally:
             	if (output.dst) {
             		PyMem_Free(output.dst);
             	}
-            	if (dstream) {
-            		ZSTD_freeDStream(dstream);
             	return res;
             }
             PyDoc_STRVAR(Decompressor_decompress__doc__,
             "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
             "\n"
             "This method will decompress the entirety of the argument and return the\n"
             "result.\n"
             "\n"
             "The input bytes are expected to contain a full Zstandard frame (something\n"
             "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
             "not contain a full frame, an exception will be raised.\n"
             "\n"
             "If the frame header of the compressed data does not contain the content size\n"
             "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
             "allocation of size ``max_output_size`` will be performed and an attempt will\n"
             "be made to perform decompression into that buffer. If the buffer is too\n"
             "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
             "be resized if it is too large.\n"
             "\n"
             "Uncompressed data could be much larger than compressed data. As a result,\n"
             "calling this function could result in a very large memory allocation being\n"
             "performed to hold the uncompressed data. Therefore it is **highly**\n"
             "recommended to use a streaming decompression method instead of this one.\n"
             );
             PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"data",
             		"max_output_size",
             		NULL
             	};
             	const char* source;
             	Py_ssize_t sourceSize;
             	Py_ssize_t maxOutputSize = 0;
             	unsigned long long decompressedSize;
             	size_t destCapacity;
             	PyObject* result = NULL;
             	void* dictData = NULL;
             	size_t dictSize = 0;
             	size_t zresult;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
             #else
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
             #endif
             		kwlist, &source, &sourceSize, &maxOutputSize)) {
             		return NULL;
             	}
             	if (self->dict) {
             		dictData = self->dict->dictData;
             		dictSize = self->dict->dictSize;
             	}
             	if (dictData && !self->ddict) {
             		Py_BEGIN_ALLOW_THREADS
             		self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
             		Py_END_ALLOW_THREADS
             		if (!self->ddict) {
             			PyErr_SetString(ZstdError, "could not create decompression dict");
             			return NULL;
             		}
             	}
             	decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
             	/* 0 returned if content size not in the zstd frame header */
             	if (0 == decompressedSize) {
             		if (0 == maxOutputSize) {
             			PyErr_SetString(ZstdError, "input data invalid or missing content size "
             				"in frame header");
             			return NULL;
             		}
             		else {
             			result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
             			destCapacity = maxOutputSize;
             		}
             	}
             	else {
             		result = PyBytes_FromStringAndSize(NULL, decompressedSize);
             		destCapacity = decompressedSize;
             	}
             	if (!result) {
             		return NULL;
             	}
             	Py_BEGIN_ALLOW_THREADS
             	if (self->ddict) {
             		zresult = ZSTD_decompress_usingDDict(self->dctx,
             			PyBytes_AsString(result), destCapacity,
             			source, sourceSize, self->ddict);
             	}
             	else {
             		zresult = ZSTD_decompressDCtx(self->dctx,
             			PyBytes_AsString(result), destCapacity, source, sourceSize);
             	}
             	Py_END_ALLOW_THREADS
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
-            		Py_DecRef(result);
+            		Py_DECREF(result);
             		return NULL;
             	}
             	else if (decompressedSize && zresult != decompressedSize) {
             		PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
             			zresult, decompressedSize);
-            		Py_DecRef(result);
+            		Py_DECREF(result);
             		return NULL;
             	}
             	else if (zresult < destCapacity) {
             		if (_PyBytes_Resize(&result, zresult)) {
-            			Py_DecRef(result);
+            			Py_DECREF(result);
             			return NULL;
             		}
             	}
             	return result;
             }
             PyDoc_STRVAR(Decompressor_decompressobj__doc__,
             "decompressobj()\n"
             "\n"
             "Incrementally feed data into a decompressor.\n"
             "\n"
             "The returned object exposes a ``decompress(data)`` method. This makes it\n"
             "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
             "callers can swap in the zstd decompressor while using the same API.\n"
             );
             static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
-            	ZstdDecompressionObj* result = PyObject_New(ZstdDecompressionObj, &ZstdDecompressionObjType);
+            	ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
             	if (!result) {
             		return NULL;
             	}
-            	result->dstream = DStream_from_ZstdDecompressor(self);
+            	if (0 != init_dstream(self)) {
-            	if (!result->dstream) {
+            		Py_DECREF(result);
-            		Py_DecRef((PyObject*)result);
             		return NULL;
             	}
             	result->decompressor = self;
             	Py_INCREF(result->decompressor);
-            	result->finished = 0;
             	return result;
             }
             PyDoc_STRVAR(Decompressor_read_from__doc__,
             "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
             "Read compressed data and return an iterator\n"
             "\n"
             "Returns an iterator of decompressed data chunks produced from reading from\n"
             "the ``reader``.\n"
             "\n"
             "Compressed data will be obtained from ``reader`` by calling the\n"
             "``read(size)`` method of it. The source data will be streamed into a\n"
             "decompressor. As decompressed data is available, it will be exposed to the\n"
             "returned iterator.\n"
             "\n"
             "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
             "iterator in chunks of size ``write_size``. The default values are the input\n"
             "and output sizes for a zstd streaming decompressor.\n"
             "\n"
             "There is also support for skipping the first ``skip_bytes`` of data from\n"
             "the source.\n"
             );
             static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"reader",
             		"read_size",
             		"write_size",
             		"skip_bytes",
             		NULL
             	};
             	PyObject* reader;
             	size_t inSize = ZSTD_DStreamInSize();
             	size_t outSize = ZSTD_DStreamOutSize();
             	ZstdDecompressorIterator* result;
             	size_t skipBytes = 0;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
             		&reader, &inSize, &outSize, &skipBytes)) {
             		return NULL;
             	}
             	if (skipBytes >= inSize) {
             		PyErr_SetString(PyExc_ValueError,
             			"skip_bytes must be smaller than read_size");
             		return NULL;
             	}
-            	result = PyObject_New(ZstdDecompressorIterator, &ZstdDecompressorIteratorType);
+            	result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
             	if (!result) {
             		return NULL;
             	}
-            	result->decompressor = NULL;
-            	result->reader = NULL;
-            	result->buffer = NULL;
-            	result->dstream = NULL;
-            	result->input.src = NULL;
-            	result->output.dst = NULL;
             	if (PyObject_HasAttrString(reader, "read")) {
             		result->reader = reader;
             		Py_INCREF(result->reader);
             	}
             	else if (1 == PyObject_CheckBuffer(reader)) {
             		/* Object claims it is a buffer. Try to get a handle to it. */
             		result->buffer = PyMem_Malloc(sizeof(Py_buffer));
             		if (!result->buffer) {
             			goto except;
             		}
             		memset(result->buffer, 0, sizeof(Py_buffer));
             		if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
             			goto except;
             		}
-            		result->bufferOffset = 0;
             	}
             	else {
             		PyErr_SetString(PyExc_ValueError,
             			"must pass an object with a read() method or conforms to buffer protocol");
             		goto except;
             	}
             	result->decompressor = self;
             	Py_INCREF(result->decompressor);
             	result->inSize = inSize;
             	result->outSize = outSize;
             	result->skipBytes = skipBytes;
-            	result->dstream = DStream_from_ZstdDecompressor(self);
+            	if (0 != init_dstream(self)) {
-            	if (!result->dstream) {
             		goto except;
             	}
             	result->input.src = PyMem_Malloc(inSize);
             	if (!result->input.src) {
             		PyErr_NoMemory();
             		goto except;
             	}
-            	result->input.size = 0;
-            	result->input.pos = 0;
-            	result->output.dst = NULL;
-            	result->output.size = 0;
-            	result->output.pos = 0;
-            	result->readCount = 0;
-            	result->finishedInput = 0;
-            	result->finishedOutput = 0;
             	goto finally;
             except:
             	Py_CLEAR(result->reader);
             	if (result->buffer) {
             		PyBuffer_Release(result->buffer);
             		Py_CLEAR(result->buffer);
             	}
             	Py_CLEAR(result);
             finally:
             	return result;
             }
             PyDoc_STRVAR(Decompressor_write_to__doc__,
             "Create a context manager to write decompressed data to an object.\n"
             "\n"
             "The passed object must have a ``write()`` method.\n"
             "\n"
             "The caller feeds intput data to the object by calling ``write(data)``.\n"
             "Decompressed data is written to the argument given as it is decompressed.\n"
             "\n"
             "An optional ``write_size`` argument defines the size of chunks to\n"
             "``write()`` to the writer. It defaults to the default output size for a zstd\n"
             "streaming decompressor.\n"
             );
             static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"writer",
             		"write_size",
             		NULL
             	};
             	PyObject* writer;
             	size_t outSize = ZSTD_DStreamOutSize();
             	ZstdDecompressionWriter* result;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
             		&writer, &outSize)) {
             		return NULL;
             	}
             	if (!PyObject_HasAttrString(writer, "write")) {
             		PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
             		return NULL;
             	}
-            	result = PyObject_New(ZstdDecompressionWriter, &ZstdDecompressionWriterType);
+            	result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
             	if (!result) {
             		return NULL;
             	}
             	result->decompressor = self;
             	Py_INCREF(result->decompressor);
             	result->writer = writer;
             	Py_INCREF(result->writer);
             	result->outSize = outSize;
-            	result->entered = 0;
-            	result->dstream = NULL;
             	return result;
             }
             PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
             "Decompress a series of chunks using the content dictionary chaining technique\n"
             );
             static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"frames",
             		NULL
             	};
             	PyObject* chunks;
             	Py_ssize_t chunksLen;
             	Py_ssize_t chunkIndex;
             	char parity = 0;
             	PyObject* chunk;
             	char* chunkData;
             	Py_ssize_t chunkSize;
             	ZSTD_DCtx* dctx = NULL;
             	size_t zresult;
             	ZSTD_frameParams frameParams;
             	void* buffer1 = NULL;
             	size_t buffer1Size = 0;
             	size_t buffer1ContentSize = 0;
             	void* buffer2 = NULL;
             	size_t buffer2Size = 0;
             	size_t buffer2ContentSize = 0;
             	void* destBuffer = NULL;
             	PyObject* result = NULL;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
             		kwlist, &PyList_Type, &chunks)) {
             		return NULL;
             	}
             	chunksLen = PyList_Size(chunks);
             	if (!chunksLen) {
             		PyErr_SetString(PyExc_ValueError, "empty input chain");
             		return NULL;
             	}
             	/* The first chunk should not be using a dictionary. We handle it specially. */
             	chunk = PyList_GetItem(chunks, 0);
             	if (!PyBytes_Check(chunk)) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
             		return NULL;
             	}
             	/* We require that all chunks be zstd frames and that they have content size set. */
             	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
             	zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
             	if (ZSTD_isError(zresult)) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
             		return NULL;
             	}
             	else if (zresult) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
             		return NULL;
             	}
             	if (0 == frameParams.frameContentSize) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
             		return NULL;
             	}
             	dctx = ZSTD_createDCtx();
             	if (!dctx) {
             		PyErr_NoMemory();
             		goto finally;
             	}
             	buffer1Size = frameParams.frameContentSize;
             	buffer1 = PyMem_Malloc(buffer1Size);
             	if (!buffer1) {
             		goto finally;
             	}
             	Py_BEGIN_ALLOW_THREADS
             	zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
             	Py_END_ALLOW_THREADS
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
             		goto finally;
             	}
             	buffer1ContentSize = zresult;
             	/* Special case of a simple chain. */
             	if (1 == chunksLen) {
             		result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
             		goto finally;
             	}
             	/* This should ideally look at next chunk. But this is slightly simpler. */
             	buffer2Size = frameParams.frameContentSize;
             	buffer2 = PyMem_Malloc(buffer2Size);
             	if (!buffer2) {
             		goto finally;
             	}
             	/* For each subsequent chunk, use the previous fulltext as a content dictionary.
             	   Our strategy is to have 2 buffers. One holds the previous fulltext (to be
             	   used as a content dictionary) and the other holds the new fulltext. The
             	   buffers grow when needed but never decrease in size. This limits the
             	   memory allocator overhead.
             	*/
             	for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
             		chunk = PyList_GetItem(chunks, chunkIndex);
             		if (!PyBytes_Check(chunk)) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
             			goto finally;
             		}
             		PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
             		zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
             			goto finally;
             		}
             		else if (zresult) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
             			goto finally;
             		}
             		if (0 == frameParams.frameContentSize) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
             			goto finally;
             		}
             		parity = chunkIndex % 2;
             		/* This could definitely be abstracted to reduce code duplication. */
             		if (parity) {
             			/* Resize destination buffer to hold larger content. */
             			if (buffer2Size < frameParams.frameContentSize) {
             				buffer2Size = frameParams.frameContentSize;
             				destBuffer = PyMem_Realloc(buffer2, buffer2Size);
             				if (!destBuffer) {
             					goto finally;
             				}
             				buffer2 = destBuffer;
             			}
             			Py_BEGIN_ALLOW_THREADS
             			zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
             				chunkData, chunkSize, buffer1, buffer1ContentSize);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
             					chunkIndex, ZSTD_getErrorName(zresult));
             				goto finally;
             			}
             			buffer2ContentSize = zresult;
             		}
             		else {
             			if (buffer1Size < frameParams.frameContentSize) {
             				buffer1Size = frameParams.frameContentSize;
             				destBuffer = PyMem_Realloc(buffer1, buffer1Size);
             				if (!destBuffer) {
             					goto finally;
             				}
             				buffer1 = destBuffer;
             			}
             			Py_BEGIN_ALLOW_THREADS
             			zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
             				chunkData, chunkSize, buffer2, buffer2ContentSize);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
             					chunkIndex, ZSTD_getErrorName(zresult));
             				goto finally;
             			}
             			buffer1ContentSize = zresult;
             		}
             	}
             	result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
             		parity ? buffer2ContentSize : buffer1ContentSize);
             finally:
             	if (buffer2) {
             		PyMem_Free(buffer2);
             	}
             	if (buffer1) {
             		PyMem_Free(buffer1);
             	}
             	if (dctx) {
             		ZSTD_freeDCtx(dctx);
             	}
             	return result;
             }
+            typedef struct {
+            	void* sourceData;
+            	size_t sourceSize;
+            	unsigned long long destSize;
+            } FramePointer;
+            typedef struct {
+            	FramePointer* frames;
+            	Py_ssize_t framesSize;
+            	unsigned long long compressedSize;
+            } FrameSources;
+            typedef struct {
+            	void* dest;
+            	Py_ssize_t destSize;
+            	BufferSegment* segments;
+            	Py_ssize_t segmentsSize;
+            } DestBuffer;
+            typedef enum {
+            	WorkerError_none = 0,
+            	WorkerError_zstd = 1,
+            	WorkerError_memory = 2,
+            	WorkerError_sizeMismatch = 3,
+            	WorkerError_unknownSize = 4,
+            } WorkerError;
+            typedef struct {
+            	/* Source records and length */
+            	FramePointer* framePointers;
+            	/* Which records to process. */
+            	Py_ssize_t startOffset;
+            	Py_ssize_t endOffset;
+            	unsigned long long totalSourceSize;
+            	/* Compression state and settings. */
+            	ZSTD_DCtx* dctx;
+            	ZSTD_DDict* ddict;
+            	int requireOutputSizes;
+            	/* Output storage. */
+            	DestBuffer* destBuffers;
+            	Py_ssize_t destCount;
+            	/* Item that error occurred on. */
+            	Py_ssize_t errorOffset;
+            	/* If an error occurred. */
+            	WorkerError error;
+            	/* result from zstd decompression operation */
+            	size_t zresult;
+            } WorkerState;
+            static void decompress_worker(WorkerState* state) {
+            	size_t allocationSize;
+            	DestBuffer* destBuffer;
+            	Py_ssize_t frameIndex;
+            	Py_ssize_t localOffset = 0;
+            	Py_ssize_t currentBufferStartIndex = state->startOffset;
+            	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
+            	void* tmpBuf;
+            	Py_ssize_t destOffset = 0;
+            	FramePointer* framePointers = state->framePointers;
+            	size_t zresult;
+            	unsigned long long totalOutputSize = 0;
+            	assert(NULL == state->destBuffers);
+            	assert(0 == state->destCount);
+            	assert(state->endOffset - state->startOffset >= 0);
+            	/*
+            	 * We need to allocate a buffer to hold decompressed data. How we do this
+            	 * depends on what we know about the output. The following scenarios are
+            	 * possible:
+            	 *
+            	 * 1. All structs defining frames declare the output size.
+            	 * 2. The decompressed size is embedded within the zstd frame.
+            	 * 3. The decompressed size is not stored anywhere.
+            	 *
+            	 * For now, we only support #1 and #2.
+            	 */
+            	/* Resolve ouput segments. */
+            	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
+            		FramePointer* fp = &framePointers[frameIndex];
+            		if (0 == fp->destSize) {
+            			fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
+            			if (0 == fp->destSize && state->requireOutputSizes) {
+            				state->error = WorkerError_unknownSize;
+            				state->errorOffset = frameIndex;
+            				return;
+            			}
+            		}
+            		totalOutputSize += fp->destSize;
+            	}
+            	state->destBuffers = calloc(1, sizeof(DestBuffer));
+            	if (NULL == state->destBuffers) {
+            		state->error = WorkerError_memory;
+            		return;
+            	}
+            	state->destCount = 1;
+            	destBuffer = &state->destBuffers[state->destCount - 1];
+            	assert(framePointers[state->startOffset].destSize > 0); /* For now. */
+            	allocationSize = roundpow2(state->totalSourceSize);
+            	if (framePointers[state->startOffset].destSize > allocationSize) {
+            		allocationSize = roundpow2(framePointers[state->startOffset].destSize);
+            	}
+            	destBuffer->dest = malloc(allocationSize);
+            	if (NULL == destBuffer->dest) {
+            		state->error = WorkerError_memory;
+            		return;
+            	}
+            	destBuffer->destSize = allocationSize;
+            	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+            	if (NULL == destBuffer->segments) {
+            		/* Caller will free state->dest as part of cleanup. */
+            		state->error = WorkerError_memory;
+            		return;
+            	}
+            	destBuffer->segmentsSize = remainingItems;
+            	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
+            		const void* source = framePointers[frameIndex].sourceData;
+            		const size_t sourceSize = framePointers[frameIndex].sourceSize;
+            		void* dest;
+            		const size_t decompressedSize = framePointers[frameIndex].destSize;
+            		size_t destAvailable = destBuffer->destSize - destOffset;
+            		assert(decompressedSize > 0); /* For now. */
+            		/*
+            		 * Not enough space in current buffer. Finish current before and allocate and
+            		 * switch to a new one.
+            		 */
+            		if (decompressedSize > destAvailable) {
+            			/*
+            			 * Shrinking the destination buffer is optional. But it should be cheap,
+            			 * so we just do it.
+            			 */
+            			if (destAvailable) {
+            				tmpBuf = realloc(destBuffer->dest, destOffset);
+            				if (NULL == tmpBuf) {
+            					state->error = WorkerError_memory;
+            					return;
+            				}
+            				destBuffer->dest = tmpBuf;
+            				destBuffer->destSize = destOffset;
+            			}
+            			/* Truncate segments buffer. */
+            			tmpBuf = realloc(destBuffer->segments,
+            				(frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
+            			if (NULL == tmpBuf) {
+            				state->error = WorkerError_memory;
+            				return;
+            			}
+            			destBuffer->segments = tmpBuf;
+            			destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
+            			/* Grow space for new DestBuffer. */
+            			tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
+            			if (NULL == tmpBuf) {
+            				state->error = WorkerError_memory;
+            				return;
+            			}
+            			state->destBuffers = tmpBuf;
+            			state->destCount++;
+            			destBuffer = &state->destBuffers[state->destCount - 1];
+            			/* Don't take any chances will non-NULL pointers. */
+            			memset(destBuffer, 0, sizeof(DestBuffer));
+            			allocationSize = roundpow2(state->totalSourceSize);
+            			if (decompressedSize > allocationSize) {
+            				allocationSize = roundpow2(decompressedSize);
+            			}
+            			destBuffer->dest = malloc(allocationSize);
+            			if (NULL == destBuffer->dest) {
+            				state->error = WorkerError_memory;
+            				return;
+            			}
+            			destBuffer->destSize = allocationSize;
+            			destAvailable = allocationSize;
+            			destOffset = 0;
+            			localOffset = 0;
+            			destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+            			if (NULL == destBuffer->segments) {
+            				state->error = WorkerError_memory;
+            				return;
+            			}
+            			destBuffer->segmentsSize = remainingItems;
+            			currentBufferStartIndex = frameIndex;
+            		}
+            		dest = (char*)destBuffer->dest + destOffset;
+            		if (state->ddict) {
+            			zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
+            				source, sourceSize, state->ddict);
+            		}
+            		else {
+            			zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
+            				source, sourceSize);
+            		}
+            		if (ZSTD_isError(zresult)) {
+            			state->error = WorkerError_zstd;
+            			state->zresult = zresult;
+            			state->errorOffset = frameIndex;
+            			return;
+            		}
+            		else if (zresult != decompressedSize) {
+            			state->error = WorkerError_sizeMismatch;
+            			state->zresult = zresult;
+            			state->errorOffset = frameIndex;
+            			return;
+            		}
+            		destBuffer->segments[localOffset].offset = destOffset;
+            		destBuffer->segments[localOffset].length = decompressedSize;
+            		destOffset += zresult;
+            		localOffset++;
+            		remainingItems--;
+            	}
+            	if (destBuffer->destSize > destOffset) {
+            		tmpBuf = realloc(destBuffer->dest, destOffset);
+            		if (NULL == tmpBuf) {
+            			state->error = WorkerError_memory;
+            			return;
+            		}
+            		destBuffer->dest = tmpBuf;
+            		destBuffer->destSize = destOffset;
+            	}
+            }
+            ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
+            	unsigned int threadCount) {
+            	void* dictData = NULL;
+            	size_t dictSize = 0;
+            	Py_ssize_t i = 0;
+            	int errored = 0;
+            	Py_ssize_t segmentsCount;
+            	ZstdBufferWithSegments* bws = NULL;
+            	PyObject* resultArg = NULL;
+            	Py_ssize_t resultIndex;
+            	ZstdBufferWithSegmentsCollection* result = NULL;
+            	FramePointer* framePointers = frames->frames;
+            	unsigned long long workerBytes = 0;
+            	int currentThread = 0;
+            	Py_ssize_t workerStartOffset = 0;
+            	POOL_ctx* pool = NULL;
+            	WorkerState* workerStates = NULL;
+            	unsigned long long bytesPerWorker;
+            	/* Caller should normalize 0 and negative values to 1 or larger. */
+            	assert(threadCount >= 1);
+            	/* More threads than inputs makes no sense under any conditions. */
+            	threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
+            												   : threadCount;
+            	/* TODO lower thread count if input size is too small and threads would just
+            	   add overhead. */
+            	if (decompressor->dict) {
+            		dictData = decompressor->dict->dictData;
+            		dictSize = decompressor->dict->dictSize;
+            	}
+            	if (dictData && !decompressor->ddict) {
+            		Py_BEGIN_ALLOW_THREADS
+            		decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
+            		Py_END_ALLOW_THREADS
+            		if (!decompressor->ddict) {
+            			PyErr_SetString(ZstdError, "could not create decompression dict");
+            			return NULL;
+            		}
+            	}
+            	/* If threadCount==1, we don't start a thread pool. But we do leverage the
+            	   same API for dispatching work. */
+            	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
+            	if (NULL == workerStates) {
+            		PyErr_NoMemory();
+            		goto finally;
+            	}
+            	memset(workerStates, 0, threadCount * sizeof(WorkerState));
+            	if (threadCount > 1) {
+            		pool = POOL_create(threadCount, 1);
+            		if (NULL == pool) {
+            			PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
+            			goto finally;
+            		}
+            	}
+            	bytesPerWorker = frames->compressedSize / threadCount;
+            	for (i = 0; i < threadCount; i++) {
+            		workerStates[i].dctx = ZSTD_createDCtx();
+            		if (NULL == workerStates[i].dctx) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
+            		workerStates[i].ddict = decompressor->ddict;
+            		workerStates[i].framePointers = framePointers;
+            		workerStates[i].requireOutputSizes = 1;
+            	}
+            	Py_BEGIN_ALLOW_THREADS
+            	/* There are many ways to split work among workers.
+            	   For now, we take a simple approach of splitting work so each worker
+            	   gets roughly the same number of input bytes. This will result in more
+            	   starvation than running N>threadCount jobs. But it avoids complications
+            	   around state tracking, which could involve extra locking.
+            	*/
+            	for (i = 0; i < frames->framesSize; i++) {
+            		workerBytes += frames->frames[i].sourceSize;
+            		/*
+            		 * The last worker/thread needs to handle all remaining work. Don't
+            		 * trigger it prematurely. Defer to the block outside of the loop.
+            		 * (But still process this loop so workerBytes is correct.
+            		 */
+            		if (currentThread == threadCount - 1) {
+            			continue;
+            		}
+            		if (workerBytes >= bytesPerWorker) {
+            			workerStates[currentThread].startOffset = workerStartOffset;
+            			workerStates[currentThread].endOffset = i;
+            			workerStates[currentThread].totalSourceSize = workerBytes;
+            			if (threadCount > 1) {
+            				POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
+            			}
+            			else {
+            				decompress_worker(&workerStates[currentThread]);
+            			}
+            			currentThread++;
+            			workerStartOffset = i + 1;
+            			workerBytes = 0;
+            		}
+            	}
+            	if (workerBytes) {
+            		workerStates[currentThread].startOffset = workerStartOffset;
+            		workerStates[currentThread].endOffset = frames->framesSize - 1;
+            		workerStates[currentThread].totalSourceSize = workerBytes;
+            		if (threadCount > 1) {
+            			POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
+            		}
+            		else {
+            			decompress_worker(&workerStates[currentThread]);
+            		}
+            	}
+            	if (threadCount > 1) {
+            		POOL_free(pool);
+            		pool = NULL;
+            	}
+            	Py_END_ALLOW_THREADS
+            	for (i = 0; i < threadCount; i++) {
+            		switch (workerStates[i].error) {
+            		case WorkerError_none:
+            			break;
+            		case WorkerError_zstd:
+            			PyErr_Format(ZstdError, "error decompressing item %zd: %s",
+            				workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
+            			errored = 1;
+            			break;
+            		case WorkerError_memory:
+            			PyErr_NoMemory();
+            			errored = 1;
+            			break;
+            		case WorkerError_sizeMismatch:
+            			PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
+            				workerStates[i].errorOffset, workerStates[i].zresult,
+            				framePointers[workerStates[i].errorOffset].destSize);
+            			errored = 1;
+            			break;
+            		case WorkerError_unknownSize:
+            			PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
+            				workerStates[i].errorOffset);
+            			errored = 1;
+            			break;
+            		default:
+            			PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
+            				workerStates[i].error);
+            			errored = 1;
+            			break;
+            		}
+            		if (errored) {
+            			break;
+            		}
+            	}
+            	if (errored) {
+            		goto finally;
+            	}
+            	segmentsCount = 0;
+            	for (i = 0; i < threadCount; i++) {
+            		segmentsCount += workerStates[i].destCount;
+            	}
+            	resultArg = PyTuple_New(segmentsCount);
+            	if (NULL == resultArg) {
+            		goto finally;
+            	}
+            	resultIndex = 0;
+            	for (i = 0; i < threadCount; i++) {
+            		Py_ssize_t bufferIndex;
+            		WorkerState* state = &workerStates[i];
+            		for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
+            			DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
+            			bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
+            				destBuffer->segments, destBuffer->segmentsSize);
+            			if (NULL == bws) {
+            				goto finally;
+            			}
+            			/*
+            			* Memory for buffer and segments was allocated using malloc() in worker
+            			* and the memory is transferred to the BufferWithSegments instance. So
+            			* tell instance to use free() and NULL the reference in the state struct
+            			* so it isn't freed below.
+            			*/
+            			bws->useFree = 1;
+            			destBuffer->dest = NULL;
+            			destBuffer->segments = NULL;
+            			PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
+            		}
+            	}
+            	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
+            		(PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
+            finally:
+            	Py_CLEAR(resultArg);
+            	if (workerStates) {
+            		for (i = 0; i < threadCount; i++) {
+            			Py_ssize_t bufferIndex;
+            			WorkerState* state = &workerStates[i];
+            			if (state->dctx) {
+            				ZSTD_freeDCtx(state->dctx);
+            			}
+            			for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
+            				if (state->destBuffers) {
+            					/*
+            					* Will be NULL if memory transfered to a BufferWithSegments.
+            					* Otherwise it is left over after an error occurred.
+            					*/
+            					free(state->destBuffers[bufferIndex].dest);
+            					free(state->destBuffers[bufferIndex].segments);
+            				}
+            			}
+            			free(state->destBuffers);
+            		}
+            		PyMem_Free(workerStates);
+            	}
+            	POOL_free(pool);
+            	return result;
+            }
+            PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
+            "Decompress multiple frames to output buffers\n"
+            "\n"
+            "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
+            "list of bytes-like objects. Each item in the passed collection should be a\n"
+            "compressed zstd frame.\n"
+            "\n"
+            "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
+            "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
+            "it is an object conforming to the buffer protocol that represents an array\n"
+            "of 64-bit unsigned integers in the machine's native format. Specifying\n"
+            "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
+            "output size.\n"
+            "\n"
+            "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
+            "data. All decompressed data is allocated in a single memory buffer. The\n"
+            "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
+            "and their respective lengths.\n"
+            "\n"
+            "The ``threads`` argument controls how many threads to use for operations.\n"
+            "Negative values will use the same number of threads as logical CPUs on the\n"
+            "machine.\n"
+            );
+            static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+            	static char* kwlist[] = {
+            		"frames",
+            		"decompressed_sizes",
+            		"threads",
+            		NULL
+            	};
+            	PyObject* frames;
+            	Py_buffer frameSizes;
+            	int threads = 0;
+            	Py_ssize_t frameCount;
+            	Py_buffer* frameBuffers = NULL;
+            	FramePointer* framePointers = NULL;
+            	unsigned long long* frameSizesP = NULL;
+            	unsigned long long totalInputSize = 0;
+            	FrameSources frameSources;
+            	ZstdBufferWithSegmentsCollection* result = NULL;
+            	Py_ssize_t i;
+            	memset(&frameSizes, 0, sizeof(frameSizes));
+            #if PY_MAJOR_VERSION >= 3
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
+            #else
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
+            #endif
+            		kwlist, &frames, &frameSizes, &threads)) {
+            		return NULL;
+            	}
+            	if (frameSizes.buf) {
+            		if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
+            			PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
+            			goto finally;
+            		}
+            		frameSizesP = (unsigned long long*)frameSizes.buf;
+            	}
+            	if (threads < 0) {
+            		threads = cpu_count();
+            	}
+            	if (threads < 2) {
+            		threads = 1;
+            	}
+            	if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
+            		ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
+            		frameCount = buffer->segmentCount;
+            		if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
+            			PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
+            				frameCount * sizeof(unsigned long long), frameSizes.len);
+            			goto finally;
+            		}
+            		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+            		if (!framePointers) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		for (i = 0; i < frameCount; i++) {
+            			void* sourceData;
+            			unsigned long long sourceSize;
+            			unsigned long long decompressedSize = 0;
+            			if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
+            				PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
+            				goto finally;
+            			}
+            			sourceData = (char*)buffer->data + buffer->segments[i].offset;
+            			sourceSize = buffer->segments[i].length;
+            			totalInputSize += sourceSize;
+            			if (frameSizesP) {
+            				decompressedSize = frameSizesP[i];
+            			}
+            			framePointers[i].sourceData = sourceData;
+            			framePointers[i].sourceSize = sourceSize;
+            			framePointers[i].destSize = decompressedSize;
+            		}
+            	}
+            	else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
+            		Py_ssize_t offset = 0;
+            		ZstdBufferWithSegments* buffer;
+            		ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
+            		frameCount = BufferWithSegmentsCollection_length(collection);
+            		if (frameSizes.buf && frameSizes.len != frameCount) {
+            			PyErr_Format(PyExc_ValueError,
+            				"decompressed_sizes size mismatch; expected %zd; got %zd",
+            				frameCount * sizeof(unsigned long long), frameSizes.len);
+            			goto finally;
+            		}
+            		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+            		if (NULL == framePointers) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		/* Iterate the data structure directly because it is faster. */
+            		for (i = 0; i < collection->bufferCount; i++) {
+            			Py_ssize_t segmentIndex;
+            			buffer = collection->buffers[i];
+            			for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
+            				if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
+            					PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
+            						offset);
+            					goto finally;
+            				}
+            				totalInputSize += buffer->segments[segmentIndex].length;
+            				framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
+            				framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
+            				framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
+            				offset++;
+            			}
+            		}
+            	}
+            	else if (PyList_Check(frames)) {
+            		frameCount = PyList_GET_SIZE(frames);
+            		if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
+            			PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
+            				frameCount * sizeof(unsigned long long), frameSizes.len);
+            			goto finally;
+            		}
+            		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+            		if (!framePointers) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		/*
+            		 * It is not clear whether Py_buffer.buf is still valid after
+            		 * PyBuffer_Release. So, we hold a reference to all Py_buffer instances
+            		 * for the duration of the operation.
+            		 */
+            		frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
+            		if (NULL == frameBuffers) {
+            			PyErr_NoMemory();
+            			goto finally;
+            		}
+            		memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
+            		/* Do a pass to assemble info about our input buffers and output sizes. */
+            		for (i = 0; i < frameCount; i++) {
+            			if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
+            				&frameBuffers[i], PyBUF_CONTIG_RO)) {
+            				PyErr_Clear();
+            				PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
+            				goto finally;
+            			}
+            			totalInputSize += frameBuffers[i].len;
+            			framePointers[i].sourceData = frameBuffers[i].buf;
+            			framePointers[i].sourceSize = frameBuffers[i].len;
+            			framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
+            		}
+            	}
+            	else {
+            		PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
+            		goto finally;
+            	}
+            	/* We now have an array with info about our inputs and outputs. Feed it into
+            	   our generic decompression function. */
+            	frameSources.frames = framePointers;
+            	frameSources.framesSize = frameCount;
+            	frameSources.compressedSize = totalInputSize;
+            	result = decompress_from_framesources(self, &frameSources, threads);
+            finally:
+            	if (frameSizes.buf) {
+            		PyBuffer_Release(&frameSizes);
+            	}
+            	PyMem_Free(framePointers);
+            	if (frameBuffers) {
+            		for (i = 0; i < frameCount; i++) {
+            			PyBuffer_Release(&frameBuffers[i]);
+            		}
+            		PyMem_Free(frameBuffers);
+            	}
+            	return result;
+            }
             static PyMethodDef Decompressor_methods[] = {
             	{ "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_copy_stream__doc__ },
             	{ "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_decompress__doc__ },
             	{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
             	Decompressor_decompressobj__doc__ },
             	{ "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_read_from__doc__ },
             	{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_write_to__doc__ },
             	{ "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
             	  METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
+            	{ "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
+            	  METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
             	{ NULL, NULL }
             };
             PyTypeObject ZstdDecompressorType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdDecompressor",        /* tp_name */
             	sizeof(ZstdDecompressor),       /* tp_basicsize */
 ,                              /* tp_itemsize */
             	(destructor)Decompressor_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	Decompressor__doc__,            /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
             	Decompressor_methods,           /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
             	(initproc)Decompressor_init,    /* tp_init */
 ,                              /* tp_alloc */
             	PyType_GenericNew,              /* tp_new */
             };
             void decompressor_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
             	if (PyType_Ready(&ZstdDecompressorType) < 0) {
             		return;
             	}
             	Py_INCREF((PyObject*)&ZstdDecompressorType);
             	PyModule_AddObject(mod, "ZstdDecompressor",
             		(PyObject*)&ZstdDecompressorType);
             }

contrib/python-zstandard/c-ext/decompressoriterator.c

0 +6 -9

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             #define min(a, b) (((a) < (b)) ? (a) : (b))
             extern PyObject* ZstdError;
             PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
             "Represents an iterator of decompressed data.\n"
             );
             static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
             	Py_XDECREF(self->decompressor);
             	Py_XDECREF(self->reader);
             	if (self->buffer) {
             		PyBuffer_Release(self->buffer);
             		PyMem_FREE(self->buffer);
             		self->buffer = NULL;
             	}
-            	if (self->dstream) {
-            		ZSTD_freeDStream(self->dstream);
-            		self->dstream = NULL;
             	if (self->input.src) {
             		PyMem_Free((void*)self->input.src);
             		self->input.src = NULL;
             	}
             	PyObject_Del(self);
             }
             static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
             	Py_INCREF(self);
             	return self;
             }
             static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
             	size_t zresult;
             	PyObject* chunk;
             	DecompressorIteratorResult result;
             	size_t oldInputPos = self->input.pos;
+            	assert(self->decompressor->dstream);
             	result.chunk = NULL;
             	chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
             	if (!chunk) {
             		result.errored = 1;
             		return result;
             	}
             	self->output.dst = PyBytes_AsString(chunk);
             	self->output.size = self->outSize;
             	self->output.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
+            	zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
             	Py_END_ALLOW_THREADS
             	/* We're done with the pointer. Nullify to prevent anyone from getting a
             	handle on a Python object. */
             	self->output.dst = NULL;
             	if (ZSTD_isError(zresult)) {
             		Py_DECREF(chunk);
             		PyErr_Format(ZstdError, "zstd decompress error: %s",
             			ZSTD_getErrorName(zresult));
             		result.errored = 1;
             		return result;
             	}
             	self->readCount += self->input.pos - oldInputPos;
             	/* Frame is fully decoded. Input exhausted and output sitting in buffer. */
             	if (0 == zresult) {
             		self->finishedInput = 1;
             		self->finishedOutput = 1;
             	}
             	/* If it produced output data, return it. */
             	if (self->output.pos) {
             		if (self->output.pos < self->outSize) {
             			if (_PyBytes_Resize(&chunk, self->output.pos)) {
             				result.errored = 1;
             				return result;
             			}
             		}
             	}
             	else {
             		Py_DECREF(chunk);
             		chunk = NULL;
             	}
             	result.errored = 0;
             	result.chunk = chunk;
             	return result;
             }
             static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
             	PyObject* readResult = NULL;
             	char* readBuffer;
             	Py_ssize_t readSize;
             	Py_ssize_t bufferRemaining;
             	DecompressorIteratorResult result;
             	if (self->finishedOutput) {
             		PyErr_SetString(PyExc_StopIteration, "output flushed");
             		return NULL;
             	}
             	/* If we have data left in the input, consume it. */
             	if (self->input.pos < self->input.size) {
             		result = read_decompressor_iterator(self);
             		if (result.chunk || result.errored) {
             			return result.chunk;
             		}
             		/* Else fall through to get more data from input. */
             	}
             read_from_source:
             	if (!self->finishedInput) {
             		if (self->reader) {
             			readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
             			if (!readResult) {
             				return NULL;
             			}
             			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		}
             		else {
             			assert(self->buffer && self->buffer->buf);
             			/* Only support contiguous C arrays for now */
             			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
             			assert(self->buffer->itemsize == 1);
             			/* TODO avoid memcpy() below */
             			readBuffer = (char *)self->buffer->buf + self->bufferOffset;
             			bufferRemaining = self->buffer->len - self->bufferOffset;
             			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
             			self->bufferOffset += readSize;
             		}
             		if (readSize) {
             			if (!self->readCount && self->skipBytes) {
             				assert(self->skipBytes < self->inSize);
             				if ((Py_ssize_t)self->skipBytes >= readSize) {
             					PyErr_SetString(PyExc_ValueError,
             						"skip_bytes larger than first input chunk; "
             						"this scenario is currently unsupported");
-            					Py_DecRef(readResult);
+            					Py_XDECREF(readResult);
             					return NULL;
             				}
             				readBuffer = readBuffer + self->skipBytes;
             				readSize -= self->skipBytes;
             			}
             			/* Copy input into previously allocated buffer because it can live longer
             			than a single function call and we don't want to keep a ref to a Python
             			object around. This could be changed... */
             			memcpy((void*)self->input.src, readBuffer, readSize);
             			self->input.size = readSize;
             			self->input.pos = 0;
             		}
             		/* No bytes on first read must mean an empty input stream. */
             		else if (!self->readCount) {
             			self->finishedInput = 1;
             			self->finishedOutput = 1;
-            			Py_DecRef(readResult);
+            			Py_XDECREF(readResult);
             			PyErr_SetString(PyExc_StopIteration, "empty input");
             			return NULL;
             		}
             		else {
             			self->finishedInput = 1;
             		}
             		/* We've copied the data managed by memory. Discard the Python object. */
-            		Py_DecRef(readResult);
+            		Py_XDECREF(readResult);
             	}
             	result = read_decompressor_iterator(self);
             	if (result.errored || result.chunk) {
             		return result.chunk;
             	}
             	/* No new output data. Try again unless we know there is no more data. */
             	if (!self->finishedInput) {
             		goto read_from_source;
             	}
             	PyErr_SetString(PyExc_StopIteration, "input exhausted");
             	return NULL;
             }
             PyTypeObject ZstdDecompressorIteratorType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"zstd.ZstdDecompressorIterator",   /* tp_name */
             	sizeof(ZstdDecompressorIterator),  /* tp_basicsize */
 ,                                 /* tp_itemsize */
             	(destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
 ,                                 /* tp_print */
 ,                                 /* tp_getattr */
 ,                                 /* tp_setattr */
 ,                                 /* tp_compare */
 ,                                 /* tp_repr */
 ,                                 /* tp_as_number */
 ,                                 /* tp_as_sequence */
 ,                                 /* tp_as_mapping */
 ,                                 /* tp_hash */
 ,                                 /* tp_call */
 ,                                 /* tp_str */
 ,                                 /* tp_getattro */
 ,                                 /* tp_setattro */
 ,                                 /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
             	ZstdDecompressorIterator__doc__,   /* tp_doc */
 ,                                 /* tp_traverse */
 ,                                 /* tp_clear */
 ,                                 /* tp_richcompare */
 ,                                 /* tp_weaklistoffset */
             	ZstdDecompressorIterator_iter,     /* tp_iter */
             	(iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
 ,                                 /* tp_methods */
 ,                                 /* tp_members */
 ,                                 /* tp_getset */
 ,                                 /* tp_base */
 ,                                 /* tp_dict */
 ,                                 /* tp_descr_get */
 ,                                 /* tp_descr_set */
 ,                                 /* tp_dictoffset */
 ,                                 /* tp_init */
 ,                                 /* tp_alloc */
             	PyType_GenericNew,                 /* tp_new */
             };
             void decompressoriterator_module_init(PyObject* mod) {
             	Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
             	if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
             		return;
             	}
             }

contrib/python-zstandard/c-ext/frameparams.c

0 +1 -1

             /**
             * Copyright (c) 2017-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #include "python-zstandard.h"
             extern PyObject* ZstdError;
             PyDoc_STRVAR(FrameParameters__doc__,
             	"FrameParameters: information about a zstd frame");
             FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
             	const char* source;
             	Py_ssize_t sourceSize;
             	ZSTD_frameParams params;
             	FrameParametersObject* result = NULL;
             	size_t zresult;
             #if PY_MAJOR_VERSION >= 3
             	if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
             #else
             	if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
             #endif
             		&source, &sourceSize)) {
             		return NULL;
             	}
             	/* Needed for Python 2 to reject unicode */
             	if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
             		PyErr_SetString(PyExc_TypeError, "argument must be bytes");
             		return NULL;
             	}
             	zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
             		return NULL;
             	}
             	if (zresult) {
             		PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
             		return NULL;
             	}
             	result = PyObject_New(FrameParametersObject, &FrameParametersType);
             	if (!result) {
             		return NULL;
             	}
             	result->frameContentSize = params.frameContentSize;
             	result->windowSize = params.windowSize;
             	result->dictID = params.dictID;
             	result->checksumFlag = params.checksumFlag ? 1 : 0;
             	return result;
             }
             static void FrameParameters_dealloc(PyObject* self) {
             	PyObject_Del(self);
             }
             static PyMemberDef FrameParameters_members[] = {
             	{ "content_size", T_ULONGLONG,
             	  offsetof(FrameParametersObject, frameContentSize), READONLY,
             	  "frame content size" },
             	{ "window_size", T_UINT,
             	  offsetof(FrameParametersObject, windowSize), READONLY,
             	  "window size" },
             	{ "dict_id", T_UINT,
             	  offsetof(FrameParametersObject, dictID), READONLY,
             	  "dictionary ID" },
             	{ "has_checksum", T_BOOL,
             	  offsetof(FrameParametersObject, checksumFlag), READONLY,
             	  "checksum flag" },
             	{ NULL }
             };
             PyTypeObject FrameParametersType = {
             	PyVarObject_HEAD_INIT(NULL, 0)
             	"FrameParameters",          /* tp_name */
             	sizeof(FrameParametersObject), /* tp_basicsize */
 ,                         /* tp_itemsize */
             	(destructor)FrameParameters_dealloc, /* tp_dealloc */
 ,                         /* tp_print */
 ,                         /* tp_getattr */
 ,                         /* tp_setattr */
 ,                         /* tp_compare */
 ,                         /* tp_repr */
 ,                         /* tp_as_number */
 ,                         /* tp_as_sequence */
 ,                         /* tp_as_mapping */
 ,                         /* tp_hash  */
 ,                         /* tp_call */
 ,                         /* tp_str */
 ,                         /* tp_getattro */
 ,                         /* tp_setattro */
 ,                         /* tp_as_buffer */
             	Py_TPFLAGS_DEFAULT,        /* tp_flags */
             	FrameParameters__doc__,    /* tp_doc */
 ,                         /* tp_traverse */
 ,                         /* tp_clear */
 ,                         /* tp_richcompare */
 ,                         /* tp_weaklistoffset */
 ,                         /* tp_iter */
 ,                         /* tp_iternext */
 ,                         /* tp_methods */
             	FrameParameters_members,   /* tp_members */
 ,                         /* tp_getset */
 ,                         /* tp_base */
 ,                         /* tp_dict */
 ,                         /* tp_descr_get */
 ,                         /* tp_descr_set */
 ,                         /* tp_dictoffset */
 ,                         /* tp_init */
 ,                         /* tp_alloc */
 ,                         /* tp_new */
             };
             void frameparams_module_init(PyObject* mod) {
             	Py_TYPE(&FrameParametersType) = &PyType_Type;
             	if (PyType_Ready(&FrameParametersType) < 0) {
             		return;
             	}
-            	Py_IncRef((PyObject*)&FrameParametersType);
+            	Py_INCREF(&FrameParametersType);
             	PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
             }

contrib/python-zstandard/c-ext/python-zstandard.h

0 +113 -18

             /**
             * Copyright (c) 2016-present, Gregory Szorc
             * All rights reserved.
             *
             * This software may be modified and distributed under the terms
             * of the BSD license. See the LICENSE file for details.
             */
             #define PY_SSIZE_T_CLEAN
             #include <Python.h>
             #include "structmember.h"
             #define ZSTD_STATIC_LINKING_ONLY
             #define ZDICT_STATIC_LINKING_ONLY
             #include "mem.h"
             #include "zstd.h"
             #include "zdict.h"
+            #include "zstdmt_compress.h"
-            #define PYTHON_ZSTANDARD_VERSION "0.7.0"
+            #define PYTHON_ZSTANDARD_VERSION "0.8.0"
             typedef enum {
             	compressorobj_flush_finish,
             	compressorobj_flush_block,
             } CompressorObj_Flush;
+            /*
+               Represents a CompressionParameters type.
+               This type is basically a wrapper around ZSTD_compressionParameters.
+            */
             typedef struct {
             	PyObject_HEAD
             	unsigned windowLog;
             	unsigned chainLog;
             	unsigned hashLog;
             	unsigned searchLog;
             	unsigned searchLength;
             	unsigned targetLength;
             	ZSTD_strategy strategy;
             } CompressionParametersObject;
             extern PyTypeObject CompressionParametersType;
+            /*
+               Represents a FrameParameters type.
+               This type is basically a wrapper around ZSTD_frameParams.
+            */
             typedef struct {
             	PyObject_HEAD
             	unsigned long long frameContentSize;
             	unsigned windowSize;
             	unsigned dictID;
             	char checksumFlag;
             } FrameParametersObject;
             extern PyTypeObject FrameParametersType;
-            typedef struct {
+            /*
-            	PyObject_HEAD
+               Represents a ZstdCompressionDict type.
-            	unsigned selectivityLevel;
-            	int compressionLevel;
-            	unsigned notificationLevel;
-            	unsigned dictID;
-            } DictParametersObject;
-            extern PyTypeObject DictParametersType;
+               Instances hold data used for a zstd compression dictionary.
+            */
             typedef struct {
             	PyObject_HEAD
+            	/* Pointer to dictionary data. Owned by self. */
             	void* dictData;
+            	/* Size of dictionary data. */
             	size_t dictSize;
+            	/* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
+            	unsigned k;
+            	/* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
+            	unsigned d;
             } ZstdCompressionDict;
             extern PyTypeObject ZstdCompressionDictType;
+            /*
+               Represents a ZstdCompressor type.
+            */
             typedef struct {
             	PyObject_HEAD
+            	/* Configured compression level. Should be always set. */
             	int compressionLevel;
+            	/* Number of threads to use for operations. */
+            	unsigned int threads;
+            	/* Pointer to compression dictionary to use. NULL if not using dictionary
+            	   compression. */
             	ZstdCompressionDict* dict;
+            	/* Compression context to use. Populated during object construction. NULL
+            	   if using multi-threaded compression. */
             	ZSTD_CCtx* cctx;
+            	/* Multi-threaded compression context to use. Populated during object
+            	   construction. NULL if not using multi-threaded compression. */
+            	ZSTDMT_CCtx* mtcctx;
+            	/* Digest compression dictionary. NULL initially. Populated on first use. */
             	ZSTD_CDict* cdict;
+            	/* Low-level compression parameter control. NULL unless passed to
+            	   constructor. Takes precedence over `compressionLevel` if defined. */
             	CompressionParametersObject* cparams;
+            	/* Controls zstd frame options. */
             	ZSTD_frameParameters fparams;
+            	/* Holds state for streaming compression. Shared across all invocation.
+            	   Populated on first use. */
+            	ZSTD_CStream* cstream;
             } ZstdCompressor;
             extern PyTypeObject ZstdCompressorType;
             typedef struct {
             	PyObject_HEAD
             	ZstdCompressor* compressor;
-            	ZSTD_CStream* cstream;
             	ZSTD_outBuffer output;
             	int finished;
             } ZstdCompressionObj;
             extern PyTypeObject ZstdCompressionObjType;
             typedef struct {
             	PyObject_HEAD
             	ZstdCompressor* compressor;
             	PyObject* writer;
             	Py_ssize_t sourceSize;
             	size_t outSize;
-            	ZSTD_CStream* cstream;
             	int entered;
             } ZstdCompressionWriter;
             extern PyTypeObject ZstdCompressionWriterType;
             typedef struct {
             	PyObject_HEAD
             	ZstdCompressor* compressor;
             	PyObject* reader;
             	Py_buffer* buffer;
             	Py_ssize_t bufferOffset;
             	Py_ssize_t sourceSize;
             	size_t inSize;
             	size_t outSize;
-            	ZSTD_CStream* cstream;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	int finishedOutput;
             	int finishedInput;
             	PyObject* readResult;
             } ZstdCompressorIterator;
             extern PyTypeObject ZstdCompressorIteratorType;
             typedef struct {
             	PyObject_HEAD
             	ZSTD_DCtx* dctx;
             	ZstdCompressionDict* dict;
             	ZSTD_DDict* ddict;
+            	ZSTD_DStream* dstream;
             } ZstdDecompressor;
             extern PyTypeObject ZstdDecompressorType;
             typedef struct {
             	PyObject_HEAD
             	ZstdDecompressor* decompressor;
-            	ZSTD_DStream* dstream;
             	int finished;
             } ZstdDecompressionObj;
             extern PyTypeObject ZstdDecompressionObjType;
             typedef struct {
             	PyObject_HEAD
             	ZstdDecompressor* decompressor;
             	PyObject* writer;
             	size_t outSize;
-            	ZSTD_DStream* dstream;
             	int entered;
             } ZstdDecompressionWriter;
             extern PyTypeObject ZstdDecompressionWriterType;
             typedef struct {
             	PyObject_HEAD
             	ZstdDecompressor* decompressor;
             	PyObject* reader;
             	Py_buffer* buffer;
             	Py_ssize_t bufferOffset;
             	size_t inSize;
             	size_t outSize;
             	size_t skipBytes;
-            	ZSTD_DStream* dstream;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	Py_ssize_t readCount;
             	int finishedInput;
             	int finishedOutput;
             } ZstdDecompressorIterator;
             extern PyTypeObject ZstdDecompressorIteratorType;
             typedef struct {
             	int errored;
             	PyObject* chunk;
             } DecompressorIteratorResult;
+            typedef struct {
+            	unsigned long long offset;
+            	unsigned long long length;
+            } BufferSegment;
+            typedef struct {
+            	PyObject_HEAD
+            	PyObject* parent;
+            	BufferSegment* segments;
+            	Py_ssize_t segmentCount;
+            } ZstdBufferSegments;
+            extern PyTypeObject ZstdBufferSegmentsType;
+            typedef struct {
+            	PyObject_HEAD
+            	PyObject* parent;
+            	void* data;
+            	Py_ssize_t dataSize;
+            	unsigned long long offset;
+            } ZstdBufferSegment;
+            extern PyTypeObject ZstdBufferSegmentType;
+            typedef struct {
+            	PyObject_HEAD
+            	Py_buffer parent;
+            	void* data;
+            	unsigned long long dataSize;
+            	BufferSegment* segments;
+            	Py_ssize_t segmentCount;
+            	int useFree;
+            } ZstdBufferWithSegments;
+            extern PyTypeObject ZstdBufferWithSegmentsType;
+            /**
+             * An ordered collection of BufferWithSegments exposed as a squashed collection.
+             *
+             * This type provides a virtual view spanning multiple BufferWithSegments
+             * instances. It allows multiple instances to be "chained" together and
+             * exposed as a single collection. e.g. if there are 2 buffers holding
+             * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
+             */
+            typedef struct {
+            	PyObject_HEAD
+            	/* An array of buffers that should be exposed through this instance. */
+            	ZstdBufferWithSegments** buffers;
+            	/* Number of elements in buffers array. */
+            	Py_ssize_t bufferCount;
+            	/* Array of first offset in each buffer instance. 0th entry corresponds
+            	   to number of elements in the 0th buffer. 1st entry corresponds to the
+            	   sum of elements in 0th and 1st buffers. */
+            	Py_ssize_t* firstElements;
+            } ZstdBufferWithSegmentsCollection;
+            extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
             void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
             CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
             FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
             PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
-            ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
+            int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
-            ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
+            int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
+            int init_dstream(ZstdDecompressor* decompressor);
             ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
+            ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
+            ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
+            Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
+            int cpu_count(void);
+            size_t roundpow2(size_t);

contrib/python-zstandard/make_cffi.py

0 +42 -9

             # Copyright (c) 2016-present, Gregory Szorc
             # All rights reserved.
             #
             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
             from __future__ import absolute_import
             import cffi
             import distutils.ccompiler
             import os
             import re
             import subprocess
             import tempfile
             HERE = os.path.abspath(os.path.dirname(__file__))
             SOURCES = ['zstd/%s' % p for p in (
                 'common/entropy_common.c',
                 'common/error_private.c',
                 'common/fse_decompress.c',
                 'common/pool.c',
                 'common/threading.c',
                 'common/xxhash.c',
                 'common/zstd_common.c',
                 'compress/fse_compress.c',
                 'compress/huf_compress.c',
                 'compress/zstd_compress.c',
+                'compress/zstdmt_compress.c',
                 'decompress/huf_decompress.c',
                 'decompress/zstd_decompress.c',
                 'dictBuilder/cover.c',
                 'dictBuilder/divsufsort.c',
                 'dictBuilder/zdict.c',
             )]
+            # Headers whose preprocessed output will be fed into cdef().
             HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
                 ('zstd.h',),
-                ('common', 'pool.h'),
+                ('compress', 'zstdmt_compress.h'),
                 ('dictBuilder', 'zdict.h'),
             )]
             INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
                 'zstd',
                 'zstd/common',
                 'zstd/compress',
                 'zstd/decompress',
                 'zstd/dictBuilder',
             )]
             # cffi can't parse some of the primitives in zstd.h. So we invoke the
             # preprocessor and feed its output into cffi.
             compiler = distutils.ccompiler.new_compiler()
             # Needed for MSVC.
             if hasattr(compiler, 'initialize'):
                 compiler.initialize()
             # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
             # manually.
             if compiler.compiler_type == 'unix':
                 args = list(compiler.executables['compiler'])
                 args.extend([
                     '-E',
                     '-DZSTD_STATIC_LINKING_ONLY',
                     '-DZDICT_STATIC_LINKING_ONLY',
                 ])
             elif compiler.compiler_type == 'msvc':
                 args = [compiler.cc]
                 args.extend([
                     '/EP',
                     '/DZSTD_STATIC_LINKING_ONLY',
                     '/DZDICT_STATIC_LINKING_ONLY',
                 ])
             else:
                 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
             def preprocess(path):
-                # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
-                # This can lead to duplicate declarations. So we strip this include from the
-                # preprocessor invocation.
                 with open(path, 'rb') as fh:
-                    lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
+                    lines = []
+                    for l in fh:
+                        # zstd.h includes <stddef.h>, which is also included by cffi's
+                        # boilerplate. This can lead to duplicate declarations. So we strip
+                        # this include from the preprocessor invocation.
+                        #
+                        # The same things happens for including zstd.h, so give it the same
+                        # treatment.
+                        #
+                        # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
+                        # #define in zstdmt_compress.h and results in a compiler warning. So drop
+                        # the inline #define.
+                        if l.startswith((b'#include <stddef.h>',
+                                         b'#include "zstd.h"',
+                                         b'#define ZSTD_STATIC_LINKING_ONLY')):
+                            continue
+                        # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
+                        # important so just filter it out.
+                        if l.startswith(b'ZSTDLIB_API'):
+                            l = l[len(b'ZSTDLIB_API '):]
+                        lines.append(l)
                 fd, input_file = tempfile.mkstemp(suffix='.h')
                 os.write(fd, b''.join(lines))
                 os.close(fd)
                 try:
                     process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
                     output = process.communicate()[0]
                     ret = process.poll()
                     if ret:
                         raise Exception('preprocessor exited with error')
                     return output
                 finally:
                     os.unlink(input_file)
             def normalize_output(output):
                 lines = []
                 for line in output.splitlines():
                     # CFFI's parser doesn't like __attribute__ on UNIX compilers.
                     if line.startswith(b'__attribute__ ((visibility ("default"))) '):
                         line = line[len(b'__attribute__ ((visibility ("default"))) '):]
                     if line.startswith(b'__attribute__((deprecated('):
                         continue
                     elif b'__declspec(deprecated(' in line:
                         continue
                     lines.append(line)
                 return b'\n'.join(lines)
             ffi = cffi.FFI()
+            # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
+            # when cffi uses the function. Since we statically link against zstd, even
+            # if we use the deprecated functions it shouldn't be a huge problem.
             ffi.set_source('_zstd_cffi', '''
             #include "mem.h"
             #define ZSTD_STATIC_LINKING_ONLY
             #include "zstd.h"
             #define ZDICT_STATIC_LINKING_ONLY
-            #include "pool.h"
+            #define ZDICT_DISABLE_DEPRECATE_WARNINGS
             #include "zdict.h"
+            #include "zstdmt_compress.h"
             ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
             DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
             sources = []
+            # Feed normalized preprocessor output for headers into the cdef parser.
             for header in HEADERS:
                 preprocessed = preprocess(header)
                 sources.append(normalize_output(preprocessed))
-                # Do another pass over source and find constants that were preprocessed
+                # #define's are effectively erased as part of going through preprocessor.
-                # away.
+                # So perform a manual pass to re-add those to the cdef source.
                 with open(header, 'rb') as fh:
                     for line in fh:
                         line = line.strip()
                         m = DEFINE.match(line)
                         if not m:
                             continue
+                        if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
+                            continue
                         # The parser doesn't like some constants with complex values.
                         if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
                             continue
+                        # The ... is magic syntax by the cdef parser to resolve the
+                        # value at compile time.
                         sources.append(m.group(0) + b' ...')
-            ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources))
+            cdeflines = b'\n'.join(sources).splitlines()
+            cdeflines = [l for l in cdeflines if l.strip()]
+            ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
             if __name__ == '__main__':
                 ffi.compile()

contrib/python-zstandard/setup.py

0 +6 0

             #!/usr/bin/env python
             # Copyright (c) 2016-present, Gregory Szorc
             # All rights reserved.
             #
             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
             import sys
             from setuptools import setup
             try:
                 import cffi
             except ImportError:
                 cffi = None
             import setup_zstd
             SUPPORT_LEGACY = False
             if "--legacy" in sys.argv:
                 SUPPORT_LEGACY = True
                 sys.argv.remove("--legacy")
             # Code for obtaining the Extension instance is in its own module to
             # facilitate reuse in other projects.
             extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
+            install_requires = []
             if cffi:
                 import make_cffi
                 extensions.append(make_cffi.ffi.distutils_extension())
+                # Need change in 1.8 for ffi.from_buffer() behavior.
+                install_requires.append('cffi>=1.8')
             version = None
             with open('c-ext/python-zstandard.h', 'r') as fh:
                 for line in fh:
                     if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
                         continue
                     version = line.split()[2][1:-1]
                     break
             if not version:
                 raise Exception('could not resolve package version; '
                                 'this should never happen')
             setup(
                 name='zstandard',
                 version=version,
                 description='Zstandard bindings for Python',
                 long_description=open('README.rst', 'r').read(),
                 url='https://github.com/indygreg/python-zstandard',
                 author='Gregory Szorc',
                 author_email='gregory.szorc@gmail.com',
                 license='BSD',
                 classifiers=[
                     'Development Status :: 4 - Beta',
                     'Intended Audience :: Developers',
                     'License :: OSI Approved :: BSD License',
                     'Programming Language :: C',
                     'Programming Language :: Python :: 2.6',
                     'Programming Language :: Python :: 2.7',
                     'Programming Language :: Python :: 3.3',
                     'Programming Language :: Python :: 3.4',
                     'Programming Language :: Python :: 3.5',
                     'Programming Language :: Python :: 3.6',
                 ],
                 keywords='zstandard zstd compression',
                 ext_modules=extensions,
                 test_suite='tests',
+                install_requires=install_requires,
             )

contrib/python-zstandard/setup_zstd.py

0 +8 -2

             # Copyright (c) 2016-present, Gregory Szorc
             # All rights reserved.
             #
             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
             import os
             from distutils.extension import Extension
             zstd_sources = ['zstd/%s' % p for p in (
                 'common/entropy_common.c',
                 'common/error_private.c',
                 'common/fse_decompress.c',
                 'common/pool.c',
                 'common/threading.c',
                 'common/xxhash.c',
                 'common/zstd_common.c',
                 'compress/fse_compress.c',
                 'compress/huf_compress.c',
                 'compress/zstd_compress.c',
+                'compress/zstdmt_compress.c',
                 'decompress/huf_decompress.c',
                 'decompress/zstd_decompress.c',
                 'dictBuilder/cover.c',
                 'dictBuilder/divsufsort.c',
                 'dictBuilder/zdict.c',
             )]
             zstd_sources_legacy = ['zstd/%s' % p for p in (
                 'deprecated/zbuff_common.c',
                 'deprecated/zbuff_compress.c',
                 'deprecated/zbuff_decompress.c',
                 'legacy/zstd_v01.c',
                 'legacy/zstd_v02.c',
                 'legacy/zstd_v03.c',
                 'legacy/zstd_v04.c',
                 'legacy/zstd_v05.c',
                 'legacy/zstd_v06.c',
                 'legacy/zstd_v07.c'
             )]
             zstd_includes = [
                 'c-ext',
                 'zstd',
                 'zstd/common',
                 'zstd/compress',
                 'zstd/decompress',
                 'zstd/dictBuilder',
             ]
             zstd_includes_legacy = [
                 'zstd/deprecated',
                 'zstd/legacy',
             ]
             ext_sources = [
                 'zstd.c',
+                'c-ext/bufferutil.c',
                 'c-ext/compressiondict.c',
                 'c-ext/compressobj.c',
                 'c-ext/compressor.c',
                 'c-ext/compressoriterator.c',
                 'c-ext/compressionparams.c',
                 'c-ext/compressionwriter.c',
                 'c-ext/constants.c',
                 'c-ext/decompressobj.c',
                 'c-ext/decompressor.c',
                 'c-ext/decompressoriterator.c',
                 'c-ext/decompressionwriter.c',
-                'c-ext/dictparams.c',
                 'c-ext/frameparams.c',
             ]
             zstd_depends = [
                 'c-ext/python-zstandard.h',
             ]
             def get_c_extension(support_legacy=False, name='zstd'):
                 """Obtain a distutils.extension.Extension for the C extension."""
                 root = os.path.abspath(os.path.dirname(__file__))
                 sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
                 if support_legacy:
                     sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
                 include_dirs = [os.path.join(root, d) for d in zstd_includes]
                 if support_legacy:
                     include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
                 depends = [os.path.join(root, p) for p in zstd_depends]
+                extra_args = ['-DZSTD_MULTITHREAD']
+                if support_legacy:
+                    extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
                 # TODO compile with optimizations.
                 return Extension(name, sources,
                                  include_dirs=include_dirs,
                                  depends=depends,
-                                 extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
+                                 extra_compile_args=extra_args)

contrib/python-zstandard/tests/common.py

0 +27 0

             import inspect
             import io
+            import os
             import types
             def make_cffi(cls):
                 """Decorator to add CFFI versions of each test method."""
                 try:
                     import zstd_cffi
                 except ImportError:
                     return cls
                 # If CFFI version is available, dynamically construct test methods
                 # that use it.
                 for attr in dir(cls):
                     fn = getattr(cls, attr)
                     if not inspect.ismethod(fn) and not inspect.isfunction(fn):
                         continue
                     if not fn.__name__.startswith('test_'):
                         continue
                     name = '%s_cffi' % fn.__name__
                     # Replace the "zstd" symbol with the CFFI module instance. Then copy
                     # the function object and install it in a new attribute.
                     if isinstance(fn, types.FunctionType):
                         globs = dict(fn.__globals__)
                         globs['zstd'] = zstd_cffi
                         new_fn = types.FunctionType(fn.__code__, globs, name,
                                                     fn.__defaults__, fn.__closure__)
                         new_method = new_fn
                     else:
                         globs = dict(fn.__func__.func_globals)
                         globs['zstd'] = zstd_cffi
                         new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
                                                     fn.__func__.func_defaults,
                                                     fn.__func__.func_closure)
                         new_method = types.UnboundMethodType(new_fn, fn.im_self,
                                                              fn.im_class)
                     setattr(cls, name, new_method)
                 return cls
             class OpCountingBytesIO(io.BytesIO):
                 def __init__(self, *args, **kwargs):
                     self._read_count = 0
                     self._write_count = 0
                     return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
                 def read(self, *args):
                     self._read_count += 1
                     return super(OpCountingBytesIO, self).read(*args)
                 def write(self, data):
                     self._write_count += 1
                     return super(OpCountingBytesIO, self).write(data)
+            _source_files = []
+            def random_input_data():
+                """Obtain the raw content of source files.
+                This is used for generating "random" data to feed into fuzzing, since it is
+                faster than random content generation.
+                """
+                if _source_files:
+                    return _source_files
+                for root, dirs, files in os.walk(os.path.dirname(__file__)):
+                    dirs[:] = list(sorted(dirs))
+                    for f in sorted(files):
+                        try:
+                            with open(os.path.join(root, f), 'rb') as fh:
+                                data = fh.read()
+                                if data:
+                                    _source_files.append(data)
+                        except OSError:
+                            pass
+                return _source_files

contrib/python-zstandard/tests/test_compressor.py

0 +230 0

             import hashlib
             import io
             import struct
             import sys
             try:
                 import unittest2 as unittest
             except ImportError:
                 import unittest
             import zstd
             from .common import (
                 make_cffi,
                 OpCountingBytesIO,
             )
             if sys.version_info[0] >= 3:
                 next = lambda it: it.__next__()
             else:
                 next = lambda it: it.next()
+            def multithreaded_chunk_size(level, source_size=0):
+                params = zstd.get_compression_parameters(level, source_size)
+                return 1 << (params.window_log + 2)
             @make_cffi
             class TestCompressor(unittest.TestCase):
                 def test_level_bounds(self):
                     with self.assertRaises(ValueError):
                         zstd.ZstdCompressor(level=0)
                     with self.assertRaises(ValueError):
                         zstd.ZstdCompressor(level=23)
             @make_cffi
             class TestCompressor_compress(unittest.TestCase):
+                def test_multithreaded_unsupported(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'bar' * 64)
+                    d = zstd.train_dictionary(8192, samples)
+                    cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
+                        cctx.compress(b'foo')
+                    params = zstd.get_compression_parameters(3)
+                    cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
+                        cctx.compress(b'foo')
                 def test_compress_empty(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     result = cctx.compress(b'')
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     params = zstd.get_frame_parameters(result)
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 524288)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum, 0)
                     # TODO should be temporary until https://github.com/facebook/zstd/issues/506
                     # is fixed.
                     cctx = zstd.ZstdCompressor(write_content_size=True)
                     with self.assertRaises(ValueError):
                         cctx.compress(b'')
                     cctx.compress(b'', allow_empty=True)
                 def test_compress_large(self):
                     chunks = []
                     for i in range(255):
                         chunks.append(struct.Struct('>B').pack(i) * 16384)
                     cctx = zstd.ZstdCompressor(level=3)
                     result = cctx.compress(b''.join(chunks))
                     self.assertEqual(len(result), 999)
                     self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
                     # This matches the test for read_from() below.
                     cctx = zstd.ZstdCompressor(level=1)
                     result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
                                              b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
                                              b'\x02\x09\x00\x00\x6f')
                 def test_write_checksum(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     no_checksum = cctx.compress(b'foobar')
                     cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                     with_checksum = cctx.compress(b'foobar')
                     self.assertEqual(len(with_checksum), len(no_checksum) + 4)
                     no_params = zstd.get_frame_parameters(no_checksum)
                     with_params = zstd.get_frame_parameters(with_checksum)
                     self.assertFalse(no_params.has_checksum)
                     self.assertTrue(with_params.has_checksum)
                 def test_write_content_size(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     no_size = cctx.compress(b'foobar' * 256)
                     cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
                     with_size = cctx.compress(b'foobar' * 256)
                     self.assertEqual(len(with_size), len(no_size) + 1)
                     no_params = zstd.get_frame_parameters(no_size)
                     with_params = zstd.get_frame_parameters(with_size)
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 1536)
                 def test_no_dict_id(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(1024, samples)
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                     with_dict_id = cctx.compress(b'foobarfoobar')
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                     no_dict_id = cctx.compress(b'foobarfoobar')
                     self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
                     no_params = zstd.get_frame_parameters(no_dict_id)
                     with_params = zstd.get_frame_parameters(with_dict_id)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 1584102229)
                 def test_compress_dict_multiple(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                     for i in range(32):
                         cctx.compress(b'foo bar foobar foo bar foobar')
+                def test_multithreaded(self):
+                    chunk_size = multithreaded_chunk_size(1)
+                    source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
+                    cctx = zstd.ZstdCompressor(level=1, threads=2)
+                    compressed = cctx.compress(source)
+                    params = zstd.get_frame_parameters(compressed)
+                    self.assertEqual(params.content_size, chunk_size * 2)
+                    self.assertEqual(params.dict_id, 0)
+                    self.assertFalse(params.has_checksum)
+                    dctx = zstd.ZstdDecompressor()
+                    self.assertEqual(dctx.decompress(compressed), source)
             @make_cffi
             class TestCompressor_compressobj(unittest.TestCase):
                 def test_compressobj_empty(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     cobj = cctx.compressobj()
                     self.assertEqual(cobj.compress(b''), b'')
                     self.assertEqual(cobj.flush(),
                                      b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                 def test_compressobj_large(self):
                     chunks = []
                     for i in range(255):
                         chunks.append(struct.Struct('>B').pack(i) * 16384)
                     cctx = zstd.ZstdCompressor(level=3)
                     cobj = cctx.compressobj()
                     result = cobj.compress(b''.join(chunks)) + cobj.flush()
                     self.assertEqual(len(result), 999)
                     self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
                     params = zstd.get_frame_parameters(result)
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                 def test_write_checksum(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     cobj = cctx.compressobj()
                     no_checksum = cobj.compress(b'foobar') + cobj.flush()
                     cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                     cobj = cctx.compressobj()
                     with_checksum = cobj.compress(b'foobar') + cobj.flush()
                     no_params = zstd.get_frame_parameters(no_checksum)
                     with_params = zstd.get_frame_parameters(with_checksum)
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 0)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertTrue(with_params.has_checksum)
                     self.assertEqual(len(with_checksum), len(no_checksum) + 4)
                 def test_write_content_size(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     cobj = cctx.compressobj(size=len(b'foobar' * 256))
                     no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
                     cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
                     cobj = cctx.compressobj(size=len(b'foobar' * 256))
                     with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
                     no_params = zstd.get_frame_parameters(no_size)
                     with_params = zstd.get_frame_parameters(with_size)
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertFalse(with_params.has_checksum)
                     self.assertEqual(len(with_size), len(no_size) + 1)
                 def test_compress_after_finished(self):
                     cctx = zstd.ZstdCompressor()
                     cobj = cctx.compressobj()
                     cobj.compress(b'foo')
                     cobj.flush()
                     with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
                         cobj.compress(b'foo')
                     with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
                         cobj.flush()
                 def test_flush_block_repeated(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     cobj = cctx.compressobj()
                     self.assertEqual(cobj.compress(b'foo'), b'')
                     self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
                                      b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
                     self.assertEqual(cobj.compress(b'bar'), b'')
                     # 3 byte header plus content.
                     self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
                 def test_flush_empty_block(self):
                     cctx = zstd.ZstdCompressor(write_checksum=True)
                     cobj = cctx.compressobj()
                     cobj.compress(b'foobar')
                     cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
                     # No-op if no block is active (this is internal to zstd).
                     self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
                     trailing = cobj.flush()
                     # 3 bytes block header + 4 bytes frame checksum
                     self.assertEqual(len(trailing), 7)
                     header = trailing[0:3]
                     self.assertEqual(header, b'\x01\x00\x00')
+                def test_multithreaded(self):
+                    source = io.BytesIO()
+                    source.write(b'a' * 1048576)
+                    source.write(b'b' * 1048576)
+                    source.write(b'c' * 1048576)
+                    source.seek(0)
+                    cctx = zstd.ZstdCompressor(level=1, threads=2)
+                    cobj = cctx.compressobj()
+                    chunks = []
+                    while True:
+                        d = source.read(8192)
+                        if not d:
+                            break
+                        chunks.append(cobj.compress(d))
+                    chunks.append(cobj.flush())
+                    compressed = b''.join(chunks)
+                    self.assertEqual(len(compressed), 295)
             @make_cffi
             class TestCompressor_copy_stream(unittest.TestCase):
                 def test_no_read(self):
                     source = object()
                     dest = io.BytesIO()
                     cctx = zstd.ZstdCompressor()
                     with self.assertRaises(ValueError):
                         cctx.copy_stream(source, dest)
                 def test_no_write(self):
                     source = io.BytesIO()
                     dest = object()
                     cctx = zstd.ZstdCompressor()
                     with self.assertRaises(ValueError):
                         cctx.copy_stream(source, dest)
                 def test_empty(self):
                     source = io.BytesIO()
                     dest = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
                     r, w = cctx.copy_stream(source, dest)
                     self.assertEqual(int(r), 0)
                     self.assertEqual(w, 9)
                     self.assertEqual(dest.getvalue(),
                                      b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                 def test_large_data(self):
                     source = io.BytesIO()
                     for i in range(255):
                         source.write(struct.Struct('>B').pack(i) * 16384)
                     source.seek(0)
                     dest = io.BytesIO()
                     cctx = zstd.ZstdCompressor()
                     r, w = cctx.copy_stream(source, dest)
                     self.assertEqual(r, 255 * 16384)
                     self.assertEqual(w, 999)
                     params = zstd.get_frame_parameters(dest.getvalue())
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                 def test_write_checksum(self):
                     source = io.BytesIO(b'foobar')
                     no_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
                     cctx.copy_stream(source, no_checksum)
                     source.seek(0)
                     with_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                     cctx.copy_stream(source, with_checksum)
                     self.assertEqual(len(with_checksum.getvalue()),
                                      len(no_checksum.getvalue()) + 4)
                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 0)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertTrue(with_params.has_checksum)
                 def test_write_content_size(self):
                     source = io.BytesIO(b'foobar' * 256)
                     no_size = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
                     cctx.copy_stream(source, no_size)
                     source.seek(0)
                     with_size = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
                     cctx.copy_stream(source, with_size)
                     # Source content size is unknown, so no content size written.
                     self.assertEqual(len(with_size.getvalue()),
                                      len(no_size.getvalue()))
                     source.seek(0)
                     with_size = io.BytesIO()
                     cctx.copy_stream(source, with_size, size=len(source.getvalue()))
                     # We specified source size, so content size header is present.
                     self.assertEqual(len(with_size.getvalue()),
                                      len(no_size.getvalue()) + 1)
                     no_params = zstd.get_frame_parameters(no_size.getvalue())
                     with_params = zstd.get_frame_parameters(with_size.getvalue())
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertFalse(with_params.has_checksum)
                 def test_read_write_size(self):
                     source = OpCountingBytesIO(b'foobarfoobar')
                     dest = OpCountingBytesIO()
                     cctx = zstd.ZstdCompressor()
                     r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
                     self.assertEqual(r, len(source.getvalue()))
                     self.assertEqual(w, 21)
                     self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                     self.assertEqual(dest._write_count, len(dest.getvalue()))
+                def test_multithreaded(self):
+                    source = io.BytesIO()
+                    source.write(b'a' * 1048576)
+                    source.write(b'b' * 1048576)
+                    source.write(b'c' * 1048576)
+                    source.seek(0)
+                    dest = io.BytesIO()
+                    cctx = zstd.ZstdCompressor(threads=2)
+                    r, w = cctx.copy_stream(source, dest)
+                    self.assertEqual(r, 3145728)
+                    self.assertEqual(w, 295)
+                    params = zstd.get_frame_parameters(dest.getvalue())
+                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.dict_id, 0)
+                    self.assertFalse(params.has_checksum)
+                    # Writing content size and checksum works.
+                    cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
+                                               write_checksum=True)
+                    dest = io.BytesIO()
+                    source.seek(0)
+                    cctx.copy_stream(source, dest, size=len(source.getvalue()))
+                    params = zstd.get_frame_parameters(dest.getvalue())
+                    self.assertEqual(params.content_size, 3145728)
+                    self.assertEqual(params.dict_id, 0)
+                    self.assertTrue(params.has_checksum)
             def compress(data, level):
                 buffer = io.BytesIO()
                 cctx = zstd.ZstdCompressor(level=level)
                 with cctx.write_to(buffer) as compressor:
                     compressor.write(data)
                 return buffer.getvalue()
             @make_cffi
             class TestCompressor_write_to(unittest.TestCase):
                 def test_empty(self):
                     result = compress(b'', 1)
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     params = zstd.get_frame_parameters(result)
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 524288)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                 def test_multiple_compress(self):
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=5)
                     with cctx.write_to(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'x' * 8192), 0)
                     result = buffer.getvalue()
                     self.assertEqual(result,
                                      b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
                                      b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
                 def test_dictionary(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=9, dict_data=d)
                     with cctx.write_to(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'foo' * 16384), 634)
                     compressed = buffer.getvalue()
                     params = zstd.get_frame_parameters(compressed)
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1024)
                     self.assertEqual(params.dict_id, d.dict_id())
                     self.assertFalse(params.has_checksum)
                     self.assertEqual(compressed[0:32],
                                      b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
                                      b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
                                      b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
                     h = hashlib.sha1(compressed).hexdigest()
                     self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
                 def test_compression_params(self):
                     params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(compression_params=params)
                     with cctx.write_to(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'foobar' * 16384), 0)
                     compressed = buffer.getvalue()
                     params = zstd.get_frame_parameters(compressed)
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     h = hashlib.sha1(compressed).hexdigest()
                     self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
                 def test_write_checksum(self):
                     no_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
                     with cctx.write_to(no_checksum) as compressor:
                         self.assertEqual(compressor.write(b'foobar'), 0)
                     with_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                     with cctx.write_to(with_checksum) as compressor:
                         self.assertEqual(compressor.write(b'foobar'), 0)
                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 0)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertTrue(with_params.has_checksum)
                     self.assertEqual(len(with_checksum.getvalue()),
                                      len(no_checksum.getvalue()) + 4)
                 def test_write_content_size(self):
                     no_size = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
                     with cctx.write_to(no_size) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     with_size = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
                     with cctx.write_to(with_size) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     # Source size is not known in streaming mode, so header not
                     # written.
                     self.assertEqual(len(with_size.getvalue()),
                                      len(no_size.getvalue()))
                     # Declaring size will write the header.
                     with_size = io.BytesIO()
                     with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     no_params = zstd.get_frame_parameters(no_size.getvalue())
                     with_params = zstd.get_frame_parameters(with_size.getvalue())
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     self.assertFalse(with_params.has_checksum)
                     self.assertEqual(len(with_size.getvalue()),
                                      len(no_size.getvalue()) + 1)
                 def test_no_dict_id(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(1024, samples)
                     with_dict_id = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                     with cctx.write_to(with_dict_id) as compressor:
                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                     no_dict_id = io.BytesIO()
                     with cctx.write_to(no_dict_id) as compressor:
                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
                     no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
                     with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
                     self.assertEqual(no_params.content_size, 0)
                     self.assertEqual(with_params.content_size, 0)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, d.dict_id())
                     self.assertFalse(no_params.has_checksum)
                     self.assertFalse(with_params.has_checksum)
                     self.assertEqual(len(with_dict_id.getvalue()),
                                      len(no_dict_id.getvalue()) + 4)
                 def test_memory_size(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     buffer = io.BytesIO()
                     with cctx.write_to(buffer) as compressor:
                         size = compressor.memory_size()
                     self.assertGreater(size, 100000)
                 def test_write_size(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     dest = OpCountingBytesIO()
                     with cctx.write_to(dest, write_size=1) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'foobar'), 0)
                     self.assertEqual(len(dest.getvalue()), dest._write_count)
                 def test_flush_repeated(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     dest = OpCountingBytesIO()
                     with cctx.write_to(dest) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(dest._write_count, 0)
                         self.assertEqual(compressor.flush(), 12)
                         self.assertEqual(dest._write_count, 1)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(dest._write_count, 1)
                         self.assertEqual(compressor.flush(), 6)
                         self.assertEqual(dest._write_count, 2)
                         self.assertEqual(compressor.write(b'baz'), 0)
                     self.assertEqual(dest._write_count, 3)
                 def test_flush_empty_block(self):
                     cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
                     dest = OpCountingBytesIO()
                     with cctx.write_to(dest) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 8192), 0)
                         count = dest._write_count
                         offset = dest.tell()
                         self.assertEqual(compressor.flush(), 23)
                         self.assertGreater(dest._write_count, count)
                         self.assertGreater(dest.tell(), offset)
                         offset = dest.tell()
                         # Ending the write here should cause an empty block to be written
                         # to denote end of frame.
                     trailing = dest.getvalue()[offset:]
                     # 3 bytes block header + 4 bytes frame checksum
                     self.assertEqual(len(trailing), 7)
                     header = trailing[0:3]
                     self.assertEqual(header, b'\x01\x00\x00')
+                def test_multithreaded(self):
+                    dest = io.BytesIO()
+                    cctx = zstd.ZstdCompressor(threads=2)
+                    with cctx.write_to(dest) as compressor:
+                        compressor.write(b'a' * 1048576)
+                        compressor.write(b'b' * 1048576)
+                        compressor.write(b'c' * 1048576)
+                    self.assertEqual(len(dest.getvalue()), 295)
             @make_cffi
             class TestCompressor_read_from(unittest.TestCase):
                 def test_type_validation(self):
                     cctx = zstd.ZstdCompressor()
                     # Object with read() works.
                     for chunk in cctx.read_from(io.BytesIO()):
                         pass
                     # Buffer protocol works.
                     for chunk in cctx.read_from(b'foobar'):
                         pass
                     with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
                         for chunk in cctx.read_from(True):
                             pass
                 def test_read_empty(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     source = io.BytesIO()
                     it = cctx.read_from(source)
                     chunks = list(it)
                     self.assertEqual(len(chunks), 1)
                     compressed = b''.join(chunks)
                     self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     # And again with the buffer protocol.
                     it = cctx.read_from(b'')
                     chunks = list(it)
                     self.assertEqual(len(chunks), 1)
                     compressed2 = b''.join(chunks)
                     self.assertEqual(compressed2, compressed)
                 def test_read_large(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     source = io.BytesIO()
                     source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
                     source.write(b'o')
                     source.seek(0)
                     # Creating an iterator should not perform any compression until
                     # first read.
                     it = cctx.read_from(source, size=len(source.getvalue()))
                     self.assertEqual(source.tell(), 0)
                     # We should have exactly 2 output chunks.
                     chunks = []
                     chunk = next(it)
                     self.assertIsNotNone(chunk)
                     self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
                     chunks.append(chunk)
                     chunk = next(it)
                     self.assertIsNotNone(chunk)
                     chunks.append(chunk)
                     self.assertEqual(source.tell(), len(source.getvalue()))
                     with self.assertRaises(StopIteration):
                         next(it)
                     # And again for good measure.
                     with self.assertRaises(StopIteration):
                         next(it)
                     # We should get the same output as the one-shot compression mechanism.
                     self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                     params = zstd.get_frame_parameters(b''.join(chunks))
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 262144)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Now check the buffer protocol.
                     it = cctx.read_from(source.getvalue())
                     chunks = list(it)
                     self.assertEqual(len(chunks), 2)
                     self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                 def test_read_write_size(self):
                     source = OpCountingBytesIO(b'foobarfoobar')
                     cctx = zstd.ZstdCompressor(level=3)
                     for chunk in cctx.read_from(source, read_size=1, write_size=1):
                         self.assertEqual(len(chunk), 1)
                     self.assertEqual(source._read_count, len(source.getvalue()) + 1)
+                def test_multithreaded(self):
+                    source = io.BytesIO()
+                    source.write(b'a' * 1048576)
+                    source.write(b'b' * 1048576)
+                    source.write(b'c' * 1048576)
+                    source.seek(0)
+                    cctx = zstd.ZstdCompressor(threads=2)
+                    compressed = b''.join(cctx.read_from(source))
+                    self.assertEqual(len(compressed), 295)
+            class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
+                def test_multithreaded_unsupported(self):
+                    cctx = zstd.ZstdCompressor(threads=2)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
+                        cctx.multi_compress_to_buffer([b'foo'])
+                def test_invalid_inputs(self):
+                    cctx = zstd.ZstdCompressor()
+                    with self.assertRaises(TypeError):
+                        cctx.multi_compress_to_buffer(True)
+                    with self.assertRaises(TypeError):
+                        cctx.multi_compress_to_buffer((1, 2))
+                    with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
+                        cctx.multi_compress_to_buffer([u'foo'])
+                def test_empty_input(self):
+                    cctx = zstd.ZstdCompressor()
+                    with self.assertRaisesRegexp(ValueError, 'no source elements found'):
+                        cctx.multi_compress_to_buffer([])
+                    with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
+                        cctx.multi_compress_to_buffer([b'', b'', b''])
+                def test_list_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    original = [b'foo' * 12, b'bar' * 6]
+                    frames = [cctx.compress(c) for c in original]
+                    b = cctx.multi_compress_to_buffer(original)
+                    self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
+                    self.assertEqual(len(b), 2)
+                    self.assertEqual(b.size(), 44)
+                    self.assertEqual(b[0].tobytes(), frames[0])
+                    self.assertEqual(b[1].tobytes(), frames[1])
+                def test_buffer_with_segments_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    original = [b'foo' * 4, b'bar' * 6]
+                    frames = [cctx.compress(c) for c in original]
+                    offsets = struct.pack('=QQQQ', 0, len(original[0]),
+                                                   len(original[0]), len(original[1]))
+                    segments = zstd.BufferWithSegments(b''.join(original), offsets)
+                    result = cctx.multi_compress_to_buffer(segments)
+                    self.assertEqual(len(result), 2)
+                    self.assertEqual(result.size(), 47)
+                    self.assertEqual(result[0].tobytes(), frames[0])
+                    self.assertEqual(result[1].tobytes(), frames[1])
+                def test_buffer_with_segments_collection_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    original = [
+                        b'foo1',
+                        b'foo2' * 2,
+                        b'foo3' * 3,
+                        b'foo4' * 4,
+                        b'foo5' * 5,
+                    ]
+                    frames = [cctx.compress(c) for c in original]
+                    b = b''.join([original[0], original[1]])
+                    b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
+, len(original[0]),
+                                                                len(original[0]), len(original[1])))
+                    b = b''.join([original[2], original[3], original[4]])
+                    b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
+, len(original[2]),
+                                                                len(original[2]), len(original[3]),
+                                                                len(original[2]) + len(original[3]), len(original[4])))
+                    c = zstd.BufferWithSegmentsCollection(b1, b2)
+                    result = cctx.multi_compress_to_buffer(c)
+                    self.assertEqual(len(result), len(frames))
+                    for i, frame in enumerate(frames):
+                        self.assertEqual(result[i].tobytes(), frame)
+                def test_multiple_threads(self):
+                    # threads argument will cause multi-threaded ZSTD APIs to be used, which will
+                    # make output different.
+                    refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
+                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    frames = []
+                    frames.extend(b'x' * 64 for i in range(256))
+                    frames.extend(b'y' * 64 for i in range(256))
+                    result = cctx.multi_compress_to_buffer(frames, threads=-1)
+                    self.assertEqual(len(result), 512)
+                    for i in range(512):
+                        if i < 256:
+                            self.assertEqual(result[i].tobytes(), reference[0])
+                        else:
+                            self.assertEqual(result[i].tobytes(), reference[1])

contrib/python-zstandard/tests/test_data_structures.py

0 +9 -72

-            import io
             try:
                 import unittest2 as unittest
             except ImportError:
                 import unittest
-            try:
-                import hypothesis
-                import hypothesis.strategies as strategies
-            except ImportError:
-                hypothesis = None
             import zstd
             from . common import (
                 make_cffi,
             )
             @make_cffi
             class TestCompressionParameters(unittest.TestCase):
                 def test_init_bad_arg_type(self):
                     with self.assertRaises(TypeError):
                         zstd.CompressionParameters()
                     with self.assertRaises(TypeError):
                         zstd.CompressionParameters(0, 1)
                 def test_bounds(self):
                     zstd.CompressionParameters(zstd.WINDOWLOG_MIN,
                                                zstd.CHAINLOG_MIN,
                                                zstd.HASHLOG_MIN,
                                                zstd.SEARCHLOG_MIN,
-                                               zstd.SEARCHLENGTH_MIN,
+                                               zstd.SEARCHLENGTH_MIN + 1,
                                                zstd.TARGETLENGTH_MIN,
                                                zstd.STRATEGY_FAST)
                     zstd.CompressionParameters(zstd.WINDOWLOG_MAX,
                                                zstd.CHAINLOG_MAX,
                                                zstd.HASHLOG_MAX,
                                                zstd.SEARCHLOG_MAX,
-                                               zstd.SEARCHLENGTH_MAX,
+                                               zstd.SEARCHLENGTH_MAX - 1,
                                                zstd.TARGETLENGTH_MAX,
                                                zstd.STRATEGY_BTOPT)
                 def test_get_compression_parameters(self):
                     p = zstd.get_compression_parameters(1)
                     self.assertIsInstance(p, zstd.CompressionParameters)
                     self.assertEqual(p.window_log, 19)
                 def test_members(self):
                     p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
                     self.assertEqual(p.window_log, 10)
                     self.assertEqual(p.chain_log, 6)
                     self.assertEqual(p.hash_log, 7)
                     self.assertEqual(p.search_log, 4)
                     self.assertEqual(p.search_length, 5)
                     self.assertEqual(p.target_length, 8)
                     self.assertEqual(p.strategy, 1)
+                def test_estimated_compression_context_size(self):
+                    p = zstd.CompressionParameters(20, 16, 17,  1,  5, 16, zstd.STRATEGY_DFAST)
+                    # 32-bit has slightly different values from 64-bit.
+                    self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076,
+                                           delta=110)
             @make_cffi
             class TestFrameParameters(unittest.TestCase):
                 def test_invalid_type(self):
                     with self.assertRaises(TypeError):
                         zstd.get_frame_parameters(None)
                     with self.assertRaises(TypeError):
                         zstd.get_frame_parameters(u'foobarbaz')
                 def test_invalid_input_sizes(self):
                     with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
                         zstd.get_frame_parameters(b'')
                     with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
                         zstd.get_frame_parameters(zstd.FRAME_HEADER)
                 def test_invalid_frame(self):
                     with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                         zstd.get_frame_parameters(b'foobarbaz')
                 def test_attributes(self):
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1024)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1024)
                     self.assertEqual(params.dict_id, 255)
                     self.assertFalse(params.has_checksum)
                     # Lowest 3rd bit indicates if checksum is present.
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 1024)
                     self.assertEqual(params.dict_id, 0)
                     self.assertTrue(params.has_checksum)
                     # Upper 2 bits indicate content size.
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
                     self.assertEqual(params.content_size, 511)
                     self.assertEqual(params.window_size, 1024)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Window descriptor is 2nd byte after frame header.
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
                     self.assertEqual(params.content_size, 0)
                     self.assertEqual(params.window_size, 262144)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Set multiple things.
                     params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
                     self.assertEqual(params.content_size, 272)
                     self.assertEqual(params.window_size, 262144)
                     self.assertEqual(params.dict_id, 15)
                     self.assertTrue(params.has_checksum)
-            if hypothesis:
-                s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
-                                                  max_value=zstd.WINDOWLOG_MAX)
-                s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
-                                                 max_value=zstd.CHAINLOG_MAX)
-                s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
-                                                max_value=zstd.HASHLOG_MAX)
-                s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
-                                                  max_value=zstd.SEARCHLOG_MAX)
-                s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
-                                                     max_value=zstd.SEARCHLENGTH_MAX)
-                s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
-                                                     max_value=zstd.TARGETLENGTH_MAX)
-                s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
-                                                      zstd.STRATEGY_DFAST,
-                                                      zstd.STRATEGY_GREEDY,
-                                                      zstd.STRATEGY_LAZY,
-                                                      zstd.STRATEGY_LAZY2,
-                                                      zstd.STRATEGY_BTLAZY2,
-                                                      zstd.STRATEGY_BTOPT))
-                @make_cffi
-                class TestCompressionParametersHypothesis(unittest.TestCase):
-                    @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
-                                      s_searchlength, s_targetlength, s_strategy)
-                    def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
-                                        searchlength, targetlength, strategy):
-                        p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
-                                                       searchlog, searchlength,
-                                                       targetlength, strategy)
-                        # Verify we can instantiate a compressor with the supplied values.
-                        # ZSTD_checkCParams moves the goal posts on us from what's advertised
-                        # in the constants. So move along with them.
-                        if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
-                            searchlength += 1
-                            p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
-                                            searchlog, searchlength,
-                                            targetlength, strategy)
-                        elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
-                            searchlength -= 1
-                            p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
-                                            searchlog, searchlength,
-                                            targetlength, strategy)
-                        cctx = zstd.ZstdCompressor(compression_params=p)
-                        with cctx.write_to(io.BytesIO()):
-                            pass
-                    @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
-                                      s_searchlength, s_targetlength, s_strategy)
-                    def test_estimate_compression_context_size(self, windowlog, chainlog,
-                                                               hashlog, searchlog,
-                                                               searchlength, targetlength,
-                                                               strategy):
-                        p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
-                                            searchlog, searchlength,
-                                            targetlength, strategy)
-                        size = zstd.estimate_compression_context_size(p)

contrib/python-zstandard/tests/test_decompressor.py

0 +166 -2

             import io
             import random
             import struct
             import sys
             try:
                 import unittest2 as unittest
             except ImportError:
                 import unittest
             import zstd
             from .common import (
                 make_cffi,
                 OpCountingBytesIO,
             )
             if sys.version_info[0] >= 3:
                 next = lambda it: it.__next__()
             else:
                 next = lambda it: it.next()
             @make_cffi
             class TestDecompressor_decompress(unittest.TestCase):
                 def test_empty_input(self):
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
                         dctx.decompress(b'')
                 def test_invalid_input(self):
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
                         dctx.decompress(b'foobar')
                 def test_no_content_size_in_frame(self):
                     cctx = zstd.ZstdCompressor(write_content_size=False)
                     compressed = cctx.compress(b'foobar')
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
                         dctx.decompress(compressed)
                 def test_content_size_present(self):
                     cctx = zstd.ZstdCompressor(write_content_size=True)
                     compressed = cctx.compress(b'foobar')
                     dctx = zstd.ZstdDecompressor()
-                    decompressed  = dctx.decompress(compressed)
+                    decompressed = dctx.decompress(compressed)
                     self.assertEqual(decompressed, b'foobar')
                 def test_max_output_size(self):
                     cctx = zstd.ZstdCompressor(write_content_size=False)
                     source = b'foobar' * 256
                     compressed = cctx.compress(source)
                     dctx = zstd.ZstdDecompressor()
                     # Will fit into buffer exactly the size of input.
                     decompressed = dctx.decompress(compressed, max_output_size=len(source))
                     self.assertEqual(decompressed, source)
                     # Input size - 1 fails
                     with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'):
                         dctx.decompress(compressed, max_output_size=len(source) - 1)
                     # Input size + 1 works
                     decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
                     self.assertEqual(decompressed, source)
                     # A much larger buffer works.
                     decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
                     self.assertEqual(decompressed, source)
                 def test_stupidly_large_output_buffer(self):
                     cctx = zstd.ZstdCompressor(write_content_size=False)
                     compressed = cctx.compress(b'foobar' * 256)
                     dctx = zstd.ZstdDecompressor()
                     # Will get OverflowError on some Python distributions that can't
                     # handle really large integers.
                     with self.assertRaises((MemoryError, OverflowError)):
                         dctx.decompress(compressed, max_output_size=2**62)
                 def test_dictionary(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     orig = b'foobar' * 16384
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
                     compressed = cctx.compress(orig)
                     dctx = zstd.ZstdDecompressor(dict_data=d)
                     decompressed = dctx.decompress(compressed)
                     self.assertEqual(decompressed, orig)
                 def test_dictionary_multiple(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
                     compressed = []
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True)
                     for source in sources:
                         compressed.append(cctx.compress(source))
                     dctx = zstd.ZstdDecompressor(dict_data=d)
                     for i in range(len(sources)):
                         decompressed = dctx.decompress(compressed[i])
                         self.assertEqual(decompressed, sources[i])
             @make_cffi
             class TestDecompressor_copy_stream(unittest.TestCase):
                 def test_no_read(self):
                     source = object()
                     dest = io.BytesIO()
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaises(ValueError):
                         dctx.copy_stream(source, dest)
                 def test_no_write(self):
                     source = io.BytesIO()
                     dest = object()
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaises(ValueError):
                         dctx.copy_stream(source, dest)
                 def test_empty(self):
                     source = io.BytesIO()
                     dest = io.BytesIO()
                     dctx = zstd.ZstdDecompressor()
                     # TODO should this raise an error?
                     r, w = dctx.copy_stream(source, dest)
                     self.assertEqual(r, 0)
                     self.assertEqual(w, 0)
                     self.assertEqual(dest.getvalue(), b'')
                 def test_large_data(self):
                     source = io.BytesIO()
                     for i in range(255):
                         source.write(struct.Struct('>B').pack(i) * 16384)
                     source.seek(0)
                     compressed = io.BytesIO()
                     cctx = zstd.ZstdCompressor()
                     cctx.copy_stream(source, compressed)
                     compressed.seek(0)
                     dest = io.BytesIO()
                     dctx = zstd.ZstdDecompressor()
                     r, w = dctx.copy_stream(compressed, dest)
                     self.assertEqual(r, len(compressed.getvalue()))
                     self.assertEqual(w, len(source.getvalue()))
                 def test_read_write_size(self):
                     source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
                         b'foobarfoobar'))
                     dest = OpCountingBytesIO()
                     dctx = zstd.ZstdDecompressor()
                     r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
                     self.assertEqual(r, len(source.getvalue()))
                     self.assertEqual(w, len(b'foobarfoobar'))
                     self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                     self.assertEqual(dest._write_count, len(dest.getvalue()))
             @make_cffi
             class TestDecompressor_decompressobj(unittest.TestCase):
                 def test_simple(self):
                     data = zstd.ZstdCompressor(level=1).compress(b'foobar')
                     dctx = zstd.ZstdDecompressor()
                     dobj = dctx.decompressobj()
                     self.assertEqual(dobj.decompress(data), b'foobar')
                 def test_reuse(self):
                     data = zstd.ZstdCompressor(level=1).compress(b'foobar')
                     dctx = zstd.ZstdDecompressor()
                     dobj = dctx.decompressobj()
                     dobj.decompress(data)
                     with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
                         dobj.decompress(data)
             def decompress_via_writer(data):
                 buffer = io.BytesIO()
                 dctx = zstd.ZstdDecompressor()
                 with dctx.write_to(buffer) as decompressor:
                     decompressor.write(data)
                 return buffer.getvalue()
             @make_cffi
             class TestDecompressor_write_to(unittest.TestCase):
                 def test_empty_roundtrip(self):
                     cctx = zstd.ZstdCompressor()
                     empty = cctx.compress(b'')
                     self.assertEqual(decompress_via_writer(empty), b'')
                 def test_large_roundtrip(self):
                     chunks = []
                     for i in range(255):
                         chunks.append(struct.Struct('>B').pack(i) * 16384)
                     orig = b''.join(chunks)
                     cctx = zstd.ZstdCompressor()
                     compressed = cctx.compress(orig)
                     self.assertEqual(decompress_via_writer(compressed), orig)
                 def test_multiple_calls(self):
                     chunks = []
                     for i in range(255):
                         for j in range(255):
                             chunks.append(struct.Struct('>B').pack(j) * i)
                     orig = b''.join(chunks)
                     cctx = zstd.ZstdCompressor()
                     compressed = cctx.compress(orig)
                     buffer = io.BytesIO()
                     dctx = zstd.ZstdDecompressor()
                     with dctx.write_to(buffer) as decompressor:
                         pos = 0
                         while pos < len(compressed):
                             pos2 = pos + 8192
                             decompressor.write(compressed[pos:pos2])
                             pos += 8192
                     self.assertEqual(buffer.getvalue(), orig)
                 def test_dictionary(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     orig = b'foobar' * 16384
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(dict_data=d)
                     with cctx.write_to(buffer) as compressor:
                         self.assertEqual(compressor.write(orig), 1544)
                     compressed = buffer.getvalue()
                     buffer = io.BytesIO()
                     dctx = zstd.ZstdDecompressor(dict_data=d)
                     with dctx.write_to(buffer) as decompressor:
                         self.assertEqual(decompressor.write(compressed), len(orig))
                     self.assertEqual(buffer.getvalue(), orig)
                 def test_memory_size(self):
                     dctx = zstd.ZstdDecompressor()
                     buffer = io.BytesIO()
                     with dctx.write_to(buffer) as decompressor:
                         size = decompressor.memory_size()
                     self.assertGreater(size, 100000)
                 def test_write_size(self):
                     source = zstd.ZstdCompressor().compress(b'foobarfoobar')
                     dest = OpCountingBytesIO()
                     dctx = zstd.ZstdDecompressor()
                     with dctx.write_to(dest, write_size=1) as decompressor:
                         s = struct.Struct('>B')
                         for c in source:
                             if not isinstance(c, str):
                                 c = s.pack(c)
                             decompressor.write(c)
                     self.assertEqual(dest.getvalue(), b'foobarfoobar')
                     self.assertEqual(dest._write_count, len(dest.getvalue()))
             @make_cffi
             class TestDecompressor_read_from(unittest.TestCase):
                 def test_type_validation(self):
                     dctx = zstd.ZstdDecompressor()
                     # Object with read() works.
                     dctx.read_from(io.BytesIO())
                     # Buffer protocol works.
                     dctx.read_from(b'foobar')
                     with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
                         b''.join(dctx.read_from(True))
                 def test_empty_input(self):
                     dctx = zstd.ZstdDecompressor()
                     source = io.BytesIO()
                     it = dctx.read_from(source)
                     # TODO this is arguably wrong. Should get an error about missing frame foo.
                     with self.assertRaises(StopIteration):
                         next(it)
                     it = dctx.read_from(b'')
                     with self.assertRaises(StopIteration):
                         next(it)
                 def test_invalid_input(self):
                     dctx = zstd.ZstdDecompressor()
                     source = io.BytesIO(b'foobar')
                     it = dctx.read_from(source)
                     with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                         next(it)
                     it = dctx.read_from(b'foobar')
                     with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                         next(it)
                 def test_empty_roundtrip(self):
                     cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     empty = cctx.compress(b'')
                     source = io.BytesIO(empty)
                     source.seek(0)
                     dctx = zstd.ZstdDecompressor()
                     it = dctx.read_from(source)
                     # No chunks should be emitted since there is no data.
                     with self.assertRaises(StopIteration):
                         next(it)
                     # Again for good measure.
                     with self.assertRaises(StopIteration):
                         next(it)
                 def test_skip_bytes_too_large(self):
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
                         b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
                     with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
                         b''.join(dctx.read_from(b'foobar', skip_bytes=10))
                 def test_skip_bytes(self):
                     cctx = zstd.ZstdCompressor(write_content_size=False)
                     compressed = cctx.compress(b'foobar')
                     dctx = zstd.ZstdDecompressor()
                     output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
                     self.assertEqual(output, b'foobar')
                 def test_large_output(self):
                     source = io.BytesIO()
                     source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
                     source.write(b'o')
                     source.seek(0)
                     cctx = zstd.ZstdCompressor(level=1)
                     compressed = io.BytesIO(cctx.compress(source.getvalue()))
                     compressed.seek(0)
                     dctx = zstd.ZstdDecompressor()
                     it = dctx.read_from(compressed)
                     chunks = []
                     chunks.append(next(it))
                     chunks.append(next(it))
                     with self.assertRaises(StopIteration):
                         next(it)
                     decompressed = b''.join(chunks)
                     self.assertEqual(decompressed, source.getvalue())
                     # And again with buffer protocol.
                     it = dctx.read_from(compressed.getvalue())
                     chunks = []
                     chunks.append(next(it))
                     chunks.append(next(it))
                     with self.assertRaises(StopIteration):
                         next(it)
                     decompressed = b''.join(chunks)
                     self.assertEqual(decompressed, source.getvalue())
                 def test_large_input(self):
                     bytes = list(struct.Struct('>B').pack(i) for i in range(256))
                     compressed = io.BytesIO()
                     input_size = 0
                     cctx = zstd.ZstdCompressor(level=1)
                     with cctx.write_to(compressed) as compressor:
                         while True:
                             compressor.write(random.choice(bytes))
                             input_size += 1
                             have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
                             have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
                             if have_compressed and have_raw:
                                 break
                     compressed.seek(0)
                     self.assertGreater(len(compressed.getvalue()),
                                        zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
                     dctx = zstd.ZstdDecompressor()
                     it = dctx.read_from(compressed)
                     chunks = []
                     chunks.append(next(it))
                     chunks.append(next(it))
                     chunks.append(next(it))
                     with self.assertRaises(StopIteration):
                         next(it)
                     decompressed = b''.join(chunks)
                     self.assertEqual(len(decompressed), input_size)
                     # And again with buffer protocol.
                     it = dctx.read_from(compressed.getvalue())
                     chunks = []
                     chunks.append(next(it))
                     chunks.append(next(it))
                     chunks.append(next(it))
                     with self.assertRaises(StopIteration):
                         next(it)
                     decompressed = b''.join(chunks)
                     self.assertEqual(len(decompressed), input_size)
                 def test_interesting(self):
                     # Found this edge case via fuzzing.
                     cctx = zstd.ZstdCompressor(level=1)
                     source = io.BytesIO()
                     compressed = io.BytesIO()
                     with cctx.write_to(compressed) as compressor:
                         for i in range(256):
                             chunk = b'\0' * 1024
                             compressor.write(chunk)
                             source.write(chunk)
                     dctx = zstd.ZstdDecompressor()
                     simple = dctx.decompress(compressed.getvalue(),
                                              max_output_size=len(source.getvalue()))
                     self.assertEqual(simple, source.getvalue())
                     compressed.seek(0)
                     streamed = b''.join(dctx.read_from(compressed))
                     self.assertEqual(streamed, source.getvalue())
                 def test_read_write_size(self):
                     source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
                     dctx = zstd.ZstdDecompressor()
                     for chunk in dctx.read_from(source, read_size=1, write_size=1):
                         self.assertEqual(len(chunk), 1)
                     self.assertEqual(source._read_count, len(source.getvalue()))
             @make_cffi
             class TestDecompressor_content_dict_chain(unittest.TestCase):
                 def test_bad_inputs_simple(self):
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaises(TypeError):
                         dctx.decompress_content_dict_chain(b'foo')
                     with self.assertRaises(TypeError):
                         dctx.decompress_content_dict_chain((b'foo', b'bar'))
                     with self.assertRaisesRegexp(ValueError, 'empty input chain'):
                         dctx.decompress_content_dict_chain([])
                     with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
                         dctx.decompress_content_dict_chain([u'foo'])
                     with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
                         dctx.decompress_content_dict_chain([True])
                     with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
                         dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
                     with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
                         dctx.decompress_content_dict_chain([b'foo' * 8])
                     no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
                     with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
                         dctx.decompress_content_dict_chain([no_size])
                     # Corrupt first frame.
                     frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
                     frame = frame[0:12] + frame[15:]
                     with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
                         dctx.decompress_content_dict_chain([frame])
                 def test_bad_subsequent_input(self):
                     initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
                     dctx = zstd.ZstdDecompressor()
                     with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
                         dctx.decompress_content_dict_chain([initial, u'foo'])
                     with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
                         dctx.decompress_content_dict_chain([initial, None])
                     with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
                         dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
                     with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
                         dctx.decompress_content_dict_chain([initial, b'foo' * 8])
                     no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
                     with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
                         dctx.decompress_content_dict_chain([initial, no_size])
                     # Corrupt second frame.
                     cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
                     frame = cctx.compress(b'bar' * 64)
                     frame = frame[0:12] + frame[15:]
                     with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
                         dctx.decompress_content_dict_chain([initial, frame])
                 def test_simple(self):
                     original = [
                         b'foo' * 64,
                         b'foobar' * 64,
                         b'baz' * 64,
                         b'foobaz' * 64,
                         b'foobarbaz' * 64,
                     ]
                     chunks = []
                     chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
                     for i, chunk in enumerate(original[1:]):
                         d = zstd.ZstdCompressionDict(original[i])
                         cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
                         chunks.append(cctx.compress(chunk))
                     for i in range(1, len(original)):
                         chain = chunks[0:i]
                         expected = original[i - 1]
                         dctx = zstd.ZstdDecompressor()
                         decompressed = dctx.decompress_content_dict_chain(chain)
                         self.assertEqual(decompressed, expected)
+            # TODO enable for CFFI
+            class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
+                def test_invalid_inputs(self):
+                    dctx = zstd.ZstdDecompressor()
+                    with self.assertRaises(TypeError):
+                        dctx.multi_decompress_to_buffer(True)
+                    with self.assertRaises(TypeError):
+                        dctx.multi_decompress_to_buffer((1, 2))
+                    with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
+                        dctx.multi_decompress_to_buffer([u'foo'])
+                    with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
+                        dctx.multi_decompress_to_buffer([b'foobarbaz'])
+                def test_list_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    original = [b'foo' * 4, b'bar' * 6]
+                    frames = [cctx.compress(d) for d in original]
+                    dctx = zstd.ZstdDecompressor()
+                    result = dctx.multi_decompress_to_buffer(frames)
+                    self.assertEqual(len(result), len(frames))
+                    self.assertEqual(result.size(), sum(map(len, original)))
+                    for i, data in enumerate(original):
+                        self.assertEqual(result[i].tobytes(), data)
+                    self.assertEqual(result[0].offset, 0)
+                    self.assertEqual(len(result[0]), 12)
+                    self.assertEqual(result[1].offset, 12)
+                    self.assertEqual(len(result[1]), 18)
+                def test_list_input_frame_sizes(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=False)
+                    original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
+                    frames = [cctx.compress(d) for d in original]
+                    sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
+                    dctx = zstd.ZstdDecompressor()
+                    result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
+                    self.assertEqual(len(result), len(frames))
+                    self.assertEqual(result.size(), sum(map(len, original)))
+                    for i, data in enumerate(original):
+                        self.assertEqual(result[i].tobytes(), data)
+                def test_buffer_with_segments_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    original = [b'foo' * 4, b'bar' * 6]
+                    frames = [cctx.compress(d) for d in original]
+                    dctx = zstd.ZstdDecompressor()
+                    segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
+                    b = zstd.BufferWithSegments(b''.join(frames), segments)
+                    result = dctx.multi_decompress_to_buffer(b)
+                    self.assertEqual(len(result), len(frames))
+                    self.assertEqual(result[0].offset, 0)
+                    self.assertEqual(len(result[0]), 12)
+                    self.assertEqual(result[1].offset, 12)
+                    self.assertEqual(len(result[1]), 18)
+                def test_buffer_with_segments_sizes(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=False)
+                    original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
+                    frames = [cctx.compress(d) for d in original]
+                    sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
+                    segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
+                                           len(frames[0]), len(frames[1]),
+                                           len(frames[0]) + len(frames[1]), len(frames[2]))
+                    b = zstd.BufferWithSegments(b''.join(frames), segments)
+                    dctx = zstd.ZstdDecompressor()
+                    result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
+                    self.assertEqual(len(result), len(frames))
+                    self.assertEqual(result.size(), sum(map(len, original)))
+                    for i, data in enumerate(original):
+                        self.assertEqual(result[i].tobytes(), data)
+                def test_buffer_with_segments_collection_input(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    original = [
+                        b'foo0' * 2,
+                        b'foo1' * 3,
+                        b'foo2' * 4,
+                        b'foo3' * 5,
+                        b'foo4' * 6,
+                    ]
+                    frames = cctx.multi_compress_to_buffer(original)
+                    # Check round trip.
+                    dctx = zstd.ZstdDecompressor()
+                    decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
+                    self.assertEqual(len(decompressed), len(original))
+                    for i, data in enumerate(original):
+                        self.assertEqual(data, decompressed[i].tobytes())
+                    # And a manual mode.
+                    b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
+                    b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
+, len(frames[0]),
+                                                                len(frames[0]), len(frames[1])))
+                    b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
+                    b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
+, len(frames[2]),
+                                                                len(frames[2]), len(frames[3]),
+                                                                len(frames[2]) + len(frames[3]), len(frames[4])))
+                    c = zstd.BufferWithSegmentsCollection(b1, b2)
+                    dctx = zstd.ZstdDecompressor()
+                    decompressed = dctx.multi_decompress_to_buffer(c)
+                    self.assertEqual(len(decompressed), 5)
+                    for i in range(5):
+                        self.assertEqual(decompressed[i].tobytes(), original[i])
+                def test_multiple_threads(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    frames = []
+                    frames.extend(cctx.compress(b'x' * 64) for i in range(256))
+                    frames.extend(cctx.compress(b'y' * 64) for i in range(256))
+                    dctx = zstd.ZstdDecompressor()
+                    result = dctx.multi_decompress_to_buffer(frames, threads=-1)
+                    self.assertEqual(len(result), len(frames))
+                    self.assertEqual(result.size(), 2 * 64 * 256)
+                    self.assertEqual(result[0].tobytes(), b'x' * 64)
+                    self.assertEqual(result[256].tobytes(), b'y' * 64)
+                def test_item_failure(self):
+                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
+                    frames[1] = frames[1] + b'extra'
+                    dctx = zstd.ZstdDecompressor()
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
+                        dctx.multi_decompress_to_buffer(frames)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
+                        dctx.multi_decompress_to_buffer(frames, threads=2)

contrib/python-zstandard/tests/test_train_dictionary.py

0 +60 0

             import sys
             try:
                 import unittest2 as unittest
             except ImportError:
                 import unittest
             import zstd
             from . common import (
                 make_cffi,
             )
             if sys.version_info[0] >= 3:
                 int_type = int
             else:
                 int_type = long
             @make_cffi
             class TestTrainDictionary(unittest.TestCase):
                 def test_no_args(self):
                     with self.assertRaises(TypeError):
                         zstd.train_dictionary()
                 def test_bad_args(self):
                     with self.assertRaises(TypeError):
                         zstd.train_dictionary(8192, u'foo')
                     with self.assertRaises(ValueError):
                         zstd.train_dictionary(8192, [u'foo'])
                 def test_basic(self):
                     samples = []
                     for i in range(128):
                         samples.append(b'foo' * 64)
                         samples.append(b'bar' * 64)
                         samples.append(b'foobar' * 64)
                         samples.append(b'baz' * 64)
                         samples.append(b'foobaz' * 64)
                         samples.append(b'bazfoo' * 64)
                     d = zstd.train_dictionary(8192, samples)
                     self.assertLessEqual(len(d), 8192)
                     dict_id = d.dict_id()
                     self.assertIsInstance(dict_id, int_type)
                     data = d.as_bytes()
                     self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
+                def test_set_dict_id(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_dictionary(8192, samples, dict_id=42)
+                    self.assertEqual(d.dict_id(), 42)
+            @make_cffi
+            class TestTrainCoverDictionary(unittest.TestCase):
+                def test_no_args(self):
+                    with self.assertRaises(TypeError):
+                        zstd.train_cover_dictionary()
+                def test_bad_args(self):
+                    with self.assertRaises(TypeError):
+                        zstd.train_cover_dictionary(8192, u'foo')
+                    with self.assertRaises(ValueError):
+                        zstd.train_cover_dictionary(8192, [u'foo'])
+                def test_basic(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
+                    self.assertIsInstance(d.dict_id(), int_type)
+                    data = d.as_bytes()
+                    self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
+                    self.assertEqual(d.k, 64)
+                    self.assertEqual(d.d, 16)
+                def test_set_dict_id(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
+                                                    dict_id=42)
+                    self.assertEqual(d.dict_id(), 42)
+                def test_optimize(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_cover_dictionary(8192, samples, optimize=True,
+                                                    threads=-1, steps=1, d=16)
+                    self.assertEqual(d.k, 16)
+                    self.assertEqual(d.d, 16)

contrib/python-zstandard/zstd.c

0 +67 -2

             /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
             /* A Python C extension for Zstandard. */
+            #if defined(_WIN32)
+            #define WIN32_LEAN_AND_MEAN
+            #include <Windows.h>
+            #endif
             #include "python-zstandard.h"
             PyObject *ZstdError;
             PyDoc_STRVAR(estimate_compression_context_size__doc__,
             "estimate_compression_context_size(compression_parameters)\n"
             "\n"
             "Give the amount of memory allocated for a compression context given a\n"
             "CompressionParameters instance");
             PyDoc_STRVAR(estimate_decompression_context_size__doc__,
             "estimate_decompression_context_size()\n"
             "\n"
             "Estimate the amount of memory allocated to a decompression context.\n"
             );
             static PyObject* estimate_decompression_context_size(PyObject* self) {
             	return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
             }
             PyDoc_STRVAR(get_compression_parameters__doc__,
             "get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
             "\n"
             "Obtains a ``CompressionParameters`` instance from a compression level and\n"
             "optional input size and dictionary size");
             PyDoc_STRVAR(get_frame_parameters__doc__,
             "get_frame_parameters(data)\n"
             "\n"
             "Obtains a ``FrameParameters`` instance by parsing data.\n");
             PyDoc_STRVAR(train_dictionary__doc__,
             "train_dictionary(dict_size, samples)\n"
             "\n"
             "Train a dictionary from sample data.\n"
             "\n"
             "A compression dictionary of size ``dict_size`` will be created from the\n"
             "iterable of samples provided by ``samples``.\n"
             "\n"
             "The raw dictionary content will be returned\n");
+            PyDoc_STRVAR(train_cover_dictionary__doc__,
+            "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
+            "\n"
+            "Train a dictionary from sample data using the COVER algorithm.\n"
+            "\n"
+            "This behaves like ``train_dictionary()`` except a different algorithm is\n"
+            "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
+            "``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
+            "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
+            "``d`` must be less than or equal to ``k``.\n"
+            );
             static char zstd_doc[] = "Interface to zstandard";
             static PyMethodDef zstd_methods[] = {
+            	/* TODO remove since it is a method on CompressionParameters. */
             	{ "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
             	METH_VARARGS, estimate_compression_context_size__doc__ },
             	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
             	METH_NOARGS, estimate_decompression_context_size__doc__ },
             	{ "get_compression_parameters", (PyCFunction)get_compression_parameters,
             	METH_VARARGS, get_compression_parameters__doc__ },
             	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
             	METH_VARARGS, get_frame_parameters__doc__ },
             	{ "train_dictionary", (PyCFunction)train_dictionary,
             	METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
+            	{ "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
+            	METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
             	{ NULL, NULL }
             };
+            void bufferutil_module_init(PyObject* mod);
             void compressobj_module_init(PyObject* mod);
             void compressor_module_init(PyObject* mod);
             void compressionparams_module_init(PyObject* mod);
             void constants_module_init(PyObject* mod);
-            void dictparams_module_init(PyObject* mod);
             void compressiondict_module_init(PyObject* mod);
             void compressionwriter_module_init(PyObject* mod);
             void compressoriterator_module_init(PyObject* mod);
             void decompressor_module_init(PyObject* mod);
             void decompressobj_module_init(PyObject* mod);
             void decompressionwriter_module_init(PyObject* mod);
             void decompressoriterator_module_init(PyObject* mod);
             void frameparams_module_init(PyObject* mod);
             void zstd_module_init(PyObject* m) {
             	/* python-zstandard relies on unstable zstd C API features. This means
             	   that changes in zstd may break expectations in python-zstandard.
             	   python-zstandard is distributed with a copy of the zstd sources.
             	   python-zstandard is only guaranteed to work with the bundled version
             	   of zstd.
             	   However, downstream redistributors or packagers may unbundle zstd
             	   from python-zstandard. This can result in a mismatch between zstd
             	   versions and API semantics. This essentially "voids the warranty"
             	   of python-zstandard and may cause undefined behavior.
             	   We detect this mismatch here and refuse to load the module if this
             	   scenario is detected.
             	*/
             	if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
             		PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
             		return;
             	}
+            	bufferutil_module_init(m);
             	compressionparams_module_init(m);
-            	dictparams_module_init(m);
             	compressiondict_module_init(m);
             	compressobj_module_init(m);
             	compressor_module_init(m);
             	compressionwriter_module_init(m);
             	compressoriterator_module_init(m);
             	constants_module_init(m);
             	decompressor_module_init(m);
             	decompressobj_module_init(m);
             	decompressionwriter_module_init(m);
             	decompressoriterator_module_init(m);
             	frameparams_module_init(m);
             }
             #if PY_MAJOR_VERSION >= 3
             static struct PyModuleDef zstd_module = {
             	PyModuleDef_HEAD_INIT,
             	"zstd",
             	zstd_doc,
             	-1,
             	zstd_methods
             };
             PyMODINIT_FUNC PyInit_zstd(void) {
             	PyObject *m = PyModule_Create(&zstd_module);
             	if (m) {
             		zstd_module_init(m);
             		if (PyErr_Occurred()) {
             			Py_DECREF(m);
             			m = NULL;
             		}
             	}
             	return m;
             }
             #else
             PyMODINIT_FUNC initzstd(void) {
             	PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
             	if (m) {
             		zstd_module_init(m);
             	}
             }
             #endif
+            /* Attempt to resolve the number of CPUs in the system. */
+            int cpu_count() {
+            	int count = 0;
+            #if defined(_WIN32)
+            	SYSTEM_INFO si;
+            	si.dwNumberOfProcessors = 0;
+            	GetSystemInfo(&si);
+            	count = si.dwNumberOfProcessors;
+            #elif defined(__APPLE__)
+            	int num;
+            	size_t size = sizeof(int);
+            	if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
+            		count = num;
+            	}
+            #elif defined(__linux__)
+            	count = sysconf(_SC_NPROCESSORS_ONLN);
+            #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+            	int mib[2];
+            	size_t len = sizeof(count);
+            	mib[0] = CTL_HW;
+            	mib[1] = HW_NCPU;
+            	if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
+            		count = 0;
+            	}
+            #elif defined(__hpux)
+            	count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
+            #endif
+            	return count;
+            }
+            size_t roundpow2(size_t i) {
+            	i--;
+            	i |= i >> 1;
+            	i |= i >> 2;
+            	i |= i >> 4;
+            	i |= i >> 8;
+            	i |= i >> 16;
+            	i++;
+            	return i;
+            }

contrib/python-zstandard/zstd_cffi.py

0 +274 -59

             # Copyright (c) 2016-present, Gregory Szorc
             # All rights reserved.
             #
             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
             """Python interface to the Zstandard (zstd) compression library."""
             from __future__ import absolute_import, unicode_literals
+            import os
             import sys
             from _zstd_cffi import (
                 ffi,
                 lib,
             )
             if sys.version_info[0] == 2:
                 bytes_type = str
                 int_type = long
             else:
                 bytes_type = bytes
                 int_type = int
             COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
             COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
             DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
             DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
             new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
             MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
             MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
             FRAME_HEADER = b'\x28\xb5\x2f\xfd'
             ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
             WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
             WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
             CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
             CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
             HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
             HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
             HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
             SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
             SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
             SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
             SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
             TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
             TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
             STRATEGY_FAST = lib.ZSTD_fast
             STRATEGY_DFAST = lib.ZSTD_dfast
             STRATEGY_GREEDY = lib.ZSTD_greedy
             STRATEGY_LAZY = lib.ZSTD_lazy
             STRATEGY_LAZY2 = lib.ZSTD_lazy2
             STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
             STRATEGY_BTOPT = lib.ZSTD_btopt
             COMPRESSOBJ_FLUSH_FINISH = 0
             COMPRESSOBJ_FLUSH_BLOCK = 1
+            def _cpu_count():
+                # os.cpu_count() was introducd in Python 3.4.
+                try:
+                    return os.cpu_count() or 0
+                except AttributeError:
+                    pass
+                # Linux.
+                try:
+                    if sys.version_info[0] == 2:
+                        return os.sysconf(b'SC_NPROCESSORS_ONLN')
+                    else:
+                        return os.sysconf(u'SC_NPROCESSORS_ONLN')
+                except (AttributeError, ValueError):
+                    pass
+                # TODO implement on other platforms.
+                return 0
             class ZstdError(Exception):
                 pass
             class CompressionParameters(object):
                 def __init__(self, window_log, chain_log, hash_log, search_log,
                              search_length, target_length, strategy):
                     if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
                         raise ValueError('invalid window log value')
                     if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
                         raise ValueError('invalid chain log value')
                     if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
                         raise ValueError('invalid hash log value')
                     if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
                         raise ValueError('invalid search log value')
                     if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
                         raise ValueError('invalid search length value')
                     if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
                         raise ValueError('invalid target length value')
                     if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
                         raise ValueError('invalid strategy value')
                     self.window_log = window_log
                     self.chain_log = chain_log
                     self.hash_log = hash_log
                     self.search_log = search_log
                     self.search_length = search_length
                     self.target_length = target_length
                     self.strategy = strategy
+                    zresult = lib.ZSTD_checkCParams(self.as_compression_parameters())
+                    if lib.ZSTD_isError(zresult):
+                        raise ValueError('invalid compression parameters: %s',
+                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
+                def estimated_compression_context_size(self):
+                    return lib.ZSTD_estimateCCtxSize(self.as_compression_parameters())
                 def as_compression_parameters(self):
                     p = ffi.new('ZSTD_compressionParameters *')[0]
                     p.windowLog = self.window_log
                     p.chainLog = self.chain_log
                     p.hashLog = self.hash_log
                     p.searchLog = self.search_log
                     p.searchLength = self.search_length
                     p.targetLength = self.target_length
                     p.strategy = self.strategy
                     return p
             def get_compression_parameters(level, source_size=0, dict_size=0):
                 params = lib.ZSTD_getCParams(level, source_size, dict_size)
                 return CompressionParameters(window_log=params.windowLog,
                                              chain_log=params.chainLog,
                                              hash_log=params.hashLog,
                                              search_log=params.searchLog,
                                              search_length=params.searchLength,
                                              target_length=params.targetLength,
                                              strategy=params.strategy)
             def estimate_compression_context_size(params):
                 if not isinstance(params, CompressionParameters):
                     raise ValueError('argument must be a CompressionParameters')
                 cparams = params.as_compression_parameters()
                 return lib.ZSTD_estimateCCtxSize(cparams)
             def estimate_decompression_context_size():
                 return lib.ZSTD_estimateDCtxSize()
             class ZstdCompressionWriter(object):
                 def __init__(self, compressor, writer, source_size, write_size):
                     self._compressor = compressor
                     self._writer = writer
                     self._source_size = source_size
                     self._write_size = write_size
                     self._entered = False
+                    self._mtcctx = compressor._cctx if compressor._multithreaded else None
                 def __enter__(self):
                     if self._entered:
                         raise ZstdError('cannot __enter__ multiple times')
-                    self._cstream = self._compressor._get_cstream(self._source_size)
+                    if self._mtcctx:
+                        self._compressor._init_mtcstream(self._source_size)
+                    else:
+                        self._compressor._ensure_cstream(self._source_size)
                     self._entered = True
                     return self
                 def __exit__(self, exc_type, exc_value, exc_tb):
                     self._entered = False
                     if not exc_type and not exc_value and not exc_tb:
                         out_buffer = ffi.new('ZSTD_outBuffer *')
                         dst_buffer = ffi.new('char[]', self._write_size)
                         out_buffer.dst = dst_buffer
                         out_buffer.size = self._write_size
                         out_buffer.pos = 0
                         while True:
-                            zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
+                            if self._mtcctx:
+                                zresult = lib.ZSTDMT_endStream(self._mtcctx, out_buffer)
+                            else:
+                                zresult = lib.ZSTD_endStream(self._compressor._cstream, out_buffer)
                             if lib.ZSTD_isError(zresult):
                                 raise ZstdError('error ending compression stream: %s' %
                                                 ffi.string(lib.ZSTD_getErrorName(zresult)))
                             if out_buffer.pos:
                                 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                                 out_buffer.pos = 0
                             if zresult == 0:
                                 break
-                    self._cstream = None
                     self._compressor = None
                     return False
                 def memory_size(self):
                     if not self._entered:
                         raise ZstdError('cannot determine size of an inactive compressor; '
                                         'call when a context manager is active')
-                    return lib.ZSTD_sizeof_CStream(self._cstream)
+                    return lib.ZSTD_sizeof_CStream(self._compressor._cstream)
                 def write(self, data):
                     if not self._entered:
                         raise ZstdError('write() must be called from an active context '
                                         'manager')
                     total_write = 0
                     data_buffer = ffi.from_buffer(data)
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     in_buffer.src = data_buffer
                     in_buffer.size = len(data_buffer)
                     in_buffer.pos = 0
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     dst_buffer = ffi.new('char[]', self._write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = self._write_size
                     out_buffer.pos = 0
                     while in_buffer.pos < in_buffer.size:
-                        zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
+                        if self._mtcctx:
+                            zresult = lib.ZSTDMT_compressStream(self._mtcctx, out_buffer,
+                                                                in_buffer)
+                        else:
+                            zresult = lib.ZSTD_compressStream(self._compressor._cstream, out_buffer,
+                                                              in_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd compress error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if out_buffer.pos:
                             self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                             total_write += out_buffer.pos
                             out_buffer.pos = 0
                     return total_write
                 def flush(self):
                     if not self._entered:
                         raise ZstdError('flush must be called from an active context manager')
                     total_write = 0
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     dst_buffer = ffi.new('char[]', self._write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = self._write_size
                     out_buffer.pos = 0
                     while True:
-                        zresult = lib.ZSTD_flushStream(self._cstream, out_buffer)
+                        if self._mtcctx:
+                            zresult = lib.ZSTDMT_flushStream(self._mtcctx, out_buffer)
+                        else:
+                            zresult = lib.ZSTD_flushStream(self._compressor._cstream, out_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd compress error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if not out_buffer.pos:
                             break
                         self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                         total_write += out_buffer.pos
                         out_buffer.pos = 0
                     return total_write
             class ZstdCompressionObj(object):
                 def compress(self, data):
                     if self._finished:
                         raise ZstdError('cannot call compress() after compressor finished')
                     data_buffer = ffi.from_buffer(data)
                     source = ffi.new('ZSTD_inBuffer *')
                     source.src = data_buffer
                     source.size = len(data_buffer)
                     source.pos = 0
                     chunks = []
                     while source.pos < len(data):
-                        zresult = lib.ZSTD_compressStream(self._cstream, self._out, source)
+                        if self._mtcctx:
+                            zresult = lib.ZSTDMT_compressStream(self._mtcctx,
+                                                                self._out, source)
+                        else:
+                            zresult = lib.ZSTD_compressStream(self._compressor._cstream, self._out,
+                                                              source)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd compress error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if self._out.pos:
                             chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
                             self._out.pos = 0
                     return b''.join(chunks)
                 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
                     if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
                         raise ValueError('flush mode not recognized')
                     if self._finished:
                         raise ZstdError('compressor object already finished')
                     assert self._out.pos == 0
                     if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
-                        zresult = lib.ZSTD_flushStream(self._cstream, self._out)
+                        if self._mtcctx:
+                            zresult = lib.ZSTDMT_flushStream(self._mtcctx, self._out)
+                        else:
+                            zresult = lib.ZSTD_flushStream(self._compressor._cstream, self._out)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd compress error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         # Output buffer is guaranteed to hold full block.
                         assert zresult == 0
                         if self._out.pos:
                             result = ffi.buffer(self._out.dst, self._out.pos)[:]
                             self._out.pos = 0
                             return result
                         else:
                             return b''
                     assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
                     self._finished = True
                     chunks = []
                     while True:
-                        zresult = lib.ZSTD_endStream(self._cstream, self._out)
+                        if self._mtcctx:
+                            zresult = lib.ZSTDMT_endStream(self._mtcctx, self._out)
+                        else:
+                            zresult = lib.ZSTD_endStream(self._compressor._cstream, self._out)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('error ending compression stream: %s' %
                                             ffi.string(lib.ZSTD_getErroName(zresult)))
                         if self._out.pos:
                             chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
                             self._out.pos = 0
                         if not zresult:
                             break
-                    # GC compression stream immediately.
-                    self._cstream = None
                     return b''.join(chunks)
             class ZstdCompressor(object):
                 def __init__(self, level=3, dict_data=None, compression_params=None,
                              write_checksum=False, write_content_size=False,
-                             write_dict_id=True):
+                             write_dict_id=True, threads=0):
                     if level < 1:
                         raise ValueError('level must be greater than 0')
                     elif level > lib.ZSTD_maxCLevel():
                         raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
+                    if threads < 0:
+                        threads = _cpu_count()
                     self._compression_level = level
                     self._dict_data = dict_data
                     self._cparams = compression_params
                     self._fparams = ffi.new('ZSTD_frameParameters *')[0]
                     self._fparams.checksumFlag = write_checksum
                     self._fparams.contentSizeFlag = write_content_size
                     self._fparams.noDictIDFlag = not write_dict_id
-                    cctx = lib.ZSTD_createCCtx()
+                    if threads:
-                    if cctx == ffi.NULL:
+                        cctx = lib.ZSTDMT_createCCtx(threads)
-                        raise MemoryError()
+                        if cctx == ffi.NULL:
+                            raise MemoryError()
-                    self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
+                        self._cctx = ffi.gc(cctx, lib.ZSTDMT_freeCCtx)
+                        self._multithreaded = True
+                    else:
+                        cctx = lib.ZSTD_createCCtx()
+                        if cctx == ffi.NULL:
+                            raise MemoryError()
+                        self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
+                        self._multithreaded = False
+                    self._cstream = None
                 def compress(self, data, allow_empty=False):
                     if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
                         raise ValueError('cannot write empty inputs when writing content sizes')
+                    if self._multithreaded and self._dict_data:
+                        raise ZstdError('compress() cannot be used with both dictionaries and multi-threaded compression')
+                    if self._multithreaded and self._cparams:
+                        raise ZstdError('compress() cannot be used with both compression parameters and multi-threaded compression')
                     # TODO use a CDict for performance.
                     dict_data = ffi.NULL
                     dict_size = 0
                     if self._dict_data:
                         dict_data = self._dict_data.as_bytes()
                         dict_size = len(self._dict_data)
                     params = ffi.new('ZSTD_parameters *')[0]
                     if self._cparams:
                         params.cParams = self._cparams.as_compression_parameters()
                     else:
                         params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
                                                              dict_size)
                     params.fParams = self._fparams
                     dest_size = lib.ZSTD_compressBound(len(data))
                     out = new_nonzero('char[]', dest_size)
-                    zresult = lib.ZSTD_compress_advanced(self._cctx,
+                    if self._multithreaded:
-                                                         ffi.addressof(out), dest_size,
+                        zresult = lib.ZSTDMT_compressCCtx(self._cctx,
-                                                         data, len(data),
+                                                          ffi.addressof(out), dest_size,
-                                                         dict_data, dict_size,
+                                                          data, len(data),
-                                                         params)
+                                                          self._compression_level)
+                    else:
+                        zresult = lib.ZSTD_compress_advanced(self._cctx,
+                                                             ffi.addressof(out), dest_size,
+                                                             data, len(data),
+                                                             dict_data, dict_size,
+                                                             params)
                     if lib.ZSTD_isError(zresult):
                         raise ZstdError('cannot compress: %s' %
                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
                     return ffi.buffer(out, zresult)[:]
                 def compressobj(self, size=0):
-                    cstream = self._get_cstream(size)
+                    if self._multithreaded:
+                        self._init_mtcstream(size)
+                    else:
+                        self._ensure_cstream(size)
                     cobj = ZstdCompressionObj()
-                    cobj._cstream = cstream
                     cobj._out = ffi.new('ZSTD_outBuffer *')
                     cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
                     cobj._out.dst = cobj._dst_buffer
                     cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
                     cobj._out.pos = 0
                     cobj._compressor = self
                     cobj._finished = False
+                    if self._multithreaded:
+                        cobj._mtcctx = self._cctx
+                    else:
+                        cobj._mtcctx = None
                     return cobj
                 def copy_stream(self, ifh, ofh, size=0,
                                 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
                                 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
                     if not hasattr(ifh, 'read'):
                         raise ValueError('first argument must have a read() method')
                     if not hasattr(ofh, 'write'):
                         raise ValueError('second argument must have a write() method')
-                    cstream = self._get_cstream(size)
+                    mt = self._multithreaded
+                    if mt:
+                        self._init_mtcstream(size)
+                    else:
+                        self._ensure_cstream(size)
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     dst_buffer = ffi.new('char[]', write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = write_size
                     out_buffer.pos = 0
                     total_read, total_write = 0, 0
                     while True:
                         data = ifh.read(read_size)
                         if not data:
                             break
                         data_buffer = ffi.from_buffer(data)
                         total_read += len(data_buffer)
                         in_buffer.src = data_buffer
                         in_buffer.size = len(data_buffer)
                         in_buffer.pos = 0
                         while in_buffer.pos < in_buffer.size:
-                            zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
+                            if mt:
+                                zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
+                            else:
+                                zresult = lib.ZSTD_compressStream(self._cstream,
+                                                                  out_buffer, in_buffer)
                             if lib.ZSTD_isError(zresult):
                                 raise ZstdError('zstd compress error: %s' %
                                                 ffi.string(lib.ZSTD_getErrorName(zresult)))
                             if out_buffer.pos:
                                 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
                                 total_write += out_buffer.pos
                                 out_buffer.pos = 0
                     # We've finished reading. Flush the compressor.
                     while True:
-                        zresult = lib.ZSTD_endStream(cstream, out_buffer)
+                        if mt:
+                            zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
+                        else:
+                            zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('error ending compression stream: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if out_buffer.pos:
                             ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
                             total_write += out_buffer.pos
                             out_buffer.pos = 0
                         if zresult == 0:
                             break
                     return total_read, total_write
                 def write_to(self, writer, size=0,
                              write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
                     if not hasattr(writer, 'write'):
                         raise ValueError('must pass an object with a write() method')
                     return ZstdCompressionWriter(self, writer, size, write_size)
                 def read_from(self, reader, size=0,
                               read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
                               write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
                     if hasattr(reader, 'read'):
                         have_read = True
                     elif hasattr(reader, '__getitem__'):
                         have_read = False
                         buffer_offset = 0
                         size = len(reader)
                     else:
                         raise ValueError('must pass an object with a read() method or '
                                          'conforms to buffer protocol')
-                    cstream = self._get_cstream(size)
+                    if self._multithreaded:
+                        self._init_mtcstream(size)
+                    else:
+                        self._ensure_cstream(size)
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     in_buffer.src = ffi.NULL
                     in_buffer.size = 0
                     in_buffer.pos = 0
                     dst_buffer = ffi.new('char[]', write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = write_size
                     out_buffer.pos = 0
                     while True:
                         # We should never have output data sitting around after a previous
                         # iteration.
                         assert out_buffer.pos == 0
                         # Collect input data.
                         if have_read:
                             read_result = reader.read(read_size)
                         else:
                             remaining = len(reader) - buffer_offset
                             slice_size = min(remaining, read_size)
                             read_result = reader[buffer_offset:buffer_offset + slice_size]
                             buffer_offset += slice_size
                         # No new input data. Break out of the read loop.
                         if not read_result:
                             break
                         # Feed all read data into the compressor and emit output until
                         # exhausted.
                         read_buffer = ffi.from_buffer(read_result)
                         in_buffer.src = read_buffer
                         in_buffer.size = len(read_buffer)
                         in_buffer.pos = 0
                         while in_buffer.pos < in_buffer.size:
-                            zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
+                            if self._multithreaded:
+                                zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
+                            else:
+                                zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
                             if lib.ZSTD_isError(zresult):
                                 raise ZstdError('zstd compress error: %s' %
                                                 ffi.string(lib.ZSTD_getErrorName(zresult)))
                             if out_buffer.pos:
                                 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
                                 out_buffer.pos = 0
                                 yield data
                         assert out_buffer.pos == 0
                         # And repeat the loop to collect more data.
                         continue
                     # If we get here, input is exhausted. End the stream and emit what
                     # remains.
                     while True:
                         assert out_buffer.pos == 0
-                        zresult = lib.ZSTD_endStream(cstream, out_buffer)
+                        if self._multithreaded:
+                            zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
+                        else:
+                            zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('error ending compression stream: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if out_buffer.pos:
                             data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
                             out_buffer.pos = 0
                             yield data
                         if zresult == 0:
                             break
-                def _get_cstream(self, size):
+                def _ensure_cstream(self, size):
+                    if self._cstream:
+                        zresult = lib.ZSTD_resetCStream(self._cstream, size)
+                        if lib.ZSTD_isError(zresult):
+                            raise ZstdError('could not reset CStream: %s' %
+                                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+                        return
                     cstream = lib.ZSTD_createCStream()
                     if cstream == ffi.NULL:
                         raise MemoryError()
                     cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
                     dict_data = ffi.NULL
                     dict_size = 0
                     if self._dict_data:
                         dict_data = self._dict_data.as_bytes()
                         dict_size = len(self._dict_data)
                     zparams = ffi.new('ZSTD_parameters *')[0]
                     if self._cparams:
                         zparams.cParams = self._cparams.as_compression_parameters()
                     else:
                         zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
                                                               size, dict_size)
                     zparams.fParams = self._fparams
                     zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
                                                             zparams, size)
                     if lib.ZSTD_isError(zresult):
                         raise Exception('cannot init CStream: %s' %
                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
-                    return cstream
+                    self._cstream = cstream
+                def _init_mtcstream(self, size):
+                    assert self._multithreaded
+                    dict_data = ffi.NULL
+                    dict_size = 0
+                    if self._dict_data:
+                        dict_data = self._dict_data.as_bytes()
+                        dict_size = len(self._dict_data)
+                    zparams = ffi.new('ZSTD_parameters *')[0]
+                    if self._cparams:
+                        zparams.cParams = self._cparams.as_compression_parameters()
+                    else:
+                        zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
+                                                              size, dict_size)
+                    zparams.fParams = self._fparams
+                    zresult = lib.ZSTDMT_initCStream_advanced(self._cctx, dict_data, dict_size,
+                                                              zparams, size)
+                    if lib.ZSTD_isError(zresult):
+                        raise ZstdError('cannot init CStream: %s' %
+                                        ffi.string(lib.ZSTD_getErrorName(zresult)))
             class FrameParameters(object):
                 def __init__(self, fparams):
                     self.content_size = fparams.frameContentSize
                     self.window_size = fparams.windowSize
                     self.dict_id = fparams.dictID
                     self.has_checksum = bool(fparams.checksumFlag)
             def get_frame_parameters(data):
                 if not isinstance(data, bytes_type):
                     raise TypeError('argument must be bytes')
                 params = ffi.new('ZSTD_frameParams *')
                 zresult = lib.ZSTD_getFrameParams(params, data, len(data))
                 if lib.ZSTD_isError(zresult):
                     raise ZstdError('cannot get frame parameters: %s' %
                                     ffi.string(lib.ZSTD_getErrorName(zresult)))
                 if zresult:
                     raise ZstdError('not enough data for frame parameters; need %d bytes' %
                                     zresult)
                 return FrameParameters(params[0])
             class ZstdCompressionDict(object):
-                def __init__(self, data):
+                def __init__(self, data, k=0, d=0):
                     assert isinstance(data, bytes_type)
                     self._data = data
+                    self.k = k
+                    self.d = d
                 def __len__(self):
                     return len(self._data)
                 def dict_id(self):
                     return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
                 def as_bytes(self):
                     return self._data
-            def train_dictionary(dict_size, samples, parameters=None):
+            def train_dictionary(dict_size, samples, selectivity=0, level=0,
+                                 notifications=0, dict_id=0):
                 if not isinstance(samples, list):
                     raise TypeError('samples must be a list')
                 total_size = sum(map(len, samples))
                 samples_buffer = new_nonzero('char[]', total_size)
                 sample_sizes = new_nonzero('size_t[]', len(samples))
                 offset = 0
                 for i, sample in enumerate(samples):
                     if not isinstance(sample, bytes_type):
                         raise ValueError('samples must be bytes')
                     l = len(sample)
                     ffi.memmove(samples_buffer + offset, sample, l)
                     offset += l
                     sample_sizes[i] = l
                 dict_data = new_nonzero('char[]', dict_size)
-                zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size,
+                dparams = ffi.new('ZDICT_params_t *')[0]
-                                                    ffi.addressof(samples_buffer),
+                dparams.selectivityLevel = selectivity
-                                                    ffi.addressof(sample_sizes, 0),
+                dparams.compressionLevel = level
-                                                    len(samples))
+                dparams.notificationLevel = notifications
+                dparams.dictID = dict_id
+                zresult = lib.ZDICT_trainFromBuffer_advanced(
+                    ffi.addressof(dict_data), dict_size,
+                    ffi.addressof(samples_buffer),
+                    ffi.addressof(sample_sizes, 0), len(samples),
+                    dparams)
                 if lib.ZDICT_isError(zresult):
                     raise ZstdError('Cannot train dict: %s' %
                                     ffi.string(lib.ZDICT_getErrorName(zresult)))
                 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
+            def train_cover_dictionary(dict_size, samples, k=0, d=0,
+                                       notifications=0, dict_id=0, level=0, optimize=False,
+                                       steps=0, threads=0):
+                if not isinstance(samples, list):
+                    raise TypeError('samples must be a list')
+                if threads < 0:
+                    threads = _cpu_count()
+                total_size = sum(map(len, samples))
+                samples_buffer = new_nonzero('char[]', total_size)
+                sample_sizes = new_nonzero('size_t[]', len(samples))
+                offset = 0
+                for i, sample in enumerate(samples):
+                    if not isinstance(sample, bytes_type):
+                        raise ValueError('samples must be bytes')
+                    l = len(sample)
+                    ffi.memmove(samples_buffer + offset, sample, l)
+                    offset += l
+                    sample_sizes[i] = l
+                dict_data = new_nonzero('char[]', dict_size)
+                dparams = ffi.new('COVER_params_t *')[0]
+                dparams.k = k
+                dparams.d = d
+                dparams.steps = steps
+                dparams.nbThreads = threads
+                dparams.notificationLevel = notifications
+                dparams.dictID = dict_id
+                dparams.compressionLevel = level
+                if optimize:
+                    zresult = lib.COVER_optimizeTrainFromBuffer(
+                        ffi.addressof(dict_data), dict_size,
+                        ffi.addressof(samples_buffer),
+                        ffi.addressof(sample_sizes, 0), len(samples),
+                        ffi.addressof(dparams))
+                else:
+                    zresult = lib.COVER_trainFromBuffer(
+                        ffi.addressof(dict_data), dict_size,
+                        ffi.addressof(samples_buffer),
+                        ffi.addressof(sample_sizes, 0), len(samples),
+                        dparams)
+                if lib.ZDICT_isError(zresult):
+                    raise ZstdError('cannot train dict: %s' %
+                                    ffi.string(lib.ZDICT_getErrorName(zresult)))
+                return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
+                                           k=dparams.k, d=dparams.d)
             class ZstdDecompressionObj(object):
                 def __init__(self, decompressor):
                     self._decompressor = decompressor
-                    self._dstream = self._decompressor._get_dstream()
                     self._finished = False
                 def decompress(self, data):
                     if self._finished:
                         raise ZstdError('cannot use a decompressobj multiple times')
+                    assert(self._decompressor._dstream)
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     data_buffer = ffi.from_buffer(data)
                     in_buffer.src = data_buffer
                     in_buffer.size = len(data_buffer)
                     in_buffer.pos = 0
                     dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = len(dst_buffer)
                     out_buffer.pos = 0
                     chunks = []
                     while in_buffer.pos < in_buffer.size:
-                        zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
+                        zresult = lib.ZSTD_decompressStream(self._decompressor._dstream,
+                                                            out_buffer, in_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd decompressor error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if zresult == 0:
                             self._finished = True
-                            self._dstream = None
                             self._decompressor = None
                         if out_buffer.pos:
                             chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                             out_buffer.pos = 0
                     return b''.join(chunks)
             class ZstdDecompressionWriter(object):
                 def __init__(self, decompressor, writer, write_size):
                     self._decompressor = decompressor
                     self._writer = writer
                     self._write_size = write_size
-                    self._dstream = None
                     self._entered = False
                 def __enter__(self):
                     if self._entered:
                         raise ZstdError('cannot __enter__ multiple times')
-                    self._dstream = self._decompressor._get_dstream()
+                    self._decompressor._ensure_dstream()
                     self._entered = True
                     return self
                 def __exit__(self, exc_type, exc_value, exc_tb):
                     self._entered = False
-                    self._dstream = None
                 def memory_size(self):
-                    if not self._dstream:
+                    if not self._decompressor._dstream:
                         raise ZstdError('cannot determine size of inactive decompressor '
                                         'call when context manager is active')
-                    return lib.ZSTD_sizeof_DStream(self._dstream)
+                    return lib.ZSTD_sizeof_DStream(self._decompressor._dstream)
                 def write(self, data):
                     if not self._entered:
                         raise ZstdError('write must be called from an active context manager')
                     total_write = 0
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     data_buffer = ffi.from_buffer(data)
                     in_buffer.src = data_buffer
                     in_buffer.size = len(data_buffer)
                     in_buffer.pos = 0
                     dst_buffer = ffi.new('char[]', self._write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = len(dst_buffer)
                     out_buffer.pos = 0
+                    dstream = self._decompressor._dstream
                     while in_buffer.pos < in_buffer.size:
-                        zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
+                        zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('zstd decompress error: %s' %
                                             ffi.string(lib.ZSTD_getErrorName(zresult)))
                         if out_buffer.pos:
                             self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
                             total_write += out_buffer.pos
                             out_buffer.pos = 0
                     return total_write
             class ZstdDecompressor(object):
                 def __init__(self, dict_data=None):
                     self._dict_data = dict_data
                     dctx = lib.ZSTD_createDCtx()
                     if dctx == ffi.NULL:
                         raise MemoryError()
                     self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
+                    self._dstream = None
                 @property
                 def _ddict(self):
                     if self._dict_data:
                         dict_data = self._dict_data.as_bytes()
                         dict_size = len(self._dict_data)
                         ddict = lib.ZSTD_createDDict(dict_data, dict_size)
                         if ddict == ffi.NULL:
                             raise ZstdError('could not create decompression dict')
                     else:
                         ddict = None
                     self.__dict__['_ddict'] = ddict
                     return ddict
                 def decompress(self, data, max_output_size=0):
                     data_buffer = ffi.from_buffer(data)
                     orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
                     dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
                     lib.ZSTD_copyDCtx(dctx, self._refdctx)
                     ddict = self._ddict
                     output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
                     if output_size:
                         result_buffer = ffi.new('char[]', output_size)
                         result_size = output_size
                     else:
                         if not max_output_size:
                             raise ZstdError('input data invalid or missing content size '
                                             'in frame header')
                         result_buffer = ffi.new('char[]', max_output_size)
                         result_size = max_output_size
                     if ddict:
                         zresult = lib.ZSTD_decompress_usingDDict(dctx,
                                                                  result_buffer, result_size,
                                                                  data_buffer, len(data_buffer),
                                                                  ddict)
                     else:
                         zresult = lib.ZSTD_decompressDCtx(dctx,
                                                           result_buffer, result_size,
                                                           data_buffer, len(data_buffer))
                     if lib.ZSTD_isError(zresult):
                         raise ZstdError('decompression error: %s' %
                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
                     elif output_size and zresult != output_size:
                         raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
                                         (zresult, output_size))
                     return ffi.buffer(result_buffer, zresult)[:]
                 def decompressobj(self):
+                    self._ensure_dstream()
                     return ZstdDecompressionObj(self)
                 def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
                               write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
                               skip_bytes=0):
                     if skip_bytes >= read_size:
                         raise ValueError('skip_bytes must be smaller than read_size')
                     if hasattr(reader, 'read'):
                         have_read = True
                     elif hasattr(reader, '__getitem__'):
                         have_read = False
                         buffer_offset = 0
                         size = len(reader)
                     else:
                         raise ValueError('must pass an object with a read() method or '
                                          'conforms to buffer protocol')
                     if skip_bytes:
                         if have_read:
                             reader.read(skip_bytes)
                         else:
                             if skip_bytes > size:
                                 raise ValueError('skip_bytes larger than first input chunk')
                             buffer_offset = skip_bytes
-                    dstream = self._get_dstream()
+                    self._ensure_dstream()
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     dst_buffer = ffi.new('char[]', write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = len(dst_buffer)
                     out_buffer.pos = 0
                     while True:
                         assert out_buffer.pos == 0
                         if have_read:
                             read_result = reader.read(read_size)
                         else:
                             remaining = size - buffer_offset
                             slice_size = min(remaining, read_size)
                             read_result = reader[buffer_offset:buffer_offset + slice_size]
                             buffer_offset += slice_size
                         # No new input. Break out of read loop.
                         if not read_result:
                             break
                         # Feed all read data into decompressor and emit output until
                         # exhausted.
                         read_buffer = ffi.from_buffer(read_result)
                         in_buffer.src = read_buffer
                         in_buffer.size = len(read_buffer)
                         in_buffer.pos = 0
                         while in_buffer.pos < in_buffer.size:
                             assert out_buffer.pos == 0
-                            zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
+                            zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
                             if lib.ZSTD_isError(zresult):
                                 raise ZstdError('zstd decompress error: %s' %
                                                 ffi.string(lib.ZSTD_getErrorName(zresult)))
                             if out_buffer.pos:
                                 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
                                 out_buffer.pos = 0
                                 yield data
                             if zresult == 0:
                                 return
                         # Repeat loop to collect more input data.
                         continue
                     # If we get here, input is exhausted.
                 def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
                     if not hasattr(writer, 'write'):
                         raise ValueError('must pass an object with a write() method')
                     return ZstdDecompressionWriter(self, writer, write_size)
                 def copy_stream(self, ifh, ofh,
                                 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
                                 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
                     if not hasattr(ifh, 'read'):
                         raise ValueError('first argument must have a read() method')
                     if not hasattr(ofh, 'write'):
                         raise ValueError('second argument must have a write() method')
-                    dstream = self._get_dstream()
+                    self._ensure_dstream()
                     in_buffer = ffi.new('ZSTD_inBuffer *')
                     out_buffer = ffi.new('ZSTD_outBuffer *')
                     dst_buffer = ffi.new('char[]', write_size)
                     out_buffer.dst = dst_buffer
                     out_buffer.size = write_size
                     out_buffer.pos = 0
                     total_read, total_write = 0, 0
                     # Read all available input.
                     while True:
                         data = ifh.read(read_size)
                         if not data:
                             break
                         data_buffer = ffi.from_buffer(data)
                         total_read += len(data_buffer)
                         in_buffer.src = data_buffer
                         in_buffer.size = len(data_buffer)
                         in_buffer.pos = 0
                         # Flush all read data to output.
                         while in_buffer.pos < in_buffer.size:
-                            zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
+                            zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
                             if lib.ZSTD_isError(zresult):
                                 raise ZstdError('zstd decompressor error: %s' %
                                                 ffi.string(lib.ZSTD_getErrorName(zresult)))
                             if out_buffer.pos:
                                 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
                                 total_write += out_buffer.pos
                                 out_buffer.pos = 0
                         # Continue loop to keep reading.
                     return total_read, total_write
                 def decompress_content_dict_chain(self, frames):
                     if not isinstance(frames, list):
                         raise TypeError('argument must be a list')
                     if not frames:
                         raise ValueError('empty input chain')
                     # First chunk should not be using a dictionary. We handle it specially.
                     chunk = frames[0]
                     if not isinstance(chunk, bytes_type):
                         raise ValueError('chunk 0 must be bytes')
                     # All chunks should be zstd frames and should have content size set.
                     chunk_buffer = ffi.from_buffer(chunk)
                     params = ffi.new('ZSTD_frameParams *')
                     zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
                     if lib.ZSTD_isError(zresult):
                         raise ValueError('chunk 0 is not a valid zstd frame')
                     elif zresult:
                         raise ValueError('chunk 0 is too small to contain a zstd frame')
                     if not params.frameContentSize:
                         raise ValueError('chunk 0 missing content size in frame')
                     dctx = lib.ZSTD_createDCtx()
                     if dctx == ffi.NULL:
                         raise MemoryError()
                     dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
                     last_buffer = ffi.new('char[]', params.frameContentSize)
                     zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
                                                       chunk_buffer, len(chunk_buffer))
                     if lib.ZSTD_isError(zresult):
                         raise ZstdError('could not decompress chunk 0: %s' %
                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
                     # Special case of chain length of 1
                     if len(frames) == 1:
                         return ffi.buffer(last_buffer, len(last_buffer))[:]
                     i = 1
                     while i < len(frames):
                         chunk = frames[i]
                         if not isinstance(chunk, bytes_type):
                             raise ValueError('chunk %d must be bytes' % i)
                         chunk_buffer = ffi.from_buffer(chunk)
                         zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
                         if lib.ZSTD_isError(zresult):
                             raise ValueError('chunk %d is not a valid zstd frame' % i)
                         elif zresult:
                             raise ValueError('chunk %d is too small to contain a zstd frame' % i)
                         if not params.frameContentSize:
                             raise ValueError('chunk %d missing content size in frame' % i)
                         dest_buffer = ffi.new('char[]', params.frameContentSize)
                         zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
                                                                 chunk_buffer, len(chunk_buffer),
                                                                 last_buffer, len(last_buffer))
                         if lib.ZSTD_isError(zresult):
                             raise ZstdError('could not decompress chunk %d' % i)
                         last_buffer = dest_buffer
                         i += 1
                     return ffi.buffer(last_buffer, len(last_buffer))[:]
-                def _get_dstream(self):
+                def _ensure_dstream(self):
-                    dstream = lib.ZSTD_createDStream()
+                    if self._dstream:
-                    if dstream == ffi.NULL:
+                        zresult = lib.ZSTD_resetDStream(self._dstream)
+                        if lib.ZSTD_isError(zresult):
+                            raise ZstdError('could not reset DStream: %s' %
+                                            ffi.string(lib.ZSTD_getErrorName(zresult)))
+                        return
+                    self._dstream = lib.ZSTD_createDStream()
+                    if self._dstream == ffi.NULL:
                         raise MemoryError()
-                    dstream = ffi.gc(dstream, lib.ZSTD_freeDStream)
+                    self._dstream = ffi.gc(self._dstream, lib.ZSTD_freeDStream)
                     if self._dict_data:
-                        zresult = lib.ZSTD_initDStream_usingDict(dstream,
+                        zresult = lib.ZSTD_initDStream_usingDict(self._dstream,
                                                                  self._dict_data.as_bytes(),
                                                                  len(self._dict_data))
                     else:
-                        zresult = lib.ZSTD_initDStream(dstream)
+                        zresult = lib.ZSTD_initDStream(self._dstream)
                     if lib.ZSTD_isError(zresult):
+                        self._dstream = None
                         raise ZstdError('could not initialize DStream: %s' %
                                         ffi.string(lib.ZSTD_getErrorName(zresult)))
-                    return dstream

tests/test-check-py3-compat.t

0 +4 -1

             #require test-repo
               $ . "$TESTDIR/helpers-testrepo.sh"
               $ cd "$TESTDIR"/..
               $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py
               contrib/python-zstandard/setup.py not using absolute_import
               contrib/python-zstandard/setup_zstd.py not using absolute_import
               contrib/python-zstandard/tests/common.py not using absolute_import
+              contrib/python-zstandard/tests/test_buffer_util.py not using absolute_import
               contrib/python-zstandard/tests/test_compressor.py not using absolute_import
+              contrib/python-zstandard/tests/test_compressor_fuzzing.py not using absolute_import
               contrib/python-zstandard/tests/test_data_structures.py not using absolute_import
+              contrib/python-zstandard/tests/test_data_structures_fuzzing.py not using absolute_import
               contrib/python-zstandard/tests/test_decompressor.py not using absolute_import
+              contrib/python-zstandard/tests/test_decompressor_fuzzing.py not using absolute_import
               contrib/python-zstandard/tests/test_estimate_sizes.py not using absolute_import
               contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
-              contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
               contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
               i18n/check-translation.py not using absolute_import
               setup.py not using absolute_import
               tests/test-demandimport.py not using absolute_import
             #if py3exe
               $ hg files 'set:(**.py) - grep(pygments)' -X hgext/fsmonitor/pywatchman \
               > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \
               > | sed 's/[0-9][0-9]*)$/*)/'
               hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob)
               hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
               hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*)
               mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
               mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
               mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob)
               mercurial/scmwindows.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
               mercurial/win32.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob)
               mercurial/windows.py: error importing: <*Error> No module named 'msvcrt' (error at windows.py:*) (glob)
             #endif
             #if py3exe py3pygments
               $ hg files 'set:(**.py) and grep(pygments)' | sed 's|\\|/|g' \
               > | xargs $PYTHON3 contrib/check-py3-compat.py \
               > | sed 's/[0-9][0-9]*)$/*)/'
             #endif

contrib/python-zstandard/c-ext/dictparams.c

0 removed 0 -141

NO CONTENT: file was removed

contrib/python-zstandard/tests/test_roundtrip.py

0 removed 0 -68

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No reviewers

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages