upstream/mercurial-mirror Commit - r40157:73fef626

zstandard: vendor python-zstandard 0.10.1...

Gregory Szorc -

r40157:73fef626 default

parent child

Expand all files

The requested changes are too big and content was truncated. Show full diff

contrib/python-zstandard/c-ext/compressionchunker.c

0 created 644 +360 0

			@@ -0,0 +1,360 b''
		1	/**
		2	* Copyright (c) 2018-present, Gregory Szorc
		3	* All rights reserved.
		4	*
		5	* This software may be modified and distributed under the terms
		6	* of the BSD license. See the LICENSE file for details.
		7	*/
		8
		9	#include "python-zstandard.h"
		10
		11	extern PyObject* ZstdError;
		12
		13	PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
		14	"Iterator of output chunks from ZstdCompressionChunker.\n"
		15	);
		16
		17	static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
		18	Py_XDECREF(self->chunker);
		19
		20	PyObject_Del(self);
		21	}
		22
		23	static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
		24	Py_INCREF(self);
		25	return self;
		26	}
		27
		28	static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
		29	size_t zresult;
		30	PyObject* chunk;
		31	ZstdCompressionChunker* chunker = self->chunker;
		32	ZSTD_EndDirective zFlushMode;
		33
		34	if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
		35	PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
		36	return NULL;
		37	}
		38
		39	if (chunker->finished) {
		40	return NULL;
		41	}
		42
		43	/* If we have data left in the input, consume it. */
		44	while (chunker->input.pos < chunker->input.size) {
		45	Py_BEGIN_ALLOW_THREADS
		46	zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
		47	&chunker->input, ZSTD_e_continue);
		48	Py_END_ALLOW_THREADS
		49
		50	/* Input is fully consumed. */
		51	if (chunker->input.pos == chunker->input.size) {
		52	chunker->input.src = NULL;
		53	chunker->input.pos = 0;
		54	chunker->input.size = 0;
		55	PyBuffer_Release(&chunker->inBuffer);
		56	}
		57
		58	if (ZSTD_isError(zresult)) {
		59	PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
		60	return NULL;
		61	}
		62
		63	/* If it produced a full output chunk, emit it. */
		64	if (chunker->output.pos == chunker->output.size) {
		65	chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
		66	if (!chunk) {
		67	return NULL;
		68	}
		69
		70	chunker->output.pos = 0;
		71
		72	return chunk;
		73	}
		74
		75	/* Else continue to compress available input data. */
		76	}
		77
		78	/* We also need this here for the special case of an empty input buffer. */
		79	if (chunker->input.pos == chunker->input.size) {
		80	chunker->input.src = NULL;
		81	chunker->input.pos = 0;
		82	chunker->input.size = 0;
		83	PyBuffer_Release(&chunker->inBuffer);
		84	}
		85
		86	/* No more input data. A partial chunk may be in chunker->output.
		87	* If we're in normal compression mode, we're done. Otherwise if we're in
		88	* flush or finish mode, we need to emit what data remains.
		89	*/
		90	if (self->mode == compressionchunker_mode_normal) {
		91	/* We don't need to set StopIteration. */
		92	return NULL;
		93	}
		94
		95	if (self->mode == compressionchunker_mode_flush) {
		96	zFlushMode = ZSTD_e_flush;
		97	}
		98	else if (self->mode == compressionchunker_mode_finish) {
		99	zFlushMode = ZSTD_e_end;
		100	}
		101	else {
		102	PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
		103	return NULL;
		104	}
		105
		106	Py_BEGIN_ALLOW_THREADS
		107	zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
		108	&chunker->input, zFlushMode);
		109	Py_END_ALLOW_THREADS
		110
		111	if (ZSTD_isError(zresult)) {
		112	PyErr_Format(ZstdError, "zstd compress error: %s",
		113	ZSTD_getErrorName(zresult));
		114	return NULL;
		115	}
		116
		117	if (!zresult && chunker->output.pos == 0) {
		118	return NULL;
		119	}
		120
		121	chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
		122	if (!chunk) {
		123	return NULL;
		124	}
		125
		126	chunker->output.pos = 0;
		127
		128	if (!zresult && self->mode == compressionchunker_mode_finish) {
		129	chunker->finished = 1;
		130	}
		131
		132	return chunk;
		133	}
		134
		135	PyTypeObject ZstdCompressionChunkerIteratorType = {
		136	PyVarObject_HEAD_INIT(NULL, 0)
		137	"zstd.ZstdCompressionChunkerIterator", /* tp_name */
		138	sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
		139	0, /* tp_itemsize */
		140	(destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
		141	0, /* tp_print */
		142	0, /* tp_getattr */
		143	0, /* tp_setattr */
		144	0, /* tp_compare */
		145	0, /* tp_repr */
		146	0, /* tp_as_number */
		147	0, /* tp_as_sequence */
		148	0, /* tp_as_mapping */
		149	0, /* tp_hash */
		150	0, /* tp_call */
		151	0, /* tp_str */
		152	0, /* tp_getattro */
		153	0, /* tp_setattro */
		154	0, /* tp_as_buffer */
		155	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
		156	ZstdCompressionChunkerIterator__doc__, /* tp_doc */
		157	0, /* tp_traverse */
		158	0, /* tp_clear */
		159	0, /* tp_richcompare */
		160	0, /* tp_weaklistoffset */
		161	ZstdCompressionChunkerIterator_iter, /* tp_iter */
		162	(iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
		163	0, /* tp_methods */
		164	0, /* tp_members */
		165	0, /* tp_getset */
		166	0, /* tp_base */
		167	0, /* tp_dict */
		168	0, /* tp_descr_get */
		169	0, /* tp_descr_set */
		170	0, /* tp_dictoffset */
		171	0, /* tp_init */
		172	0, /* tp_alloc */
		173	PyType_GenericNew, /* tp_new */
		174	};
		175
		176	PyDoc_STRVAR(ZstdCompressionChunker__doc__,
		177	"Compress chunks iteratively into exact chunk sizes.\n"
		178	);
		179
		180	static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
		181	PyBuffer_Release(&self->inBuffer);
		182	self->input.src = NULL;
		183
		184	PyMem_Free(self->output.dst);
		185	self->output.dst = NULL;
		186
		187	Py_XDECREF(self->compressor);
		188
		189	PyObject_Del(self);
		190	}
		191
		192	static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
		193	static char* kwlist[] = {
		194	"data",
		195	NULL
		196	};
		197
		198	ZstdCompressionChunkerIterator* result;
		199
		200	if (self->finished) {
		201	PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
		202	return NULL;
		203	}
		204
		205	if (self->inBuffer.obj) {
		206	PyErr_SetString(ZstdError,
		207	"cannot perform operation before consuming output from previous operation");
		208	return NULL;
		209	}
		210
		211	#if PY_MAJOR_VERSION >= 3
		212	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
		213	#else
		214	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
		215	#endif
		216	kwlist, &self->inBuffer)) {
		217	return NULL;
		218	}
		219
		220	if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') \|\| self->inBuffer.ndim > 1) {
		221	PyErr_SetString(PyExc_ValueError,
		222	"data buffer should be contiguous and have at most one dimension");
		223	PyBuffer_Release(&self->inBuffer);
		224	return NULL;
		225	}
		226
		227	result = (ZstdCompressionChunkerIterator)PyObject_CallObject((PyObject)&ZstdCompressionChunkerIteratorType, NULL);
		228	if (!result) {
		229	PyBuffer_Release(&self->inBuffer);
		230	return NULL;
		231	}
		232
		233	self->input.src = self->inBuffer.buf;
		234	self->input.size = self->inBuffer.len;
		235	self->input.pos = 0;
		236
		237	result->chunker = self;
		238	Py_INCREF(result->chunker);
		239
		240	result->mode = compressionchunker_mode_normal;
		241
		242	return result;
		243	}
		244
		245	static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
		246	ZstdCompressionChunkerIterator* result;
		247
		248	if (self->finished) {
		249	PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
		250	return NULL;
		251	}
		252
		253	if (self->inBuffer.obj) {
		254	PyErr_SetString(ZstdError,
		255	"cannot call finish() before consuming output from previous operation");
		256	return NULL;
		257	}
		258
		259	result = (ZstdCompressionChunkerIterator)PyObject_CallObject((PyObject)&ZstdCompressionChunkerIteratorType, NULL);
		260	if (!result) {
		261	return NULL;
		262	}
		263
		264	result->chunker = self;
		265	Py_INCREF(result->chunker);
		266
		267	result->mode = compressionchunker_mode_finish;
		268
		269	return result;
		270	}
		271
		272	static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
		273	ZstdCompressionChunkerIterator* result;
		274
		275	if (self->finished) {
		276	PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
		277	return NULL;
		278	}
		279
		280	if (self->inBuffer.obj) {
		281	PyErr_SetString(ZstdError,
		282	"cannot call flush() before consuming output from previous operation");
		283	return NULL;
		284	}
		285
		286	result = (ZstdCompressionChunkerIterator)PyObject_CallObject((PyObject)&ZstdCompressionChunkerIteratorType, NULL);
		287	if (!result) {
		288	return NULL;
		289	}
		290
		291	result->chunker = self;
		292	Py_INCREF(result->chunker);
		293
		294	result->mode = compressionchunker_mode_flush;
		295
		296	return result;
		297	}
		298
		299	static PyMethodDef ZstdCompressionChunker_methods[] = {
		300	{ "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS \| METH_KEYWORDS,
		301	PyDoc_STR("compress data") },
		302	{ "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
		303	PyDoc_STR("finish compression operation") },
		304	{ "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS \| METH_KEYWORDS,
		305	PyDoc_STR("finish compression operation") },
		306	{ NULL, NULL }
		307	};
		308
		309	PyTypeObject ZstdCompressionChunkerType = {
		310	PyVarObject_HEAD_INIT(NULL, 0)
		311	"zstd.ZstdCompressionChunkerType", /* tp_name */
		312	sizeof(ZstdCompressionChunker), /* tp_basicsize */
		313	0, /* tp_itemsize */
		314	(destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
		315	0, /* tp_print */
		316	0, /* tp_getattr */
		317	0, /* tp_setattr */
		318	0, /* tp_compare */
		319	0, /* tp_repr */
		320	0, /* tp_as_number */
		321	0, /* tp_as_sequence */
		322	0, /* tp_as_mapping */
		323	0, /* tp_hash */
		324	0, /* tp_call */
		325	0, /* tp_str */
		326	0, /* tp_getattro */
		327	0, /* tp_setattro */
		328	0, /* tp_as_buffer */
		329	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
		330	ZstdCompressionChunker__doc__, /* tp_doc */
		331	0, /* tp_traverse */
		332	0, /* tp_clear */
		333	0, /* tp_richcompare */
		334	0, /* tp_weaklistoffset */
		335	0, /* tp_iter */
		336	0, /* tp_iternext */
		337	ZstdCompressionChunker_methods, /* tp_methods */
		338	0, /* tp_members */
		339	0, /* tp_getset */
		340	0, /* tp_base */
		341	0, /* tp_dict */
		342	0, /* tp_descr_get */
		343	0, /* tp_descr_set */
		344	0, /* tp_dictoffset */
		345	0, /* tp_init */
		346	0, /* tp_alloc */
		347	PyType_GenericNew, /* tp_new */
		348	};
		349
		350	void compressionchunker_module_init(PyObject* module) {
		351	Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
		352	if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
		353	return;
		354	}
		355
		356	Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
		357	if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
		358	return;
		359	}
		360	}

contrib/python-zstandard/zstd/common/debug.c

0 created 644 +44 0

			@@ -0,0 +1,44 b''
		1	/* ******************************************************************
		2	debug
		3	Part of FSE library
		4	Copyright (C) 2013-present, Yann Collet.
		5
		6	BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
		7
		8	Redistribution and use in source and binary forms, with or without
		9	modification, are permitted provided that the following conditions are
		10	met:
		11
		12	* Redistributions of source code must retain the above copyright
		13	notice, this list of conditions and the following disclaimer.
		14	* Redistributions in binary form must reproduce the above
		15	copyright notice, this list of conditions and the following disclaimer
		16	in the documentation and/or other materials provided with the
		17	distribution.
		18
		19	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		20	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		21	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
		22	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
		23	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
		24	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
		25	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		26	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		27	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		28	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
		29	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		30
		31	You can contact the author at :
		32	- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
		33	****************************************************************** */
		34
		35
		36	/*
		37	* This module only hosts one global variable
		38	* which can be used to dynamically influence the verbosity of traces,
		39	* such as DEBUGLOG and RAWLOG
		40	*/
		41
		42	#include "debug.h"
		43
		44	int g_debuglevel = DEBUGLEVEL;

contrib/python-zstandard/zstd/common/debug.h

0 created 644 +123 0

			@@ -0,0 +1,123 b''
		1	/* ******************************************************************
		2	debug
		3	Part of FSE library
		4	Copyright (C) 2013-present, Yann Collet.
		5
		6	BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
		7
		8	Redistribution and use in source and binary forms, with or without
		9	modification, are permitted provided that the following conditions are
		10	met:
		11
		12	* Redistributions of source code must retain the above copyright
		13	notice, this list of conditions and the following disclaimer.
		14	* Redistributions in binary form must reproduce the above
		15	copyright notice, this list of conditions and the following disclaimer
		16	in the documentation and/or other materials provided with the
		17	distribution.
		18
		19	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		20	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		21	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
		22	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
		23	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
		24	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
		25	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		26	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		27	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		28	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
		29	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		30
		31	You can contact the author at :
		32	- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
		33	****************************************************************** */
		34
		35
		36	/*
		37	* The purpose of this header is to enable debug functions.
		38	* They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
		39	* and DEBUG_STATIC_ASSERT() for compile-time.
		40	*
		41	* By default, DEBUGLEVEL==0, which means run-time debug is disabled.
		42	*
		43	* Level 1 enables assert() only.
		44	* Starting level 2, traces can be generated and pushed to stderr.
		45	* The higher the level, the more verbose the traces.
		46	*
		47	* It's possible to dynamically adjust level using variable g_debug_level,
		48	* which is only declared if DEBUGLEVEL>=2,
		49	* and is a global variable, not multi-thread protected (use with care)
		50	*/
		51
		52	#ifndef DEBUG_H_12987983217
		53	#define DEBUG_H_12987983217
		54
		55	#if defined (__cplusplus)
		56	extern "C" {
		57	#endif
		58
		59
		60	/* static assert is triggered at compile time, leaving no runtime artefact,
		61	* but can only work with compile-time constants.
		62	* This variant can only be used inside a function. */
		63	#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
		64
		65
		66	/* DEBUGLEVEL is expected to be defined externally,
		67	* typically through compiler command line.
		68	* Value must be a number. */
		69	#ifndef DEBUGLEVEL
		70	# define DEBUGLEVEL 0
		71	#endif
		72
		73	/* recommended values for DEBUGLEVEL :
		74	* 0 : no debug, all run-time functions disabled
		75	* 1 : no display, enables assert() only
		76	* 2 : reserved, for currently active debug path
		77	* 3 : events once per object lifetime (CCtx, CDict, etc.)
		78	* 4 : events once per frame
		79	* 5 : events once per block
		80	* 6 : events once per sequence (verbose)
		81	* 7+: events at every position (very verbose)
		82	*
		83	* It's generally inconvenient to output traces > 5.
		84	* In which case, it's possible to selectively enable higher verbosity levels
		85	* by modifying g_debug_level.
		86	*/
		87
		88	#if (DEBUGLEVEL>=1)
		89	# include <assert.h>
		90	#else
		91	# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
		92	# define assert(condition) ((void)0) /* disable assert (default) */
		93	# endif
		94	#endif
		95
		96	#if (DEBUGLEVEL>=2)
		97	# include <stdio.h>
		98	extern int g_debuglevel; /* here, this variable is only declared,
		99	it actually lives in debug.c,
		100	and is shared by the whole process.
		101	It's typically used to enable very verbose levels
		102	on selective conditions (such as position in src) */
		103
		104	# define RAWLOG(l, ...) { \
		105	if (l<=g_debuglevel) { \
		106	fprintf(stderr, __VA_ARGS__); \
		107	} }
		108	# define DEBUGLOG(l, ...) { \
		109	if (l<=g_debuglevel) { \
		110	fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
		111	fprintf(stderr, " \n"); \
		112	} }
		113	#else
		114	# define RAWLOG(l, ...) {} /* disabled */
		115	# define DEBUGLOG(l, ...) {} /* disabled */
		116	#endif
		117
		118
		119	#if defined (__cplusplus)
		120	}
		121	#endif
		122
		123	#endif /* DEBUG_H_12987983217 */

contrib/python-zstandard/zstd/compress/hist.c

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/hist.h

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/cover.h

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/fastcover.c

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/clang-format-ignorelist

0 +7 0

              # Files that just need to be migrated to the formatter.
              # Do not add new files here!
              mercurial/cext/dirs.c
              mercurial/cext/manifest.c
              mercurial/cext/osutil.c
              mercurial/cext/revlog.c
              # Vendored code that we should never format:
              contrib/python-zstandard/c-ext/bufferutil.c
+             contrib/python-zstandard/c-ext/compressionchunker.c
              contrib/python-zstandard/c-ext/compressiondict.c
              contrib/python-zstandard/c-ext/compressionparams.c
              contrib/python-zstandard/c-ext/compressionreader.c
              contrib/python-zstandard/c-ext/compressionwriter.c
              contrib/python-zstandard/c-ext/compressobj.c
              contrib/python-zstandard/c-ext/compressor.c
              contrib/python-zstandard/c-ext/compressoriterator.c
              contrib/python-zstandard/c-ext/constants.c
              contrib/python-zstandard/c-ext/decompressionreader.c
              contrib/python-zstandard/c-ext/decompressionwriter.c
              contrib/python-zstandard/c-ext/decompressobj.c
              contrib/python-zstandard/c-ext/decompressor.c
              contrib/python-zstandard/c-ext/decompressoriterator.c
              contrib/python-zstandard/c-ext/frameparams.c
              contrib/python-zstandard/c-ext/python-zstandard.h
              contrib/python-zstandard/zstd.c
              contrib/python-zstandard/zstd/common/bitstream.h
              contrib/python-zstandard/zstd/common/compiler.h
              contrib/python-zstandard/zstd/common/cpu.h
+             contrib/python-zstandard/zstd/common/debug.c
+             contrib/python-zstandard/zstd/common/debug.h
              contrib/python-zstandard/zstd/common/entropy_common.c
              contrib/python-zstandard/zstd/common/error_private.c
              contrib/python-zstandard/zstd/common/error_private.h
              contrib/python-zstandard/zstd/common/fse_decompress.c
              contrib/python-zstandard/zstd/common/fse.h
              contrib/python-zstandard/zstd/common/huf.h
              contrib/python-zstandard/zstd/common/mem.h
              contrib/python-zstandard/zstd/common/pool.c
              contrib/python-zstandard/zstd/common/pool.h
              contrib/python-zstandard/zstd/common/threading.c
              contrib/python-zstandard/zstd/common/threading.h
              contrib/python-zstandard/zstd/common/xxhash.c
              contrib/python-zstandard/zstd/common/xxhash.h
              contrib/python-zstandard/zstd/common/zstd_common.c
              contrib/python-zstandard/zstd/common/zstd_errors.h
              contrib/python-zstandard/zstd/common/zstd_internal.h
              contrib/python-zstandard/zstd/compress/fse_compress.c
+             contrib/python-zstandard/zstd/compress/hist.c
+             contrib/python-zstandard/zstd/compress/hist.h
              contrib/python-zstandard/zstd/compress/huf_compress.c
              contrib/python-zstandard/zstd/compress/zstd_compress.c
              contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
              contrib/python-zstandard/zstd/compress/zstd_double_fast.c
              contrib/python-zstandard/zstd/compress/zstd_double_fast.h
              contrib/python-zstandard/zstd/compress/zstd_fast.c
              contrib/python-zstandard/zstd/compress/zstd_fast.h
              contrib/python-zstandard/zstd/compress/zstd_lazy.c
              contrib/python-zstandard/zstd/compress/zstd_lazy.h
              contrib/python-zstandard/zstd/compress/zstd_ldm.c
              contrib/python-zstandard/zstd/compress/zstd_ldm.h
              contrib/python-zstandard/zstd/compress/zstdmt_compress.c
              contrib/python-zstandard/zstd/compress/zstdmt_compress.h
              contrib/python-zstandard/zstd/compress/zstd_opt.c
              contrib/python-zstandard/zstd/compress/zstd_opt.h
              contrib/python-zstandard/zstd/decompress/huf_decompress.c
              contrib/python-zstandard/zstd/decompress/zstd_decompress.c
              contrib/python-zstandard/zstd/deprecated/zbuff_common.c
              contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
              contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
              contrib/python-zstandard/zstd/deprecated/zbuff.h
              contrib/python-zstandard/zstd/dictBuilder/cover.c
+             contrib/python-zstandard/zstd/dictBuilder/cover.h
              contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
              contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
+             contrib/python-zstandard/zstd/dictBuilder/fastcover.c
              contrib/python-zstandard/zstd/dictBuilder/zdict.c
              contrib/python-zstandard/zstd/dictBuilder/zdict.h
              contrib/python-zstandard/zstd/zstd.h
              hgext/fsmonitor/pywatchman/bser.c
              mercurial/thirdparty/xdiff/xdiff.h
              mercurial/thirdparty/xdiff/xdiffi.c
              mercurial/thirdparty/xdiff/xdiffi.h
              mercurial/thirdparty/xdiff/xemit.c
              mercurial/thirdparty/xdiff/xemit.h
              mercurial/thirdparty/xdiff/xhistogram.c
              mercurial/thirdparty/xdiff/xinclude.h
              mercurial/thirdparty/xdiff/xmacros.h
              mercurial/thirdparty/xdiff/xmerge.c
              mercurial/thirdparty/xdiff/xpatience.c
              mercurial/thirdparty/xdiff/xprepare.c
              mercurial/thirdparty/xdiff/xprepare.h
              mercurial/thirdparty/xdiff/xtypes.h
              mercurial/thirdparty/xdiff/xutils.c
              mercurial/thirdparty/xdiff/xutils.h
              mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c

contrib/python-zstandard/MANIFEST.in

0 +3 0

              graft c-ext
+             graft debian
              graft zstd
              graft tests
              include make_cffi.py
              include setup_zstd.py
              include zstd.c
+             include zstd_cffi.py
              include LICENSE
+             include NEWS.rst

contrib/python-zstandard/NEWS.rst

0 +119 -1

              ===============
              Version History
              ===============
 .0.0 (not yet released)
              ========================
              Actions Blocking Release
              ------------------------
              * compression and decompression APIs that support ``io.rawIOBase`` interface
                (#13).
              * Refactor module names so C and CFFI extensions live under ``zstandard``
                package.
              * Overall API design review.
              * Use Python allocator where possible.
              * Figure out what to do about experimental APIs not implemented by CFFI.
              * APIs for auto adjusting compression parameters based on input size. e.g.
                clamping the window log so it isn't too large for input.
              * Consider allowing compressor and decompressor instances to be thread safe,
                support concurrent operations. Or track when an operation is in progress and
                refuse to let concurrent operations use the same instance.
              * Support for magic-less frames for all decompression operations (``decompress()``
                doesn't work due to sniffing the content size and the lack of a ZSTD API to
                sniff magic-less frames - this should be fixed in 1.3.5.).
              * Audit for complete flushing when ending compression streams.
              * Deprecate legacy APIs.
              * Audit for ability to control read/write sizes on all APIs.
              * Detect memory leaks via bench.py.
              * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
                require use of ``CompressionParameters``.
              * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
+             * Support modifying compression parameters mid operation when supported by
+               zstd API.
+             * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
+             * Support ``ZSTD_p_forceAttachDict`` compression parameter.
+             * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
+               compression parameters.
+             * Consider exposing ``ZSTDMT_toFlushNow()``.
+             * Expose ``ZDICT_trainFromBuffer_fastCover()``,
+               ``ZDICT_optimizeTrainFromBuffer_fastCover``.
+             * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
+             * Consider a ``chunker()`` API for decompression.
+             * Consider stats for ``chunker()`` API, including finding the last consumed
+               offset of input data.
              Other Actions Not Blocking Release
              ---------------------------------------
              * Support for block compression APIs.
              * API for ensuring max memory ceiling isn't exceeded.
              * Move off nose for testing.
+.10.1 (released 2018-10-08)
+             ============================
+             Backwards Compatibility Notes
+             -----------------------------
+             * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
+               method (#58).
+             * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
+               method (#58).
+             Changes
+             -------
+             * Stop attempting to package Python 3.6 for Miniconda. The latest version of
+               Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
+               since this were built against Python 3.7.
+             * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
+               ``closed`` attribute is now a read-only property instead of a method. This now
+               properly matches the ``IOBase`` API and allows instances to be used in more
+               places that accept ``IOBase`` instances.
+.10.0 (released 2018-10-08)
+             ============================
+             Backwards Compatibility Notes
+             -----------------------------
+             * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
+               argument in both the C and CFFI backends. Before, the CFFI implementation
+               would assume a default value of ``-1``, which was later rejected.
+             * The ``compress_literals`` argument and attribute has been removed from
+               ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
+               API.
+             * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
+               operation performed against ``ZstdCompressor`` instances. The reason for this
+               change is that the zstd 1.3.5 API no longer allows this without calling
+               ``ZSTD_CCtx_resetParameters()`` first. But if we called
+               ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
+               potentially expensive setup when using dictionaries. We now call
+               ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
+               compression parameters.
+             * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
+               used as a context manager. The context manager interface still exists and its
+               behavior is unchanged.
+             * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
+               used as a context manager. The context manager interface still exists and its
+               behavior is unchanged.
+             Bug Fixes
+             ---------
+             * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
+               from internal buffers in more scenarios. Before, it was possible for data to
+               remain in internal buffers. This data would be emitted on a subsequent call
+               to ``decompress()``. The overall output stream would still be valid. But if
+               callers were expecting input data to exactly map to output data (say the
+               producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
+               map input chunks to output chunks), then the previous behavior would be
+               wrong. The new behavior is such that output from
+               ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
+               should produce all available compressed input.
+             * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
+               a previous context manager resulted in error (#56).
+             * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
+               all data necessary to flush a block. Before, it was possible for the
+               ``flush()`` to not emit all data necessary to fully represent a block. This
+               would mean decompressors wouldn't be able to decompress all data that had been
+               fed into the compressor and ``flush()``ed. (#55).
+             New Features
+             ------------
+             * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
+               ``TARGETLENGTH_MAX`` that expose constants from libzstd.
+             * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
+               compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
+               API doesn't impose restrictions on the input or output types for the
+               data streams. Unlike ``compressobj()``, it ensures output chunks are of a
+               fixed size. This makes this API useful when the compressed output is being
+               fed into an I/O layer, where uniform write sizes are useful.
+             * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
+               manager (#34).
+             * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
+               manager (#34).
+             * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
+             Changes
+             -------
+             * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
+             * ``zstandard.__version__`` is now defined (#50).
+             * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
+             * Upgrade various packages used in CI to latest versions. Notably tox (in
+               order to support Python 3.7).
+             * Use relative paths in setup.py to appease Python 3.7 (#51).
+             * Added CI for Python 3.7.
+.9.1 (released 2018-06-04)
+             ===========================
+             * Debian packaging support.
+             * Fix typo in setup.py (#44).
+             * Support building with mingw compiler (#46).
 .9.0 (released 2018-04-08)
              ===========================
              Backwards Compatibility Notes
              -----------------------------
              * CFFI 1.11 or newer is now required (previous requirement was 1.8).
              * The primary module is now ``zstandard``. Please change imports of ``zstd``
                and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
                for importing the old names will be dropped in the next release.
              * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
                been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
                name and will be deleted in a future release.
              * Support for Python 2.6 has been removed.
              * Support for Python 3.3 has been removed.
              * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
                the feature disappeared from zstd 1.3.
              * Support for legacy dictionaries has been removed. Cover dictionaries are now
                the default. ``train_cover_dictionary()`` has effectively been renamed to
                ``train_dictionary()``.
              * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
                deleted and the method now allows empty inputs to be compressed by default.
              * ``estimate_compression_context_size()`` has been removed. Use
                ``CompressionParameters.estimated_compression_context_size()`` instead.
              * ``get_compression_parameters()`` has been removed. Use
                ``CompressionParameters.from_level()`` instead.
              * The arguments to ``CompressionParameters.__init__()`` have changed. If you
                were using positional arguments before, the positions now map to different
                arguments. It is recommended to use keyword arguments to construct
                ``CompressionParameters`` instances.
              * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
 .3.4).
              * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
                renamed to ``ZstdCompressor.stream_writer()`` and
                ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
                aliased, but will be removed in the next major release.
              * Content sizes are written into frame headers by default
                (``ZstdCompressor(write_content_size=True)`` is now the default).
              * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
                for consistency with other types. The old name is an alias and will be removed
                in the next major release.
              Bug Fixes
              ---------
              * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
              * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
              * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
              New Features
              ------------
-             * Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
+             * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
                bug fixes and performance improvements. It also gives us access to newer
                features.
              * Support for negative compression levels.
              * Support for *long distance matching* (facilitates compression ratios that approach
                LZMA).
              * Supporting for reading empty zstandard frames (with an embedded content size
                of 0).
              * Support for writing and partial support for reading zstandard frames without a
                magic header.
              * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
                you to ``.read()`` from a file-like object).
              * Several minor features, bug fixes, and performance enhancements.
              * Wheels for Linux and macOS are now provided with releases.
              Changes
              -------
              * Functions accepting bytes data now use the buffer protocol and can accept
                more types (like ``memoryview`` and ``bytearray``) (#26).
              * Add #includes so compilation on OS X and BSDs works (#20).
              * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
                of decompressed data for a source.
              * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
                compressed data for a source.
              * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
                The old name is still available.
              * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
                ``read_from()`` is still available at its old location.
              * Introduce the ``zstandard`` module to import and re-export the C or CFFI
                *backend* as appropriate. Behavior can be controlled via the
                ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
                usage info.
              * Vendored version of zstd upgraded to 1.3.4.
              * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
              * Add ``STRATEGY_BTULTRA`` compression strategy constant.
              * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
                ``ZSTD_getFrameContentSize()`` replacement.
              * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
                special handling.
              * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
                method for determining the current memory utilization of the underlying zstd
                primitive.
              * ``train_dictionary()`` has new arguments and functionality for trying multiple
                variations of COVER parameters and selecting the best one.
              * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
                ``LDM_BUCKETSIZELOG_MAX``.
              * Converted all consumers to the zstandard *new advanced API*, which uses
                ``ZSTD_compress_generic()``
              * ``CompressionParameters.__init__`` now accepts several more arguments,
                including support for *long distance matching*.
              * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
                controls how the dictionary should be interpreted. This can be used to
                force the use of *content-only* dictionaries or to require the presence
                of the dictionary magic header.
              * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
                compression dictionary so it can efficiently be used with multiple
                ``ZstdCompressor`` instances.
              * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
                created automatically on first use, and automatically reused by all
                ``ZstdDecompressor`` instances bound to that dictionary.
              * All meaningful functions now accept keyword arguments.
              * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
                to control how much work to perform on every decompressor invocation.
              * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
                total number of bytes written so far.
              * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
                forward in the stream.
              * Removed ``TARGETLENGTH_MAX`` constant.
              * Added ``frame_header_size(data)`` function.
              * Added ``frame_content_size(data)`` function.
              * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
                decompression* API.
              * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
                negative compression levels.
              * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
                amount of memory required for decompression operations.
              * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
                the ``format`` compression parameter to control whether the frame magic
                header is written.
              * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
                expected frame format.
              * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
                information about the current compression operation.
              * Error messages in CFFI no longer have ``b''`` literals.
              * Compiler warnings and underlying overflow issues on 32-bit platforms have been
                fixed.
              * Builds in CI now build with compiler warnings as errors. This should hopefully
                fix new compiler warnings from being introduced.
              * Make ``ZstdCompressor(write_content_size=True)`` and
                ``CompressionParameters(write_content_size=True)`` the default.
              * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
 .8.2 (released 2018-02-22)
              ---------------------------
              * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
              * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
 .8.1 (released 2017-04-08)
              ---------------------------
              * Add #includes so compilation on OS X and BSDs works (#20).
 .8.0 (released 2017-03-08)
              ===========================
              * CompressionParameters now has a estimated_compression_context_size() method.
                zstd.estimate_compression_context_size() is now deprecated and slated for
                removal.
              * Implemented a lot of fuzzing tests.
              * CompressionParameters instances now perform extra validation by calling
                ZSTD_checkCParams() at construction time.
              * multi_compress_to_buffer() API for compressing multiple inputs as a
                single operation, as efficiently as possible.
              * ZSTD_CStream instances are now used across multiple operations on
                ZstdCompressor instances, resulting in much better performance for
                APIs that do streaming.
              * ZSTD_DStream instances are now used across multiple operations on
                ZstdDecompressor instances, resulting in much better performance for
                APIs that do streaming.
              * train_dictionary() now releases the GIL.
              * Support for training dictionaries using the COVER algorithm.
              * multi_decompress_to_buffer() API for decompressing multiple frames as a
                single operation, as efficiently as possible.
              * Support for multi-threaded compression.
              * Disable deprecation warnings when compiling CFFI module.
              * Fixed memory leak in train_dictionary().
              * Removed DictParameters type.
              * train_dictionary() now accepts keyword arguments instead of a
                DictParameters instance to control dictionary generation.
 .7.0 (released 2017-02-07)
              ===========================
              * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
              * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
                decompression of *content-only dictionary chains*.
              * CFFI module fully implemented; all tests run against both C extension and
                CFFI implementation.
              * Vendored version of zstd updated to 1.1.3.
              * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
                to avoid extra memory allocation of dict data.
              * Add function names to error messages (by using ":name" in PyArg_Parse*
                functions).
              * Reuse decompression context across operations. Previously, we created a
                new ZSTD_DCtx for each decompress(). This was measured to slow down
                decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
                is not thread safe. So we reuse the ZSTD_DCtx across operations and make
                things faster in the process.
              * ZstdCompressor.write_to()'s compress() and flush() methods now return number
                of bytes written.
              * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
                written to the underlying output object.
              * CompressionParameters instances now expose their values as attributes.
              * CompressionParameters instances no longer are subscriptable nor behave
                as tuples (backwards incompatible). Use attributes to obtain values.
              * DictParameters instances now expose their values as attributes.
 .6.0 (released 2017-01-14)
              ===========================
              * Support for legacy zstd protocols (build time opt in feature).
              * Automation improvements to test against Python 3.6, latest versions
                of Tox, more deterministic AppVeyor behavior.
              * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
                source code manually.
              * Vendored version of zstd updated to 1.1.2.
              * Documentation improvements.
              * Introduce a bench.py script for performing (crude) benchmarks.
              * ZSTD_CCtx instances are now reused across multiple compress() operations.
              * ZstdCompressor.write_to() now has a flush() method.
              * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
                flush a block (as opposed to ending the stream).
              * Disallow compress(b'') when writing content sizes by default (issue #11).
 .5.2 (released 2016-11-12)
              ===========================
              * more packaging fixes for source distribution
 .5.1 (released 2016-11-12)
              ===========================
              * setup_zstd.py is included in the source distribution
 .5.0 (released 2016-11-10)
              ===========================
              * Vendored version of zstd updated to 1.1.1.
              * Continuous integration for Python 3.6 and 3.7
              * Continuous integration for Conda
              * Added compression and decompression APIs providing similar interfaces
                to the standard library ``zlib`` and ``bz2`` modules. This allows
                coding to a common interface.
              * ``zstd.__version__` is now defined.
              * ``read_from()`` on various APIs now accepts objects implementing the buffer
                protocol.
              * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
                to pass in an existing buffer with a header without having to create a
                slice or a new object.
              * Implemented ``ZstdCompressionDict.as_bytes()``.
              * Python's memory allocator is now used instead of ``malloc()``.
              * Low-level zstd data structures are reused in more instances, cutting down
                on overhead for certain operations.
              * ``distutils`` boilerplate for obtaining an ``Extension`` instance
                has now been refactored into a standalone ``setup_zstd.py`` file. This
                allows other projects with ``setup.py`` files to reuse the
                ``distutils`` code for this project without copying code.
              * The monolithic ``zstd.c`` file has been split into a header file defining
                types and separate ``.c`` source files for the implementation.
              Older History
              =============
 -08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
              Python extension for use by the Mercurial project. A very hacky prototype
              is sent to the mercurial-devel list for RFC.
 -09-03 - Most functionality from Zstandard C API implemented. Source
              code published on https://github.com/indygreg/python-zstandard. Travis-CI
              automation configured. 0.0.1 release on PyPI.
 -09-05 - After the API was rounded out a bit and support for Python
 .6 and 2.7 was added, version 0.1 was released to PyPI.
 -09-05 - After the compressor and decompressor APIs were changed, 0.2
              was released to PyPI.
 -09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
              now accepts arguments controlling frame parameters. The source size can now
              be declared when performing streaming compression. ZstdDecompressor.decompress()
              is implemented. Compression dictionaries are now cached when using the simple
              compression and decompression APIs. Memory size APIs added.
              ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
              implemented. This rounds out the major compression/decompression APIs planned
              by the author.
 -10-02 - 0.3.3 is released with a bug fix for read_from not fully
              decoding a zstd frame (issue #2).
 -10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
              write buffer sizes, and a few bug fixes involving failure to read/write
              all data when buffer sizes were too small to hold remaining data.
 -11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.

contrib/python-zstandard/README.rst

0 +88 -13

              ================
              python-zstandard
              ================
              This project provides Python bindings for interfacing with the
              `Zstandard <http://www.zstd.net>`_ compression library. A C extension
              and CFFI interface are provided.
              The primary goal of the project is to provide a rich interface to the
              underlying C API through a Pythonic interface while not sacrificing
              performance. This means exposing most of the features and flexibility
              of the C API while not sacrificing usability or safety that Python provides.
              The canonical home for this project lives in a Mercurial repository run by
              the author. For convenience, that repository is frequently synchronized to
              https://github.com/indygreg/python-zstandard.
              |  |ci-status| |win-ci-status|
              Requirements
              ============
              This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6
              on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested
              on Windows. Only x86_64 is well-tested on Linux and macOS.
              Installing
              ==========
              This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
              So, to install this package::
                 $ pip install zstandard
              Binary wheels are made available for some platforms. If you need to
              install from a source distribution, all you should need is a working C
              compiler and the Python development headers/libraries. On many Linux
              distributions, you can install a ``python-dev`` or ``python-devel``
              package to provide these dependencies.
              Packages are also uploaded to Anaconda Cloud at
              https://anaconda.org/indygreg/zstandard. See that URL for how to install
              this package with ``conda``.
              Performance
              ===========
              zstandard is a highly tunable compression algorithm. In its default settings
              (compression level 3), it will be faster at compression and decompression and
              will have better compression ratios than zlib on most data sets. When tuned
              for speed, it approaches lz4's speed and ratios. When tuned for compression
              ratio, it approaches lzma ratios and compression speed, but decompression
              speed is much faster. See the official zstandard documentation for more.
              zstandard and this library support multi-threaded compression. There is a
              mechanism to compress large inputs using multiple threads.
              The performance of this library is usually very similar to what the zstandard
              C API can deliver. Overhead in this library is due to general Python overhead
              and can't easily be avoided by *any* zstandard Python binding. This library
              exposes multiple APIs for performing compression and decompression so callers
              can pick an API suitable for their need. Contrast with the compression
              modules in Python's standard library (like ``zlib``), which only offer limited
              mechanisms for performing operations. The API flexibility means consumers can
              choose to use APIs that facilitate zero copying or minimize Python object
              creation and garbage collection overhead.
              This library is capable of single-threaded throughputs well over 1 GB/s. For
              exact numbers, measure yourself. The source code repository has a ``bench.py``
              script that can be used to measure things.
              API
              ===
              To interface with Zstandard, simply import the ``zstandard`` module::
                 import zstandard
              It is a popular convention to alias the module as a different name for
              brevity::
                 import zstandard as zstd
              This module attempts to import and use either the C extension or CFFI
              implementation. On Python platforms known to support C extensions (like
              CPython), it raises an ImportError if the C extension cannot be imported.
              On Python platforms known to not support C extensions (like PyPy), it only
              attempts to import the CFFI implementation and raises ImportError if that
              can't be done. On other platforms, it first tries to import the C extension
              then falls back to CFFI if that fails and raises ImportError if CFFI fails.
              To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
              environment variable can be set. The following values are accepted:
              default
                 The behavior described above.
              cffi_fallback
                 Always try to import the C extension then fall back to CFFI if that
                 fails.
              cext
                 Only attempt to import the C extension.
              cffi
                 Only attempt to import the CFFI implementation.
              In addition, the ``zstandard`` module exports a ``backend`` attribute
              containing the string name of the backend being used. It will be one
              of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
              The types, functions, and attributes exposed by the ``zstandard`` module
              are documented in the sections below.
              .. note::
                 The documentation in this section makes references to various zstd
                 concepts and functionality. The source repository contains a
                 ``docs/concepts.rst`` file explaining these in more detail.
              ZstdCompressor
              --------------
              The ``ZstdCompressor`` class provides an interface for performing
              compression operations. Each instance is essentially a wrapper around a
              ``ZSTD_CCtx`` from the C API.
              Each instance is associated with parameters that control compression
              behavior. These come from the following named arguments (all optional):
              level
                 Integer compression level. Valid values are between 1 and 22.
              dict_data
                 Compression dictionary to use.
                 Note: When using dictionary data and ``compress()`` is called multiple
                 times, the ``ZstdCompressionParameters`` derived from an integer
                 compression ``level`` and the first compressed data's size will be reused
                 for all subsequent operations. This may not be desirable if source data
                 size varies significantly.
              compression_params
                 A ``ZstdCompressionParameters`` instance defining compression settings.
              write_checksum
                 Whether a 4 byte checksum should be written with the compressed data.
                 Defaults to False. If True, the decompressor can verify that decompressed
                 data matches the original input data.
              write_content_size
                 Whether the size of the uncompressed data will be written into the
                 header of compressed data. Defaults to True. The data will only be
                 written if the compressor knows the size of the input data. This is
                 often not true for streaming compression.
              write_dict_id
                 Whether to write the dictionary ID into the compressed data.
                 Defaults to True. The dictionary ID is only written if a dictionary
                 is being used.
              threads
                 Enables and sets the number of threads to use for multi-threaded compression
                 operations. Defaults to 0, which means to use single-threaded compression.
                 Negative values will resolve to the number of logical CPUs in the system.
                 Read below for more info on multi-threaded compression. This argument only
                 controls thread count for operations that operate on individual pieces of
                 data. APIs that spawn multiple threads for working on multiple pieces of
                 data have their own ``threads`` argument.
              ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
              ``write_content_size``, ``write_dict_id``, and ``threads``.
              Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
              instances can be called from multiple Python threads simultaneously. In other
              words, assume instances are not thread safe unless stated otherwise.
              Utility Methods
              ^^^^^^^^^^^^^^^
              ``frame_progression()`` returns a 3-tuple containing the number of bytes
              ingested, consumed, and produced by the current compression operation.
              ``memory_size()`` obtains the memory utilization of the underlying zstd
              compression context, in bytes.::
                  cctx = zstd.ZstdCompressor()
                  memory = cctx.memory_size()
              Simple API
              ^^^^^^^^^^
              ``compress(data)`` compresses and returns data as a one-shot operation.::
                 cctx = zstd.ZstdCompressor()
                 compressed = cctx.compress(b'data to compress')
              The ``data`` argument can be any object that implements the *buffer protocol*.
              Stream Reader API
              ^^^^^^^^^^^^^^^^^
              ``stream_reader(source)`` can be used to obtain an object conforming to the
              ``io.RawIOBase`` interface for reading compressed output as a stream::
                 with open(path, 'rb') as fh:
                     cctx = zstd.ZstdCompressor()
+                    reader = cctx.stream_reader(fh)
+                    while True:
+                        chunk = reader.read(16384)
+                        if not chunk:
+                            break
+                        # Do something with compressed chunk.
+             Instances can also be used as context managers::
+                with open(path, 'rb') as fh:
                     with cctx.stream_reader(fh) as reader:
                         while True:
                             chunk = reader.read(16384)
                             if not chunk:
                                 break
                             # Do something with compressed chunk.
-             The stream can only be read within a context manager. When the context
-             manager exits, the stream is closed and the underlying resource is
-             released and future operations against the compression stream stream will fail.
+             When the context manager exists or ``close()`` is called, the stream is closed,
+             underlying resources are released, and future operations against the compression
+             stream will fail.
              The ``source`` argument to ``stream_reader()`` can be any object with a
              ``read(size)`` method or any object implementing the *buffer protocol*.
              ``stream_reader()`` accepts a ``size`` argument specifying how large the input
              stream is. This is used to adjust compression parameters so they are
              tailored to the source size.::
                 with open(path, 'rb') as fh:
                     cctx = zstd.ZstdCompressor()
                     with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
                         ...
              If the ``source`` is a stream, you can specify how large ``read()`` requests
              to that stream should be via the ``read_size`` argument. It defaults to
              ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
                 with open(path, 'rb') as fh:
                     cctx = zstd.ZstdCompressor()
                     # Will perform fh.read(8192) when obtaining data to feed into the
                     # compressor.
                     with cctx.stream_reader(fh, read_size=8192) as reader:
                         ...
              The stream returned by ``stream_reader()`` is neither writable nor seekable
              (even if the underlying source is seekable). ``readline()`` and
              ``readlines()`` are not implemented because they don't make sense for
              compressed data. ``tell()`` returns the number of compressed bytes
              emitted so far.
              Streaming Input API
              ^^^^^^^^^^^^^^^^^^^
              ``stream_writer(fh)`` (which behaves as a context manager) allows you to *stream*
              data into a compressor.::
                 cctx = zstd.ZstdCompressor(level=10)
                 with cctx.stream_writer(fh) as compressor:
                     compressor.write(b'chunk 0')
                     compressor.write(b'chunk 1')
                     ...
              The argument to ``stream_writer()`` must have a ``write(data)`` method. As
              compressed data is available, ``write()`` will be called with the compressed
              data as its argument. Many common Python types implement ``write()``, including
              open file handles and ``io.BytesIO``.
              ``stream_writer()`` returns an object representing a streaming compressor
              instance. It **must** be used as a context manager. That object's
              ``write(data)`` method is used to feed data into the compressor.
              A ``flush()`` method can be called to evict whatever data remains within the
              compressor's internal state into the output object. This may result in 0 or
              more ``write()`` calls to the output object.
              Both ``write()`` and ``flush()`` return the number of bytes written to the
              object's ``write()``. In many cases, small inputs do not accumulate enough
              data to cause a write and ``write()`` will return ``0``.
              If the size of the data being fed to this streaming compressor is known,
              you can declare it before compression begins::
                 cctx = zstd.ZstdCompressor()
                 with cctx.stream_writer(fh, size=data_len) as compressor:
                     compressor.write(chunk0)
                     compressor.write(chunk1)
                     ...
              Declaring the size of the source data allows compression parameters to
              be tuned. And if ``write_content_size`` is used, it also results in the
              content size being written into the frame header of the output data.
              The size of chunks being ``write()`` to the destination can be specified::
                  cctx = zstd.ZstdCompressor()
                  with cctx.stream_writer(fh, write_size=32768) as compressor:
                      ...
              To see how much memory is being used by the streaming compressor::
                  cctx = zstd.ZstdCompressor()
                  with cctx.stream_writer(fh) as compressor:
                      ...
                      byte_size = compressor.memory_size()
              Thte total number of bytes written so far are exposed via ``tell()``::
                  cctx = zstd.ZstdCompressor()
                  with cctx.stream_writer(fh) as compressor:
                      ...
                      total_written = compressor.tell()
              Streaming Output API
              ^^^^^^^^^^^^^^^^^^^^
              ``read_to_iter(reader)`` provides a mechanism to stream data out of a
              compressor as an iterator of data chunks.::
                 cctx = zstd.ZstdCompressor()
                 for chunk in cctx.read_to_iter(fh):
                      # Do something with emitted data.
              ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
              conforms to the buffer protocol.
              Uncompressed data is fetched from the source either by calling ``read(size)``
              or by fetching a slice of data from the object directly (in the case where
              the buffer protocol is being used). The returned iterator consists of chunks
              of compressed data.
              If reading from the source via ``read()``, ``read()`` will be called until
              it raises or returns an empty bytes (``b''``). It is perfectly valid for
              the source to deliver fewer bytes than were what requested by ``read(size)``.
              Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
              declaring the size of the input stream::
                  cctx = zstd.ZstdCompressor()
                  for chunk in cctx.read_to_iter(fh, size=some_int):
                      pass
              You can also control the size that data is ``read()`` from the source and
              the ideal size of output chunks::
                  cctx = zstd.ZstdCompressor()
                  for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
                      pass
              Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
              over the sizes of chunks fed into the compressor. Instead, chunk sizes will
              be whatever the object being read from delivers. These will often be of a
              uniform size.
              Stream Copying API
              ^^^^^^^^^^^^^^^^^^
              ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
              compressing it.::
                 cctx = zstd.ZstdCompressor()
                 cctx.copy_stream(ifh, ofh)
              For example, say you wish to compress a file::
                 cctx = zstd.ZstdCompressor()
                 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
                     cctx.copy_stream(ifh, ofh)
              It is also possible to declare the size of the source stream::
                 cctx = zstd.ZstdCompressor()
                 cctx.copy_stream(ifh, ofh, size=len_of_input)
              You can also specify how large the chunks that are ``read()`` and ``write()``
              from and to the streams::
                 cctx = zstd.ZstdCompressor()
                 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
              The stream copier returns a 2-tuple of bytes read and written::
                 cctx = zstd.ZstdCompressor()
                 read_count, write_count = cctx.copy_stream(ifh, ofh)
              Compressor API
              ^^^^^^^^^^^^^^
              ``compressobj()`` returns an object that exposes ``compress(data)`` and
              ``flush()`` methods. Each returns compressed data or an empty bytes.
              The purpose of ``compressobj()`` is to provide an API-compatible interface
              with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
              swap in different compressor objects while using the same API.
              ``flush()`` accepts an optional argument indicating how to end the stream.
              ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
              Once this type of flush is performed, ``compress()`` and ``flush()`` can
              no longer be called. This type of flush **must** be called to end the
              compression context. If not called, returned data may be incomplete.
              A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
              zstd block. Flushes of this type can be performed multiple times. The next
              call to ``compress()`` will begin a new zstd block.
              Here is how this API should be used::
                 cctx = zstd.ZstdCompressor()
                 cobj = cctx.compressobj()
                 data = cobj.compress(b'raw input 0')
                 data = cobj.compress(b'raw input 1')
                 data = cobj.flush()
              Or to flush blocks::
                 cctx.zstd.ZstdCompressor()
                 cobj = cctx.compressobj()
                 data = cobj.compress(b'chunk in first block')
                 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
                 data = cobj.compress(b'chunk in second block')
                 data = cobj.flush()
              For best performance results, keep input chunks under 256KB. This avoids
              extra allocations for a large output object.
              It is possible to declare the input size of the data that will be fed into
              the compressor::
                 cctx = zstd.ZstdCompressor()
                 cobj = cctx.compressobj(size=6)
                 data = cobj.compress(b'foobar')
                 data = cobj.flush()
+             Chunker API
+             ^^^^^^^^^^^
+             ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
+             an object that can be used to iteratively feed chunks of data into a compressor
+             and produce output chunks of a uniform size.
+             The object returned by ``chunker()`` exposes the following methods:
+             ``compress(data)``
+                Feeds new input data into the compressor.
+             ``flush()``
+                Flushes all data currently in the compressor.
+             ``finish()``
+                Signals the end of input data. No new data can be compressed after this
+                method is called.
+             ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
+             ``bytes`` instances holding compressed data. The iterator may be empty. Callers
+             MUST iterate through all elements of the returned iterator before performing
+             another operation on the object.
+             All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
+             ``flush()`` and ``finish()`` may return a final chunk smaller than
+             ``chunk_size``.
+             Here is how the API should be used::
+                cctx = zstd.ZstdCompressor()
+                chunker = cctx.chunker(chunk_size=32768)
+                with open(path, 'rb') as fh:
+                    while True:
+                        in_chunk = fh.read(32768)
+                        if not in_chunk:
+                            break
+                        for out_chunk in chunker.compress(in_chunk):
+                            # Do something with output chunk of size 32768.
+                    for out_chunk in chunker.finish():
+                        # Do something with output chunks that finalize the zstd frame.
+             The ``chunker()`` API is often a better alternative to ``compressobj()``.
+             ``compressobj()`` will emit output data as it is available. This results in a
+             *stream* of output chunks of varying sizes. The consistency of the output chunk
+             size with ``chunker()`` is more appropriate for many usages, such as sending
+             compressed data to a socket.
+             ``compressobj()`` may also perform extra memory reallocations in order to
+             dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
+             chunks are all the same size (except for flushed or final chunks), there is
+             less memory allocation overhead.
              Batch Compression API
              ^^^^^^^^^^^^^^^^^^^^^
              (Experimental. Not yet supported in CFFI bindings.)
              ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
              inputs as a single operation.
              Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
              ``BufferWithSegments``, or a list containing byte like objects. Each element of
              the container will be compressed individually using the configured parameters
              on the ``ZstdCompressor`` instance.
              The ``threads`` argument controls how many threads to use for compression. The
              default is ``0`` which means to use a single thread. Negative values use the
              number of logical CPUs in the machine.
              The function returns a ``BufferWithSegmentsCollection``. This type represents
              N discrete memory allocations, eaching holding 1 or more compressed frames.
              Output data is written to shared memory buffers. This means that unlike
              regular Python objects, a reference to *any* object within the collection
              keeps the shared buffer and therefore memory backing it alive. This can have
              undesirable effects on process memory usage.
              The API and behavior of this function is experimental and will likely change.
              Known deficiencies include:
              * If asked to use multiple threads, it will always spawn that many threads,
                even if the input is too small to use them. It should automatically lower
                the thread count when the extra threads would just add overhead.
              * The buffer allocation strategy is fixed. There is room to make it dynamic,
                perhaps even to allow one output buffer per input, facilitating a variation
                of the API to return a list without the adverse effects of shared memory
                buffers.
              ZstdDecompressor
              ----------------
              The ``ZstdDecompressor`` class provides an interface for performing
              decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
              the C API.
              Each instance is associated with parameters that control decompression. These
              come from the following named arguments (all optional):
              dict_data
                 Compression dictionary to use.
              max_window_size
                 Sets an uppet limit on the window size for decompression operations in
                 kibibytes. This setting can be used to prevent large memory allocations
                 for inputs using large compression windows.
              format
                 Set the format of data for the decoder. By default, this is
                 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
                 allow decoding frames without the 4 byte magic header. Not all decompression
                 APIs support this mode.
              The interface of this class is very similar to ``ZstdCompressor`` (by design).
              Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
              instances can be called from multiple Python threads simultaneously. In other
              words, assume instances are not thread safe unless stated otherwise.
              Utility Methods
              ^^^^^^^^^^^^^^^
              ``memory_size()`` obtains the size of the underlying zstd decompression context,
              in bytes.::
                  dctx = zstd.ZstdDecompressor()
                  size = dctx.memory_size()
              Simple API
              ^^^^^^^^^^
              ``decompress(data)`` can be used to decompress an entire compressed zstd
              frame in a single operation.::
                  dctx = zstd.ZstdDecompressor()
                  decompressed = dctx.decompress(data)
              By default, ``decompress(data)`` will only work on data written with the content
              size encoded in its header (this is the default behavior of
              ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
              compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
              be raised.
              If the compressed data doesn't have its content size embedded within it,
              decompression can be attempted by specifying the ``max_output_size``
              argument.::
                  dctx = zstd.ZstdDecompressor()
                  uncompressed = dctx.decompress(data, max_output_size=1048576)
              Ideally, ``max_output_size`` will be identical to the decompressed output
              size.
              If ``max_output_size`` is too small to hold the decompressed data,
              ``zstd.ZstdError`` will be raised.
              If ``max_output_size`` is larger than the decompressed data, the allocated
              output buffer will be resized to only use the space required.
              Please note that an allocation of the requested ``max_output_size`` will be
              performed every time the method is called. Setting to a very large value could
              result in a lot of work for the memory allocator and may result in
              ``MemoryError`` being raised if the allocation fails.
              .. important::
                 If the exact size of decompressed data is unknown (not passed in explicitly
                 and not stored in the zstandard frame), for performance reasons it is
                 encouraged to use a streaming API.
              Stream Reader API
              ^^^^^^^^^^^^^^^^^
              ``stream_reader(source)`` can be used to obtain an object conforming to the
              ``io.RawIOBase`` interface for reading decompressed output as a stream::
                 with open(path, 'rb') as fh:
                     dctx = zstd.ZstdDecompressor()
-                    with dctx.stream_reader(fh) as reader:
-                        while True:
-                            chunk = reader.read(16384)
-                            if not chunk:
-                                break
+                    reader = dctx.stream_reader(fh)
+                    while True:
+                        chunk = reader.read(16384)
+                         if not chunk:
+                             break
+                         # Do something with decompressed chunk.
-                            # Do something with decompressed chunk.
+             The stream can also be used as a context manager::
-             The stream can only be read within a context manager. When the context
-             manager exits, the stream is closed and the underlying resource is
-             released and future operations against the stream will fail.
+                with open(path, 'rb') as fh:
+                    dctx = zstd.ZstdDecompressor()
+                    with dctx.stream_reader(fh) as reader:
+                        ...
+             When used as a context manager, the stream is closed and the underlying
+             resources are released when the context manager exits. Future operations against
+             the stream will fail.
              The ``source`` argument to ``stream_reader()`` can be any object with a
              ``read(size)`` method or any object implementing the *buffer protocol*.
              If the ``source`` is a stream, you can specify how large ``read()`` requests
              to that stream should be via the ``read_size`` argument. It defaults to
              ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
                 with open(path, 'rb') as fh:
                     dctx = zstd.ZstdDecompressor()
                     # Will perform fh.read(8192) when obtaining data for the decompressor.
                     with dctx.stream_reader(fh, read_size=8192) as reader:
                         ...
              The stream returned by ``stream_reader()`` is not writable.
              The stream returned by ``stream_reader()`` is *partially* seekable.
              Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
              of the current position are allowed. Offsets behind the current read
              position and offsets relative to the end of stream are not allowed and
              will raise ``ValueError`` if attempted.
              ``tell()`` returns the number of decompressed bytes read so far.
              Not all I/O methods are implemented. Notably missing is support for
              ``readline()``, ``readlines()``, and linewise iteration support. Support for
              these is planned for a future release.
              Streaming Input API
              ^^^^^^^^^^^^^^^^^^^
              ``stream_writer(fh)`` can be used to incrementally send compressed data to a
              decompressor.::
                  dctx = zstd.ZstdDecompressor()
                  with dctx.stream_writer(fh) as decompressor:
                      decompressor.write(compressed_data)
              This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
              the decompressor by calling ``write(data)`` and decompressed output is written
              to the output object by calling its ``write(data)`` method.
              Calls to ``write()`` will return the number of bytes written to the output
              object. Not all inputs will result in bytes being written, so return values
              of ``0`` are possible.
              The size of chunks being ``write()`` to the destination can be specified::
                  dctx = zstd.ZstdDecompressor()
                  with dctx.stream_writer(fh, write_size=16384) as decompressor:
                      pass
              You can see how much memory is being used by the decompressor::
                  dctx = zstd.ZstdDecompressor()
                  with dctx.stream_writer(fh) as decompressor:
                      byte_size = decompressor.memory_size()
              Streaming Output API
              ^^^^^^^^^^^^^^^^^^^^
              ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
              compressed source as an iterator of data chunks.::
                  dctx = zstd.ZstdDecompressor()
                  for chunk in dctx.read_to_iter(fh):
                      # Do something with original data.
              ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
              return  compressed bytes or an object conforming to the buffer protocol that
              can expose its data as a contiguous range of bytes.
              ``read_to_iter()`` returns an iterator whose elements are chunks of the
              decompressed data.
              The size of requested ``read()`` from the source can be specified::
                  dctx = zstd.ZstdDecompressor()
                  for chunk in dctx.read_to_iter(fh, read_size=16384):
                      pass
              It is also possible to skip leading bytes in the input data::
                  dctx = zstd.ZstdDecompressor()
                  for chunk in dctx.read_to_iter(fh, skip_bytes=1):
                      pass
              .. tip::
                 Skipping leading bytes is useful if the source data contains extra
                 *header* data. Traditionally, you would need to create a slice or
                 ``memoryview`` of the data you want to decompress. This would create
                 overhead. It is more efficient to pass the offset into this API.
              Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
              controls when data is decompressed. If the iterator isn't consumed,
              decompression is put on hold.
              When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
              the behavior may seem similar to what occurs when the simple decompression
              API is used. However, this API works when the decompressed size is unknown.
              Furthermore, if feeding large inputs, the decompressor will work in chunks
              instead of performing a single operation.
              Stream Copying API
              ^^^^^^^^^^^^^^^^^^
              ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
              performing decompression.::
                  dctx = zstd.ZstdDecompressor()
                  dctx.copy_stream(ifh, ofh)
              e.g. to decompress a file to another file::
                  dctx = zstd.ZstdDecompressor()
                  with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
                      dctx.copy_stream(ifh, ofh)
              The size of chunks being ``read()`` and ``write()`` from and to the streams
              can be specified::
                  dctx = zstd.ZstdDecompressor()
                  dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
              Decompressor API
              ^^^^^^^^^^^^^^^^
              ``decompressobj()`` returns an object that exposes a ``decompress(data)``
              method. Compressed data chunks are fed into ``decompress(data)`` and
              uncompressed output (or an empty bytes) is returned. Output from subsequent
              calls needs to be concatenated to reassemble the full decompressed byte
              sequence.
              The purpose of ``decompressobj()`` is to provide an API-compatible interface
              with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
              to swap in different decompressor objects while using the same API.
              Each object is single use: once an input frame is decoded, ``decompress()``
              can no longer be called.
              Here is how this API should be used::
                 dctx = zstd.ZstdDecompressor()
                 dobj = dctx.decompressobj()
                 data = dobj.decompress(compressed_chunk_0)
                 data = dobj.decompress(compressed_chunk_1)
              By default, calls to ``decompress()`` write output data in chunks of size
              ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
              before being returned to the caller. It is possible to define the size of
              these temporary chunks by passing ``write_size`` to ``decompressobj()``::
                 dctx = zstd.ZstdDecompressor()
                 dobj = dctx.decompressobj(write_size=1048576)
              .. note::
                 Because calls to ``decompress()`` may need to perform multiple
                 memory (re)allocations, this streaming decompression API isn't as
                 efficient as other APIs.
              Batch Decompression API
              ^^^^^^^^^^^^^^^^^^^^^^^
              (Experimental. Not yet supported in CFFI bindings.)
              ``multi_decompress_to_buffer()`` performs decompression of multiple
              frames as a single operation and returns a ``BufferWithSegmentsCollection``
              containing decompressed data for all inputs.
              Compressed frames can be passed to the function as a ``BufferWithSegments``,
              a ``BufferWithSegmentsCollection``, or as a list containing objects that
              conform to the buffer protocol. For best performance, pass a
              ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
              minimal input validation will be done for that type. If calling from
              Python (as opposed to C), constructing one of these instances may add
              overhead cancelling out the performance overhead of validation for list
              inputs.::
                  dctx = zstd.ZstdDecompressor()
                  results = dctx.multi_decompress_to_buffer([b'...', b'...'])
              The decompressed size of each frame MUST be discoverable. It can either be
              embedded within the zstd frame (``write_content_size=True`` argument to
              ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
              The ``decompressed_sizes`` argument is an object conforming to the buffer
              protocol which holds an array of 64-bit unsigned integers in the machine's
              native format defining the decompressed sizes of each frame. If this argument
              is passed, it avoids having to scan each frame for its decompressed size.
              This frame scanning can add noticeable overhead in some scenarios.::
                  frames = [...]
                  sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
                  dctx = zstd.ZstdDecompressor()
                  results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
              The ``threads`` argument controls the number of threads to use to perform
              decompression operations. The default (``0``) or the value ``1`` means to
              use a single thread. Negative values use the number of logical CPUs in the
              machine.
              .. note::
                 It is possible to pass a ``mmap.mmap()`` instance into this function by
                 wrapping it with a ``BufferWithSegments`` instance (which will define the
                 offsets of frames within the memory mapped region).
              This function is logically equivalent to performing ``dctx.decompress()``
              on each input frame and returning the result.
              This function exists to perform decompression on multiple frames as fast
              as possible by having as little overhead as possible. Since decompression is
              performed as a single operation and since the decompressed output is stored in
              a single buffer, extra memory allocations, Python objects, and Python function
              calls are avoided. This is ideal for scenarios where callers know up front that
              they need to access data for multiple frames, such as when  *delta chains* are
              being used.
              Currently, the implementation always spawns multiple threads when requested,
              even if the amount of work to do is small. In the future, it will be smarter
              about avoiding threads and their associated overhead when the amount of
              work to do is small.
              Prefix Dictionary Chain Decompression
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
              ``decompress_content_dict_chain(frames)`` performs decompression of a list of
              zstd frames produced using chained *prefix* dictionary compression. Such
              a list of frames is produced by compressing discrete inputs where each
              non-initial input is compressed with a *prefix* dictionary consisting of the
              content of the previous input.
              For example, say you have the following inputs::
                 inputs = [b'input 1', b'input 2', b'input 3']
              The zstd frame chain consists of:
 . ``b'input 1'`` compressed in standalone/discrete mode
 . ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
 . ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
              Each zstd frame **must** have the content size written.
              The following Python code can be used to produce a *prefix dictionary chain*::
                  def make_chain(inputs):
                      frames = []
                      # First frame is compressed in standalone/discrete mode.
                      zctx = zstd.ZstdCompressor()
                      frames.append(zctx.compress(inputs[0]))
                      # Subsequent frames use the previous fulltext as a prefix dictionary
                      for i, raw in enumerate(inputs[1:]):
                          dict_data = zstd.ZstdCompressionDict(
                              inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
                          zctx = zstd.ZstdCompressor(dict_data=dict_data)
                          frames.append(zctx.compress(raw))
                      return frames
              ``decompress_content_dict_chain()`` returns the uncompressed data of the last
              element in the input chain.
              .. note::
                 It is possible to implement *prefix dictionary chain* decompression
                 on top of other APIs. However, this function will likely be faster -
                 especially for long input chains - as it avoids the overhead of instantiating
                 and passing around intermediate objects between C and Python.
              Multi-Threaded Compression
              --------------------------
              ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
              of threads to use for compression. The way this works is that input is split
              into segments and each segment is fed into a worker pool for compression. Once
              a segment is compressed, it is flushed/appended to the output.
              .. note::
                 These threads are created at the C layer and are not Python threads. So they
                 work outside the GIL. It is therefore possible to CPU saturate multiple cores
                 from Python.
              The segment size for multi-threaded compression is chosen from the window size
              of the compressor. This is derived from the ``window_log`` attribute of a
              ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
              range.
              If multi-threaded compression is requested and the input is smaller than the
              configured segment size, only a single compression thread will be used. If the
              input is smaller than the segment size multiplied by the thread pool size or
              if data cannot be delivered to the compressor fast enough, not all requested
              compressor threads may be active simultaneously.
              Compared to non-multi-threaded compression, multi-threaded compression has
              higher per-operation overhead. This includes extra memory operations,
              thread creation, lock acquisition, etc.
              Due to the nature of multi-threaded compression using *N* compression
              *states*, the output from multi-threaded compression will likely be larger
              than non-multi-threaded compression. The difference is usually small. But
              there is a CPU/wall time versus size trade off that may warrant investigation.
              Output from multi-threaded compression does not require any special handling
              on the decompression side. To the decompressor, data generated with single
              threaded compressor looks the same as data generated by a multi-threaded
              compressor and does not require any special handling or additional resource
              requirements.
              Dictionary Creation and Management
              ----------------------------------
              Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
              Instances can be constructed from bytes::
                 dict_data = zstd.ZstdCompressionDict(data)
              It is possible to construct a dictionary from *any* data. If the data doesn't
              begin with a magic header, it will be treated as a *prefix* dictionary.
              *Prefix* dictionaries allow compression operations to reference raw data
              within the dictionary.
              It is possible to force the use of *prefix* dictionaries or to require a
              dictionary header:
                 dict_data = zstd.ZstdCompressionDict(data,
                                                      dict_type=zstd.DICT_TYPE_RAWCONTENT)
                 dict_data = zstd.ZstdCompressionDict(data,
                                                      dict_type=zstd.DICT_TYPE_FULLDICT)
              You can see how many bytes are in the dictionary by calling ``len()``::
                 dict_data = zstd.train_dictionary(size, samples)
                 dict_size = len(dict_data)  # will not be larger than ``size``
              Once you have a dictionary, you can pass it to the objects performing
              compression and decompression::
                 dict_data = zstd.train_dictionary(131072, samples)
                 cctx = zstd.ZstdCompressor(dict_data=dict_data)
                 for source_data in input_data:
                     compressed = cctx.compress(source_data)
                     # Do something with compressed data.
                 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
                 for compressed_data in input_data:
                     buffer = io.BytesIO()
                     with dctx.stream_writer(buffer) as decompressor:
                         decompressor.write(compressed_data)
                     # Do something with raw data in ``buffer``.
              Dictionaries have unique integer IDs. You can retrieve this ID via::
                 dict_id = zstd.dictionary_id(dict_data)
              You can obtain the raw data in the dict (useful for persisting and constructing
              a ``ZstdCompressionDict`` later) via ``as_bytes()``::
                 dict_data = zstd.train_dictionary(size, samples)
                 raw_data = dict_data.as_bytes()
              By default, when a ``ZstdCompressionDict`` is *attached* to a
              ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
              dictionary for use. This is fine if only 1 compression operation is being
              performed or if the ``ZstdCompressor`` is being reused for multiple operations.
              But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
              this can add overhead.
              It is possible to *precompute* the dictionary so it can readily be consumed
              by multiple ``ZstdCompressor`` instances::
                  d = zstd.ZstdCompressionDict(data)
                  # Precompute for compression level 3.
                  d.precompute_compress(level=3)
                  # Precompute with specific compression parameters.
                  params = zstd.ZstdCompressionParameters(...)
                  d.precompute_compress(compression_params=params)
              .. note::
                 When a dictionary is precomputed, the compression parameters used to
                 precompute the dictionary overwrite some of the compression parameters
                 specified to ``ZstdCompressor.__init__``.
              Training Dictionaries
              ^^^^^^^^^^^^^^^^^^^^^
              Unless using *prefix* dictionaries, dictionary data is produced by *training*
              on existing data::
                 dict_data = zstd.train_dictionary(size, samples)
              This takes a target dictionary size and list of bytes instances and creates and
              returns a ``ZstdCompressionDict``.
              The dictionary training mechanism is known as *cover*. More details about it are
              available in the paper *Effective Construction of Relative Lempel-Ziv
              Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
              The cover algorithm takes parameters ``k` and ``d``. These are the
              *segment size* and *dmer size*, respectively. The returned dictionary
              instance created by this function has ``k`` and ``d`` attributes
              containing the values for these parameters. If a ``ZstdCompressionDict``
              is constructed from raw bytes data (a content-only dictionary), the
              ``k`` and ``d`` attributes will be ``0``.
              The segment and dmer size parameters to the cover algorithm can either be
              specified manually or ``train_dictionary()`` can try multiple values
              and pick the best one, where *best* means the smallest compressed data size.
              This later mode is called *optimization* mode.
              If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
              or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
              struct) are defined, *optimization* mode is used with default parameter
              values.
              If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
              with explicit control over those parameters. Specifying ``threads=0`` or
              ``threads=1`` can be used to engage *optimization* mode if other parameters
              are not defined.
              Otherwise, non-*optimization* mode is used with the parameters specified.
              This function takes the following arguments:
              dict_size
                 Target size in bytes of the dictionary to generate.
              samples
                 A list of bytes holding samples the dictionary will be trained from.
              k
                 Parameter to cover algorithm defining the segment size. A reasonable range
                 is [16, 2048+].
              d
                 Parameter to cover algorithm defining the dmer size. A reasonable range is
                 [6, 16]. ``d`` must be less than or equal to ``k``.
              dict_id
                 Integer dictionary ID for the produced dictionary. Default is 0, which uses
                 a random value.
              steps
                 Number of steps through ``k`` values to perform when trying parameter
                 variations.
              threads
                 Number of threads to use when trying parameter variations. Default is 0,
                 which means to use a single thread. A negative value can be specified to
                 use as many threads as there are detected logical CPUs.
              level
                 Integer target compression level when trying parameter variations.
              notifications
                 Controls writing of informational messages to ``stderr``. ``0`` (the
                 default) means to write nothing. ``1`` writes errors. ``2`` writes
                 progression info. ``3`` writes more details. And ``4`` writes all info.
              Explicit Compression Parameters
              -------------------------------
              Zstandard offers a high-level *compression level* that maps to lower-level
              compression parameters. For many consumers, this numeric level is the only
              compression setting you'll need to touch.
              But for advanced use cases, it might be desirable to tweak these lower-level
              settings.
              The ``ZstdCompressionParameters`` type represents these low-level compression
              settings.
              Instances of this type can be constructed from a myriad of keyword arguments
              (defined below) for complete low-level control over each adjustable
              compression setting.
              From a higher level, one can construct a ``ZstdCompressionParameters`` instance
              given a desired compression level and target input and dictionary size
              using ``ZstdCompressionParameters.from_level()``. e.g.::
                  # Derive compression settings for compression level 7.
                  params = zstd.ZstdCompressionParameters.from_level(7)
                  # With an input size of 1MB
                  params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
              Using ``from_level()``, it is also possible to override individual compression
              parameters or to define additional settings that aren't automatically derived.
              e.g.::
                  params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
                  params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
              Or you can define low-level compression settings directly::
                  params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
              Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
              configure a compressor::
                  cctx = zstd.ZstdCompressor(compression_params=params)
              The named arguments and attributes of ``ZstdCompressionParameters`` are as
              follows:
              * format
              * compression_level
              * window_log
              * hash_log
              * chain_log
              * search_log
              * min_match
              * target_length
              * compression_strategy
              * write_content_size
              * write_checksum
              * write_dict_id
              * job_size
              * overlap_size_log
-             * compress_literals
              * force_max_window
              * enable_ldm
              * ldm_hash_log
              * ldm_min_match
              * ldm_bucket_size_log
              * ldm_hash_every_log
              * threads
              Some of these are very low-level settings. It may help to consult the official
              zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
              in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
              Frame Inspection
              ----------------
              Data emitted from zstd compression is encapsulated in a *frame*. This frame
              begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
              the frame in more detail. For more info, see
              https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
              ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
              instance and return a ``FrameParameters`` object describing the frame.
              Depending on which fields are present in the frame and their values, the
              length of the frame parameters varies. If insufficient bytes are passed
              in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
              frame parameters can be parsed, pass in at least 18 bytes.
              ``FrameParameters`` instances have the following attributes:
              content_size
                 Integer size of original, uncompressed content. This will be ``0`` if the
                 original content size isn't written to the frame (controlled with the
                 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
                 content size was ``0``.
              window_size
                 Integer size of maximum back-reference distance in compressed data.
              dict_id
                 Integer of dictionary ID used for compression. ``0`` if no dictionary
                 ID was used or if the dictionary ID was ``0``.
              has_checksum
                 Bool indicating whether a 4 byte content checksum is stored at the end
                 of the frame.
              ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
              header.
              ``zstd.frame_content_size(data)`` returns the content size as parsed from
              the frame header. ``-1`` means the content size is unknown. ``0`` means
              an empty frame. The content size is usually correct. However, it may not
              be accurate.
              Misc Functionality
              ------------------
              estimate_decompression_context_size()
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
              Estimate the memory size requirements for a decompressor instance.
              Constants
              ---------
              The following module constants/attributes are exposed:
              ZSTD_VERSION
                  This module attribute exposes a 3-tuple of the Zstandard version. e.g.
                  ``(1, 0, 0)``
              MAX_COMPRESSION_LEVEL
                  Integer max compression level accepted by compression functions
              COMPRESSION_RECOMMENDED_INPUT_SIZE
                  Recommended chunk size to feed to compressor functions
              COMPRESSION_RECOMMENDED_OUTPUT_SIZE
                  Recommended chunk size for compression output
              DECOMPRESSION_RECOMMENDED_INPUT_SIZE
                  Recommended chunk size to feed into decompresor functions
              DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
                  Recommended chunk size for decompression output
              FRAME_HEADER
                  bytes containing header of the Zstandard frame
              MAGIC_NUMBER
                  Frame header as an integer
              CONTENTSIZE_UNKNOWN
                  Value for content size when the content size is unknown.
              CONTENTSIZE_ERROR
                  Value for content size when content size couldn't be determined.
              WINDOWLOG_MIN
                  Minimum value for compression parameter
              WINDOWLOG_MAX
                  Maximum value for compression parameter
              CHAINLOG_MIN
                  Minimum value for compression parameter
              CHAINLOG_MAX
                  Maximum value for compression parameter
              HASHLOG_MIN
                  Minimum value for compression parameter
              HASHLOG_MAX
                  Maximum value for compression parameter
              SEARCHLOG_MIN
                  Minimum value for compression parameter
              SEARCHLOG_MAX
                  Maximum value for compression parameter
              SEARCHLENGTH_MIN
                  Minimum value for compression parameter
              SEARCHLENGTH_MAX
                  Maximum value for compression parameter
              TARGETLENGTH_MIN
                  Minimum value for compression parameter
              STRATEGY_FAST
                  Compression strategy
              STRATEGY_DFAST
                  Compression strategy
              STRATEGY_GREEDY
                  Compression strategy
              STRATEGY_LAZY
                  Compression strategy
              STRATEGY_LAZY2
                  Compression strategy
              STRATEGY_BTLAZY2
                  Compression strategy
              STRATEGY_BTOPT
                  Compression strategy
              STRATEGY_BTULTRA
                  Compression strategy
              FORMAT_ZSTD1
                  Zstandard frame format
              FORMAT_ZSTD1_MAGICLESS
                  Zstandard frame format without magic header
              Performance Considerations
              --------------------------
              The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
              persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
              or ``ZstdDecompressor`` instance for multiple operations is faster than
              instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
              operation. The differences are magnified as the size of data decreases. For
              example, the difference between *context* reuse and non-reuse for 100,000
 byte inputs will be significant (possiby over 10x faster to reuse contexts)
              whereas 10 100,000,000 byte inputs will be more similar in speed (because the
              time spent doing compression dwarfs time spent creating new *contexts*).
              Buffer Types
              ------------
              The API exposes a handful of custom types for interfacing with memory buffers.
              The primary goal of these types is to facilitate efficient multi-object
              operations.
              The essential idea is to have a single memory allocation provide backing
              storage for multiple logical objects. This has 2 main advantages: fewer
              allocations and optimal memory access patterns. This avoids having to allocate
              a Python object for each logical object and furthermore ensures that access of
              data for objects can be sequential (read: fast) in memory.
              BufferWithSegments
              ^^^^^^^^^^^^^^^^^^
              The ``BufferWithSegments`` type represents a memory buffer containing N
              discrete items of known lengths (segments). It is essentially a fixed size
              memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
              unsigned native endian integers defining the byte offset and length of each
              segment within the buffer.
              Instances behave like containers.
              ``len()`` returns the number of segments within the instance.
              ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
              individual segment within the backing buffer. That returned object references
              (not copies) memory. This means that iterating all objects doesn't copy
              data within the buffer.
              The ``.size`` attribute contains the total size in bytes of the backing
              buffer.
              Instances conform to the buffer protocol. So a reference to the backing bytes
              can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
              be obtained via ``.tobytes()``.
              The ``.segments`` attribute exposes the array of ``(offset, length)`` for
              segments within the buffer. It is a ``BufferSegments`` type.
              BufferSegment
              ^^^^^^^^^^^^^
              The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
              It is essentially a reference to N bytes within a ``BufferWithSegments``.
              ``len()`` returns the length of the segment in bytes.
              ``.offset`` contains the byte offset of this segment within its parent
              ``BufferWithSegments`` instance.
              The object conforms to the buffer protocol. ``.tobytes()`` can be called to
              obtain a ``bytes`` instance with a copy of the backing bytes.
              BufferSegments
              ^^^^^^^^^^^^^^
              This type represents an array of ``(offset, length)`` integers defining segments
              within a ``BufferWithSegments``.
              The array members are 64-bit unsigned integers using host/native bit order.
              Instances conform to the buffer protocol.
              BufferWithSegmentsCollection
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
              The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
              of multiple ``BufferWithSegments`` instances.
              Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
              resulting object behaves like an ordered sequence whose members are the
              segments within each ``BufferWithSegments``.
              ``len()`` returns the number of segments within all ``BufferWithSegments``
              instances.
              ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
              that offset as if all ``BufferWithSegments`` instances were a single
              entity.
              If the object is composed of 2 ``BufferWithSegments`` instances with the
              first having 2 segments and the second have 3 segments, then ``b[0]``
              and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
              and ``b[4]`` access segments from the second.
              Choosing an API
              ===============
              There are multiple APIs for performing compression and decompression. This is
              because different applications have different needs and the library wants to
              facilitate optimal use in as many use cases as possible.
              From a high-level, APIs are divided into *one-shot* and *streaming*: either you
              are operating on all data at once or you operate on it piecemeal.
              The *one-shot* APIs are useful for small data, where the input or output
              size is known. (The size can come from a buffer length, file size, or
              stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
              input and output must fit in memory simultaneously. For say a 4 GB input,
              this is often not feasible.
              The *one-shot* APIs also perform all work as a single operation. So, if you
              feed it large input, it could take a long time for the function to return.
              The streaming APIs do not have the limitations of the simple API. But the
              price you pay for this flexibility is that they are more complex than a
              single function call.
              The streaming APIs put the caller in control of compression and decompression
              behavior by allowing them to directly control either the input or output side
              of the operation.
              With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
              has full control over the input to the compression or decompression stream.
              They can directly choose when new data is operated on.
              With the *streaming ouput* APIs, the caller has full control over the output
              of the compression or decompression stream. It can choose when to receive
              new data.
              When using the *streaming* APIs that operate on file-like or stream objects,
              it is important to consider what happens in that object when I/O is requested.
              There is potential for long pauses as data is read or written from the
              underlying stream (say from interacting with a filesystem or network). This
              could add considerable overhead.
              Thread Safety
              =============
              ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
              about thread safety. Do not operate on the same ``ZstdCompressor`` and
              ``ZstdDecompressor`` instance simultaneously from different threads. It is
              fine to have different threads call into a single instance, just not at the
              same time.
              Some operations require multiple function calls to complete. e.g. streaming
              operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
              for simultaneously active operations. e.g. you must not start a streaming
              operation when another streaming operation is already active.
              The C extension releases the GIL during non-trivial calls into the zstd C
              API. Non-trivial calls are notably compression and decompression. Trivial
              calls are things like parsing frame parameters. Where the GIL is released
              is considered an implementation detail and can change in any release.
              APIs that accept bytes-like objects don't enforce that the underlying object
              is read-only. However, it is assumed that the passed object is read-only for
              the duration of the function call. It is possible to pass a mutable object
              (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
              released, and mutate the object from another thread. Such a race condition
              is a bug in the consumer of python-zstandard. Most Python data types are
              immutable, so unless you are doing something fancy, you don't need to
              worry about this.
              Note on Zstandard's *Experimental* API
              ======================================
              Many of the Zstandard APIs used by this module are marked as *experimental*
              within the Zstandard project.
              It is unclear how Zstandard's C API will evolve over time, especially with
              regards to this *experimental* functionality. We will try to maintain
              backwards compatibility at the Python API level. However, we cannot
              guarantee this for things not under our control.
              Since a copy of the Zstandard source code is distributed with this
              module and since we compile against it, the behavior of a specific
              version of this module should be constant for all of time. So if you
              pin the version of this module used in your projects (which is a Python
              best practice), you should be shielded from unwanted future changes.
              Donate
              ======
              A lot of time has been invested into this project by the author.
              If you find this project useful and would like to thank the author for
              their work, consider donating some money. Any amount is appreciated.
              .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
                  :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
                  :alt: Donate via PayPal
              .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
                  :target: https://travis-ci.org/indygreg/python-zstandard
              .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
                  :target: https://ci.appveyor.com/project/indygreg/python-zstandard
                  :alt: Windows build status

contrib/python-zstandard/c-ext/compressionparams.c

0 +2 -27

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) {
              	size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
              			ZSTD_getErrorName(zresult));
              		return 1;
              	}
              	return 0;
              }
              #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
              int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
              	TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format);
              	TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel);
              	TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch);
              	TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength);
              	TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy);
              	TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag);
              	TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag);
              	TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag);
              	TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
              	TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
              	TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
-             	TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals);
              	TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
              	TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
              	TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch);
              	TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog);
              	TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog);
              	return 0;
              }
              int reset_params(ZstdCompressionParametersObject* params) {
              	if (params->params) {
              		ZSTD_CCtxParams_reset(params->params);
              	}
              	else {
              		params->params = ZSTD_createCCtxParams();
              		if (!params->params) {
              			PyErr_NoMemory();
              			return 1;
              		}
              	}
              	return set_parameters(params->params, params);
              }
              static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"format",
              		"compression_level",
              		"window_log",
              		"hash_log",
              		"chain_log",
              		"search_log",
              		"min_match",
              		"target_length",
              		"compression_strategy",
              		"write_content_size",
              		"write_checksum",
              		"write_dict_id",
              		"job_size",
              		"overlap_size_log",
              		"force_max_window",
              		"enable_ldm",
              		"ldm_hash_log",
              		"ldm_min_match",
              		"ldm_bucket_size_log",
              		"ldm_hash_every_log",
              		"threads",
-             		"compress_literals",
              		NULL
              	};
              	unsigned format = 0;
              	int compressionLevel = 0;
              	unsigned windowLog = 0;
              	unsigned hashLog = 0;
              	unsigned chainLog = 0;
              	unsigned searchLog = 0;
              	unsigned minMatch = 0;
              	unsigned targetLength = 0;
              	unsigned compressionStrategy = 0;
              	unsigned contentSizeFlag = 1;
              	unsigned checksumFlag = 0;
              	unsigned dictIDFlag = 0;
              	unsigned jobSize = 0;
              	unsigned overlapSizeLog = 0;
              	unsigned forceMaxWindow = 0;
              	unsigned enableLDM = 0;
              	unsigned ldmHashLog = 0;
              	unsigned ldmMinMatch = 0;
              	unsigned ldmBucketSizeLog = 0;
              	unsigned ldmHashEveryLog = 0;
              	int threads = 0;
-             	/* Setting value 0 has the effect of disabling. So we use -1 as a default
-             	 * to detect whether to set. Then we automatically derive the expected value
-             	 * based on the level, just like zstandard does itself. */
-             	int compressLiterals = -1;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs,
-             		"|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters",
+             		"|IiIIIIIIIIIIIIIIIIIIi:CompressionParameters",
              		kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
              		&searchLog, &minMatch, &targetLength, &compressionStrategy,
              		&contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
              		&forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
-             		&ldmHashEveryLog, &threads, &compressLiterals)) {
+             		&ldmHashEveryLog, &threads)) {
              		return -1;
              	}
              	if (threads < 0) {
              		threads = cpu_count();
              	}
-             	if (compressLiterals < 0) {
-             		compressLiterals = compressionLevel >= 0;
+             	}
              	self->format = format;
              	self->compressionLevel = compressionLevel;
              	self->windowLog = windowLog;
              	self->hashLog = hashLog;
              	self->chainLog = chainLog;
              	self->searchLog = searchLog;
              	self->minMatch = minMatch;
              	self->targetLength = targetLength;
              	self->compressionStrategy = compressionStrategy;
              	self->contentSizeFlag = contentSizeFlag;
              	self->checksumFlag = checksumFlag;
              	self->dictIDFlag = dictIDFlag;
              	self->threads = threads;
              	self->jobSize = jobSize;
              	self->overlapSizeLog = overlapSizeLog;
-             	self->compressLiterals = compressLiterals;
              	self->forceMaxWindow = forceMaxWindow;
              	self->enableLongDistanceMatching = enableLDM;
              	self->ldmHashLog = ldmHashLog;
              	self->ldmMinMatch = ldmMinMatch;
              	self->ldmBucketSizeLog = ldmBucketSizeLog;
              	self->ldmHashEveryLog = ldmHashEveryLog;
              	if (reset_params(self)) {
              		return -1;
              	}
              	return 0;
              }
              PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
              "Create a CompressionParameters from a compression level and target sizes\n"
              );
              ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
              	int managedKwargs = 0;
              	int level;
              	PyObject* sourceSize = NULL;
              	PyObject* dictSize = NULL;
              	unsigned PY_LONG_LONG iSourceSize = 0;
              	Py_ssize_t iDictSize = 0;
              	PyObject* val;
              	ZSTD_compressionParameters params;
              	ZstdCompressionParametersObject* result = NULL;
              	int res;
              	if (!PyArg_ParseTuple(args, "i:from_level",
              		&level)) {
              		return NULL;
              	}
              	if (!kwargs) {
              		kwargs = PyDict_New();
              		if (!kwargs) {
              			return NULL;
              		}
              		managedKwargs = 1;
              	}
              	sourceSize = PyDict_GetItemString(kwargs, "source_size");
              	if (sourceSize) {
              #if PY_MAJOR_VERSION >= 3
              		iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
              		if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
              			goto cleanup;
              		}
              #else
              		iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
              #endif
              		PyDict_DelItemString(kwargs, "source_size");
              	}
              	dictSize = PyDict_GetItemString(kwargs, "dict_size");
              	if (dictSize) {
              #if PY_MAJOR_VERSION >= 3
              		iDictSize = PyLong_AsSsize_t(dictSize);
              #else
              		iDictSize = PyInt_AsSsize_t(dictSize);
              #endif
              		if (iDictSize == -1) {
              			goto cleanup;
              		}
              		PyDict_DelItemString(kwargs, "dict_size");
              	}
              	params = ZSTD_getCParams(level, iSourceSize, iDictSize);
              	/* Values derived from the input level and sizes are passed along to the
              	   constructor. But only if a value doesn't already exist. */
              	val = PyDict_GetItemString(kwargs, "window_log");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.windowLog);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "window_log", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "chain_log");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.chainLog);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "chain_log", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "hash_log");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.hashLog);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "hash_log", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "search_log");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.searchLog);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "search_log", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "min_match");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.searchLength);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "min_match", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "target_length");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.targetLength);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "target_length", val);
              		Py_DECREF(val);
              	}
              	val = PyDict_GetItemString(kwargs, "compression_strategy");
              	if (!val) {
              		val = PyLong_FromUnsignedLong(params.strategy);
              		if (!val) {
              			goto cleanup;
              		}
              		PyDict_SetItemString(kwargs, "compression_strategy", val);
              		Py_DECREF(val);
              	}
-             	val = PyDict_GetItemString(kwargs, "compress_literals");
-             	if (!val) {
-             		val = PyLong_FromLong(level >= 0 ? 1 : 0);
-             		if (!val) {
-             			goto cleanup;
+             		}
-             		PyDict_SetItemString(kwargs, "compress_literals", val);
-             		Py_DECREF(val);
+             	}
              	result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
              	if (!result) {
              		goto cleanup;
              	}
              	result->params = NULL;
              	val = PyTuple_New(0);
              	if (!val) {
              		Py_CLEAR(result);
              		goto cleanup;
              	}
              	res = ZstdCompressionParameters_init(result, val, kwargs);
              	Py_DECREF(val);
              	if (res) {
              		Py_CLEAR(result);
              		goto cleanup;
              	}
              cleanup:
              	if (managedKwargs) {
              		Py_DECREF(kwargs);
              	}
              	return result;
              }
              PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
              "Estimate the size in bytes of a compression context for compression parameters\n"
              );
              PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
              	return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
              }
              PyDoc_STRVAR(ZstdCompressionParameters__doc__,
              "ZstdCompressionParameters: low-level control over zstd compression");
              static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
              	if (self->params) {
              		ZSTD_freeCCtxParams(self->params);
              		self->params = NULL;
              	}
              	PyObject_Del(self);
              }
              static PyMethodDef ZstdCompressionParameters_methods[] = {
              	{
              		"from_level",
              		(PyCFunction)CompressionParameters_from_level,
              		METH_VARARGS | METH_KEYWORDS | METH_STATIC,
              		ZstdCompressionParameters_from_level__doc__
              	},
              	{
              		"estimated_compression_context_size",
              		(PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
              		METH_NOARGS,
              		ZstdCompressionParameters_estimated_compression_context_size__doc__
              	},
              	{ NULL, NULL }
              };
              static PyMemberDef ZstdCompressionParameters_members[] = {
              	{ "format", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, format), READONLY,
              	  "compression format" },
              	{ "compression_level", T_INT,
              	  offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY,
              	  "compression level" },
              	{ "window_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, windowLog), READONLY,
              	  "window log" },
              	{ "hash_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, hashLog), READONLY,
              	  "hash log" },
              	{ "chain_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, chainLog), READONLY,
              	  "chain log" },
              	{ "search_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, searchLog), READONLY,
              	  "search log" },
              	{ "min_match", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, minMatch), READONLY,
              	  "search length" },
              	{ "target_length", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, targetLength), READONLY,
              	  "target length" },
              	{ "compression_strategy", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY,
              	  "compression strategy" },
              	{ "write_content_size", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY,
              	  "whether to write content size in frames" },
              	{ "write_checksum", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY,
              	  "whether to write checksum in frames" },
              	{ "write_dict_id", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY,
              	  "whether to write dictionary ID in frames" },
              	{ "threads", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, threads), READONLY,
              	  "number of threads to use" },
              	{ "job_size", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, jobSize), READONLY,
              	  "size of compression job when using multiple threads" },
              	{ "overlap_size_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
              	  "Size of previous input reloaded at the beginning of each job" },
-             	{ "compress_literals", T_UINT,
-             	  offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY,
-             	  "whether Huffman compression of literals is in use" },
              	{ "force_max_window", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
              	  "force back references to remain smaller than window size" },
              	{ "enable_ldm", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY,
              	  "whether to enable long distance matching" },
              	{ "ldm_hash_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY,
              	  "Size of the table for long distance matching, as a power of 2" },
              	{ "ldm_min_match", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY,
              	  "minimum size of searched matches for long distance matcher" },
              	{ "ldm_bucket_size_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY,
              	  "log size of each bucket in the LDM hash table for collision resolution" },
              	{ "ldm_hash_every_log", T_UINT,
              	  offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY,
              	  "frequency of inserting/looking up entries in the LDM hash table" },
              	{ NULL }
              };
              PyTypeObject ZstdCompressionParametersType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"ZstdCompressionParameters", /* tp_name */
              	sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
 ,                         /* tp_itemsize */
              	(destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
 ,                         /* tp_print */
 ,                         /* tp_getattr */
 ,                         /* tp_setattr */
 ,                         /* tp_compare */
 ,                         /* tp_repr */
 ,                         /* tp_as_number */
 ,                         /* tp_as_sequence */
 ,                         /* tp_as_mapping */
 ,                         /* tp_hash  */
 ,                         /* tp_call */
 ,                         /* tp_str */
 ,                         /* tp_getattro */
 ,                         /* tp_setattro */
 ,                         /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	ZstdCompressionParameters__doc__, /* tp_doc */
 ,                         /* tp_traverse */
 ,                         /* tp_clear */
 ,                         /* tp_richcompare */
 ,                         /* tp_weaklistoffset */
 ,                         /* tp_iter */
 ,                         /* tp_iternext */
              	ZstdCompressionParameters_methods, /* tp_methods */
              	ZstdCompressionParameters_members, /* tp_members */
 ,                         /* tp_getset */
 ,                         /* tp_base */
 ,                         /* tp_dict */
 ,                         /* tp_descr_get */
 ,                         /* tp_descr_set */
 ,                         /* tp_dictoffset */
              	(initproc)ZstdCompressionParameters_init, /* tp_init */
 ,                         /* tp_alloc */
              	PyType_GenericNew,         /* tp_new */
              };
              void compressionparams_module_init(PyObject* mod) {
              	Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
              	if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
              		return;
              	}
              	Py_INCREF(&ZstdCompressionParametersType);
              	PyModule_AddObject(mod, "ZstdCompressionParameters",
              		(PyObject*)&ZstdCompressionParametersType);
              	/* TODO remove deprecated alias. */
              	Py_INCREF(&ZstdCompressionParametersType);
              	PyModule_AddObject(mod, "CompressionParameters",
              		(PyObject*)&ZstdCompressionParametersType);
              }

contrib/python-zstandard/c-ext/compressionreader.c

0 +7 -26

              /**
              * Copyright (c) 2017-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              static void set_unsupported_operation(void) {
              	PyObject* iomod;
              	PyObject* exc;
              	iomod = PyImport_ImportModule("io");
              	if (NULL == iomod) {
              		return;
              	}
              	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
              	if (NULL == exc) {
              		Py_DECREF(iomod);
              		return;
              	}
              	PyErr_SetNone(exc);
              	Py_DECREF(exc);
              	Py_DECREF(iomod);
              }
              static void reader_dealloc(ZstdCompressionReader* self) {
              	Py_XDECREF(self->compressor);
              	Py_XDECREF(self->reader);
              	if (self->buffer.buf) {
              		PyBuffer_Release(&self->buffer);
              		memset(&self->buffer, 0, sizeof(self->buffer));
              	}
              	PyObject_Del(self);
              }
              static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
-             	size_t zresult;
              	if (self->entered) {
              		PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
              		return NULL;
              	}
-             	zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
-             	if (ZSTD_isError(zresult)) {
-             		PyErr_Format(ZstdError, "error setting source size: %s",
-             			ZSTD_getErrorName(zresult));
-             		return NULL;
+             	}
              	self->entered = 1;
              	Py_INCREF(self);
              	return self;
              }
              static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
              	PyObject* exc_type;
              	PyObject* exc_value;
              	PyObject* exc_tb;
              	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
              		return NULL;
              	}
              	self->entered = 0;
              	self->closed = 1;
              	/* Release resources associated with source. */
              	Py_CLEAR(self->reader);
              	if (self->buffer.buf) {
              		PyBuffer_Release(&self->buffer);
              		memset(&self->buffer, 0, sizeof(self->buffer));
              	}
                  Py_CLEAR(self->compressor);
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_readable(ZstdCompressionReader* self) {
              	Py_RETURN_TRUE;
              }
              static PyObject* reader_writable(ZstdCompressionReader* self) {
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_seekable(ZstdCompressionReader* self) {
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_readline(PyObject* self, PyObject* args) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyObject* reader_readlines(PyObject* self, PyObject* args) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyObject* reader_write(PyObject* self, PyObject* args) {
              	PyErr_SetString(PyExc_OSError, "stream is not writable");
              	return NULL;
              }
              static PyObject* reader_writelines(PyObject* self, PyObject* args) {
              	PyErr_SetString(PyExc_OSError, "stream is not writable");
              	return NULL;
              }
              static PyObject* reader_isatty(PyObject* self) {
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_flush(PyObject* self) {
              	Py_RETURN_NONE;
              }
              static PyObject* reader_close(ZstdCompressionReader* self) {
              	self->closed = 1;
              	Py_RETURN_NONE;
              }
-             static PyObject* reader_closed(ZstdCompressionReader* self) {
-             	if (self->closed) {
-             		Py_RETURN_TRUE;
+             	}
-             	else {
-             		Py_RETURN_FALSE;
+             	}
+             }
              static PyObject* reader_tell(ZstdCompressionReader* self) {
              	/* TODO should this raise OSError since stream isn't seekable? */
              	return PyLong_FromUnsignedLongLong(self->bytesCompressed);
              }
              static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"size",
              		NULL
              	};
              	Py_ssize_t size = -1;
              	PyObject* result = NULL;
              	char* resultBuffer;
              	Py_ssize_t resultSize;
              	size_t zresult;
              	size_t oldPos;
-             	if (!self->entered) {
-             		PyErr_SetString(ZstdError, "read() must be called from an active context manager");
-             		return NULL;
+             	}
              	if (self->closed) {
              		PyErr_SetString(PyExc_ValueError, "stream is closed");
              		return NULL;
              	}
              	if (self->finishedOutput) {
              		return PyBytes_FromStringAndSize("", 0);
              	}
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
              		return NULL;
              	}
              	if (size < 1) {
              		PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
              		return NULL;
              	}
              	result = PyBytes_FromStringAndSize(NULL, size);
              	if (NULL == result) {
              		return NULL;
              	}
              	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
              	self->output.dst = resultBuffer;
              	self->output.size = resultSize;
              	self->output.pos = 0;
              readinput:
              	/* If we have data left over, consume it. */
              	if (self->input.pos < self->input.size) {
              		oldPos = self->output.pos;
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_compress_generic(self->compressor->cctx,
              			&self->output, &self->input, ZSTD_e_continue);
              		Py_END_ALLOW_THREADS
              		self->bytesCompressed += self->output.pos - oldPos;
              		/* Input exhausted. Clear out state tracking. */
              		if (self->input.pos == self->input.size) {
              			memset(&self->input, 0, sizeof(self->input));
              			Py_CLEAR(self->readResult);
              			if (self->buffer.buf) {
              				self->finishedInput = 1;
              			}
              		}
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
              			return NULL;
              		}
              		if (self->output.pos) {
              			/* If no more room in output, emit it. */
              			if (self->output.pos == self->output.size) {
              				memset(&self->output, 0, sizeof(self->output));
              				return result;
              			}
              			/*
              			 * There is room in the output. We fall through to below, which will either
              			 * get more input for us or will attempt to end the stream.
              			 */
              		}
              		/* Fall through to gather more input. */
              	}
              	if (!self->finishedInput) {
              		if (self->reader) {
              			Py_buffer buffer;
              			assert(self->readResult == NULL);
              			self->readResult = PyObject_CallMethod(self->reader, "read",
              				"k", self->readSize);
              			if (self->readResult == NULL) {
              				return NULL;
              			}
              			memset(&buffer, 0, sizeof(buffer));
              			if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
              				return NULL;
              			}
              			/* EOF */
              			if (0 == buffer.len) {
              				self->finishedInput = 1;
              				Py_CLEAR(self->readResult);
              			}
              			else {
              				self->input.src = buffer.buf;
              				self->input.size = buffer.len;
              				self->input.pos = 0;
              			}
              			PyBuffer_Release(&buffer);
              		}
              		else {
              			assert(self->buffer.buf);
              			self->input.src = self->buffer.buf;
              			self->input.size = self->buffer.len;
              			self->input.pos = 0;
              		}
              	}
              	if (self->input.size) {
              		goto readinput;
              	}
              	/* Else EOF */
              	oldPos = self->output.pos;
              	zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
              		&self->input, ZSTD_e_end);
              	self->bytesCompressed += self->output.pos - oldPos;
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error ending compression stream: %s",
              			ZSTD_getErrorName(zresult));
              		return NULL;
              	}
              	assert(self->output.pos);
              	if (0 == zresult) {
              		self->finishedOutput = 1;
              	}
              	if (safe_pybytes_resize(&result, self->output.pos)) {
              		Py_XDECREF(result);
              		return NULL;
              	}
              	memset(&self->output, 0, sizeof(self->output));
              	return result;
              }
              static PyObject* reader_readall(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyObject* reader_iter(PyObject* self) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyObject* reader_iternext(PyObject* self) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyMethodDef reader_methods[] = {
              	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
              	PyDoc_STR("Enter a compression context") },
              	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
              	PyDoc_STR("Exit a compression context") },
              	{ "close", (PyCFunction)reader_close, METH_NOARGS,
              	PyDoc_STR("Close the stream so it cannot perform any more operations") },
-             	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
-             	PyDoc_STR("Whether stream is closed") },
              	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
              	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
              	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
              	PyDoc_STR("Returns True") },
              	{ "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
              	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
              	{ "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
              	{ "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
              	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
              	PyDoc_STR("Returns False") },
              	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
              	PyDoc_STR("Returns current number of bytes compressed") },
              	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
              	PyDoc_STR("Returns False") },
              	{ "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
              	{ "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
              	{ NULL, NULL }
              };
+             static PyMemberDef reader_members[] = {
+             	{ "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
+             	  READONLY, "whether stream is closed" },
+             	{ NULL }
+             };
              PyTypeObject ZstdCompressionReaderType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdCompressionReader", /* tp_name */
              	sizeof(ZstdCompressionReader), /* tp_basicsize */
 , /* tp_itemsize */
              	(destructor)reader_dealloc, /* tp_dealloc */
 , /* tp_print */
 , /* tp_getattr */
 , /* tp_setattr */
 , /* tp_compare */
 , /* tp_repr */
 , /* tp_as_number */
 , /* tp_as_sequence */
 , /* tp_as_mapping */
 , /* tp_hash */
 , /* tp_call */
 , /* tp_str */
 , /* tp_getattro */
 , /* tp_setattro */
 , /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT, /* tp_flags */
 , /* tp_doc */
 , /* tp_traverse */
 , /* tp_clear */
 , /* tp_richcompare */
 , /* tp_weaklistoffset */
              	reader_iter, /* tp_iter */
              	reader_iternext, /* tp_iternext */
              	reader_methods, /* tp_methods */
-, /* tp_members */
+             	reader_members, /* tp_members */
 , /* tp_getset */
 , /* tp_base */
 , /* tp_dict */
 , /* tp_descr_get */
 , /* tp_descr_set */
 , /* tp_dictoffset */
 , /* tp_init */
 , /* tp_alloc */
              	PyType_GenericNew, /* tp_new */
              };
              void compressionreader_module_init(PyObject* mod) {
              	/* TODO make reader a sub-class of io.RawIOBase */
              	Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
              	if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
              		return;
              	}
              }

contrib/python-zstandard/c-ext/compressionwriter.c

0 +5 -4

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
              """A context manager used for writing compressed output to a writer.\n"
              );
              static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
              	Py_XDECREF(self->compressor);
              	Py_XDECREF(self->writer);
              	PyObject_Del(self);
              }
              static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
              	size_t zresult;
              	if (self->entered) {
              		PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
              		return NULL;
              	}
              	zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error setting source size: %s",
              			ZSTD_getErrorName(zresult));
              		return NULL;
              	}
              	self->entered = 1;
              	Py_INCREF(self);
              	return (PyObject*)self;
              }
              static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
              	PyObject* exc_type;
              	PyObject* exc_value;
              	PyObject* exc_tb;
              	size_t zresult;
              	ZSTD_outBuffer output;
              	PyObject* res;
              	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
              		return NULL;
              	}
              	self->entered = 0;
              	if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
              		ZSTD_inBuffer inBuffer;
              		inBuffer.src = NULL;
              		inBuffer.size = 0;
              		inBuffer.pos = 0;
              		output.dst = PyMem_Malloc(self->outSize);
              		if (!output.dst) {
              			return PyErr_NoMemory();
              		}
              		output.size = self->outSize;
              		output.pos = 0;
              		while (1) {
              			zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end);
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "error ending compression stream: %s",
              					ZSTD_getErrorName(zresult));
              				PyMem_Free(output.dst);
              				return NULL;
              			}
              			if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              				res = PyObject_CallMethod(self->writer, "write", "y#",
              #else
              				res = PyObject_CallMethod(self->writer, "write", "s#",
              #endif
              					output.dst, output.pos);
              				Py_XDECREF(res);
              			}
              			if (!zresult) {
              				break;
              			}
              			output.pos = 0;
              		}
              		PyMem_Free(output.dst);
              	}
              	Py_RETURN_FALSE;
              }
              static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
              	return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
              }
              static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		NULL
              	};
              	PyObject* result = NULL;
              	Py_buffer source;
              	size_t zresult;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	PyObject* res;
              	Py_ssize_t totalWrite = 0;
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!self->entered) {
              		PyErr_SetString(ZstdError, "compress must be called from an active context manager");
              		goto finally;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
              	output.dst = PyMem_Malloc(self->outSize);
              	if (!output.dst) {
              		PyErr_NoMemory();
              		goto finally;
              	}
              	output.size = self->outSize;
              	output.pos = 0;
              	input.src = source.buf;
              	input.size = source.len;
              	input.pos = 0;
              	while ((ssize_t)input.pos < source.len) {
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              			PyMem_Free(output.dst);
              			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
              			goto finally;
              		}
              		/* Copy data from output buffer to writer. */
              		if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              			res = PyObject_CallMethod(self->writer, "write", "y#",
              #else
              			res = PyObject_CallMethod(self->writer, "write", "s#",
              #endif
              				output.dst, output.pos);
              			Py_XDECREF(res);
              			totalWrite += output.pos;
              			self->bytesCompressed += output.pos;
              		}
              		output.pos = 0;
              	}
              	PyMem_Free(output.dst);
              	result = PyLong_FromSsize_t(totalWrite);
              finally:
              	PyBuffer_Release(&source);
              	return result;
              }
              static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
              	size_t zresult;
              	ZSTD_outBuffer output;
              	ZSTD_inBuffer input;
              	PyObject* res;
              	Py_ssize_t totalWrite = 0;
              	if (!self->entered) {
              		PyErr_SetString(ZstdError, "flush must be called from an active context manager");
              		return NULL;
              	}
              	input.src = NULL;
              	input.size = 0;
              	input.pos = 0;
              	output.dst = PyMem_Malloc(self->outSize);
              	if (!output.dst) {
              		return PyErr_NoMemory();
              	}
              	output.size = self->outSize;
              	output.pos = 0;
              	while (1) {
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              			PyMem_Free(output.dst);
              			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
              			return NULL;
              		}
-             		if (!output.pos) {
-             			break;
+             		}
              		/* Copy data from output buffer to writer. */
              		if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              			res = PyObject_CallMethod(self->writer, "write", "y#",
              #else
              			res = PyObject_CallMethod(self->writer, "write", "s#",
              #endif
              				output.dst, output.pos);
              			Py_XDECREF(res);
              			totalWrite += output.pos;
              			self->bytesCompressed += output.pos;
              		}
              		output.pos = 0;
+             		if (!zresult) {
+             			break;
+             		}
              	}
              	PyMem_Free(output.dst);
              	return PyLong_FromSsize_t(totalWrite);
              }
              static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
              	return PyLong_FromUnsignedLongLong(self->bytesCompressed);
              }
              static PyMethodDef ZstdCompressionWriter_methods[] = {
              	{ "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
              	PyDoc_STR("Enter a compression context.") },
              	{ "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
              	PyDoc_STR("Exit a compression context.") },
              	{ "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
              	PyDoc_STR("Obtain the memory size of the underlying compressor") },
              	{ "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
              	PyDoc_STR("Compress data") },
              	{ "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
              	PyDoc_STR("Flush data and finish a zstd frame") },
              	{ "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
              	PyDoc_STR("Returns current number of bytes compressed") },
              	{ NULL, NULL }
              };
              PyTypeObject ZstdCompressionWriterType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdCompressionWriter",  /* tp_name */
              	sizeof(ZstdCompressionWriter),  /* tp_basicsize */
 ,                              /* tp_itemsize */
              	(destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	ZstdCompresssionWriter__doc__,  /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	ZstdCompressionWriter_methods,  /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
              	PyType_GenericNew,              /* tp_new */
              };
              void compressionwriter_module_init(PyObject* mod) {
              	Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
              	if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
              		return;
              	}
              }

contrib/python-zstandard/c-ext/compressobj.c

0 +19 -36

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              PyDoc_STRVAR(ZstdCompressionObj__doc__,
              "Perform compression using a standard library compatible API.\n"
              );
              static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
              	PyMem_Free(self->output.dst);
              	self->output.dst = NULL;
              	Py_XDECREF(self->compressor);
              	PyObject_Del(self);
              }
              static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		NULL
              	};
              	Py_buffer source;
              	ZSTD_inBuffer input;
              	size_t zresult;
              	PyObject* result = NULL;
              	Py_ssize_t resultSize = 0;
              	if (self->finished) {
              		PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
              		return NULL;
              	}
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
              	input.src = source.buf;
              	input.size = source.len;
              	input.pos = 0;
              	while ((ssize_t)input.pos < source.len) {
              		Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
              				&input, ZSTD_e_continue);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
              			Py_CLEAR(result);
              			goto finally;
              		}
              		if (self->output.pos) {
              			if (result) {
              				resultSize = PyBytes_GET_SIZE(result);
              				if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
              					Py_CLEAR(result);
              					goto finally;
              				}
              				memcpy(PyBytes_AS_STRING(result) + resultSize,
              					self->output.dst, self->output.pos);
              			}
              			else {
              				result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
              				if (!result) {
              					goto finally;
              				}
              			}
              			self->output.pos = 0;
              		}
              	}
              	if (NULL == result) {
              		result = PyBytes_FromString("");
              	}
              finally:
              	PyBuffer_Release(&source);
              	return result;
              }
              static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"flush_mode",
              		NULL
              	};
              	int flushMode = compressorobj_flush_finish;
              	size_t zresult;
              	PyObject* result = NULL;
              	Py_ssize_t resultSize = 0;
              	ZSTD_inBuffer input;
+             	ZSTD_EndDirective zFlushMode;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
              		return NULL;
              	}
              	if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
              		PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
              		return NULL;
              	}
              	if (self->finished) {
              		PyErr_SetString(ZstdError, "compressor object already finished");
              		return NULL;
              	}
+             	switch (flushMode) {
+             		case compressorobj_flush_block:
+             			zFlushMode = ZSTD_e_flush;
+             			break;
+             		case compressorobj_flush_finish:
+             			zFlushMode = ZSTD_e_end;
+             			self->finished = 1;
+             			break;
+             		default:
+             			PyErr_SetString(ZstdError, "unhandled flush mode");
+             			return NULL;
+             	}
              	assert(self->output.pos == 0);
              	input.src = NULL;
              	input.size = 0;
              	input.pos = 0;
-             	if (flushMode == compressorobj_flush_block) {
-             		/* The output buffer is of size ZSTD_CStreamOutSize(), which is
-             		   guaranteed to hold a full block. */
+             	while (1) {
              		Py_BEGIN_ALLOW_THREADS
-             			zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-             				&input, ZSTD_e_flush);
+             		zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
+             			&input, zFlushMode);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
-             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
-             			return NULL;
+             		}
-             		/* Output buffer is guaranteed to hold full block. */
-             		assert(zresult == 0);
-             		if (self->output.pos) {
-             			result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
-             			if (!result) {
-             				return NULL;
+             			}
+             		}
-             		self->output.pos = 0;
-             		if (result) {
-             			return result;
+             		}
-             		else {
-             			return PyBytes_FromString("");
+             		}
+             	}
-             	assert(flushMode == compressorobj_flush_finish);
-             	self->finished = 1;
-             	while (1) {
-             		zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-             			&input, ZSTD_e_end);
-             		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "error ending compression stream: %s",
              				ZSTD_getErrorName(zresult));
              			return NULL;
              		}
              		if (self->output.pos) {
              			if (result) {
              				resultSize = PyBytes_GET_SIZE(result);
              				if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
              					Py_XDECREF(result);
              					return NULL;
              				}
              				memcpy(PyBytes_AS_STRING(result) + resultSize,
              					self->output.dst, self->output.pos);
              			}
              			else {
              				result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
              				if (!result) {
              					return NULL;
              				}
              			}
              			self->output.pos = 0;
              		}
              		if (!zresult) {
              			break;
              		}
              	}
              	if (result) {
              		return result;
              	}
              	else {
              		return PyBytes_FromString("");
              	}
              }
              static PyMethodDef ZstdCompressionObj_methods[] = {
              	{ "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
              	PyDoc_STR("compress data") },
              	{ "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
              	PyDoc_STR("finish compression operation") },
              	{ NULL, NULL }
              };
              PyTypeObject ZstdCompressionObjType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdCompressionObj",      /* tp_name */
              	sizeof(ZstdCompressionObj),     /* tp_basicsize */
 ,                              /* tp_itemsize */
              	(destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	ZstdCompressionObj__doc__,      /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	ZstdCompressionObj_methods,     /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
              	PyType_GenericNew,              /* tp_new */
              };
              void compressobj_module_init(PyObject* module) {
              	Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
              	if (PyType_Ready(&ZstdCompressionObjType) < 0) {
              		return;
              	}
              }

contrib/python-zstandard/c-ext/compressor.c

0 +70 -23

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              #include "pool.h"
              extern PyObject* ZstdError;
-             int ensure_cctx(ZstdCompressor* compressor) {
+             int setup_cctx(ZstdCompressor* compressor) {
              	size_t zresult;
              	assert(compressor);
              	assert(compressor->cctx);
              	assert(compressor->params);
-             	ZSTD_CCtx_reset(compressor->cctx);
              	zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "could not set compression parameters: %s",
              			ZSTD_getErrorName(zresult));
              		return 1;
              	}
              	if (compressor->dict) {
              		if (compressor->dict->cdict) {
              			zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
              		}
              		else {
              			zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
              				compressor->dict->dictData, compressor->dict->dictSize,
              				ZSTD_dlm_byRef, compressor->dict->dictType);
              		}
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "could not load compression dictionary: %s",
              				ZSTD_getErrorName(zresult));
              			return 1;
              		}
              	}
              	return 0;
              }
              static PyObject* frame_progression(ZSTD_CCtx* cctx) {
              	PyObject* result = NULL;
              	PyObject* value;
              	ZSTD_frameProgression progression;
              	result = PyTuple_New(3);
              	if (!result) {
              		return NULL;
              	}
              	progression = ZSTD_getFrameProgression(cctx);
              	value = PyLong_FromUnsignedLongLong(progression.ingested);
              	if (!value) {
              		Py_DECREF(result);
              		return NULL;
              	}
              	PyTuple_SET_ITEM(result, 0, value);
              	value = PyLong_FromUnsignedLongLong(progression.consumed);
              	if (!value) {
              		Py_DECREF(result);
              		return NULL;
              	}
              	PyTuple_SET_ITEM(result, 1, value);
              	value = PyLong_FromUnsignedLongLong(progression.produced);
              	if (!value) {
              		Py_DECREF(result);
              		return NULL;
              	}
              	PyTuple_SET_ITEM(result, 2, value);
              	return result;
              }
              PyDoc_STRVAR(ZstdCompressor__doc__,
              "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
              "\n"
              "Create an object used to perform Zstandard compression.\n"
              "\n"
              "An instance can compress data various ways. Instances can be used multiple\n"
              "times. Each compression operation will use the compression parameters\n"
              "defined at construction time.\n"
              "\n"
              "Compression can be configured via the following names arguments:\n"
              "\n"
              "level\n"
              "   Integer compression level.\n"
              "dict_data\n"
              "   A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
              "compression_params\n"
              "   A ``CompressionParameters`` instance defining low-level compression"
              "   parameters. If defined, this will overwrite the ``level`` argument.\n"
              "write_checksum\n"
              "   If True, a 4 byte content checksum will be written with the compressed\n"
              "   data, allowing the decompressor to perform content verification.\n"
              "write_content_size\n"
              "   If True (the default), the decompressed content size will be included in\n"
              "   the header of the compressed data. This data will only be written if the\n"
              "   compressor knows the size of the input data.\n"
              "write_dict_id\n"
              "   Determines whether the dictionary ID will be written into the compressed\n"
              "   data. Defaults to True. Only adds content to the compressed data if\n"
              "   a dictionary is being used.\n"
              "threads\n"
              "   Number of threads to use to compress data concurrently. When set,\n"
              "   compression operations are performed on multiple threads. The default\n"
              "   value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
              "   set the number of threads to the number of detected logical CPUs.\n"
              );
              static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"level",
              		"dict_data",
              		"compression_params",
              		"write_checksum",
              		"write_content_size",
              		"write_dict_id",
              		"threads",
              		NULL
              	};
              	int level = 3;
              	ZstdCompressionDict* dict = NULL;
              	ZstdCompressionParametersObject* params = NULL;
              	PyObject* writeChecksum = NULL;
              	PyObject* writeContentSize = NULL;
              	PyObject* writeDictID = NULL;
              	int threads = 0;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
              		kwlist,	&level, &ZstdCompressionDictType, &dict,
              		&ZstdCompressionParametersType, &params,
              		&writeChecksum, &writeContentSize, &writeDictID, &threads)) {
              		return -1;
              	}
              	if (level > ZSTD_maxCLevel()) {
              		PyErr_Format(PyExc_ValueError, "level must be less than %d",
              			ZSTD_maxCLevel() + 1);
              		return -1;
              	}
              	if (threads < 0) {
              		threads = cpu_count();
              	}
              	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
              	   overhead of each compression operation. */
              	self->cctx = ZSTD_createCCtx();
              	if (!self->cctx) {
              		PyErr_NoMemory();
              		return -1;
              	}
              	/* TODO stuff the original parameters away somewhere so we can reset later. This
              	   will allow us to do things like automatically adjust cparams based on input
              	   size (assuming zstd isn't doing that internally). */
              	self->params = ZSTD_createCCtxParams();
              	if (!self->params) {
              		PyErr_NoMemory();
              		return -1;
              	}
              	if (params && writeChecksum) {
              		PyErr_SetString(PyExc_ValueError,
              			"cannot define compression_params and write_checksum");
              		return -1;
              	}
              	if (params && writeContentSize) {
              		PyErr_SetString(PyExc_ValueError,
              			"cannot define compression_params and write_content_size");
              		return -1;
              	}
              	if (params && writeDictID) {
              		PyErr_SetString(PyExc_ValueError,
              			"cannot define compression_params and write_dict_id");
              		return -1;
              	}
              	if (params && threads) {
              		PyErr_SetString(PyExc_ValueError,
              			"cannot define compression_params and threads");
              		return -1;
              	}
              	if (params) {
              		if (set_parameters(self->params, params)) {
              			return -1;
              		}
              	}
              	else {
              		if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) {
              			return -1;
              		}
              		if (set_parameter(self->params, ZSTD_p_contentSizeFlag,
              			writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
              			return -1;
              		}
              		if (set_parameter(self->params, ZSTD_p_checksumFlag,
              			writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
              			return -1;
              		}
              		if (set_parameter(self->params, ZSTD_p_dictIDFlag,
              			writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
              			return -1;
              		}
              		if (threads) {
              			if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) {
              				return -1;
              			}
              		}
              	}
              	if (dict) {
              		self->dict = dict;
              		Py_INCREF(dict);
              	}
-             	if (ensure_cctx(self)) {
-             		return -1;
+             	}
+                 if (setup_cctx(self)) {
+                     return -1;
+                 }
              	return 0;
              }
              static void ZstdCompressor_dealloc(ZstdCompressor* self) {
              	if (self->cctx) {
              		ZSTD_freeCCtx(self->cctx);
              		self->cctx = NULL;
              	}
              	if (self->params) {
              		ZSTD_freeCCtxParams(self->params);
              		self->params = NULL;
              	}
              	Py_XDECREF(self->dict);
              	PyObject_Del(self);
              }
              PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
              "memory_size()\n"
              "\n"
              "Obtain the memory usage of this compressor, in bytes.\n"
              );
              static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
              	if (self->cctx) {
              		return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
              	}
              	else {
              		PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
              		return NULL;
              	}
              }
              PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
              "frame_progression()\n"
              "\n"
              "Return information on how much work the compressor has done.\n"
              "\n"
              "Returns a 3-tuple of (ingested, consumed, produced).\n"
              );
              static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
              	return frame_progression(self->cctx);
              }
              PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
              "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
              "compress data between streams\n"
              "\n"
              "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
              "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
              "method.\n"
              "\n"
              "An optional ``size`` argument specifies the size of the source stream.\n"
              "If defined, compression parameters will be tuned based on the size.\n"
              "\n"
              "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
              "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
              "the default compression stream input and output sizes, respectively.\n"
              );
              static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"ifh",
              		"ofh",
              		"size",
              		"read_size",
              		"write_size",
              		NULL
              	};
              	PyObject* source;
              	PyObject* dest;
              	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
              	size_t inSize = ZSTD_CStreamInSize();
              	size_t outSize = ZSTD_CStreamOutSize();
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	Py_ssize_t totalRead = 0;
              	Py_ssize_t totalWrite = 0;
              	char* readBuffer;
              	Py_ssize_t readSize;
              	PyObject* readResult = NULL;
              	PyObject* res = NULL;
              	size_t zresult;
              	PyObject* writeResult;
              	PyObject* totalReadPy;
              	PyObject* totalWritePy;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
              		&source, &dest, &sourceSize, &inSize, &outSize)) {
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(source, "read")) {
              		PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(dest, "write")) {
              		PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
              		return NULL;
              	}
-             	if (ensure_cctx(self)) {
-             		return NULL;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
              	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error setting source size: %s",
              			ZSTD_getErrorName(zresult));
              		return NULL;
              	}
              	/* Prevent free on uninitialized memory in finally. */
              	output.dst = PyMem_Malloc(outSize);
              	if (!output.dst) {
              		PyErr_NoMemory();
              		res = NULL;
              		goto finally;
              	}
              	output.size = outSize;
              	output.pos = 0;
              	input.src = NULL;
              	input.size = 0;
              	input.pos = 0;
              	while (1) {
              		/* Try to read from source stream. */
              		readResult = PyObject_CallMethod(source, "read", "n", inSize);
              		if (!readResult) {
              			PyErr_SetString(ZstdError, "could not read() from source");
              			goto finally;
              		}
              		PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
              		/* If no data was read, we're at EOF. */
              		if (0 == readSize) {
              			break;
              		}
              		totalRead += readSize;
              		/* Send data to compressor */
              		input.src = readBuffer;
              		input.size = readSize;
              		input.pos = 0;
              		while (input.pos < input.size) {
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				res = NULL;
              				PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
              				goto finally;
              			}
              			if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              				writeResult = PyObject_CallMethod(dest, "write", "y#",
              #else
              				writeResult = PyObject_CallMethod(dest, "write", "s#",
              #endif
              					output.dst, output.pos);
              				Py_XDECREF(writeResult);
              				totalWrite += output.pos;
              				output.pos = 0;
              			}
              		}
              		Py_CLEAR(readResult);
              	}
              	/* We've finished reading. Now flush the compressor stream. */
              	assert(input.pos == input.size);
              	while (1) {
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "error ending compression stream: %s",
              				ZSTD_getErrorName(zresult));
              			res = NULL;
              			goto finally;
              		}
              		if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              			writeResult = PyObject_CallMethod(dest, "write", "y#",
              #else
              			writeResult = PyObject_CallMethod(dest, "write", "s#",
              #endif
              				output.dst, output.pos);
              			totalWrite += output.pos;
              			Py_XDECREF(writeResult);
              			output.pos = 0;
              		}
              		if (!zresult) {
              			break;
              		}
              	}
              	totalReadPy = PyLong_FromSsize_t(totalRead);
              	totalWritePy = PyLong_FromSsize_t(totalWrite);
              	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
              	Py_DECREF(totalReadPy);
              	Py_DECREF(totalWritePy);
              finally:
              	if (output.dst) {
              		PyMem_Free(output.dst);
              	}
              	Py_XDECREF(readResult);
              	return res;
              }
              PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
              "stream_reader(source, [size=0])\n"
              "\n"
              "Obtain an object that behaves like an I/O stream.\n"
              "\n"
              "The source object can be any object with a ``read(size)`` method\n"
              "or an object that conforms to the buffer protocol.\n"
              );
              static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"source",
              		"size",
              		"read_size",
              		NULL
              	};
              	PyObject* source;
              	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
              	size_t readSize = ZSTD_CStreamInSize();
              	ZstdCompressionReader* result = NULL;
+             	size_t zresult;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
              		&source, &sourceSize, &readSize)) {
              		return NULL;
              	}
              	result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	if (PyObject_HasAttrString(source, "read")) {
              		result->reader = source;
              		Py_INCREF(source);
              		result->readSize = readSize;
              	}
              	else if (1 == PyObject_CheckBuffer(source)) {
              		if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
              			goto except;
              		}
              		assert(result->buffer.len >= 0);
              		sourceSize = result->buffer.len;
              	}
              	else {
              		PyErr_SetString(PyExc_TypeError,
              			"must pass an object with a read() method or that conforms to the buffer protocol");
              		goto except;
              	}
-             	if (ensure_cctx(self)) {
+             	ZSTD_CCtx_reset(self->cctx);
+             	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+             	if (ZSTD_isError(zresult)) {
+             		PyErr_Format(ZstdError, "error setting source source: %s",
+             			ZSTD_getErrorName(zresult));
              		goto except;
              	}
              	result->compressor = self;
              	Py_INCREF(self);
-             	result->sourceSize = sourceSize;
              	return result;
              except:
              	Py_CLEAR(result);
              	return NULL;
              }
              PyDoc_STRVAR(ZstdCompressor_compress__doc__,
              "compress(data)\n"
              "\n"
              "Compress data in a single operation.\n"
              "\n"
              "This is the simplest mechanism to perform compression: simply pass in a\n"
              "value and get a compressed value back. It is almost the most prone to abuse.\n"
              "The input and output values must fit in memory, so passing in very large\n"
              "values can result in excessive memory usage. For this reason, one of the\n"
              "streaming based APIs is preferred for larger values.\n"
              );
              static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		NULL
              	};
              	Py_buffer source;
              	size_t destSize;
              	PyObject* output = NULL;
              	size_t zresult;
              	ZSTD_outBuffer outBuffer;
              	ZSTD_inBuffer inBuffer;
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
-             	if (ensure_cctx(self)) {
-             		goto finally;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
              	destSize = ZSTD_compressBound(source.len);
              	output = PyBytes_FromStringAndSize(NULL, destSize);
              	if (!output) {
              		goto finally;
              	}
              	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error setting source size: %s",
              			ZSTD_getErrorName(zresult));
              		Py_CLEAR(output);
              		goto finally;
              	}
              	inBuffer.src = source.buf;
              	inBuffer.size = source.len;
              	inBuffer.pos = 0;
              	outBuffer.dst = PyBytes_AsString(output);
              	outBuffer.size = destSize;
              	outBuffer.pos = 0;
              	Py_BEGIN_ALLOW_THREADS
              	/* By avoiding ZSTD_compress(), we don't necessarily write out content
              		size. This means the argument to ZstdCompressor to control frame
              		parameters is honored. */
              	zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
              	Py_END_ALLOW_THREADS
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
              		Py_CLEAR(output);
              		goto finally;
              	}
              	else if (zresult) {
              		PyErr_SetString(ZstdError, "unexpected partial frame flush");
              		Py_CLEAR(output);
              		goto finally;
              	}
              	Py_SIZE(output) = outBuffer.pos;
              finally:
              	PyBuffer_Release(&source);
              	return output;
              }
              PyDoc_STRVAR(ZstdCompressionObj__doc__,
              "compressobj()\n"
              "\n"
              "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
              "\n"
              "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
              "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
              "without changing how compression is performed.\n"
              );
              static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"size",
              		NULL
              	};
              	unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
              	size_t outSize = ZSTD_CStreamOutSize();
              	ZstdCompressionObj* result = NULL;
              	size_t zresult;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
              		return NULL;
              	}
-             	if (ensure_cctx(self)) {
-             		return NULL;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
              	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error setting source size: %s",
              			ZSTD_getErrorName(zresult));
              		return NULL;
              	}
              	result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	result->output.dst = PyMem_Malloc(outSize);
              	if (!result->output.dst) {
              		PyErr_NoMemory();
              		Py_DECREF(result);
              		return NULL;
              	}
              	result->output.size = outSize;
              	result->compressor = self;
              	Py_INCREF(result->compressor);
              	return result;
              }
              PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
              "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
              "Read uncompressed data from a reader and return an iterator\n"
              "\n"
              "Returns an iterator of compressed data produced from reading from ``reader``.\n"
              "\n"
              "Uncompressed data will be obtained from ``reader`` by calling the\n"
              "``read(size)`` method of it. The source data will be streamed into a\n"
              "compressor. As compressed data is available, it will be exposed to the\n"
              "iterator.\n"
              "\n"
              "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
              "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
              "and output defaults, respectively.\n"
              "\n"
              "The caller is partially in control of how fast data is fed into the\n"
              "compressor by how it consumes the returned iterator. The compressor will\n"
              "not consume from the reader unless the caller consumes from the iterator.\n"
              );
              static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"reader",
              		"size",
              		"read_size",
              		"write_size",
              		NULL
              	};
              	PyObject* reader;
              	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
              	size_t inSize = ZSTD_CStreamInSize();
              	size_t outSize = ZSTD_CStreamOutSize();
              	ZstdCompressorIterator* result;
              	size_t zresult;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
              		&reader, &sourceSize, &inSize, &outSize)) {
              		return NULL;
              	}
              	result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	if (PyObject_HasAttrString(reader, "read")) {
              		result->reader = reader;
              		Py_INCREF(result->reader);
              	}
              	else if (1 == PyObject_CheckBuffer(reader)) {
              		if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
              			goto except;
              		}
              		sourceSize = result->buffer.len;
              	}
              	else {
              		PyErr_SetString(PyExc_ValueError,
              			"must pass an object with a read() method or conforms to buffer protocol");
              		goto except;
              	}
-             	if (ensure_cctx(self)) {
-             		return NULL;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
              	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "error setting source size: %s",
              			ZSTD_getErrorName(zresult));
              		return NULL;
              	}
              	result->compressor = self;
              	Py_INCREF(result->compressor);
              	result->inSize = inSize;
              	result->outSize = outSize;
              	result->output.dst = PyMem_Malloc(outSize);
              	if (!result->output.dst) {
              		PyErr_NoMemory();
              		goto except;
              	}
              	result->output.size = outSize;
              	goto finally;
              except:
              	Py_CLEAR(result);
              finally:
              	return result;
              }
              PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
              "Create a context manager to write compressed data to an object.\n"
              "\n"
              "The passed object must have a ``write()`` method.\n"
              "\n"
              "The caller feeds input data to the object by calling ``compress(data)``.\n"
              "Compressed data is written to the argument given to this function.\n"
              "\n"
              "The function takes an optional ``size`` argument indicating the total size\n"
              "of the eventual input. If specified, the size will influence compression\n"
              "parameter tuning and could result in the size being written into the\n"
              "header of the compressed data.\n"
              "\n"
              "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
              "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
              "for a compressor output stream.\n"
              );
              static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"writer",
              		"size",
              		"write_size",
              		NULL
              	};
              	PyObject* writer;
              	ZstdCompressionWriter* result;
              	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
              	size_t outSize = ZSTD_CStreamOutSize();
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist,
              		&writer, &sourceSize, &outSize)) {
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(writer, "write")) {
              		PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
              		return NULL;
              	}
-             	if (ensure_cctx(self)) {
-             		return NULL;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
              	result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	result->compressor = self;
              	Py_INCREF(result->compressor);
              	result->writer = writer;
              	Py_INCREF(result->writer);
              	result->sourceSize = sourceSize;
              	result->outSize = outSize;
              	result->bytesCompressed = 0;
              	return result;
              }
+             PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
+             "Create an object for iterative compressing to same-sized chunks.\n"
+             );
+             static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+             	static char* kwlist[] = {
+             		"size",
+             		"chunk_size",
+             		NULL
+             	};
+             	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+             	size_t chunkSize = ZSTD_CStreamOutSize();
+             	ZstdCompressionChunker* chunker;
+             	size_t zresult;
+             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
+             		&sourceSize, &chunkSize)) {
+             		return NULL;
+             	}
+             	ZSTD_CCtx_reset(self->cctx);
+             	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+             	if (ZSTD_isError(zresult)) {
+             		PyErr_Format(ZstdError, "error setting source size: %s",
+             			ZSTD_getErrorName(zresult));
+             		return NULL;
+             	}
+             	chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
+             	if (!chunker) {
+             		return NULL;
+             	}
+             	chunker->output.dst = PyMem_Malloc(chunkSize);
+             	if (!chunker->output.dst) {
+             		PyErr_NoMemory();
+             		Py_DECREF(chunker);
+             		return NULL;
+             	}
+             	chunker->output.size = chunkSize;
+             	chunker->output.pos = 0;
+             	chunker->compressor = self;
+             	Py_INCREF(chunker->compressor);
+             	chunker->chunkSize = chunkSize;
+             	return chunker;
+             }
              typedef struct {
              	void* sourceData;
              	size_t sourceSize;
              } DataSource;
              typedef struct {
              	DataSource* sources;
              	Py_ssize_t sourcesSize;
              	unsigned long long totalSourceSize;
              } DataSources;
              typedef struct {
              	void* dest;
              	Py_ssize_t destSize;
              	BufferSegment* segments;
              	Py_ssize_t segmentsSize;
              } DestBuffer;
              typedef enum {
              	WorkerError_none = 0,
              	WorkerError_zstd = 1,
              	WorkerError_no_memory = 2,
              	WorkerError_nospace = 3,
              } WorkerError;
              /**
               * Holds state for an individual worker performing multi_compress_to_buffer work.
               */
              typedef struct {
              	/* Used for compression. */
              	ZSTD_CCtx* cctx;
              	/* What to compress. */
              	DataSource* sources;
              	Py_ssize_t sourcesSize;
              	Py_ssize_t startOffset;
              	Py_ssize_t endOffset;
              	unsigned long long totalSourceSize;
              	/* Result storage. */
              	DestBuffer* destBuffers;
              	Py_ssize_t destCount;
              	/* Error tracking. */
              	WorkerError error;
              	size_t zresult;
              	Py_ssize_t errorOffset;
              } WorkerState;
              static void compress_worker(WorkerState* state) {
              	Py_ssize_t inputOffset = state->startOffset;
              	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
              	Py_ssize_t currentBufferStartOffset = state->startOffset;
              	size_t zresult;
              	void* newDest;
              	size_t allocationSize;
              	size_t boundSize;
              	Py_ssize_t destOffset = 0;
              	DataSource* sources = state->sources;
              	DestBuffer* destBuffer;
              	assert(!state->destBuffers);
              	assert(0 == state->destCount);
              	/*
              	 * The total size of the compressed data is unknown until we actually
              	 * compress data. That means we can't pre-allocate the exact size we need.
              	 *
              	 * There is a cost to every allocation and reallocation. So, it is in our
              	 * interest to minimize the number of allocations.
              	 *
              	 * There is also a cost to too few allocations. If allocations are too
              	 * large they may fail. If buffers are shared and all inputs become
              	 * irrelevant at different lifetimes, then a reference to one segment
              	 * in the buffer will keep the entire buffer alive. This leads to excessive
              	 * memory usage.
              	 *
              	 * Our current strategy is to assume a compression ratio of 16:1 and
              	 * allocate buffers of that size, rounded up to the nearest power of 2
              	 * (because computers like round numbers). That ratio is greater than what
              	 * most inputs achieve. This is by design: we don't want to over-allocate.
              	 * But we don't want to under-allocate and lead to too many buffers either.
              	 */
              	state->destCount = 1;
              	state->destBuffers = calloc(1, sizeof(DestBuffer));
              	if (NULL == state->destBuffers) {
              		state->error = WorkerError_no_memory;
              		return;
              	}
              	destBuffer = &state->destBuffers[state->destCount - 1];
              	/*
              	 * Rather than track bounds and grow the segments buffer, allocate space
              	 * to hold remaining items then truncate when we're done with it.
              	 */
              	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
              	if (NULL == destBuffer->segments) {
              		state->error = WorkerError_no_memory;
              		return;
              	}
              	destBuffer->segmentsSize = remainingItems;
              	assert(state->totalSourceSize <= SIZE_MAX);
              	allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
              	/* If the maximum size of the output is larger than that, round up. */
              	boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
              	if (boundSize > allocationSize) {
              		allocationSize = roundpow2(boundSize);
              	}
              	destBuffer->dest = malloc(allocationSize);
              	if (NULL == destBuffer->dest) {
              		state->error = WorkerError_no_memory;
              		return;
              	}
              	destBuffer->destSize = allocationSize;
              	for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
              		void* source = sources[inputOffset].sourceData;
              		size_t sourceSize = sources[inputOffset].sourceSize;
              		size_t destAvailable;
              		void* dest;
              		ZSTD_outBuffer opOutBuffer;
              		ZSTD_inBuffer opInBuffer;
              		destAvailable = destBuffer->destSize - destOffset;
              		boundSize = ZSTD_compressBound(sourceSize);
              		/*
              		 * Not enough space in current buffer to hold largest compressed output.
              		 * So allocate and switch to a new output buffer.
              		 */
              		if (boundSize > destAvailable) {
              			/*
              			 * The downsizing of the existing buffer is optional. It should be cheap
              			 * (unlike growing). So we just do it.
              			 */
              			if (destAvailable) {
              				newDest = realloc(destBuffer->dest, destOffset);
              				if (NULL == newDest) {
              					state->error = WorkerError_no_memory;
              					return;
              				}
              				destBuffer->dest = newDest;
              				destBuffer->destSize = destOffset;
              			}
              			/* Truncate segments buffer. */
              			newDest = realloc(destBuffer->segments,
              				(inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
              			if (NULL == newDest) {
              				state->error = WorkerError_no_memory;
              				return;
              			}
              			destBuffer->segments = newDest;
              			destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
              			/* Grow space for new struct. */
              			/* TODO consider over-allocating so we don't do this every time. */
              			newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
              			if (NULL == newDest) {
              				state->error = WorkerError_no_memory;
              				return;
              			}
              			state->destBuffers = newDest;
              			state->destCount++;
              			destBuffer = &state->destBuffers[state->destCount - 1];
              			/* Don't take any chances with non-NULL pointers. */
              			memset(destBuffer, 0, sizeof(DestBuffer));
              			/**
              			 * We could dynamically update allocation size based on work done so far.
              			 * For now, keep is simple.
              			 */
              			assert(state->totalSourceSize <= SIZE_MAX);
              			allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
              			if (boundSize > allocationSize) {
              				allocationSize = roundpow2(boundSize);
              			}
              			destBuffer->dest = malloc(allocationSize);
              			if (NULL == destBuffer->dest) {
              				state->error = WorkerError_no_memory;
              				return;
              			}
              			destBuffer->destSize = allocationSize;
              			destAvailable = allocationSize;
              			destOffset = 0;
              			destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
              			if (NULL == destBuffer->segments) {
              				state->error = WorkerError_no_memory;
              				return;
              			}
              			destBuffer->segmentsSize = remainingItems;
              			currentBufferStartOffset = inputOffset;
              		}
              		dest = (char*)destBuffer->dest + destOffset;
              		opInBuffer.src = source;
              		opInBuffer.size = sourceSize;
              		opInBuffer.pos = 0;
              		opOutBuffer.dst = dest;
              		opOutBuffer.size = destAvailable;
              		opOutBuffer.pos = 0;
              		zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
              		if (ZSTD_isError(zresult)) {
              			state->error = WorkerError_zstd;
              			state->zresult = zresult;
              			state->errorOffset = inputOffset;
              			break;
              		}
              		zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
              		if (ZSTD_isError(zresult)) {
              			state->error = WorkerError_zstd;
              			state->zresult = zresult;
              			state->errorOffset = inputOffset;
              			break;
              		}
              		else if (zresult) {
              			state->error = WorkerError_nospace;
              			state->errorOffset = inputOffset;
              			break;
              		}
              		destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
              		destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
              		destOffset += opOutBuffer.pos;
              		remainingItems--;
              	}
              	if (destBuffer->destSize > destOffset) {
              		newDest = realloc(destBuffer->dest, destOffset);
              		if (NULL == newDest) {
              			state->error = WorkerError_no_memory;
              			return;
              		}
              		destBuffer->dest = newDest;
              		destBuffer->destSize = destOffset;
              	}
              }
              ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
              	DataSources* sources, Py_ssize_t threadCount) {
              	unsigned long long bytesPerWorker;
              	POOL_ctx* pool = NULL;
              	WorkerState* workerStates = NULL;
              	Py_ssize_t i;
              	unsigned long long workerBytes = 0;
              	Py_ssize_t workerStartOffset = 0;
              	Py_ssize_t currentThread = 0;
              	int errored = 0;
              	Py_ssize_t segmentsCount = 0;
              	Py_ssize_t segmentIndex;
              	PyObject* segmentsArg = NULL;
              	ZstdBufferWithSegments* buffer;
              	ZstdBufferWithSegmentsCollection* result = NULL;
              	assert(sources->sourcesSize > 0);
              	assert(sources->totalSourceSize > 0);
              	assert(threadCount >= 1);
              	/* More threads than inputs makes no sense. */
              	threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
              													 : threadCount;
              	/* TODO lower thread count when input size is too small and threads would add
              	overhead. */
              	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
              	if (NULL == workerStates) {
              		PyErr_NoMemory();
              		goto finally;
              	}
              	memset(workerStates, 0, threadCount * sizeof(WorkerState));
              	if (threadCount > 1) {
              		pool = POOL_create(threadCount, 1);
              		if (NULL == pool) {
              			PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
              			goto finally;
              		}
              	}
              	bytesPerWorker = sources->totalSourceSize / threadCount;
              	for (i = 0; i < threadCount; i++) {
              		size_t zresult;
              		workerStates[i].cctx = ZSTD_createCCtx();
              		if (!workerStates[i].cctx) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
              			compressor->params);
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "could not set compression parameters: %s",
              				ZSTD_getErrorName(zresult));
              			goto finally;
              		}
              		if (compressor->dict) {
              			if (compressor->dict->cdict) {
              				zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
              			}
              			else {
              				zresult = ZSTD_CCtx_loadDictionary_advanced(
              					workerStates[i].cctx,
              					compressor->dict->dictData,
              					compressor->dict->dictSize,
              					ZSTD_dlm_byRef,
              					compressor->dict->dictType);
              			}
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "could not load compression dictionary: %s",
              					ZSTD_getErrorName(zresult));
              				goto finally;
              			}
              		}
              		workerStates[i].sources = sources->sources;
              		workerStates[i].sourcesSize = sources->sourcesSize;
              	}
              	Py_BEGIN_ALLOW_THREADS
              	for (i = 0; i < sources->sourcesSize; i++) {
              		workerBytes += sources->sources[i].sourceSize;
              		/*
              		 * The last worker/thread needs to handle all remaining work. Don't
              		 * trigger it prematurely. Defer to the block outside of the loop
              		 * to run the last worker/thread. But do still process this loop
              		 * so workerBytes is correct.
              		 */
              		if (currentThread == threadCount - 1) {
              			continue;
              		}
              		if (workerBytes >= bytesPerWorker) {
              			assert(currentThread < threadCount);
              			workerStates[currentThread].totalSourceSize = workerBytes;
              			workerStates[currentThread].startOffset = workerStartOffset;
              			workerStates[currentThread].endOffset = i;
              			if (threadCount > 1) {
              				POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
              			}
              			else {
              				compress_worker(&workerStates[currentThread]);
              			}
              			currentThread++;
              			workerStartOffset = i + 1;
              			workerBytes = 0;
              		}
              	}
              	if (workerBytes) {
              		assert(currentThread < threadCount);
              		workerStates[currentThread].totalSourceSize = workerBytes;
              		workerStates[currentThread].startOffset = workerStartOffset;
              		workerStates[currentThread].endOffset = sources->sourcesSize - 1;
              		if (threadCount > 1) {
              			POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
              		}
              		else {
              			compress_worker(&workerStates[currentThread]);
              		}
              	}
              	if (threadCount > 1) {
              		POOL_free(pool);
              		pool = NULL;
              	}
              	Py_END_ALLOW_THREADS
              	for (i = 0; i < threadCount; i++) {
              		switch (workerStates[i].error) {
              		case WorkerError_no_memory:
              			PyErr_NoMemory();
              			errored = 1;
              			break;
              		case WorkerError_zstd:
              			PyErr_Format(ZstdError, "error compressing item %zd: %s",
              				workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
              			errored = 1;
              			break;
              		case WorkerError_nospace:
              			PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
              				workerStates[i].errorOffset);
              			errored = 1;
              			break;
              		default:
              			;
              		}
              		if (errored) {
              			break;
              		}
              	}
              	if (errored) {
              		goto finally;
              	}
              	segmentsCount = 0;
              	for (i = 0; i < threadCount; i++) {
              		WorkerState* state = &workerStates[i];
              		segmentsCount += state->destCount;
              	}
              	segmentsArg = PyTuple_New(segmentsCount);
              	if (NULL == segmentsArg) {
              		goto finally;
              	}
              	segmentIndex = 0;
              	for (i = 0; i < threadCount; i++) {
              		Py_ssize_t j;
              		WorkerState* state = &workerStates[i];
              		for (j = 0; j < state->destCount; j++) {
              			DestBuffer* destBuffer = &state->destBuffers[j];
              			buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
              				destBuffer->segments, destBuffer->segmentsSize);
              			if (NULL == buffer) {
              				goto finally;
              			}
              			/* Tell instance to use free() instsead of PyMem_Free(). */
              			buffer->useFree = 1;
              			/*
              			 * BufferWithSegments_FromMemory takes ownership of the backing memory.
              			 * Unset it here so it doesn't get freed below.
              			 */
              			destBuffer->dest = NULL;
              			destBuffer->segments = NULL;
              			PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
              		}
              	}
              	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
              		(PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
              finally:
              	Py_CLEAR(segmentsArg);
              	if (pool) {
              		POOL_free(pool);
              	}
              	if (workerStates) {
              		Py_ssize_t j;
              		for (i = 0; i < threadCount; i++) {
              			WorkerState state = workerStates[i];
              			if (state.cctx) {
              				ZSTD_freeCCtx(state.cctx);
              			}
              			/* malloc() is used in worker thread. */
              			for (j = 0; j < state.destCount; j++) {
              				if (state.destBuffers) {
              					free(state.destBuffers[j].dest);
              					free(state.destBuffers[j].segments);
              				}
              			}
              			free(state.destBuffers);
              		}
              		PyMem_Free(workerStates);
              	}
              	return result;
              }
              PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
              "Compress multiple pieces of data as a single operation\n"
              "\n"
              "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
              "a list of bytes like objects holding data to compress.\n"
              "\n"
              "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
              "\n"
              "This function is optimized to perform multiple compression operations as\n"
              "as possible with as little overhead as possbile.\n"
              );
              static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		"threads",
              		NULL
              	};
              	PyObject* data;
              	int threads = 0;
              	Py_buffer* dataBuffers = NULL;
              	DataSources sources;
              	Py_ssize_t i;
              	Py_ssize_t sourceCount = 0;
              	ZstdBufferWithSegmentsCollection* result = NULL;
              	memset(&sources, 0, sizeof(sources));
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
              		&data, &threads)) {
              		return NULL;
              	}
              	if (threads < 0) {
              		threads = cpu_count();
              	}
              	if (threads < 2) {
              		threads = 1;
              	}
              	if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
              		ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
              		sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
              		if (NULL == sources.sources) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		for (i = 0; i < buffer->segmentCount; i++) {
              			if (buffer->segments[i].length > SIZE_MAX) {
              				PyErr_Format(PyExc_ValueError,
              					"buffer segment %zd is too large for this platform", i);
              				goto finally;
              			}
              			sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
              			sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
              			sources.totalSourceSize += buffer->segments[i].length;
              		}
              		sources.sourcesSize = buffer->segmentCount;
              	}
              	else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
              		Py_ssize_t j;
              		Py_ssize_t offset = 0;
              		ZstdBufferWithSegments* buffer;
              		ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
              		sourceCount = BufferWithSegmentsCollection_length(collection);
              		sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
              		if (NULL == sources.sources) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		for (i = 0; i < collection->bufferCount; i++) {
              			buffer = collection->buffers[i];
              			for (j = 0; j < buffer->segmentCount; j++) {
              				if (buffer->segments[j].length > SIZE_MAX) {
              					PyErr_Format(PyExc_ValueError,
              						"buffer segment %zd in buffer %zd is too large for this platform",
              						j, i);
              					goto finally;
              				}
              				sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
              				sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
              				sources.totalSourceSize += buffer->segments[j].length;
              				offset++;
              			}
              		}
              		sources.sourcesSize = sourceCount;
              	}
              	else if (PyList_Check(data)) {
              		sourceCount = PyList_GET_SIZE(data);
              		sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
              		if (NULL == sources.sources) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
              		if (NULL == dataBuffers) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
              		for (i = 0; i < sourceCount; i++) {
              			if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
              				&dataBuffers[i], PyBUF_CONTIG_RO)) {
              				PyErr_Clear();
              				PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
              				goto finally;
              			}
              			sources.sources[i].sourceData = dataBuffers[i].buf;
              			sources.sources[i].sourceSize = dataBuffers[i].len;
              			sources.totalSourceSize += dataBuffers[i].len;
              		}
              		sources.sourcesSize = sourceCount;
              	}
              	else {
              		PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
              		goto finally;
              	}
              	if (0 == sources.sourcesSize) {
              		PyErr_SetString(PyExc_ValueError, "no source elements found");
              		goto finally;
              	}
              	if (0 == sources.totalSourceSize) {
              		PyErr_SetString(PyExc_ValueError, "source elements are empty");
              		goto finally;
              	}
              	if (sources.totalSourceSize > SIZE_MAX) {
              		PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
              		goto finally;
              	}
              	result = compress_from_datasources(self, &sources, threads);
              finally:
              	PyMem_Free(sources.sources);
              	if (dataBuffers) {
              		for (i = 0; i < sourceCount; i++) {
              			PyBuffer_Release(&dataBuffers[i]);
              		}
              		PyMem_Free(dataBuffers);
              	}
              	return result;
              }
              static PyMethodDef ZstdCompressor_methods[] = {
+             	{ "chunker", (PyCFunction)ZstdCompressor_chunker,
+             	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
              	{ "compress", (PyCFunction)ZstdCompressor_compress,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
              	{ "compressobj", (PyCFunction)ZstdCompressor_compressobj,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
              	{ "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
              	{ "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
              	{ "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
              	{ "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
              	/* TODO Remove deprecated API */
              	{ "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
              	/* TODO remove deprecated API */
              	{ "write_to", (PyCFunction)ZstdCompressor_stream_writer,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
              	{ "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
              	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
              	{ "memory_size", (PyCFunction)ZstdCompressor_memory_size,
              	METH_NOARGS, ZstdCompressor_memory_size__doc__ },
              	{ "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
              	METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
              	{ NULL, NULL }
              };
              PyTypeObject ZstdCompressorType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdCompressor",         /* tp_name */
              	sizeof(ZstdCompressor),        /* tp_basicsize */
 ,                              /* tp_itemsize */
              	(destructor)ZstdCompressor_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	ZstdCompressor__doc__,          /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	ZstdCompressor_methods,         /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
              	(initproc)ZstdCompressor_init,  /* tp_init */
 ,                              /* tp_alloc */
              	PyType_GenericNew,              /* tp_new */
              };
              void compressor_module_init(PyObject* mod) {
              	Py_TYPE(&ZstdCompressorType) = &PyType_Type;
              	if (PyType_Ready(&ZstdCompressorType) < 0) {
              		return;
              	}
              	Py_INCREF((PyObject*)&ZstdCompressorType);
              	PyModule_AddObject(mod, "ZstdCompressor",
              		(PyObject*)&ZstdCompressorType);
              }

contrib/python-zstandard/c-ext/constants.c

0 +3 -2

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              static char frame_header[] = {
              	'\x28',
              	'\xb5',
              	'\x2f',
              	'\xfd',
              };
              void constants_module_init(PyObject* mod) {
              	PyObject* version;
              	PyObject* zstdVersion;
              	PyObject* frameHeader;
              #if PY_MAJOR_VERSION >= 3
              	version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
              #else
              	version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
              #endif
-             	Py_INCREF(version);
              	PyModule_AddObject(mod, "__version__", version);
              	ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
              	PyModule_AddObject(mod, "ZstdError", ZstdError);
              	PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
              	PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
              	/* For now, the version is a simple tuple instead of a dedicated type. */
              	zstdVersion = PyTuple_New(3);
              	PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
              	PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
              	PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
-             	Py_INCREF(zstdVersion);
              	PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
              	frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
              	if (frameHeader) {
              		PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
              	}
              	else {
              		PyErr_Format(PyExc_ValueError, "could not create frame header object");
              	}
              	PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
              		PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
              	PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
              		PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
              	PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
              	PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
              		(long)ZSTD_CStreamInSize());
              	PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
              		(long)ZSTD_CStreamOutSize());
              	PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
              		(long)ZSTD_DStreamInSize());
              	PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
              		(long)ZSTD_DStreamOutSize());
              	PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
+             	PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
+             	PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
              	PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
              	PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
              	PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
              	PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
              	PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
              	PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
              	PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
              	PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
              	PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
              	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
              	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
              	PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
+             	PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
              	PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
              	PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
              	PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
              	PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
              	PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
              	PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
              	PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
              	PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
              	PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
              	PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
              	PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
              	PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
              	PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
              	PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
              	PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
              	PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless);
              }

contrib/python-zstandard/c-ext/decompressionreader.c

0 +7 -26

              /**
              * Copyright (c) 2017-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              static void set_unsupported_operation(void) {
              	PyObject* iomod;
              	PyObject* exc;
              	iomod = PyImport_ImportModule("io");
              	if (NULL == iomod) {
              		return;
              	}
              	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
              	if (NULL == exc) {
              		Py_DECREF(iomod);
              		return;
              	}
              	PyErr_SetNone(exc);
              	Py_DECREF(exc);
              	Py_DECREF(iomod);
              }
              static void reader_dealloc(ZstdDecompressionReader* self) {
              	Py_XDECREF(self->decompressor);
              	Py_XDECREF(self->reader);
              	if (self->buffer.buf) {
              		PyBuffer_Release(&self->buffer);
              	}
              	PyObject_Del(self);
              }
              static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
              	if (self->entered) {
              		PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
              		return NULL;
              	}
-             	if (ensure_dctx(self->decompressor, 1)) {
-             		return NULL;
+             	}
              	self->entered = 1;
              	Py_INCREF(self);
              	return self;
              }
              static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
              	PyObject* exc_type;
              	PyObject* exc_value;
              	PyObject* exc_tb;
              	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
              		return NULL;
              	}
              	self->entered = 0;
              	self->closed = 1;
              	/* Release resources. */
              	Py_CLEAR(self->reader);
              	if (self->buffer.buf) {
              		PyBuffer_Release(&self->buffer);
              		memset(&self->buffer, 0, sizeof(self->buffer));
              	}
              	Py_CLEAR(self->decompressor);
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_readable(PyObject* self) {
              	Py_RETURN_TRUE;
              }
              static PyObject* reader_writable(PyObject* self) {
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_seekable(PyObject* self) {
              	Py_RETURN_TRUE;
              }
              static PyObject* reader_close(ZstdDecompressionReader* self) {
              	self->closed = 1;
              	Py_RETURN_NONE;
              }
-             static PyObject* reader_closed(ZstdDecompressionReader* self) {
-             	if (self->closed) {
-             		Py_RETURN_TRUE;
+             	}
-             	else {
-             		Py_RETURN_FALSE;
+             	}
+             }
              static PyObject* reader_flush(PyObject* self) {
              	Py_RETURN_NONE;
              }
              static PyObject* reader_isatty(PyObject* self) {
              	Py_RETURN_FALSE;
              }
              static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"size",
              		NULL
              	};
              	Py_ssize_t size = -1;
              	PyObject* result = NULL;
              	char* resultBuffer;
              	Py_ssize_t resultSize;
              	ZSTD_outBuffer output;
              	size_t zresult;
-             	if (!self->entered) {
-             		PyErr_SetString(ZstdError, "read() must be called from an active context manager");
-             		return NULL;
+             	}
              	if (self->closed) {
              		PyErr_SetString(PyExc_ValueError, "stream is closed");
              		return NULL;
              	}
              	if (self->finishedOutput) {
              		return PyBytes_FromStringAndSize("", 0);
              	}
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
              		return NULL;
              	}
              	if (size < 1) {
              		PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
              		return NULL;
              	}
              	result = PyBytes_FromStringAndSize(NULL, size);
              	if (NULL == result) {
              		return NULL;
              	}
              	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
              	output.dst = resultBuffer;
              	output.size = resultSize;
              	output.pos = 0;
              readinput:
              	/* Consume input data left over from last time. */
              	if (self->input.pos < self->input.size) {
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_decompress_generic(self->decompressor->dctx,
              			&output, &self->input);
              		Py_END_ALLOW_THREADS
              		/* Input exhausted. Clear our state tracking. */
              		if (self->input.pos == self->input.size) {
              			memset(&self->input, 0, sizeof(self->input));
              			Py_CLEAR(self->readResult);
              			if (self->buffer.buf) {
              				self->finishedInput = 1;
              			}
              		}
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
              			return NULL;
              		}
              		else if (0 == zresult) {
              			self->finishedOutput = 1;
              		}
              		/* We fulfilled the full read request. Emit it. */
              		if (output.pos && output.pos == output.size) {
              			self->bytesDecompressed += output.size;
              			return result;
              		}
              		/*
              		 * There is more room in the output. Fall through to try to collect
              		 * more data so we can try to fill the output.
              		 */
              	}
              	if (!self->finishedInput) {
              		if (self->reader) {
              			Py_buffer buffer;
              			assert(self->readResult == NULL);
              			self->readResult = PyObject_CallMethod(self->reader, "read",
              				"k", self->readSize);
              			if (NULL == self->readResult) {
              				return NULL;
              			}
              			memset(&buffer, 0, sizeof(buffer));
              			if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
              				return NULL;
              			}
              			/* EOF */
              			if (0 == buffer.len) {
              				self->finishedInput = 1;
              				Py_CLEAR(self->readResult);
              			}
              			else {
              				self->input.src = buffer.buf;
              				self->input.size = buffer.len;
              				self->input.pos = 0;
              			}
              			PyBuffer_Release(&buffer);
              		}
              		else {
              			assert(self->buffer.buf);
              			/*
              			 * We should only get here once since above block will exhaust
              			 * source buffer until finishedInput is set.
              			 */
              			assert(self->input.src == NULL);
              			self->input.src = self->buffer.buf;
              			self->input.size = self->buffer.len;
              			self->input.pos = 0;
              		}
              	}
              	if (self->input.size) {
              		goto readinput;
              	}
              	/* EOF */
              	self->bytesDecompressed += output.pos;
              	if (safe_pybytes_resize(&result, output.pos)) {
              		Py_XDECREF(result);
              		return NULL;
              	}
              	return result;
              }
              static PyObject* reader_readall(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyObject* reader_readline(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyObject* reader_readlines(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
              	Py_ssize_t pos;
              	int whence = 0;
              	unsigned long long readAmount = 0;
              	size_t defaultOutSize = ZSTD_DStreamOutSize();
-             	if (!self->entered) {
-             		PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
-             		return NULL;
+             	}
              	if (self->closed) {
              		PyErr_SetString(PyExc_ValueError, "stream is closed");
              		return NULL;
              	}
              	if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
              		return NULL;
              	}
              	if (whence == SEEK_SET) {
              		if (pos < 0) {
              			PyErr_SetString(PyExc_ValueError,
              				"cannot seek to negative position with SEEK_SET");
              			return NULL;
              		}
              		if ((unsigned long long)pos < self->bytesDecompressed) {
              			PyErr_SetString(PyExc_ValueError,
              				"cannot seek zstd decompression stream backwards");
              			return NULL;
              		}
              		readAmount = pos - self->bytesDecompressed;
              	}
              	else if (whence == SEEK_CUR) {
              		if (pos < 0) {
              			PyErr_SetString(PyExc_ValueError,
              				"cannot seek zstd decompression stream backwards");
              			return NULL;
              		}
              		readAmount = pos;
              	}
              	else if (whence == SEEK_END) {
              		/* We /could/ support this with pos==0. But let's not do that until someone
              		   needs it. */
              		PyErr_SetString(PyExc_ValueError,
              			"zstd decompression streams cannot be seeked with SEEK_END");
              		return NULL;
              	}
              	/* It is a bit inefficient to do this via the Python API. But since there
              	   is a bit of state tracking involved to read from this type, it is the
              	   easiest to implement. */
              	while (readAmount) {
              		Py_ssize_t readSize;
              		PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
              			readAmount < defaultOutSize ? readAmount : defaultOutSize);
              		if (!readResult) {
              			return NULL;
              		}
              		readSize = PyBytes_GET_SIZE(readResult);
              		/* Empty read means EOF. */
              		if (!readSize) {
              			break;
              		}
              		readAmount -= readSize;
              	}
              	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
              }
              static PyObject* reader_tell(ZstdDecompressionReader* self) {
              	/* TODO should this raise OSError since stream isn't seekable? */
              	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
              }
              static PyObject* reader_write(PyObject* self, PyObject* args) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyObject* reader_writelines(PyObject* self, PyObject* args) {
              	set_unsupported_operation();
              	return NULL;
              }
              static PyObject* reader_iter(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyObject* reader_iternext(PyObject* self) {
              	PyErr_SetNone(PyExc_NotImplementedError);
              	return NULL;
              }
              static PyMethodDef reader_methods[] = {
              	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
              	PyDoc_STR("Enter a compression context") },
              	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
              	PyDoc_STR("Exit a compression context") },
              	{ "close", (PyCFunction)reader_close, METH_NOARGS,
              	PyDoc_STR("Close the stream so it cannot perform any more operations") },
-             	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
-             	PyDoc_STR("Whether stream is closed") },
              	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
              	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
              	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
              	PyDoc_STR("Returns True") },
              	{ "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
              	PyDoc_STR("read compressed data") },
              	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
              	{ "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
              	{ "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
              	{ "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
              	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
              	PyDoc_STR("Returns True") },
              	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
              	PyDoc_STR("Returns current number of bytes compressed") },
              	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
              	PyDoc_STR("Returns False") },
              	{ "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
              	{ "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
              	{ NULL, NULL }
              };
+             static PyMemberDef reader_members[] = {
+             	{ "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
+             	  READONLY, "whether stream is closed" },
+             	{ NULL }
+             };
              PyTypeObject ZstdDecompressionReaderType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdDecompressionReader", /* tp_name */
              	sizeof(ZstdDecompressionReader), /* tp_basicsize */
 , /* tp_itemsize */
              	(destructor)reader_dealloc, /* tp_dealloc */
 , /* tp_print */
 , /* tp_getattr */
 , /* tp_setattr */
 , /* tp_compare */
 , /* tp_repr */
 , /* tp_as_number */
 , /* tp_as_sequence */
 , /* tp_as_mapping */
 , /* tp_hash */
 , /* tp_call */
 , /* tp_str */
 , /* tp_getattro */
 , /* tp_setattro */
 , /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT, /* tp_flags */
 , /* tp_doc */
 , /* tp_traverse */
 , /* tp_clear */
 , /* tp_richcompare */
 , /* tp_weaklistoffset */
              	reader_iter, /* tp_iter */
              	reader_iternext, /* tp_iternext */
              	reader_methods, /* tp_methods */
-, /* tp_members */
+             	reader_members, /* tp_members */
 , /* tp_getset */
 , /* tp_base */
 , /* tp_dict */
 , /* tp_descr_get */
 , /* tp_descr_set */
 , /* tp_dictoffset */
 , /* tp_init */
 , /* tp_alloc */
              	PyType_GenericNew, /* tp_new */
              };
              void decompressionreader_module_init(PyObject* mod) {
              	/* TODO make reader a sub-class of io.RawIOBase */
              	Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
              	if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
              		return;
              	}
              }

contrib/python-zstandard/c-ext/decompressobj.c

0 +14 -3

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              extern PyObject* ZstdError;
              PyDoc_STRVAR(DecompressionObj__doc__,
              "Perform decompression using a standard library compatible API.\n"
              );
              static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
              	Py_XDECREF(self->decompressor);
              	PyObject_Del(self);
              }
              static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		NULL
              	};
              	Py_buffer source;
              	size_t zresult;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	PyObject* result = NULL;
              	Py_ssize_t resultSize = 0;
+             	output.dst = NULL;
              	if (self->finished) {
              		PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
              		return NULL;
              	}
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
+             	/* Special case of empty input. Output will always be empty. */
+             	if (source.len == 0) {
+             		result = PyBytes_FromString("");
+             		goto finally;
+             	}
              	input.src = source.buf;
              	input.size = source.len;
              	input.pos = 0;
              	output.dst = PyMem_Malloc(self->outSize);
              	if (!output.dst) {
              		PyErr_NoMemory();
              		goto except;
              	}
              	output.size = self->outSize;
              	output.pos = 0;
-             	/* Read input until exhausted. */
-             	while (input.pos < input.size) {
+             	while (1) {
              		Py_BEGIN_ALLOW_THREADS
              		zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "zstd decompressor error: %s",
              				ZSTD_getErrorName(zresult));
              			goto except;
              		}
              		if (0 == zresult) {
              			self->finished = 1;
              		}
              		if (output.pos) {
              			if (result) {
              				resultSize = PyBytes_GET_SIZE(result);
              				if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
              					Py_XDECREF(result);
              					goto except;
              				}
              				memcpy(PyBytes_AS_STRING(result) + resultSize,
              					output.dst, output.pos);
              			}
              			else {
              				result = PyBytes_FromStringAndSize(output.dst, output.pos);
              				if (!result) {
              					goto except;
              				}
              			}
+             		}
-             			output.pos = 0;
+             		if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
+             			break;
              		}
+             		output.pos = 0;
              	}
              	if (!result) {
              		result = PyBytes_FromString("");
              	}
              	goto finally;
              except:
              	Py_CLEAR(result);
              finally:
              	PyMem_Free(output.dst);
              	PyBuffer_Release(&source);
              	return result;
              }
              static PyMethodDef DecompressionObj_methods[] = {
              	{ "decompress", (PyCFunction)DecompressionObj_decompress,
              	  METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
              	{ NULL, NULL }
              };
              PyTypeObject ZstdDecompressionObjType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdDecompressionObj",    /* tp_name */
              	sizeof(ZstdDecompressionObj),   /* tp_basicsize */
 ,                              /* tp_itemsize */
              	(destructor)DecompressionObj_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	DecompressionObj__doc__,        /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	DecompressionObj_methods,       /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
 ,                              /* tp_init */
 ,                              /* tp_alloc */
              	PyType_GenericNew,              /* tp_new */
              };
              void decompressobj_module_init(PyObject* module) {
              	Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
              	if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
              		return;
              	}
              }

contrib/python-zstandard/c-ext/decompressor.c

0 +4 0

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #include "python-zstandard.h"
              #include "pool.h"
              extern PyObject* ZstdError;
              /**
               * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
               */
              int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
              	size_t zresult;
              	ZSTD_DCtx_reset(decompressor->dctx);
              	if (decompressor->maxWindowSize) {
              		zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "unable to set max window size: %s",
              				ZSTD_getErrorName(zresult));
              			return 1;
              		}
              	}
              	zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "unable to set decoding format: %s",
              			ZSTD_getErrorName(zresult));
              		return 1;
              	}
              	if (loadDict && decompressor->dict) {
              		if (ensure_ddict(decompressor->dict)) {
              			return 1;
              		}
              		zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
              				ZSTD_getErrorName(zresult));
              			return 1;
              		}
              	}
              	return 0;
              }
              PyDoc_STRVAR(Decompressor__doc__,
              "ZstdDecompressor(dict_data=None)\n"
              "\n"
              "Create an object used to perform Zstandard decompression.\n"
              "\n"
              "An instance can perform multiple decompression operations."
              );
              static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"dict_data",
              		"max_window_size",
              		"format",
              		NULL
              	};
              	ZstdCompressionDict* dict = NULL;
              	size_t maxWindowSize = 0;
              	ZSTD_format_e format = ZSTD_f_zstd1;
              	self->dctx = NULL;
              	self->dict = NULL;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist,
              		&ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
              		return -1;
              	}
              	self->dctx = ZSTD_createDCtx();
              	if (!self->dctx) {
              		PyErr_NoMemory();
              		goto except;
              	}
              	self->maxWindowSize = maxWindowSize;
              	self->format = format;
              	if (dict) {
              		self->dict = dict;
              		Py_INCREF(dict);
              	}
              	if (ensure_dctx(self, 1)) {
              		goto except;
              	}
              	return 0;
              except:
              	Py_CLEAR(self->dict);
              	if (self->dctx) {
              		ZSTD_freeDCtx(self->dctx);
              		self->dctx = NULL;
              	}
              	return -1;
              }
              static void Decompressor_dealloc(ZstdDecompressor* self) {
              	Py_CLEAR(self->dict);
              	if (self->dctx) {
              		ZSTD_freeDCtx(self->dctx);
              		self->dctx = NULL;
              	}
              	PyObject_Del(self);
              }
              PyDoc_STRVAR(Decompressor_memory_size__doc__,
              "memory_size() -- Size of decompression context, in bytes\n"
              );
              static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
              	if (self->dctx) {
              		return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
              	}
              	else {
              		PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
              		return NULL;
              	}
              }
              PyDoc_STRVAR(Decompressor_copy_stream__doc__,
              	"copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
              	"\n"
              	"Compressed data will be read from ``ifh``, decompressed, and written to\n"
              	"``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
              	"``write(data)`` method.\n"
              	"\n"
              	"The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
              	"size of data that is ``read()`` and ``write()`` between streams. They default\n"
              	"to the default input and output sizes of zstd decompressor streams.\n"
              );
              static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"ifh",
              		"ofh",
              		"read_size",
              		"write_size",
              		NULL
              	};
              	PyObject* source;
              	PyObject* dest;
              	size_t inSize = ZSTD_DStreamInSize();
              	size_t outSize = ZSTD_DStreamOutSize();
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	Py_ssize_t totalRead = 0;
              	Py_ssize_t totalWrite = 0;
              	char* readBuffer;
              	Py_ssize_t readSize;
              	PyObject* readResult = NULL;
              	PyObject* res = NULL;
              	size_t zresult = 0;
              	PyObject* writeResult;
              	PyObject* totalReadPy;
              	PyObject* totalWritePy;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
              		&source, &dest, &inSize, &outSize)) {
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(source, "read")) {
              		PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(dest, "write")) {
              		PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
              		return NULL;
              	}
              	/* Prevent free on uninitialized memory in finally. */
              	output.dst = NULL;
              	if (ensure_dctx(self, 1)) {
              		res = NULL;
              		goto finally;
              	}
              	output.dst = PyMem_Malloc(outSize);
              	if (!output.dst) {
              		PyErr_NoMemory();
              		res = NULL;
              		goto finally;
              	}
              	output.size = outSize;
              	output.pos = 0;
              	/* Read source stream until EOF */
              	while (1) {
              		readResult = PyObject_CallMethod(source, "read", "n", inSize);
              		if (!readResult) {
              			PyErr_SetString(ZstdError, "could not read() from source");
              			goto finally;
              		}
              		PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
              		/* If no data was read, we're at EOF. */
              		if (0 == readSize) {
              			break;
              		}
              		totalRead += readSize;
              		/* Send data to decompressor */
              		input.src = readBuffer;
              		input.size = readSize;
              		input.pos = 0;
              		while (input.pos < input.size) {
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_decompress_generic(self->dctx, &output, &input);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "zstd decompressor error: %s",
              					ZSTD_getErrorName(zresult));
              				res = NULL;
              				goto finally;
              			}
              			if (output.pos) {
              #if PY_MAJOR_VERSION >= 3
              				writeResult = PyObject_CallMethod(dest, "write", "y#",
              #else
              				writeResult = PyObject_CallMethod(dest, "write", "s#",
              #endif
              					output.dst, output.pos);
              				Py_XDECREF(writeResult);
              				totalWrite += output.pos;
              				output.pos = 0;
              			}
              		}
              		Py_CLEAR(readResult);
              	}
              	/* Source stream is exhausted. Finish up. */
              	totalReadPy = PyLong_FromSsize_t(totalRead);
              	totalWritePy = PyLong_FromSsize_t(totalWrite);
              	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
              	Py_DECREF(totalReadPy);
              	Py_DECREF(totalWritePy);
              finally:
              	if (output.dst) {
              		PyMem_Free(output.dst);
              	}
              	Py_XDECREF(readResult);
              	return res;
              }
              PyDoc_STRVAR(Decompressor_decompress__doc__,
              "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
              "\n"
              "This method will decompress the entirety of the argument and return the\n"
              "result.\n"
              "\n"
              "The input bytes are expected to contain a full Zstandard frame (something\n"
              "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
              "not contain a full frame, an exception will be raised.\n"
              "\n"
              "If the frame header of the compressed data does not contain the content size\n"
              "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
              "allocation of size ``max_output_size`` will be performed and an attempt will\n"
              "be made to perform decompression into that buffer. If the buffer is too\n"
              "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
              "be resized if it is too large.\n"
              "\n"
              "Uncompressed data could be much larger than compressed data. As a result,\n"
              "calling this function could result in a very large memory allocation being\n"
              "performed to hold the uncompressed data. Therefore it is **highly**\n"
              "recommended to use a streaming decompression method instead of this one.\n"
              );
              PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"data",
              		"max_output_size",
              		NULL
              	};
              	Py_buffer source;
              	Py_ssize_t maxOutputSize = 0;
              	unsigned long long decompressedSize;
              	size_t destCapacity;
              	PyObject* result = NULL;
              	size_t zresult;
              	ZSTD_outBuffer outBuffer;
              	ZSTD_inBuffer inBuffer;
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
              #endif
              		kwlist, &source, &maxOutputSize)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
              	if (ensure_dctx(self, 1)) {
              		goto finally;
              	}
              	decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
              	if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
              		PyErr_SetString(ZstdError, "error determining content size from frame header");
              		goto finally;
              	}
              	/* Special case of empty frame. */
              	else if (0 == decompressedSize) {
              		result = PyBytes_FromStringAndSize("", 0);
              		goto finally;
              	}
              	/* Missing content size in frame header. */
              	if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
              		if (0 == maxOutputSize) {
              			PyErr_SetString(ZstdError, "could not determine content size in frame header");
              			goto finally;
              		}
              		result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
              		destCapacity = maxOutputSize;
              		decompressedSize = 0;
              	}
              	/* Size is recorded in frame header. */
              	else {
              		assert(SIZE_MAX >= PY_SSIZE_T_MAX);
              		if (decompressedSize > PY_SSIZE_T_MAX) {
              			PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
              			goto finally;
              		}
              		result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
              		destCapacity = (size_t)decompressedSize;
              	}
              	if (!result) {
              		goto finally;
              	}
              	outBuffer.dst = PyBytes_AsString(result);
              	outBuffer.size = destCapacity;
              	outBuffer.pos = 0;
              	inBuffer.src = source.buf;
              	inBuffer.size = source.len;
              	inBuffer.pos = 0;
              	Py_BEGIN_ALLOW_THREADS
              	zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
              	Py_END_ALLOW_THREADS
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
              		Py_CLEAR(result);
              		goto finally;
              	}
              	else if (zresult) {
              		PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
              		Py_CLEAR(result);
              		goto finally;
              	}
              	else if (decompressedSize && outBuffer.pos != decompressedSize) {
              		PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
              			zresult, decompressedSize);
              		Py_CLEAR(result);
              		goto finally;
              	}
              	else if (outBuffer.pos < destCapacity) {
              		if (safe_pybytes_resize(&result, outBuffer.pos)) {
              			Py_CLEAR(result);
              			goto finally;
              		}
              	}
              finally:
              	PyBuffer_Release(&source);
              	return result;
              }
              PyDoc_STRVAR(Decompressor_decompressobj__doc__,
              "decompressobj([write_size=default])\n"
              "\n"
              "Incrementally feed data into a decompressor.\n"
              "\n"
              "The returned object exposes a ``decompress(data)`` method. This makes it\n"
              "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
              "callers can swap in the zstd decompressor while using the same API.\n"
              );
              static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"write_size",
              		NULL
              	};
              	ZstdDecompressionObj* result = NULL;
              	size_t outSize = ZSTD_DStreamOutSize();
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
              		return NULL;
              	}
              	if (!outSize) {
              		PyErr_SetString(PyExc_ValueError, "write_size must be positive");
              		return NULL;
              	}
              	result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	if (ensure_dctx(self, 1)) {
              		Py_DECREF(result);
              		return NULL;
              	}
              	result->decompressor = self;
              	Py_INCREF(result->decompressor);
              	result->outSize = outSize;
              	return result;
              }
              PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
              "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
              "Read compressed data and return an iterator\n"
              "\n"
              "Returns an iterator of decompressed data chunks produced from reading from\n"
              "the ``reader``.\n"
              "\n"
              "Compressed data will be obtained from ``reader`` by calling the\n"
              "``read(size)`` method of it. The source data will be streamed into a\n"
              "decompressor. As decompressed data is available, it will be exposed to the\n"
              "returned iterator.\n"
              "\n"
              "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
              "iterator in chunks of size ``write_size``. The default values are the input\n"
              "and output sizes for a zstd streaming decompressor.\n"
              "\n"
              "There is also support for skipping the first ``skip_bytes`` of data from\n"
              "the source.\n"
              );
              static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"reader",
              		"read_size",
              		"write_size",
              		"skip_bytes",
              		NULL
              	};
              	PyObject* reader;
              	size_t inSize = ZSTD_DStreamInSize();
              	size_t outSize = ZSTD_DStreamOutSize();
              	ZstdDecompressorIterator* result;
              	size_t skipBytes = 0;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
              		&reader, &inSize, &outSize, &skipBytes)) {
              		return NULL;
              	}
              	if (skipBytes >= inSize) {
              		PyErr_SetString(PyExc_ValueError,
              			"skip_bytes must be smaller than read_size");
              		return NULL;
              	}
              	result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	if (PyObject_HasAttrString(reader, "read")) {
              		result->reader = reader;
              		Py_INCREF(result->reader);
              	}
              	else if (1 == PyObject_CheckBuffer(reader)) {
              		/* Object claims it is a buffer. Try to get a handle to it. */
              		if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
              			goto except;
              		}
              	}
              	else {
              		PyErr_SetString(PyExc_ValueError,
              			"must pass an object with a read() method or conforms to buffer protocol");
              		goto except;
              	}
              	result->decompressor = self;
              	Py_INCREF(result->decompressor);
              	result->inSize = inSize;
              	result->outSize = outSize;
              	result->skipBytes = skipBytes;
              	if (ensure_dctx(self, 1)) {
              		goto except;
              	}
              	result->input.src = PyMem_Malloc(inSize);
              	if (!result->input.src) {
              		PyErr_NoMemory();
              		goto except;
              	}
              	goto finally;
              except:
              	Py_CLEAR(result);
              finally:
              	return result;
              }
              PyDoc_STRVAR(Decompressor_stream_reader__doc__,
              "stream_reader(source, [read_size=default])\n"
              "\n"
              "Obtain an object that behaves like an I/O stream that can be used for\n"
              "reading decompressed output from an object.\n"
              "\n"
              "The source object can be any object with a ``read(size)`` method or that\n"
              "conforms to the buffer protocol.\n"
              );
              static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"source",
              		"read_size",
              		NULL
              	};
              	PyObject* source;
              	size_t readSize = ZSTD_DStreamInSize();
              	ZstdDecompressionReader* result;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist,
              		&source, &readSize)) {
              		return NULL;
              	}
+             	if (ensure_dctx(self, 1)) {
+             		return NULL;
+             	}
              	result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
              	if (NULL == result) {
              		return NULL;
              	}
              	if (PyObject_HasAttrString(source, "read")) {
              		result->reader = source;
              		Py_INCREF(source);
              		result->readSize = readSize;
              	}
              	else if (1 == PyObject_CheckBuffer(source)) {
              		if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
              			Py_CLEAR(result);
              			return NULL;
              		}
              	}
              	else {
              		PyErr_SetString(PyExc_TypeError,
              			"must pass an object with a read() method or that conforms to the buffer protocol");
              		Py_CLEAR(result);
              		return NULL;
              	}
              	result->decompressor = self;
              	Py_INCREF(self);
              	return result;
              }
              PyDoc_STRVAR(Decompressor_stream_writer__doc__,
              "Create a context manager to write decompressed data to an object.\n"
              "\n"
              "The passed object must have a ``write()`` method.\n"
              "\n"
              "The caller feeds intput data to the object by calling ``write(data)``.\n"
              "Decompressed data is written to the argument given as it is decompressed.\n"
              "\n"
              "An optional ``write_size`` argument defines the size of chunks to\n"
              "``write()`` to the writer. It defaults to the default output size for a zstd\n"
              "streaming decompressor.\n"
              );
              static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"writer",
              		"write_size",
              		NULL
              	};
              	PyObject* writer;
              	size_t outSize = ZSTD_DStreamOutSize();
              	ZstdDecompressionWriter* result;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist,
              		&writer, &outSize)) {
              		return NULL;
              	}
              	if (!PyObject_HasAttrString(writer, "write")) {
              		PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
              		return NULL;
              	}
              	result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
              	if (!result) {
              		return NULL;
              	}
              	result->decompressor = self;
              	Py_INCREF(result->decompressor);
              	result->writer = writer;
              	Py_INCREF(result->writer);
              	result->outSize = outSize;
              	return result;
              }
              PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
              "Decompress a series of chunks using the content dictionary chaining technique\n"
              );
              static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"frames",
              		NULL
              	};
              	PyObject* chunks;
              	Py_ssize_t chunksLen;
              	Py_ssize_t chunkIndex;
              	char parity = 0;
              	PyObject* chunk;
              	char* chunkData;
              	Py_ssize_t chunkSize;
              	size_t zresult;
              	ZSTD_frameHeader frameHeader;
              	void* buffer1 = NULL;
              	size_t buffer1Size = 0;
              	size_t buffer1ContentSize = 0;
              	void* buffer2 = NULL;
              	size_t buffer2Size = 0;
              	size_t buffer2ContentSize = 0;
              	void* destBuffer = NULL;
              	PyObject* result = NULL;
              	ZSTD_outBuffer outBuffer;
              	ZSTD_inBuffer inBuffer;
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
              		kwlist, &PyList_Type, &chunks)) {
              		return NULL;
              	}
              	chunksLen = PyList_Size(chunks);
              	if (!chunksLen) {
              		PyErr_SetString(PyExc_ValueError, "empty input chain");
              		return NULL;
              	}
              	/* The first chunk should not be using a dictionary. We handle it specially. */
              	chunk = PyList_GetItem(chunks, 0);
              	if (!PyBytes_Check(chunk)) {
              		PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
              		return NULL;
              	}
              	/* We require that all chunks be zstd frames and that they have content size set. */
              	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
              	zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
              	if (ZSTD_isError(zresult)) {
              		PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
              		return NULL;
              	}
              	else if (zresult) {
              		PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
              		return NULL;
              	}
              	if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
              		PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
              		return NULL;
              	}
              	assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
              	/* We check against PY_SSIZE_T_MAX here because we ultimately cast the
              	 * result to a Python object and it's length can be no greater than
              	 * Py_ssize_t. In theory, we could have an intermediate frame that is
              	 * larger. But a) why would this API be used for frames that large b)
              	 * it isn't worth the complexity to support. */
              	assert(SIZE_MAX >= PY_SSIZE_T_MAX);
              	if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
              		PyErr_SetString(PyExc_ValueError,
              			"chunk 0 is too large to decompress on this platform");
              		return NULL;
              	}
              	if (ensure_dctx(self, 0)) {
              		goto finally;
              	}
              	buffer1Size = (size_t)frameHeader.frameContentSize;
              	buffer1 = PyMem_Malloc(buffer1Size);
              	if (!buffer1) {
              		goto finally;
              	}
              	outBuffer.dst = buffer1;
              	outBuffer.size = buffer1Size;
              	outBuffer.pos = 0;
              	inBuffer.src = chunkData;
              	inBuffer.size = chunkSize;
              	inBuffer.pos = 0;
              	Py_BEGIN_ALLOW_THREADS
              	zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
              	Py_END_ALLOW_THREADS
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
              		goto finally;
              	}
              	else if (zresult) {
              		PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
              		goto finally;
              	}
              	buffer1ContentSize = outBuffer.pos;
              	/* Special case of a simple chain. */
              	if (1 == chunksLen) {
              		result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
              		goto finally;
              	}
              	/* This should ideally look at next chunk. But this is slightly simpler. */
              	buffer2Size = (size_t)frameHeader.frameContentSize;
              	buffer2 = PyMem_Malloc(buffer2Size);
              	if (!buffer2) {
              		goto finally;
              	}
              	/* For each subsequent chunk, use the previous fulltext as a content dictionary.
              	   Our strategy is to have 2 buffers. One holds the previous fulltext (to be
              	   used as a content dictionary) and the other holds the new fulltext. The
              	   buffers grow when needed but never decrease in size. This limits the
              	   memory allocator overhead.
              	*/
              	for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
              		chunk = PyList_GetItem(chunks, chunkIndex);
              		if (!PyBytes_Check(chunk)) {
              			PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
              			goto finally;
              		}
              		PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
              		zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
              			goto finally;
              		}
              		else if (zresult) {
              			PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
              			goto finally;
              		}
              		if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
              			PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
              			goto finally;
              		}
              		assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
              		if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
              			PyErr_Format(PyExc_ValueError,
              				"chunk %zd is too large to decompress on this platform", chunkIndex);
              			goto finally;
              		}
              		inBuffer.src = chunkData;
              		inBuffer.size = chunkSize;
              		inBuffer.pos = 0;
              		parity = chunkIndex % 2;
              		/* This could definitely be abstracted to reduce code duplication. */
              		if (parity) {
              			/* Resize destination buffer to hold larger content. */
              			if (buffer2Size < frameHeader.frameContentSize) {
              				buffer2Size = (size_t)frameHeader.frameContentSize;
              				destBuffer = PyMem_Realloc(buffer2, buffer2Size);
              				if (!destBuffer) {
              					goto finally;
              				}
              				buffer2 = destBuffer;
              			}
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
              				buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError,
              					"failed to load prefix dictionary at chunk %zd", chunkIndex);
              				goto finally;
              			}
              			outBuffer.dst = buffer2;
              			outBuffer.size = buffer2Size;
              			outBuffer.pos = 0;
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
              					chunkIndex, ZSTD_getErrorName(zresult));
              				goto finally;
              			}
              			else if (zresult) {
              				PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
              					chunkIndex);
              				goto finally;
              			}
              			buffer2ContentSize = outBuffer.pos;
              		}
              		else {
              			if (buffer1Size < frameHeader.frameContentSize) {
              				buffer1Size = (size_t)frameHeader.frameContentSize;
              				destBuffer = PyMem_Realloc(buffer1, buffer1Size);
              				if (!destBuffer) {
              					goto finally;
              				}
              				buffer1 = destBuffer;
              			}
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
              				buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError,
              					"failed to load prefix dictionary at chunk %zd", chunkIndex);
              				goto finally;
              			}
              			outBuffer.dst = buffer1;
              			outBuffer.size = buffer1Size;
              			outBuffer.pos = 0;
              			Py_BEGIN_ALLOW_THREADS
              			zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
              			Py_END_ALLOW_THREADS
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
              					chunkIndex, ZSTD_getErrorName(zresult));
              				goto finally;
              			}
              			else if (zresult) {
              				PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
              					chunkIndex);
              				goto finally;
              			}
              			buffer1ContentSize = outBuffer.pos;
              		}
              	}
              	result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
              		parity ? buffer2ContentSize : buffer1ContentSize);
              finally:
              	if (buffer2) {
              		PyMem_Free(buffer2);
              	}
              	if (buffer1) {
              		PyMem_Free(buffer1);
              	}
              	return result;
              }
              typedef struct {
              	void* sourceData;
              	size_t sourceSize;
              	size_t destSize;
              } FramePointer;
              typedef struct {
              	FramePointer* frames;
              	Py_ssize_t framesSize;
              	unsigned long long compressedSize;
              } FrameSources;
              typedef struct {
              	void* dest;
              	Py_ssize_t destSize;
              	BufferSegment* segments;
              	Py_ssize_t segmentsSize;
              } DestBuffer;
              typedef enum {
              	WorkerError_none = 0,
              	WorkerError_zstd = 1,
              	WorkerError_memory = 2,
              	WorkerError_sizeMismatch = 3,
              	WorkerError_unknownSize = 4,
              } WorkerError;
              typedef struct {
              	/* Source records and length */
              	FramePointer* framePointers;
              	/* Which records to process. */
              	Py_ssize_t startOffset;
              	Py_ssize_t endOffset;
              	unsigned long long totalSourceSize;
              	/* Compression state and settings. */
              	ZSTD_DCtx* dctx;
              	int requireOutputSizes;
              	/* Output storage. */
              	DestBuffer* destBuffers;
              	Py_ssize_t destCount;
              	/* Item that error occurred on. */
              	Py_ssize_t errorOffset;
              	/* If an error occurred. */
              	WorkerError error;
              	/* result from zstd decompression operation */
              	size_t zresult;
              } WorkerState;
              static void decompress_worker(WorkerState* state) {
              	size_t allocationSize;
              	DestBuffer* destBuffer;
              	Py_ssize_t frameIndex;
              	Py_ssize_t localOffset = 0;
              	Py_ssize_t currentBufferStartIndex = state->startOffset;
              	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
              	void* tmpBuf;
              	Py_ssize_t destOffset = 0;
              	FramePointer* framePointers = state->framePointers;
              	size_t zresult;
              	unsigned long long totalOutputSize = 0;
              	assert(NULL == state->destBuffers);
              	assert(0 == state->destCount);
              	assert(state->endOffset - state->startOffset >= 0);
              	/* We could get here due to the way work is allocated. Ideally we wouldn't
              	   get here. But that would require a bit of a refactor in the caller. */
              	if (state->totalSourceSize > SIZE_MAX) {
              		state->error = WorkerError_memory;
              		state->errorOffset = 0;
              		return;
              	}
              	/*
              	 * We need to allocate a buffer to hold decompressed data. How we do this
              	 * depends on what we know about the output. The following scenarios are
              	 * possible:
              	 *
              	 * 1. All structs defining frames declare the output size.
              	 * 2. The decompressed size is embedded within the zstd frame.
              	 * 3. The decompressed size is not stored anywhere.
              	 *
              	 * For now, we only support #1 and #2.
              	 */
              	/* Resolve ouput segments. */
              	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
              		FramePointer* fp = &framePointers[frameIndex];
              		unsigned long long decompressedSize;
              		if (0 == fp->destSize) {
              			decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
              			if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
              				state->error = WorkerError_unknownSize;
              				state->errorOffset = frameIndex;
              				return;
              			}
              			else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
              				if (state->requireOutputSizes) {
              					state->error = WorkerError_unknownSize;
              					state->errorOffset = frameIndex;
              					return;
              				}
              				/* This will fail the assert for .destSize > 0 below. */
              				decompressedSize = 0;
              			}
              			if (decompressedSize > SIZE_MAX) {
              				state->error = WorkerError_memory;
              				state->errorOffset = frameIndex;
              				return;
              			}
              			fp->destSize = (size_t)decompressedSize;
              		}
              		totalOutputSize += fp->destSize;
              	}
              	state->destBuffers = calloc(1, sizeof(DestBuffer));
              	if (NULL == state->destBuffers) {
              		state->error = WorkerError_memory;
              		return;
              	}
              	state->destCount = 1;
              	destBuffer = &state->destBuffers[state->destCount - 1];
              	assert(framePointers[state->startOffset].destSize > 0); /* For now. */
              	allocationSize = roundpow2((size_t)state->totalSourceSize);
              	if (framePointers[state->startOffset].destSize > allocationSize) {
              		allocationSize = roundpow2(framePointers[state->startOffset].destSize);
              	}
              	destBuffer->dest = malloc(allocationSize);
              	if (NULL == destBuffer->dest) {
              		state->error = WorkerError_memory;
              		return;
              	}
              	destBuffer->destSize = allocationSize;
              	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
              	if (NULL == destBuffer->segments) {
              		/* Caller will free state->dest as part of cleanup. */
              		state->error = WorkerError_memory;
              		return;
              	}
              	destBuffer->segmentsSize = remainingItems;
              	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
              		ZSTD_outBuffer outBuffer;
              		ZSTD_inBuffer inBuffer;
              		const void* source = framePointers[frameIndex].sourceData;
              		const size_t sourceSize = framePointers[frameIndex].sourceSize;
              		void* dest;
              		const size_t decompressedSize = framePointers[frameIndex].destSize;
              		size_t destAvailable = destBuffer->destSize - destOffset;
              		assert(decompressedSize > 0); /* For now. */
              		/*
              		 * Not enough space in current buffer. Finish current before and allocate and
              		 * switch to a new one.
              		 */
              		if (decompressedSize > destAvailable) {
              			/*
              			 * Shrinking the destination buffer is optional. But it should be cheap,
              			 * so we just do it.
              			 */
              			if (destAvailable) {
              				tmpBuf = realloc(destBuffer->dest, destOffset);
              				if (NULL == tmpBuf) {
              					state->error = WorkerError_memory;
              					return;
              				}
              				destBuffer->dest = tmpBuf;
              				destBuffer->destSize = destOffset;
              			}
              			/* Truncate segments buffer. */
              			tmpBuf = realloc(destBuffer->segments,
              				(frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
              			if (NULL == tmpBuf) {
              				state->error = WorkerError_memory;
              				return;
              			}
              			destBuffer->segments = tmpBuf;
              			destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
              			/* Grow space for new DestBuffer. */
              			tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
              			if (NULL == tmpBuf) {
              				state->error = WorkerError_memory;
              				return;
              			}
              			state->destBuffers = tmpBuf;
              			state->destCount++;
              			destBuffer = &state->destBuffers[state->destCount - 1];
              			/* Don't take any chances will non-NULL pointers. */
              			memset(destBuffer, 0, sizeof(DestBuffer));
              			allocationSize = roundpow2((size_t)state->totalSourceSize);
              			if (decompressedSize > allocationSize) {
              				allocationSize = roundpow2(decompressedSize);
              			}
              			destBuffer->dest = malloc(allocationSize);
              			if (NULL == destBuffer->dest) {
              				state->error = WorkerError_memory;
              				return;
              			}
              			destBuffer->destSize = allocationSize;
              			destAvailable = allocationSize;
              			destOffset = 0;
              			localOffset = 0;
              			destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
              			if (NULL == destBuffer->segments) {
              				state->error = WorkerError_memory;
              				return;
              			}
              			destBuffer->segmentsSize = remainingItems;
              			currentBufferStartIndex = frameIndex;
              		}
              		dest = (char*)destBuffer->dest + destOffset;
              		outBuffer.dst = dest;
              		outBuffer.size = decompressedSize;
              		outBuffer.pos = 0;
              		inBuffer.src = source;
              		inBuffer.size = sourceSize;
              		inBuffer.pos = 0;
              		zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer);
              		if (ZSTD_isError(zresult)) {
              			state->error = WorkerError_zstd;
              			state->zresult = zresult;
              			state->errorOffset = frameIndex;
              			return;
              		}
              		else if (zresult || outBuffer.pos != decompressedSize) {
              			state->error = WorkerError_sizeMismatch;
              			state->zresult = outBuffer.pos;
              			state->errorOffset = frameIndex;
              			return;
              		}
              		destBuffer->segments[localOffset].offset = destOffset;
              		destBuffer->segments[localOffset].length = outBuffer.pos;
              		destOffset += outBuffer.pos;
              		localOffset++;
              		remainingItems--;
              	}
              	if (destBuffer->destSize > destOffset) {
              		tmpBuf = realloc(destBuffer->dest, destOffset);
              		if (NULL == tmpBuf) {
              			state->error = WorkerError_memory;
              			return;
              		}
              		destBuffer->dest = tmpBuf;
              		destBuffer->destSize = destOffset;
              	}
              }
              ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
              	Py_ssize_t threadCount) {
              	Py_ssize_t i = 0;
              	int errored = 0;
              	Py_ssize_t segmentsCount;
              	ZstdBufferWithSegments* bws = NULL;
              	PyObject* resultArg = NULL;
              	Py_ssize_t resultIndex;
              	ZstdBufferWithSegmentsCollection* result = NULL;
              	FramePointer* framePointers = frames->frames;
              	unsigned long long workerBytes = 0;
              	Py_ssize_t currentThread = 0;
              	Py_ssize_t workerStartOffset = 0;
              	POOL_ctx* pool = NULL;
              	WorkerState* workerStates = NULL;
              	unsigned long long bytesPerWorker;
              	/* Caller should normalize 0 and negative values to 1 or larger. */
              	assert(threadCount >= 1);
              	/* More threads than inputs makes no sense under any conditions. */
              	threadCount = frames->framesSize < threadCount ? frames->framesSize
              												   : threadCount;
              	/* TODO lower thread count if input size is too small and threads would just
              	   add overhead. */
              	if (decompressor->dict) {
              		if (ensure_ddict(decompressor->dict)) {
              			return NULL;
              		}
              	}
              	/* If threadCount==1, we don't start a thread pool. But we do leverage the
              	   same API for dispatching work. */
              	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
              	if (NULL == workerStates) {
              		PyErr_NoMemory();
              		goto finally;
              	}
              	memset(workerStates, 0, threadCount * sizeof(WorkerState));
              	if (threadCount > 1) {
              		pool = POOL_create(threadCount, 1);
              		if (NULL == pool) {
              			PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
              			goto finally;
              		}
              	}
              	bytesPerWorker = frames->compressedSize / threadCount;
              	if (bytesPerWorker > SIZE_MAX) {
              		PyErr_SetString(ZstdError, "too much data per worker for this platform");
              		goto finally;
              	}
              	for (i = 0; i < threadCount; i++) {
              		size_t zresult;
              		workerStates[i].dctx = ZSTD_createDCtx();
              		if (NULL == workerStates[i].dctx) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
              		if (decompressor->dict) {
              			zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
              			if (zresult) {
              				PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
              					ZSTD_getErrorName(zresult));
              				goto finally;
              			}
              		}
              		workerStates[i].framePointers = framePointers;
              		workerStates[i].requireOutputSizes = 1;
              	}
              	Py_BEGIN_ALLOW_THREADS
              	/* There are many ways to split work among workers.
              	   For now, we take a simple approach of splitting work so each worker
              	   gets roughly the same number of input bytes. This will result in more
              	   starvation than running N>threadCount jobs. But it avoids complications
              	   around state tracking, which could involve extra locking.
              	*/
              	for (i = 0; i < frames->framesSize; i++) {
              		workerBytes += frames->frames[i].sourceSize;
              		/*
              		 * The last worker/thread needs to handle all remaining work. Don't
              		 * trigger it prematurely. Defer to the block outside of the loop.
              		 * (But still process this loop so workerBytes is correct.
              		 */
              		if (currentThread == threadCount - 1) {
              			continue;
              		}
              		if (workerBytes >= bytesPerWorker) {
              			workerStates[currentThread].startOffset = workerStartOffset;
              			workerStates[currentThread].endOffset = i;
              			workerStates[currentThread].totalSourceSize = workerBytes;
              			if (threadCount > 1) {
              				POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
              			}
              			else {
              				decompress_worker(&workerStates[currentThread]);
              			}
              			currentThread++;
              			workerStartOffset = i + 1;
              			workerBytes = 0;
              		}
              	}
              	if (workerBytes) {
              		workerStates[currentThread].startOffset = workerStartOffset;
              		workerStates[currentThread].endOffset = frames->framesSize - 1;
              		workerStates[currentThread].totalSourceSize = workerBytes;
              		if (threadCount > 1) {
              			POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
              		}
              		else {
              			decompress_worker(&workerStates[currentThread]);
              		}
              	}
              	if (threadCount > 1) {
              		POOL_free(pool);
              		pool = NULL;
              	}
              	Py_END_ALLOW_THREADS
              	for (i = 0; i < threadCount; i++) {
              		switch (workerStates[i].error) {
              		case WorkerError_none:
              			break;
              		case WorkerError_zstd:
              			PyErr_Format(ZstdError, "error decompressing item %zd: %s",
              				workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
              			errored = 1;
              			break;
              		case WorkerError_memory:
              			PyErr_NoMemory();
              			errored = 1;
              			break;
              		case WorkerError_sizeMismatch:
              			PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
              				workerStates[i].errorOffset, workerStates[i].zresult,
              				framePointers[workerStates[i].errorOffset].destSize);
              			errored = 1;
              			break;
              		case WorkerError_unknownSize:
              			PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
              				workerStates[i].errorOffset);
              			errored = 1;
              			break;
              		default:
              			PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
              				workerStates[i].error);
              			errored = 1;
              			break;
              		}
              		if (errored) {
              			break;
              		}
              	}
              	if (errored) {
              		goto finally;
              	}
              	segmentsCount = 0;
              	for (i = 0; i < threadCount; i++) {
              		segmentsCount += workerStates[i].destCount;
              	}
              	resultArg = PyTuple_New(segmentsCount);
              	if (NULL == resultArg) {
              		goto finally;
              	}
              	resultIndex = 0;
              	for (i = 0; i < threadCount; i++) {
              		Py_ssize_t bufferIndex;
              		WorkerState* state = &workerStates[i];
              		for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
              			DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
              			bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
              				destBuffer->segments, destBuffer->segmentsSize);
              			if (NULL == bws) {
              				goto finally;
              			}
              			/*
              			* Memory for buffer and segments was allocated using malloc() in worker
              			* and the memory is transferred to the BufferWithSegments instance. So
              			* tell instance to use free() and NULL the reference in the state struct
              			* so it isn't freed below.
              			*/
              			bws->useFree = 1;
              			destBuffer->dest = NULL;
              			destBuffer->segments = NULL;
              			PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
              		}
              	}
              	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
              		(PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
              finally:
              	Py_CLEAR(resultArg);
              	if (workerStates) {
              		for (i = 0; i < threadCount; i++) {
              			Py_ssize_t bufferIndex;
              			WorkerState* state = &workerStates[i];
              			if (state->dctx) {
              				ZSTD_freeDCtx(state->dctx);
              			}
              			for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
              				if (state->destBuffers) {
              					/*
              					* Will be NULL if memory transfered to a BufferWithSegments.
              					* Otherwise it is left over after an error occurred.
              					*/
              					free(state->destBuffers[bufferIndex].dest);
              					free(state->destBuffers[bufferIndex].segments);
              				}
              			}
              			free(state->destBuffers);
              		}
              		PyMem_Free(workerStates);
              	}
              	POOL_free(pool);
              	return result;
              }
              PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
              "Decompress multiple frames to output buffers\n"
              "\n"
              "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
              "list of bytes-like objects. Each item in the passed collection should be a\n"
              "compressed zstd frame.\n"
              "\n"
              "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
              "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
              "it is an object conforming to the buffer protocol that represents an array\n"
              "of 64-bit unsigned integers in the machine's native format. Specifying\n"
              "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
              "output size.\n"
              "\n"
              "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
              "data. All decompressed data is allocated in a single memory buffer. The\n"
              "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
              "and their respective lengths.\n"
              "\n"
              "The ``threads`` argument controls how many threads to use for operations.\n"
              "Negative values will use the same number of threads as logical CPUs on the\n"
              "machine.\n"
              );
              static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"frames",
              		"decompressed_sizes",
              		"threads",
              		NULL
              	};
              	PyObject* frames;
              	Py_buffer frameSizes;
              	int threads = 0;
              	Py_ssize_t frameCount;
              	Py_buffer* frameBuffers = NULL;
              	FramePointer* framePointers = NULL;
              	unsigned long long* frameSizesP = NULL;
              	unsigned long long totalInputSize = 0;
              	FrameSources frameSources;
              	ZstdBufferWithSegmentsCollection* result = NULL;
              	Py_ssize_t i;
              	memset(&frameSizes, 0, sizeof(frameSizes));
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
              #endif
              		kwlist, &frames, &frameSizes, &threads)) {
              		return NULL;
              	}
              	if (frameSizes.buf) {
              		if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
              			PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
              			goto finally;
              		}
              		frameSizesP = (unsigned long long*)frameSizes.buf;
              	}
              	if (threads < 0) {
              		threads = cpu_count();
              	}
              	if (threads < 2) {
              		threads = 1;
              	}
              	if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
              		ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
              		frameCount = buffer->segmentCount;
              		if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
              			PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
              				frameCount * sizeof(unsigned long long), frameSizes.len);
              			goto finally;
              		}
              		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
              		if (!framePointers) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		for (i = 0; i < frameCount; i++) {
              			void* sourceData;
              			unsigned long long sourceSize;
              			unsigned long long decompressedSize = 0;
              			if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
              				PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
              				goto finally;
              			}
              			sourceData = (char*)buffer->data + buffer->segments[i].offset;
              			sourceSize = buffer->segments[i].length;
              			totalInputSize += sourceSize;
              			if (frameSizesP) {
              				decompressedSize = frameSizesP[i];
              			}
              			if (sourceSize > SIZE_MAX) {
              				PyErr_Format(PyExc_ValueError,
              					"item %zd is too large for this platform", i);
              				goto finally;
              			}
              			if (decompressedSize > SIZE_MAX) {
              				PyErr_Format(PyExc_ValueError,
              					"decompressed size of item %zd is too large for this platform", i);
              				goto finally;
              			}
              			framePointers[i].sourceData = sourceData;
              			framePointers[i].sourceSize = (size_t)sourceSize;
              			framePointers[i].destSize = (size_t)decompressedSize;
              		}
              	}
              	else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
              		Py_ssize_t offset = 0;
              		ZstdBufferWithSegments* buffer;
              		ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
              		frameCount = BufferWithSegmentsCollection_length(collection);
              		if (frameSizes.buf && frameSizes.len != frameCount) {
              			PyErr_Format(PyExc_ValueError,
              				"decompressed_sizes size mismatch; expected %zd; got %zd",
              				frameCount * sizeof(unsigned long long), frameSizes.len);
              			goto finally;
              		}
              		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
              		if (NULL == framePointers) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		/* Iterate the data structure directly because it is faster. */
              		for (i = 0; i < collection->bufferCount; i++) {
              			Py_ssize_t segmentIndex;
              			buffer = collection->buffers[i];
              			for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
              				unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
              				if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
              					PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
              						offset);
              					goto finally;
              				}
              				if (buffer->segments[segmentIndex].length > SIZE_MAX) {
              					PyErr_Format(PyExc_ValueError,
              						"item %zd in buffer %zd is too large for this platform",
              						segmentIndex, i);
              					goto finally;
              				}
              				if (decompressedSize > SIZE_MAX) {
              					PyErr_Format(PyExc_ValueError,
              						"decompressed size of item %zd in buffer %zd is too large for this platform",
              						segmentIndex, i);
              					goto finally;
              				}
              				totalInputSize += buffer->segments[segmentIndex].length;
              				framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
              				framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
              				framePointers[offset].destSize = (size_t)decompressedSize;
              				offset++;
              			}
              		}
              	}
              	else if (PyList_Check(frames)) {
              		frameCount = PyList_GET_SIZE(frames);
              		if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
              			PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
              				frameCount * sizeof(unsigned long long), frameSizes.len);
              			goto finally;
              		}
              		framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
              		if (!framePointers) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
              		if (NULL == frameBuffers) {
              			PyErr_NoMemory();
              			goto finally;
              		}
              		memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
              		/* Do a pass to assemble info about our input buffers and output sizes. */
              		for (i = 0; i < frameCount; i++) {
              			unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
              			if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
              				&frameBuffers[i], PyBUF_CONTIG_RO)) {
              				PyErr_Clear();
              				PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
              				goto finally;
              			}
              			if (decompressedSize > SIZE_MAX) {
              				PyErr_Format(PyExc_ValueError,
              					"decompressed size of item %zd is too large for this platform", i);
              				goto finally;
              			}
              			totalInputSize += frameBuffers[i].len;
              			framePointers[i].sourceData = frameBuffers[i].buf;
              			framePointers[i].sourceSize = frameBuffers[i].len;
              			framePointers[i].destSize = (size_t)decompressedSize;
              		}
              	}
              	else {
              		PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
              		goto finally;
              	}
              	/* We now have an array with info about our inputs and outputs. Feed it into
              	   our generic decompression function. */
              	frameSources.frames = framePointers;
              	frameSources.framesSize = frameCount;
              	frameSources.compressedSize = totalInputSize;
              	result = decompress_from_framesources(self, &frameSources, threads);
              finally:
              	if (frameSizes.buf) {
              		PyBuffer_Release(&frameSizes);
              	}
              	PyMem_Free(framePointers);
              	if (frameBuffers) {
              		for (i = 0; i < frameCount; i++) {
              			PyBuffer_Release(&frameBuffers[i]);
              		}
              		PyMem_Free(frameBuffers);
              	}
              	return result;
              }
              static PyMethodDef Decompressor_methods[] = {
              	{ "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_copy_stream__doc__ },
              	{ "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_decompress__doc__ },
              	{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_decompressobj__doc__ },
              	{ "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_read_to_iter__doc__ },
              	/* TODO Remove deprecated API */
              	{ "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_read_to_iter__doc__ },
              	{ "stream_reader", (PyCFunction)Decompressor_stream_reader,
              	METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
              	{ "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_stream_writer__doc__ },
              	/* TODO remove deprecated API */
              	{ "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
              	Decompressor_stream_writer__doc__ },
              	{ "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
              	  METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
              	{ "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
              	  METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
              	{ "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
              	Decompressor_memory_size__doc__ },
              	{ NULL, NULL }
              };
              PyTypeObject ZstdDecompressorType = {
              	PyVarObject_HEAD_INIT(NULL, 0)
              	"zstd.ZstdDecompressor",        /* tp_name */
              	sizeof(ZstdDecompressor),       /* tp_basicsize */
 ,                              /* tp_itemsize */
              	(destructor)Decompressor_dealloc, /* tp_dealloc */
 ,                              /* tp_print */
 ,                              /* tp_getattr */
 ,                              /* tp_setattr */
 ,                              /* tp_compare */
 ,                              /* tp_repr */
 ,                              /* tp_as_number */
 ,                              /* tp_as_sequence */
 ,                              /* tp_as_mapping */
 ,                              /* tp_hash */
 ,                              /* tp_call */
 ,                              /* tp_str */
 ,                              /* tp_getattro */
 ,                              /* tp_setattro */
 ,                              /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
              	Decompressor__doc__,            /* tp_doc */
 ,                              /* tp_traverse */
 ,                              /* tp_clear */
 ,                              /* tp_richcompare */
 ,                              /* tp_weaklistoffset */
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	Decompressor_methods,           /* tp_methods */
 ,                              /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */
 ,                              /* tp_descr_get */
 ,                              /* tp_descr_set */
 ,                              /* tp_dictoffset */
              	(initproc)Decompressor_init,    /* tp_init */
 ,                              /* tp_alloc */
              	PyType_GenericNew,              /* tp_new */
              };
              void decompressor_module_init(PyObject* mod) {
              	Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
              	if (PyType_Ready(&ZstdDecompressorType) < 0) {
              		return;
              	}
              	Py_INCREF((PyObject*)&ZstdDecompressorType);
              	PyModule_AddObject(mod, "ZstdDecompressor",
              		(PyObject*)&ZstdDecompressorType);
              }

contrib/python-zstandard/c-ext/python-zstandard.h

0 +30 -3

              /**
              * Copyright (c) 2016-present, Gregory Szorc
              * All rights reserved.
              *
              * This software may be modified and distributed under the terms
              * of the BSD license. See the LICENSE file for details.
              */
              #define PY_SSIZE_T_CLEAN
              #include <Python.h>
              #include "structmember.h"
              #define ZSTD_STATIC_LINKING_ONLY
              #define ZDICT_STATIC_LINKING_ONLY
              #include <zstd.h>
              #include <zdict.h>
-             #define PYTHON_ZSTANDARD_VERSION "0.9.0"
+             /* Remember to change the string in zstandard/__init__ as well */
+             #define PYTHON_ZSTANDARD_VERSION "0.10.1"
              typedef enum {
              	compressorobj_flush_finish,
              	compressorobj_flush_block,
              } CompressorObj_Flush;
              /*
                 Represents a ZstdCompressionParameters type.
                 This type holds all the low-level compression parameters that can be set.
              */
              typedef struct {
              	PyObject_HEAD
              	ZSTD_CCtx_params* params;
              	unsigned format;
              	int compressionLevel;
              	unsigned windowLog;
              	unsigned hashLog;
              	unsigned chainLog;
              	unsigned searchLog;
              	unsigned minMatch;
              	unsigned targetLength;
              	unsigned compressionStrategy;
              	unsigned contentSizeFlag;
              	unsigned checksumFlag;
              	unsigned dictIDFlag;
              	unsigned threads;
              	unsigned jobSize;
              	unsigned overlapSizeLog;
-             	unsigned compressLiterals;
              	unsigned forceMaxWindow;
              	unsigned enableLongDistanceMatching;
              	unsigned ldmHashLog;
              	unsigned ldmMinMatch;
              	unsigned ldmBucketSizeLog;
              	unsigned ldmHashEveryLog;
              } ZstdCompressionParametersObject;
              extern PyTypeObject ZstdCompressionParametersType;
              /*
                 Represents a FrameParameters type.
                 This type is basically a wrapper around ZSTD_frameParams.
              */
              typedef struct {
              	PyObject_HEAD
              	unsigned long long frameContentSize;
              	unsigned long long windowSize;
              	unsigned dictID;
              	char checksumFlag;
              } FrameParametersObject;
              extern PyTypeObject FrameParametersType;
              /*
                 Represents a ZstdCompressionDict type.
                 Instances hold data used for a zstd compression dictionary.
              */
              typedef struct {
              	PyObject_HEAD
              	/* Pointer to dictionary data. Owned by self. */
              	void* dictData;
              	/* Size of dictionary data. */
              	size_t dictSize;
              	ZSTD_dictContentType_e dictType;
              	/* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
              	unsigned k;
              	/* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
              	unsigned d;
              	/* Digested dictionary, suitable for reuse. */
              	ZSTD_CDict* cdict;
              	ZSTD_DDict* ddict;
              } ZstdCompressionDict;
              extern PyTypeObject ZstdCompressionDictType;
              /*
                 Represents a ZstdCompressor type.
              */
              typedef struct {
              	PyObject_HEAD
              	/* Number of threads to use for operations. */
              	unsigned int threads;
              	/* Pointer to compression dictionary to use. NULL if not using dictionary
              	   compression. */
              	ZstdCompressionDict* dict;
              	/* Compression context to use. Populated during object construction. */
              	ZSTD_CCtx* cctx;
              	/* Compression parameters in use. */
              	ZSTD_CCtx_params* params;
              } ZstdCompressor;
              extern PyTypeObject ZstdCompressorType;
              typedef struct {
              	PyObject_HEAD
              	ZstdCompressor* compressor;
              	ZSTD_outBuffer output;
              	int finished;
              } ZstdCompressionObj;
              extern PyTypeObject ZstdCompressionObjType;
              typedef struct {
              	PyObject_HEAD
              	ZstdCompressor* compressor;
              	PyObject* writer;
              	unsigned long long sourceSize;
              	size_t outSize;
              	int entered;
              	unsigned long long bytesCompressed;
              } ZstdCompressionWriter;
              extern PyTypeObject ZstdCompressionWriterType;
              typedef struct {
              	PyObject_HEAD
              	ZstdCompressor* compressor;
              	PyObject* reader;
              	Py_buffer buffer;
              	Py_ssize_t bufferOffset;
              	size_t inSize;
              	size_t outSize;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	int finishedOutput;
              	int finishedInput;
              	PyObject* readResult;
              } ZstdCompressorIterator;
              extern PyTypeObject ZstdCompressorIteratorType;
              typedef struct {
              	PyObject_HEAD
              	ZstdCompressor* compressor;
              	PyObject* reader;
              	Py_buffer buffer;
-             	unsigned long long sourceSize;
              	size_t readSize;
              	int entered;
              	int closed;
              	unsigned long long bytesCompressed;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	int finishedInput;
              	int finishedOutput;
              	PyObject* readResult;
              } ZstdCompressionReader;
              extern PyTypeObject ZstdCompressionReaderType;
              typedef struct {
              	PyObject_HEAD
+             	ZstdCompressor* compressor;
+             	ZSTD_inBuffer input;
+             	ZSTD_outBuffer output;
+             	Py_buffer inBuffer;
+             	int finished;
+             	size_t chunkSize;
+             } ZstdCompressionChunker;
+             extern PyTypeObject ZstdCompressionChunkerType;
+             typedef enum {
+             	compressionchunker_mode_normal,
+             	compressionchunker_mode_flush,
+             	compressionchunker_mode_finish,
+             } CompressionChunkerMode;
+             typedef struct {
+             	PyObject_HEAD
+             	ZstdCompressionChunker* chunker;
+             	CompressionChunkerMode mode;
+             } ZstdCompressionChunkerIterator;
+             extern PyTypeObject ZstdCompressionChunkerIteratorType;
+             typedef struct {
+             	PyObject_HEAD
              	ZSTD_DCtx* dctx;
              	ZstdCompressionDict* dict;
              	size_t maxWindowSize;
              	ZSTD_format_e format;
              } ZstdDecompressor;
              extern PyTypeObject ZstdDecompressorType;
              typedef struct {
              	PyObject_HEAD
              	ZstdDecompressor* decompressor;
              	size_t outSize;
              	int finished;
              } ZstdDecompressionObj;
              extern PyTypeObject ZstdDecompressionObjType;
              typedef struct {
              	PyObject_HEAD
              	/* Parent decompressor to which this object is associated. */
              	ZstdDecompressor* decompressor;
              	/* Object to read() from (if reading from a stream). */
              	PyObject* reader;
              	/* Size for read() operations on reader. */
              	size_t readSize;
              	/* Buffer to read from (if reading from a buffer). */
              	Py_buffer buffer;
              	/* Whether the context manager is active. */
              	int entered;
              	/* Whether we've closed the stream. */
              	int closed;
              	/* Number of bytes decompressed and returned to user. */
              	unsigned long long bytesDecompressed;
              	/* Tracks data going into decompressor. */
              	ZSTD_inBuffer input;
              	/* Holds output from read() operation on reader. */
              	PyObject* readResult;
              	/* Whether all input has been sent to the decompressor. */
              	int finishedInput;
              	/* Whether all output has been flushed from the decompressor. */
              	int finishedOutput;
              } ZstdDecompressionReader;
              extern PyTypeObject ZstdDecompressionReaderType;
              typedef struct {
              	PyObject_HEAD
              	ZstdDecompressor* decompressor;
              	PyObject* writer;
              	size_t outSize;
              	int entered;
              } ZstdDecompressionWriter;
              extern PyTypeObject ZstdDecompressionWriterType;
              typedef struct {
              	PyObject_HEAD
              	ZstdDecompressor* decompressor;
              	PyObject* reader;
              	Py_buffer buffer;
              	Py_ssize_t bufferOffset;
              	size_t inSize;
              	size_t outSize;
              	size_t skipBytes;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	Py_ssize_t readCount;
              	int finishedInput;
              	int finishedOutput;
              } ZstdDecompressorIterator;
              extern PyTypeObject ZstdDecompressorIteratorType;
              typedef struct {
              	int errored;
              	PyObject* chunk;
              } DecompressorIteratorResult;
              typedef struct {
              	/* The public API is that these are 64-bit unsigned integers. So these can't
              	 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
              	 * be nonsensical for this platform. */
              	unsigned long long offset;
              	unsigned long long length;
              } BufferSegment;
              typedef struct {
              	PyObject_HEAD
              	PyObject* parent;
              	BufferSegment* segments;
              	Py_ssize_t segmentCount;
              } ZstdBufferSegments;
              extern PyTypeObject ZstdBufferSegmentsType;
              typedef struct {
              	PyObject_HEAD
              	PyObject* parent;
              	void* data;
              	Py_ssize_t dataSize;
              	unsigned long long offset;
              } ZstdBufferSegment;
              extern PyTypeObject ZstdBufferSegmentType;
              typedef struct {
              	PyObject_HEAD
              	Py_buffer parent;
              	void* data;
              	unsigned long long dataSize;
              	BufferSegment* segments;
              	Py_ssize_t segmentCount;
              	int useFree;
              } ZstdBufferWithSegments;
              extern PyTypeObject ZstdBufferWithSegmentsType;
              /**
               * An ordered collection of BufferWithSegments exposed as a squashed collection.
               *
               * This type provides a virtual view spanning multiple BufferWithSegments
               * instances. It allows multiple instances to be "chained" together and
               * exposed as a single collection. e.g. if there are 2 buffers holding
               * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
               */
              typedef struct {
              	PyObject_HEAD
              	/* An array of buffers that should be exposed through this instance. */
              	ZstdBufferWithSegments** buffers;
              	/* Number of elements in buffers array. */
              	Py_ssize_t bufferCount;
              	/* Array of first offset in each buffer instance. 0th entry corresponds
              	   to number of elements in the 0th buffer. 1st entry corresponds to the
              	   sum of elements in 0th and 1st buffers. */
              	Py_ssize_t* firstElements;
              } ZstdBufferWithSegmentsCollection;
              extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
              int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
              int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
              FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
              int ensure_ddict(ZstdCompressionDict* dict);
              int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
              ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
              ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
              Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
              int cpu_count(void);
              size_t roundpow2(size_t);
              int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);

contrib/python-zstandard/make_cffi.py

0 +3 0

              # Copyright (c) 2016-present, Gregory Szorc
              # All rights reserved.
              #
              # This software may be modified and distributed under the terms
              # of the BSD license. See the LICENSE file for details.
              from __future__ import absolute_import
              import cffi
              import distutils.ccompiler
              import os
              import re
              import subprocess
              import tempfile
              HERE = os.path.abspath(os.path.dirname(__file__))
              SOURCES = ['zstd/%s' % p for p in (
+                 'common/debug.c',
                  'common/entropy_common.c',
                  'common/error_private.c',
                  'common/fse_decompress.c',
                  'common/pool.c',
                  'common/threading.c',
                  'common/xxhash.c',
                  'common/zstd_common.c',
                  'compress/fse_compress.c',
+                 'compress/hist.c',
                  'compress/huf_compress.c',
                  'compress/zstd_compress.c',
                  'compress/zstd_double_fast.c',
                  'compress/zstd_fast.c',
                  'compress/zstd_lazy.c',
                  'compress/zstd_ldm.c',
                  'compress/zstd_opt.c',
                  'compress/zstdmt_compress.c',
                  'decompress/huf_decompress.c',
                  'decompress/zstd_decompress.c',
                  'dictBuilder/cover.c',
+                 'dictBuilder/fastcover.c',
                  'dictBuilder/divsufsort.c',
                  'dictBuilder/zdict.c',
              )]
              # Headers whose preprocessed output will be fed into cdef().
              HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
                  ('zstd.h',),
                  ('dictBuilder', 'zdict.h'),
              )]
              INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
                  'zstd',
                  'zstd/common',
                  'zstd/compress',
                  'zstd/decompress',
                  'zstd/dictBuilder',
              )]
              # cffi can't parse some of the primitives in zstd.h. So we invoke the
              # preprocessor and feed its output into cffi.
              compiler = distutils.ccompiler.new_compiler()
              # Needed for MSVC.
              if hasattr(compiler, 'initialize'):
                  compiler.initialize()
              # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
              # manually.
              if compiler.compiler_type == 'unix':
                  args = list(compiler.executables['compiler'])
                  args.extend([
                      '-E',
                      '-DZSTD_STATIC_LINKING_ONLY',
                      '-DZDICT_STATIC_LINKING_ONLY',
                  ])
              elif compiler.compiler_type == 'msvc':
                  args = [compiler.cc]
                  args.extend([
                      '/EP',
                      '/DZSTD_STATIC_LINKING_ONLY',
                      '/DZDICT_STATIC_LINKING_ONLY',
                  ])
              else:
                  raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
              def preprocess(path):
                  with open(path, 'rb') as fh:
                      lines = []
                      it = iter(fh)
                      for l in it:
                          # zstd.h includes <stddef.h>, which is also included by cffi's
                          # boilerplate. This can lead to duplicate declarations. So we strip
                          # this include from the preprocessor invocation.
                          #
                          # The same things happens for including zstd.h, so give it the same
                          # treatment.
                          #
                          # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
                          # #define in zstdmt_compress.h and results in a compiler warning. So drop
                          # the inline #define.
                          if l.startswith((b'#include <stddef.h>',
                                           b'#include "zstd.h"',
                                           b'#define ZSTD_STATIC_LINKING_ONLY')):
                              continue
                          # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
                          # important so just filter it out.
                          if l.startswith(b'ZSTDLIB_API'):
                              l = l[len(b'ZSTDLIB_API '):]
                          lines.append(l)
                  fd, input_file = tempfile.mkstemp(suffix='.h')
                  os.write(fd, b''.join(lines))
                  os.close(fd)
                  try:
                      process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
                      output = process.communicate()[0]
                      ret = process.poll()
                      if ret:
                          raise Exception('preprocessor exited with error')
                      return output
                  finally:
                      os.unlink(input_file)
              def normalize_output(output):
                  lines = []
                  for line in output.splitlines():
                      # CFFI's parser doesn't like __attribute__ on UNIX compilers.
                      if line.startswith(b'__attribute__ ((visibility ("default"))) '):
                          line = line[len(b'__attribute__ ((visibility ("default"))) '):]
                      if line.startswith(b'__attribute__((deprecated('):
                          continue
                      elif b'__declspec(deprecated(' in line:
                          continue
                      lines.append(line)
                  return b'\n'.join(lines)
              ffi = cffi.FFI()
              # zstd.h uses a possible undefined MIN(). Define it until
              # https://github.com/facebook/zstd/issues/976 is fixed.
              # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
              # when cffi uses the function. Since we statically link against zstd, even
              # if we use the deprecated functions it shouldn't be a huge problem.
              ffi.set_source('_zstd_cffi', '''
              #define MIN(a,b) ((a)<(b) ? (a) : (b))
              #define ZSTD_STATIC_LINKING_ONLY
              #include <zstd.h>
              #define ZDICT_STATIC_LINKING_ONLY
              #define ZDICT_DISABLE_DEPRECATE_WARNINGS
              #include <zdict.h>
              ''', sources=SOURCES,
                   include_dirs=INCLUDE_DIRS,
                   extra_compile_args=['-DZSTD_MULTITHREAD'])
              DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
              sources = []
              # Feed normalized preprocessor output for headers into the cdef parser.
              for header in HEADERS:
                  preprocessed = preprocess(header)
                  sources.append(normalize_output(preprocessed))
                  # #define's are effectively erased as part of going through preprocessor.
                  # So perform a manual pass to re-add those to the cdef source.
                  with open(header, 'rb') as fh:
                      for line in fh:
                          line = line.strip()
                          m = DEFINE.match(line)
                          if not m:
                              continue
                          if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
                              continue
                          # The parser doesn't like some constants with complex values.
                          if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
                              continue
                          # The ... is magic syntax by the cdef parser to resolve the
                          # value at compile time.
                          sources.append(m.group(0) + b' ...')
              cdeflines = b'\n'.join(sources).splitlines()
              cdeflines = [l for l in cdeflines if l.strip()]
              ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
              if __name__ == '__main__':
                  ffi.compile()

contrib/python-zstandard/setup_zstd.py

0 +39 -11

              # Copyright (c) 2016-present, Gregory Szorc
              # All rights reserved.
              #
              # This software may be modified and distributed under the terms
              # of the BSD license. See the LICENSE file for details.
              import distutils.ccompiler
              import os
-             import sys
              from distutils.extension import Extension
              zstd_sources = ['zstd/%s' % p for p in (
+                 'common/debug.c',
                  'common/entropy_common.c',
                  'common/error_private.c',
                  'common/fse_decompress.c',
                  'common/pool.c',
                  'common/threading.c',
                  'common/xxhash.c',
                  'common/zstd_common.c',
                  'compress/fse_compress.c',
+                 'compress/hist.c',
                  'compress/huf_compress.c',
                  'compress/zstd_compress.c',
                  'compress/zstd_double_fast.c',
                  'compress/zstd_fast.c',
                  'compress/zstd_lazy.c',
                  'compress/zstd_ldm.c',
                  'compress/zstd_opt.c',
                  'compress/zstdmt_compress.c',
                  'decompress/huf_decompress.c',
                  'decompress/zstd_decompress.c',
                  'dictBuilder/cover.c',
                  'dictBuilder/divsufsort.c',
+                 'dictBuilder/fastcover.c',
                  'dictBuilder/zdict.c',
              )]
              zstd_sources_legacy = ['zstd/%s' % p for p in (
                  'deprecated/zbuff_common.c',
                  'deprecated/zbuff_compress.c',
                  'deprecated/zbuff_decompress.c',
                  'legacy/zstd_v01.c',
                  'legacy/zstd_v02.c',
                  'legacy/zstd_v03.c',
                  'legacy/zstd_v04.c',
                  'legacy/zstd_v05.c',
                  'legacy/zstd_v06.c',
                  'legacy/zstd_v07.c'
              )]
              zstd_includes = [
                  'zstd',
                  'zstd/common',
                  'zstd/compress',
                  'zstd/decompress',
                  'zstd/dictBuilder',
              ]
              zstd_includes_legacy = [
                  'zstd/deprecated',
                  'zstd/legacy',
              ]
              ext_includes = [
                  'c-ext',
                  'zstd/common',
              ]
              ext_sources = [
                  'zstd/common/pool.c',
                  'zstd/common/threading.c',
                  'zstd.c',
                  'c-ext/bufferutil.c',
                  'c-ext/compressiondict.c',
                  'c-ext/compressobj.c',
                  'c-ext/compressor.c',
                  'c-ext/compressoriterator.c',
+                 'c-ext/compressionchunker.c',
                  'c-ext/compressionparams.c',
                  'c-ext/compressionreader.c',
                  'c-ext/compressionwriter.c',
                  'c-ext/constants.c',
                  'c-ext/decompressobj.c',
                  'c-ext/decompressor.c',
                  'c-ext/decompressoriterator.c',
                  'c-ext/decompressionreader.c',
                  'c-ext/decompressionwriter.c',
                  'c-ext/frameparams.c',
              ]
              zstd_depends = [
                  'c-ext/python-zstandard.h',
              ]
              def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
-                                 warnings_as_errors=False):
-                 """Obtain a distutils.extension.Extension for the C extension."""
-                 root = os.path.abspath(os.path.dirname(__file__))
+                                 warnings_as_errors=False, root=None):
+                 """Obtain a distutils.extension.Extension for the C extension.
+                 ``support_legacy`` controls whether to compile in legacy zstd format support.
+                 ``system_zstd`` controls whether to compile against the system zstd library.
+                 For this to work, the system zstd library and headers must match what
+                 python-zstandard is coded against exactly.
+                 ``name`` is the module name of the C extension to produce.
+                 ``warnings_as_errors`` controls whether compiler warnings are turned into
+                 compiler errors.
-                 sources = set([os.path.join(root, p) for p in ext_sources])
+                 ``root`` defines a root path that source should be computed as relative
+                 to. This should be the directory with the main ``setup.py`` that is
+                 being invoked. If not defined, paths will be relative to this file.
+                 """
+                 actual_root = os.path.abspath(os.path.dirname(__file__))
+                 root = root or actual_root
+                 sources = set([os.path.join(actual_root, p) for p in ext_sources])
                  if not system_zstd:
-                     sources.update([os.path.join(root, p) for p in zstd_sources])
+                     sources.update([os.path.join(actual_root, p) for p in zstd_sources])
                      if support_legacy:
-                         sources.update([os.path.join(root, p) for p in zstd_sources_legacy])
+                         sources.update([os.path.join(actual_root, p)
+                                         for p in zstd_sources_legacy])
                  sources = list(sources)
-                 include_dirs = set([os.path.join(root, d) for d in ext_includes])
+                 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
                  if not system_zstd:
-                     include_dirs.update([os.path.join(root, d) for d in zstd_includes])
+                     include_dirs.update([os.path.join(actual_root, d)
+                                          for d in zstd_includes])
                      if support_legacy:
-                         include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy])
+                         include_dirs.update([os.path.join(actual_root, d)
+                                              for d in zstd_includes_legacy])
                  include_dirs = list(include_dirs)
-                 depends = [os.path.join(root, p) for p in zstd_depends]
+                 depends = [os.path.join(actual_root, p) for p in zstd_depends]
                  compiler = distutils.ccompiler.new_compiler()
                  # Needed for MSVC.
                  if hasattr(compiler, 'initialize'):
                      compiler.initialize()
                  if compiler.compiler_type == 'unix':
                      compiler_type = 'unix'
                  elif compiler.compiler_type == 'msvc':
                      compiler_type = 'msvc'
                  elif compiler.compiler_type == 'mingw32':
                      compiler_type = 'mingw32'
                  else:
                      raise Exception('unhandled compiler type: %s' %
                                      compiler.compiler_type)
                  extra_args = ['-DZSTD_MULTITHREAD']
                  if not system_zstd:
                      extra_args.append('-DZSTDLIB_VISIBILITY=')
                      extra_args.append('-DZDICTLIB_VISIBILITY=')
                      extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
                      if compiler_type == 'unix':
                          extra_args.append('-fvisibility=hidden')
                  if not system_zstd and support_legacy:
                      extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
                  if warnings_as_errors:
                      if compiler_type in ('unix', 'mingw32'):
                          extra_args.append('-Werror')
                      elif compiler_type == 'msvc':
                          extra_args.append('/WX')
                      else:
                          assert False
                  libraries = ['zstd'] if system_zstd else []
+                 # Python 3.7 doesn't like absolute paths. So normalize to relative.
+                 sources = [os.path.relpath(p, root) for p in sources]
+                 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
+                 depends = [os.path.relpath(p, root) for p in depends]
                  # TODO compile with optimizations.
                  return Extension(name, sources,
                                   include_dirs=include_dirs,
                                   depends=depends,
                                   extra_compile_args=extra_args,
                                   libraries=libraries)

contrib/python-zstandard/tests/test_compressor.py

0 +209 -12

              import hashlib
              import io
              import struct
              import sys
              import tarfile
              import unittest
              import zstandard as zstd
              from .common import (
                  make_cffi,
                  OpCountingBytesIO,
              )
              if sys.version_info[0] >= 3:
                  next = lambda it: it.__next__()
              else:
                  next = lambda it: it.next()
              def multithreaded_chunk_size(level, source_size=0):
                  params = zstd.ZstdCompressionParameters.from_level(level,
                                                                     source_size=source_size)
                  return 1 << (params.window_log + 2)
              @make_cffi
              class TestCompressor(unittest.TestCase):
                  def test_level_bounds(self):
                      with self.assertRaises(ValueError):
                          zstd.ZstdCompressor(level=23)
                  def test_memory_size(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      self.assertGreater(cctx.memory_size(), 100)
              @make_cffi
              class TestCompressor_compress(unittest.TestCase):
                  def test_compress_empty(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      result = cctx.compress(b'')
                      self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                      params = zstd.get_frame_parameters(result)
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 524288)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum, 0)
                      cctx = zstd.ZstdCompressor()
                      result = cctx.compress(b'')
                      self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
                      params = zstd.get_frame_parameters(result)
                      self.assertEqual(params.content_size, 0)
                  def test_input_types(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
                      mutable_array = bytearray(3)
                      mutable_array[:] = b'foo'
                      sources = [
                          memoryview(b'foo'),
                          bytearray(b'foo'),
                          mutable_array,
                      ]
                      for source in sources:
                          self.assertEqual(cctx.compress(source), expected)
                  def test_compress_large(self):
                      chunks = []
                      for i in range(255):
                          chunks.append(struct.Struct('>B').pack(i) * 16384)
                      cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
                      result = cctx.compress(b''.join(chunks))
                      self.assertEqual(len(result), 999)
                      self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
                      # This matches the test for read_to_iter() below.
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
                      self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
                                               b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
                                               b'\x02\x09\x00\x00\x6f')
                  def test_negative_level(self):
                      cctx = zstd.ZstdCompressor(level=-4)
                      result = cctx.compress(b'foo' * 256)
                  def test_no_magic(self):
                      params = zstd.ZstdCompressionParameters.from_level(
 , format=zstd.FORMAT_ZSTD1)
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      magic = cctx.compress(b'foobar')
                      params = zstd.ZstdCompressionParameters.from_level(
 , format=zstd.FORMAT_ZSTD1_MAGICLESS)
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      no_magic = cctx.compress(b'foobar')
                      self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
                      self.assertEqual(magic[4:], no_magic)
                  def test_write_checksum(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      no_checksum = cctx.compress(b'foobar')
                      cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                      with_checksum = cctx.compress(b'foobar')
                      self.assertEqual(len(with_checksum), len(no_checksum) + 4)
                      no_params = zstd.get_frame_parameters(no_checksum)
                      with_params = zstd.get_frame_parameters(with_checksum)
                      self.assertFalse(no_params.has_checksum)
                      self.assertTrue(with_params.has_checksum)
                  def test_write_content_size(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      with_size = cctx.compress(b'foobar' * 256)
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      no_size = cctx.compress(b'foobar' * 256)
                      self.assertEqual(len(with_size), len(no_size) + 1)
                      no_params = zstd.get_frame_parameters(no_size)
                      with_params = zstd.get_frame_parameters(with_size)
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, 1536)
                  def test_no_dict_id(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(1024, samples)
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      with_dict_id = cctx.compress(b'foobarfoobar')
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                      no_dict_id = cctx.compress(b'foobarfoobar')
                      self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
                      no_params = zstd.get_frame_parameters(no_dict_id)
                      with_params = zstd.get_frame_parameters(with_dict_id)
                      self.assertEqual(no_params.dict_id, 0)
-                     self.assertEqual(with_params.dict_id, 1387616518)
+                     self.assertEqual(with_params.dict_id, 1880053135)
                  def test_compress_dict_multiple(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      for i in range(32):
                          cctx.compress(b'foo bar foobar foo bar foobar')
                  def test_dict_precompute(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      d.precompute_compress(level=1)
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      for i in range(32):
                          cctx.compress(b'foo bar foobar foo bar foobar')
                  def test_multithreaded(self):
                      chunk_size = multithreaded_chunk_size(1)
                      source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
                      cctx = zstd.ZstdCompressor(level=1, threads=2)
                      compressed = cctx.compress(source)
                      params = zstd.get_frame_parameters(compressed)
                      self.assertEqual(params.content_size, chunk_size * 2)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      dctx = zstd.ZstdDecompressor()
                      self.assertEqual(dctx.decompress(compressed), source)
                  def test_multithreaded_dict(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(1024, samples)
                      cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
                      result = cctx.compress(b'foo')
                      params = zstd.get_frame_parameters(result);
                      self.assertEqual(params.content_size, 3);
                      self.assertEqual(params.dict_id, d.dict_id())
                      self.assertEqual(result,
-                                      b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
+                                      b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
                                       b'\x66\x6f\x6f')
                  def test_multithreaded_compression_params(self):
                      params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      result = cctx.compress(b'foo')
                      params = zstd.get_frame_parameters(result);
                      self.assertEqual(params.content_size, 3);
                      self.assertEqual(result,
                                       b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
              @make_cffi
              class TestCompressor_compressobj(unittest.TestCase):
                  def test_compressobj_empty(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      cobj = cctx.compressobj()
                      self.assertEqual(cobj.compress(b''), b'')
                      self.assertEqual(cobj.flush(),
                                       b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                  def test_input_types(self):
                      expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      mutable_array = bytearray(3)
                      mutable_array[:] = b'foo'
                      sources = [
                          memoryview(b'foo'),
                          bytearray(b'foo'),
                          mutable_array,
                      ]
                      for source in sources:
                          cobj = cctx.compressobj()
                          self.assertEqual(cobj.compress(source), b'')
                          self.assertEqual(cobj.flush(), expected)
                  def test_compressobj_large(self):
                      chunks = []
                      for i in range(255):
                          chunks.append(struct.Struct('>B').pack(i) * 16384)
                      cctx = zstd.ZstdCompressor(level=3)
                      cobj = cctx.compressobj()
                      result = cobj.compress(b''.join(chunks)) + cobj.flush()
                      self.assertEqual(len(result), 999)
                      self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
                      params = zstd.get_frame_parameters(result)
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1048576)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                  def test_write_checksum(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      cobj = cctx.compressobj()
                      no_checksum = cobj.compress(b'foobar') + cobj.flush()
                      cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                      cobj = cctx.compressobj()
                      with_checksum = cobj.compress(b'foobar') + cobj.flush()
                      no_params = zstd.get_frame_parameters(no_checksum)
                      with_params = zstd.get_frame_parameters(with_checksum)
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertTrue(with_params.has_checksum)
                      self.assertEqual(len(with_checksum), len(no_checksum) + 4)
                  def test_write_content_size(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      cobj = cctx.compressobj(size=len(b'foobar' * 256))
                      with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      cobj = cctx.compressobj(size=len(b'foobar' * 256))
                      no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
                      no_params = zstd.get_frame_parameters(no_size)
                      with_params = zstd.get_frame_parameters(with_size)
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, 1536)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_size), len(no_size) + 1)
                  def test_compress_after_finished(self):
                      cctx = zstd.ZstdCompressor()
                      cobj = cctx.compressobj()
                      cobj.compress(b'foo')
                      cobj.flush()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'):
                          cobj.compress(b'foo')
                      with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
                          cobj.flush()
                  def test_flush_block_repeated(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      cobj = cctx.compressobj()
                      self.assertEqual(cobj.compress(b'foo'), b'')
                      self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
                                       b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
                      self.assertEqual(cobj.compress(b'bar'), b'')
                      # 3 byte header plus content.
-                     self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
+                     self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
+                                      b'\x18\x00\x00bar')
+                     self.assertEqual(cobj.flush(), b'\x01\x00\x00')
                  def test_flush_empty_block(self):
                      cctx = zstd.ZstdCompressor(write_checksum=True)
                      cobj = cctx.compressobj()
                      cobj.compress(b'foobar')
                      cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
                      # No-op if no block is active (this is internal to zstd).
                      self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
                      trailing = cobj.flush()
                      # 3 bytes block header + 4 bytes frame checksum
                      self.assertEqual(len(trailing), 7)
                      header = trailing[0:3]
                      self.assertEqual(header, b'\x01\x00\x00')
                  def test_multithreaded(self):
                      source = io.BytesIO()
                      source.write(b'a' * 1048576)
                      source.write(b'b' * 1048576)
                      source.write(b'c' * 1048576)
                      source.seek(0)
                      cctx = zstd.ZstdCompressor(level=1, threads=2)
                      cobj = cctx.compressobj()
                      chunks = []
                      while True:
                          d = source.read(8192)
                          if not d:
                              break
                          chunks.append(cobj.compress(d))
                      chunks.append(cobj.flush())
                      compressed = b''.join(chunks)
                      self.assertEqual(len(compressed), 295)
                  def test_frame_progression(self):
                      cctx = zstd.ZstdCompressor()
                      self.assertEqual(cctx.frame_progression(), (0, 0, 0))
                      cobj = cctx.compressobj()
                      cobj.compress(b'foobar')
                      self.assertEqual(cctx.frame_progression(), (6, 0, 0))
                      cobj.flush()
                      self.assertEqual(cctx.frame_progression(), (6, 6, 15))
                  def test_bad_size(self):
                      cctx = zstd.ZstdCompressor()
                      cobj = cctx.compressobj(size=2)
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                          cobj.compress(b'foo')
                      # Try another operation on this instance.
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                          cobj.compress(b'aa')
                      # Try another operation on the compressor.
                      cctx.compressobj(size=4)
                      cctx.compress(b'foobar')
              @make_cffi
              class TestCompressor_copy_stream(unittest.TestCase):
                  def test_no_read(self):
                      source = object()
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      with self.assertRaises(ValueError):
                          cctx.copy_stream(source, dest)
                  def test_no_write(self):
                      source = io.BytesIO()
                      dest = object()
                      cctx = zstd.ZstdCompressor()
                      with self.assertRaises(ValueError):
                          cctx.copy_stream(source, dest)
                  def test_empty(self):
                      source = io.BytesIO()
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      r, w = cctx.copy_stream(source, dest)
                      self.assertEqual(int(r), 0)
                      self.assertEqual(w, 9)
                      self.assertEqual(dest.getvalue(),
                                       b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                  def test_large_data(self):
                      source = io.BytesIO()
                      for i in range(255):
                          source.write(struct.Struct('>B').pack(i) * 16384)
                      source.seek(0)
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      r, w = cctx.copy_stream(source, dest)
                      self.assertEqual(r, 255 * 16384)
                      self.assertEqual(w, 999)
                      params = zstd.get_frame_parameters(dest.getvalue())
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1048576)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                  def test_write_checksum(self):
                      source = io.BytesIO(b'foobar')
                      no_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      cctx.copy_stream(source, no_checksum)
                      source.seek(0)
                      with_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                      cctx.copy_stream(source, with_checksum)
                      self.assertEqual(len(with_checksum.getvalue()),
                                       len(no_checksum.getvalue()) + 4)
                      no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                      with_params = zstd.get_frame_parameters(with_checksum.getvalue())
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertTrue(with_params.has_checksum)
                  def test_write_content_size(self):
                      source = io.BytesIO(b'foobar' * 256)
                      no_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      cctx.copy_stream(source, no_size)
                      source.seek(0)
                      with_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      cctx.copy_stream(source, with_size)
                      # Source content size is unknown, so no content size written.
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()))
                      source.seek(0)
                      with_size = io.BytesIO()
                      cctx.copy_stream(source, with_size, size=len(source.getvalue()))
                      # We specified source size, so content size header is present.
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()) + 1)
                      no_params = zstd.get_frame_parameters(no_size.getvalue())
                      with_params = zstd.get_frame_parameters(with_size.getvalue())
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, 1536)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertFalse(with_params.has_checksum)
                  def test_read_write_size(self):
                      source = OpCountingBytesIO(b'foobarfoobar')
                      dest = OpCountingBytesIO()
                      cctx = zstd.ZstdCompressor()
                      r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
                      self.assertEqual(r, len(source.getvalue()))
                      self.assertEqual(w, 21)
                      self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                      self.assertEqual(dest._write_count, len(dest.getvalue()))
                  def test_multithreaded(self):
                      source = io.BytesIO()
                      source.write(b'a' * 1048576)
                      source.write(b'b' * 1048576)
                      source.write(b'c' * 1048576)
                      source.seek(0)
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
                      r, w = cctx.copy_stream(source, dest)
                      self.assertEqual(r, 3145728)
                      self.assertEqual(w, 295)
                      params = zstd.get_frame_parameters(dest.getvalue())
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      # Writing content size and checksum works.
                      cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
                      dest = io.BytesIO()
                      source.seek(0)
                      cctx.copy_stream(source, dest, size=len(source.getvalue()))
                      params = zstd.get_frame_parameters(dest.getvalue())
                      self.assertEqual(params.content_size, 3145728)
                      self.assertEqual(params.dict_id, 0)
                      self.assertTrue(params.has_checksum)
                  def test_bad_size(self):
                      source = io.BytesIO()
                      source.write(b'a' * 32768)
                      source.write(b'b' * 32768)
                      source.seek(0)
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                          cctx.copy_stream(source, dest, size=42)
                      # Try another operation on this compressor.
                      source.seek(0)
                      dest = io.BytesIO()
                      cctx.copy_stream(source, dest)
              @make_cffi
              class TestCompressor_stream_reader(unittest.TestCase):
                  def test_context_manager(self):
                      cctx = zstd.ZstdCompressor()
-                     reader = cctx.stream_reader(b'foo' * 60)
-                     with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
-                         reader.read(10)
                      with cctx.stream_reader(b'foo') as reader:
                          with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
                              with reader as reader2:
                                  pass
+                 def test_no_context_manager(self):
+                     cctx = zstd.ZstdCompressor()
+                     reader = cctx.stream_reader(b'foo')
+                     reader.read(4)
+                     self.assertFalse(reader.closed)
+                     reader.close()
+                     self.assertTrue(reader.closed)
+                     with self.assertRaisesRegexp(ValueError, 'stream is closed'):
+                         reader.read(1)
                  def test_not_implemented(self):
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_reader(b'foo' * 60) as reader:
                          with self.assertRaises(io.UnsupportedOperation):
                              reader.readline()
                          with self.assertRaises(io.UnsupportedOperation):
                              reader.readlines()
                          # This could probably be implemented someday.
                          with self.assertRaises(NotImplementedError):
                              reader.readall()
                          with self.assertRaises(io.UnsupportedOperation):
                              iter(reader)
                          with self.assertRaises(io.UnsupportedOperation):
                              next(reader)
                          with self.assertRaises(OSError):
                              reader.writelines([])
                          with self.assertRaises(OSError):
                              reader.write(b'foo')
                  def test_constant_methods(self):
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_reader(b'boo') as reader:
                          self.assertTrue(reader.readable())
                          self.assertFalse(reader.writable())
                          self.assertFalse(reader.seekable())
                          self.assertFalse(reader.isatty())
+                         self.assertFalse(reader.closed)
                          self.assertIsNone(reader.flush())
+                         self.assertFalse(reader.closed)
+                     self.assertTrue(reader.closed)
                  def test_read_closed(self):
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_reader(b'foo' * 60) as reader:
                          reader.close()
+                         self.assertTrue(reader.closed)
                          with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                              reader.read(10)
                  def test_read_bad_size(self):
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_reader(b'foo') as reader:
                          with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
                              reader.read(-1)
                          with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
                              reader.read(0)
                  def test_read_buffer(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      with cctx.stream_reader(source) as reader:
                          self.assertEqual(reader.tell(), 0)
                          # We should get entire frame in one read.
                          result = reader.read(8192)
                          self.assertEqual(result, frame)
                          self.assertEqual(reader.tell(), len(result))
                          self.assertEqual(reader.read(), b'')
                          self.assertEqual(reader.tell(), len(result))
                  def test_read_buffer_small_chunks(self):
                      cctx = zstd.ZstdCompressor()
                      source = b'foo' * 60
                      chunks = []
                      with cctx.stream_reader(source) as reader:
                          self.assertEqual(reader.tell(), 0)
                          while True:
                              chunk = reader.read(1)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                              self.assertEqual(reader.tell(), sum(map(len, chunks)))
                      self.assertEqual(b''.join(chunks), cctx.compress(source))
                  def test_read_stream(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
                          self.assertEqual(reader.tell(), 0)
                          chunk = reader.read(8192)
                          self.assertEqual(chunk, frame)
                          self.assertEqual(reader.tell(), len(chunk))
                          self.assertEqual(reader.read(), b'')
                          self.assertEqual(reader.tell(), len(chunk))
                  def test_read_stream_small_chunks(self):
                      cctx = zstd.ZstdCompressor()
                      source = b'foo' * 60
                      chunks = []
                      with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
                          self.assertEqual(reader.tell(), 0)
                          while True:
                              chunk = reader.read(1)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                              self.assertEqual(reader.tell(), sum(map(len, chunks)))
                      self.assertEqual(b''.join(chunks), cctx.compress(source))
                  def test_read_after_exit(self):
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_reader(b'foo' * 60) as reader:
                          while reader.read(8192):
                              pass
-                     with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
+                     with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                          reader.read(10)
                  def test_bad_size(self):
                      cctx = zstd.ZstdCompressor()
                      source = io.BytesIO(b'foobar')
                      with cctx.stream_reader(source, size=2) as reader:
                          with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                              reader.read(10)
                      # Try another compression operation.
                      with cctx.stream_reader(source, size=42):
                          pass
              @make_cffi
              class TestCompressor_stream_writer(unittest.TestCase):
                  def test_empty(self):
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      with cctx.stream_writer(buffer) as compressor:
                          compressor.write(b'')
                      result = buffer.getvalue()
                      self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                      params = zstd.get_frame_parameters(result)
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 524288)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                  def test_input_types(self):
                      expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
                      cctx = zstd.ZstdCompressor(level=1)
                      mutable_array = bytearray(3)
                      mutable_array[:] = b'foo'
                      sources = [
                          memoryview(b'foo'),
                          bytearray(b'foo'),
                          mutable_array,
                      ]
                      for source in sources:
                          buffer = io.BytesIO()
                          with cctx.stream_writer(buffer) as compressor:
                              compressor.write(source)
                          self.assertEqual(buffer.getvalue(), expected)
                  def test_multiple_compress(self):
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=5)
                      with cctx.stream_writer(buffer) as compressor:
                          self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(compressor.write(b'x' * 8192), 0)
                      result = buffer.getvalue()
                      self.assertEqual(result,
                                       b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f'
                                       b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
                  def test_dictionary(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      h = hashlib.sha1(d.as_bytes()).hexdigest()
-                     self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027')
+                     self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=9, dict_data=d)
                      with cctx.stream_writer(buffer) as compressor:
                          self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(compressor.write(b'foo' * 16384), 0)
                      compressed = buffer.getvalue()
                      params = zstd.get_frame_parameters(compressed)
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 2097152)
                      self.assertEqual(params.dict_id, d.dict_id())
                      self.assertFalse(params.has_checksum)
-                     self.assertEqual(compressed,
-                                      b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00'
-                                      b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89')
+                     h = hashlib.sha1(compressed).hexdigest()
+                     self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786')
+                     source = b'foo' + b'bar' + (b'foo' * 16384)
+                     dctx = zstd.ZstdDecompressor(dict_data=d)
+                     self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
+                                      source)
                  def test_compression_params(self):
                      params = zstd.ZstdCompressionParameters(
                          window_log=20,
                          chain_log=6,
                          hash_log=12,
                          min_match=5,
                          search_log=4,
                          target_length=10,
                          compression_strategy=zstd.STRATEGY_FAST)
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      with cctx.stream_writer(buffer) as compressor:
                          self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(compressor.write(b'foobar' * 16384), 0)
                      compressed = buffer.getvalue()
                      params = zstd.get_frame_parameters(compressed)
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1048576)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      h = hashlib.sha1(compressed).hexdigest()
                      self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
                  def test_write_checksum(self):
                      no_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      with cctx.stream_writer(no_checksum) as compressor:
                          self.assertEqual(compressor.write(b'foobar'), 0)
                      with_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                      with cctx.stream_writer(with_checksum) as compressor:
                          self.assertEqual(compressor.write(b'foobar'), 0)
                      no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                      with_params = zstd.get_frame_parameters(with_checksum.getvalue())
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertTrue(with_params.has_checksum)
                      self.assertEqual(len(with_checksum.getvalue()),
                                       len(no_checksum.getvalue()) + 4)
                  def test_write_content_size(self):
                      no_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      with cctx.stream_writer(no_size) as compressor:
                          self.assertEqual(compressor.write(b'foobar' * 256), 0)
                      with_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      with cctx.stream_writer(with_size) as compressor:
                          self.assertEqual(compressor.write(b'foobar' * 256), 0)
                      # Source size is not known in streaming mode, so header not
                      # written.
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()))
                      # Declaring size will write the header.
                      with_size = io.BytesIO()
                      with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
                          self.assertEqual(compressor.write(b'foobar' * 256), 0)
                      no_params = zstd.get_frame_parameters(no_size.getvalue())
                      with_params = zstd.get_frame_parameters(with_size.getvalue())
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, 1536)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, 0)
                      self.assertFalse(no_params.has_checksum)
                      self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()) + 1)
                  def test_no_dict_id(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(1024, samples)
                      with_dict_id = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      with cctx.stream_writer(with_dict_id) as compressor:
                          self.assertEqual(compressor.write(b'foobarfoobar'), 0)
                      self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                      no_dict_id = io.BytesIO()
                      with cctx.stream_writer(no_dict_id) as compressor:
                          self.assertEqual(compressor.write(b'foobarfoobar'), 0)
                      self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
                      no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
                      with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
                      self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(no_params.dict_id, 0)
                      self.assertEqual(with_params.dict_id, d.dict_id())
                      self.assertFalse(no_params.has_checksum)
                      self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_dict_id.getvalue()),
                                       len(no_dict_id.getvalue()) + 4)
                  def test_memory_size(self):
                      cctx = zstd.ZstdCompressor(level=3)
                      buffer = io.BytesIO()
                      with cctx.stream_writer(buffer) as compressor:
                          compressor.write(b'foo')
                          size = compressor.memory_size()
                      self.assertGreater(size, 100000)
                  def test_write_size(self):
                      cctx = zstd.ZstdCompressor(level=3)
                      dest = OpCountingBytesIO()
                      with cctx.stream_writer(dest, write_size=1) as compressor:
                          self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(compressor.write(b'foobar'), 0)
                      self.assertEqual(len(dest.getvalue()), dest._write_count)
                  def test_flush_repeated(self):
                      cctx = zstd.ZstdCompressor(level=3)
                      dest = OpCountingBytesIO()
                      with cctx.stream_writer(dest) as compressor:
                          self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(dest._write_count, 0)
                          self.assertEqual(compressor.flush(), 12)
                          self.assertEqual(dest._write_count, 1)
                          self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(dest._write_count, 1)
                          self.assertEqual(compressor.flush(), 6)
                          self.assertEqual(dest._write_count, 2)
                          self.assertEqual(compressor.write(b'baz'), 0)
                      self.assertEqual(dest._write_count, 3)
                  def test_flush_empty_block(self):
                      cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
                      dest = OpCountingBytesIO()
                      with cctx.stream_writer(dest) as compressor:
                          self.assertEqual(compressor.write(b'foobar' * 8192), 0)
                          count = dest._write_count
                          offset = dest.tell()
                          self.assertEqual(compressor.flush(), 23)
                          self.assertGreater(dest._write_count, count)
                          self.assertGreater(dest.tell(), offset)
                          offset = dest.tell()
                          # Ending the write here should cause an empty block to be written
                          # to denote end of frame.
                      trailing = dest.getvalue()[offset:]
                      # 3 bytes block header + 4 bytes frame checksum
                      self.assertEqual(len(trailing), 7)
                      header = trailing[0:3]
                      self.assertEqual(header, b'\x01\x00\x00')
                  def test_multithreaded(self):
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor(threads=2)
                      with cctx.stream_writer(dest) as compressor:
                          compressor.write(b'a' * 1048576)
                          compressor.write(b'b' * 1048576)
                          compressor.write(b'c' * 1048576)
                      self.assertEqual(len(dest.getvalue()), 295)
                  def test_tell(self):
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_writer(dest) as compressor:
                          self.assertEqual(compressor.tell(), 0)
                          for i in range(256):
                              compressor.write(b'foo' * (i + 1))
                              self.assertEqual(compressor.tell(), dest.tell())
                  def test_bad_size(self):
                      cctx = zstd.ZstdCompressor()
                      dest = io.BytesIO()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                          with cctx.stream_writer(dest, size=2) as compressor:
                              compressor.write(b'foo')
                      # Test another operation.
                      with cctx.stream_writer(dest, size=42):
                          pass
                  def test_tarfile_compat(self):
                      raise unittest.SkipTest('not yet fully working')
                      dest = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      with cctx.stream_writer(dest) as compressor:
                          with tarfile.open('tf', mode='w', fileobj=compressor) as tf:
                              tf.add(__file__, 'test_compressor.py')
                      dest.seek(0)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(dest) as reader:
                          with tarfile.open(mode='r:', fileobj=reader) as tf:
                              for member in tf:
                                  self.assertEqual(member.name, 'test_compressor.py')
              @make_cffi
              class TestCompressor_read_to_iter(unittest.TestCase):
                  def test_type_validation(self):
                      cctx = zstd.ZstdCompressor()
                      # Object with read() works.
                      for chunk in cctx.read_to_iter(io.BytesIO()):
                          pass
                      # Buffer protocol works.
                      for chunk in cctx.read_to_iter(b'foobar'):
                          pass
                      with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
                          for chunk in cctx.read_to_iter(True):
                              pass
                  def test_read_empty(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      source = io.BytesIO()
                      it = cctx.read_to_iter(source)
                      chunks = list(it)
                      self.assertEqual(len(chunks), 1)
                      compressed = b''.join(chunks)
                      self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                      # And again with the buffer protocol.
                      it = cctx.read_to_iter(b'')
                      chunks = list(it)
                      self.assertEqual(len(chunks), 1)
                      compressed2 = b''.join(chunks)
                      self.assertEqual(compressed2, compressed)
                  def test_read_large(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      source = io.BytesIO()
                      source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
                      source.write(b'o')
                      source.seek(0)
                      # Creating an iterator should not perform any compression until
                      # first read.
                      it = cctx.read_to_iter(source, size=len(source.getvalue()))
                      self.assertEqual(source.tell(), 0)
                      # We should have exactly 2 output chunks.
                      chunks = []
                      chunk = next(it)
                      self.assertIsNotNone(chunk)
                      self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
                      chunks.append(chunk)
                      chunk = next(it)
                      self.assertIsNotNone(chunk)
                      chunks.append(chunk)
                      self.assertEqual(source.tell(), len(source.getvalue()))
                      with self.assertRaises(StopIteration):
                          next(it)
                      # And again for good measure.
                      with self.assertRaises(StopIteration):
                          next(it)
                      # We should get the same output as the one-shot compression mechanism.
                      self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                      params = zstd.get_frame_parameters(b''.join(chunks))
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 262144)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      # Now check the buffer protocol.
                      it = cctx.read_to_iter(source.getvalue())
                      chunks = list(it)
                      self.assertEqual(len(chunks), 2)
                      params = zstd.get_frame_parameters(b''.join(chunks))
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      #self.assertEqual(params.window_size, 262144)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                  def test_read_write_size(self):
                      source = OpCountingBytesIO(b'foobarfoobar')
                      cctx = zstd.ZstdCompressor(level=3)
                      for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
                          self.assertEqual(len(chunk), 1)
                      self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                  def test_multithreaded(self):
                      source = io.BytesIO()
                      source.write(b'a' * 1048576)
                      source.write(b'b' * 1048576)
                      source.write(b'c' * 1048576)
                      source.seek(0)
                      cctx = zstd.ZstdCompressor(threads=2)
                      compressed = b''.join(cctx.read_to_iter(source))
                      self.assertEqual(len(compressed), 295)
                  def test_bad_size(self):
                      cctx = zstd.ZstdCompressor()
                      source = io.BytesIO(b'a' * 42)
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
                          b''.join(cctx.read_to_iter(source, size=2))
                      # Test another operation on errored compressor.
                      b''.join(cctx.read_to_iter(source))
+             @make_cffi
+             class TestCompressor_chunker(unittest.TestCase):
+                 def test_empty(self):
+                     cctx = zstd.ZstdCompressor(write_content_size=False)
+                     chunker = cctx.chunker()
+                     it = chunker.compress(b'')
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                     it = chunker.finish()
+                     self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00')
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                 def test_simple_input(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker()
+                     it = chunker.compress(b'foobar')
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                     it = chunker.compress(b'baz' * 30)
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                     it = chunker.finish()
+                     self.assertEqual(next(it),
+                                      b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f'
+                                      b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                 def test_input_size(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker(size=1024)
+                     it = chunker.compress(b'x' * 1000)
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                     it = chunker.compress(b'y' * 24)
+                     with self.assertRaises(StopIteration):
+                         next(it)
+                     chunks = list(chunker.finish())
+                     self.assertEqual(chunks, [
+                         b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
+                         b'\xa0\x16\xe3\x2b\x80\x05'
+                     ])
+                     dctx = zstd.ZstdDecompressor()
+                     self.assertEqual(dctx.decompress(b''.join(chunks)),
+                                      (b'x' * 1000) + (b'y' * 24))
+                 def test_small_chunk_size(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker(chunk_size=1)
+                     chunks = list(chunker.compress(b'foo' * 1024))
+                     self.assertEqual(chunks, [])
+                     chunks = list(chunker.finish())
+                     self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
+                     self.assertEqual(
+                         b''.join(chunks),
+                         b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
+                         b'\xfa\xd3\x77\x43')
+                     dctx = zstd.ZstdDecompressor()
+                     self.assertEqual(dctx.decompress(b''.join(chunks),
+                                                      max_output_size=10000),
+                                      b'foo' * 1024)
+                 def test_input_types(self):
+                     cctx = zstd.ZstdCompressor()
+                     mutable_array = bytearray(3)
+                     mutable_array[:] = b'foo'
+                     sources = [
+                         memoryview(b'foo'),
+                         bytearray(b'foo'),
+                         mutable_array,
+                     ]
+                     for source in sources:
+                         chunker = cctx.chunker()
+                         self.assertEqual(list(chunker.compress(source)), [])
+                         self.assertEqual(list(chunker.finish()), [
+                             b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f'
+                         ])
+                 def test_flush(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker()
+                     self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
+                     self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
+                     chunks1 = list(chunker.flush())
+                     self.assertEqual(chunks1, [
+                         b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
+                         b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
+                     ])
+                     self.assertEqual(list(chunker.flush()), [])
+                     self.assertEqual(list(chunker.flush()), [])
+                     self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
+                     chunks2 = list(chunker.flush())
+                     self.assertEqual(len(chunks2), 1)
+                     chunks3 = list(chunker.finish())
+                     self.assertEqual(len(chunks2), 1)
+                     dctx = zstd.ZstdDecompressor()
+                     self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
+                                                      max_output_size=10000),
+                                      (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
+                 def test_compress_after_finish(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker()
+                     list(chunker.compress(b'foo'))
+                     list(chunker.finish())
+                     with self.assertRaisesRegexp(
+                             zstd.ZstdError,
+                             'cannot call compress\(\) after compression finished'):
+                         list(chunker.compress(b'foo'))
+                 def test_flush_after_finish(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker()
+                     list(chunker.compress(b'foo'))
+                     list(chunker.finish())
+                     with self.assertRaisesRegexp(
+                             zstd.ZstdError,
+                             'cannot call flush\(\) after compression finished'):
+                         list(chunker.flush())
+                 def test_finish_after_finish(self):
+                     cctx = zstd.ZstdCompressor()
+                     chunker = cctx.chunker()
+                     list(chunker.compress(b'foo'))
+                     list(chunker.finish())
+                     with self.assertRaisesRegexp(
+                             zstd.ZstdError,
+                             'cannot call finish\(\) after compression finished'):
+                         list(chunker.finish())
              class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
                  def test_invalid_inputs(self):
                      cctx = zstd.ZstdCompressor()
                      with self.assertRaises(TypeError):
                          cctx.multi_compress_to_buffer(True)
                      with self.assertRaises(TypeError):
                          cctx.multi_compress_to_buffer((1, 2))
                      with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
                          cctx.multi_compress_to_buffer([u'foo'])
                  def test_empty_input(self):
                      cctx = zstd.ZstdCompressor()
                      with self.assertRaisesRegexp(ValueError, 'no source elements found'):
                          cctx.multi_compress_to_buffer([])
                      with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
                          cctx.multi_compress_to_buffer([b'', b'', b''])
                  def test_list_input(self):
                      cctx = zstd.ZstdCompressor(write_checksum=True)
                      original = [b'foo' * 12, b'bar' * 6]
                      frames = [cctx.compress(c) for c in original]
                      b = cctx.multi_compress_to_buffer(original)
                      self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
                      self.assertEqual(len(b), 2)
                      self.assertEqual(b.size(), 44)
                      self.assertEqual(b[0].tobytes(), frames[0])
                      self.assertEqual(b[1].tobytes(), frames[1])
                  def test_buffer_with_segments_input(self):
                      cctx = zstd.ZstdCompressor(write_checksum=True)
                      original = [b'foo' * 4, b'bar' * 6]
                      frames = [cctx.compress(c) for c in original]
                      offsets = struct.pack('=QQQQ', 0, len(original[0]),
                                                     len(original[0]), len(original[1]))
                      segments = zstd.BufferWithSegments(b''.join(original), offsets)
                      result = cctx.multi_compress_to_buffer(segments)
                      self.assertEqual(len(result), 2)
                      self.assertEqual(result.size(), 47)
                      self.assertEqual(result[0].tobytes(), frames[0])
                      self.assertEqual(result[1].tobytes(), frames[1])
                  def test_buffer_with_segments_collection_input(self):
                      cctx = zstd.ZstdCompressor(write_checksum=True)
                      original = [
                          b'foo1',
                          b'foo2' * 2,
                          b'foo3' * 3,
                          b'foo4' * 4,
                          b'foo5' * 5,
                      ]
                      frames = [cctx.compress(c) for c in original]
                      b = b''.join([original[0], original[1]])
                      b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
 , len(original[0]),
                                                                  len(original[0]), len(original[1])))
                      b = b''.join([original[2], original[3], original[4]])
                      b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
 , len(original[2]),
                                                                  len(original[2]), len(original[3]),
                                                                  len(original[2]) + len(original[3]), len(original[4])))
                      c = zstd.BufferWithSegmentsCollection(b1, b2)
                      result = cctx.multi_compress_to_buffer(c)
                      self.assertEqual(len(result), len(frames))
                      for i, frame in enumerate(frames):
                          self.assertEqual(result[i].tobytes(), frame)
                  def test_multiple_threads(self):
                      # threads argument will cause multi-threaded ZSTD APIs to be used, which will
                      # make output different.
                      refcctx = zstd.ZstdCompressor(write_checksum=True)
                      reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
                      cctx = zstd.ZstdCompressor(write_checksum=True)
                      frames = []
                      frames.extend(b'x' * 64 for i in range(256))
                      frames.extend(b'y' * 64 for i in range(256))
                      result = cctx.multi_compress_to_buffer(frames, threads=-1)
                      self.assertEqual(len(result), 512)
                      for i in range(512):
                          if i < 256:
                              self.assertEqual(result[i].tobytes(), reference[0])
                          else:
                              self.assertEqual(result[i].tobytes(), reference[1])

contrib/python-zstandard/tests/test_compressor_fuzzing.py

0 +132 0

              import io
              import os
              import unittest
              try:
                  import hypothesis
                  import hypothesis.strategies as strategies
              except ImportError:
                  raise unittest.SkipTest('hypothesis not available')
              import zstandard as zstd
              from . common import (
                  make_cffi,
                  random_input_data,
              )
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              @make_cffi
              class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                    level=strategies.integers(min_value=1, max_value=5),
                                    source_read_size=strategies.integers(1, 16384),
                                    read_sizes=strategies.data())
                  def test_stream_source_read_variance(self, original, level, source_read_size,
                                                       read_sizes):
                      refctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refctx.compress(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      with cctx.stream_reader(io.BytesIO(original), size=len(original),
                                              read_size=source_read_size) as reader:
                          chunks = []
                          while True:
                              read_size = read_sizes.draw(strategies.integers(1, 16384))
                              chunk = reader.read(read_size)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                      self.assertEqual(b''.join(chunks), ref_frame)
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                    level=strategies.integers(min_value=1, max_value=5),
                                    source_read_size=strategies.integers(1, 16384),
                                    read_sizes=strategies.data())
                  def test_buffer_source_read_variance(self, original, level, source_read_size,
                                                       read_sizes):
                      refctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refctx.compress(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      with cctx.stream_reader(original, size=len(original),
                                              read_size=source_read_size) as reader:
                          chunks = []
                          while True:
                              read_size = read_sizes.draw(strategies.integers(1, 16384))
                              chunk = reader.read(read_size)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                      self.assertEqual(b''.join(chunks), ref_frame)
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              @make_cffi
              class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                      level=strategies.integers(min_value=1, max_value=5),
                                      write_size=strategies.integers(min_value=1, max_value=1048576))
                  def test_write_size_variance(self, original, level, write_size):
                      refctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refctx.compress(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      b = io.BytesIO()
                      with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
                          compressor.write(original)
                      self.assertEqual(b.getvalue(), ref_frame)
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              @make_cffi
              class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                    level=strategies.integers(min_value=1, max_value=5),
                                    read_size=strategies.integers(min_value=1, max_value=1048576),
                                    write_size=strategies.integers(min_value=1, max_value=1048576))
                  def test_read_write_size_variance(self, original, level, read_size, write_size):
                      refctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refctx.compress(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      source = io.BytesIO(original)
                      dest = io.BytesIO()
                      cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
                                       write_size=write_size)
                      self.assertEqual(dest.getvalue(), ref_frame)
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              @make_cffi
              class TestCompressor_compressobj_fuzzing(unittest.TestCase):
                  @hypothesis.settings(
                      suppress_health_check=[hypothesis.HealthCheck.large_base_example])
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                    level=strategies.integers(min_value=1, max_value=5),
                                    chunk_sizes=strategies.data())
                  def test_random_input_sizes(self, original, level, chunk_sizes):
                      refctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refctx.compress(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      cobj = cctx.compressobj(size=len(original))
                      chunks = []
                      i = 0
                      while True:
                          chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
                          source = original[i:i + chunk_size]
                          if not source:
                              break
                          chunks.append(cobj.compress(source))
                          i += chunk_size
                      chunks.append(cobj.flush())
                      self.assertEqual(b''.join(chunks), ref_frame)
+                 @hypothesis.settings(
+                     suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                                   level=strategies.integers(min_value=1, max_value=5),
+                                   chunk_sizes=strategies.data(),
+                                   flushes=strategies.data())
+                 def test_flush_block(self, original, level, chunk_sizes, flushes):
+                     cctx = zstd.ZstdCompressor(level=level)
+                     cobj = cctx.compressobj()
+                     dctx = zstd.ZstdDecompressor()
+                     dobj = dctx.decompressobj()
+                     compressed_chunks = []
+                     decompressed_chunks = []
+                     i = 0
+                     while True:
+                         input_size = chunk_sizes.draw(strategies.integers(1, 4096))
+                         source = original[i:i + input_size]
+                         if not source:
+                             break
+                         i += input_size
+                         chunk = cobj.compress(source)
+                         compressed_chunks.append(chunk)
+                         decompressed_chunks.append(dobj.decompress(chunk))
+                         if not flushes.draw(strategies.booleans()):
+                             continue
+                         chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
+                         compressed_chunks.append(chunk)
+                         decompressed_chunks.append(dobj.decompress(chunk))
+                         self.assertEqual(b''.join(decompressed_chunks), original[0:i])
+                     chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
+                     compressed_chunks.append(chunk)
+                     decompressed_chunks.append(dobj.decompress(chunk))
+                     self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
+                                                      max_output_size=len(original)),
+                                      original)
+                     self.assertEqual(b''.join(decompressed_chunks), original)
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              @make_cffi
              class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
                  @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                    level=strategies.integers(min_value=1, max_value=5),
                                    read_size=strategies.integers(min_value=1, max_value=4096),
                                    write_size=strategies.integers(min_value=1, max_value=4096))
                  def test_read_write_size_variance(self, original, level, read_size, write_size):
                      refcctx = zstd.ZstdCompressor(level=level)
                      ref_frame = refcctx.compress(original)
                      source = io.BytesIO(original)
                      cctx = zstd.ZstdCompressor(level=level)
                      chunks = list(cctx.read_to_iter(source, size=len(original),
                                                      read_size=read_size,
                                                      write_size=write_size))
                      self.assertEqual(b''.join(chunks), ref_frame)
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
                  @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
                                                              min_size=1, max_size=1024),
                                      threads=strategies.integers(min_value=1, max_value=8),
                                      use_dict=strategies.booleans())
                  def test_data_equivalence(self, original, threads, use_dict):
                      kwargs = {}
                      # Use a content dictionary because it is cheap to create.
                      if use_dict:
                          kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
                      cctx = zstd.ZstdCompressor(level=1,
                                                 write_checksum=True,
                                                 **kwargs)
                      result = cctx.multi_compress_to_buffer(original, threads=-1)
                      self.assertEqual(len(result), len(original))
                      # The frame produced via the batch APIs may not be bit identical to that
                      # produced by compress() because compression parameters are adjusted
                      # from the first input in batch mode. So the only thing we can do is
                      # verify the decompressed data matches the input.
                      dctx = zstd.ZstdDecompressor(**kwargs)
                      for i, frame in enumerate(result):
                          self.assertEqual(dctx.decompress(frame), original[i])
+             @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
+             @make_cffi
+             class TestCompressor_chunker_fuzzing(unittest.TestCase):
+                 @hypothesis.settings(
+                     suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                                   level=strategies.integers(min_value=1, max_value=5),
+                                   chunk_size=strategies.integers(
+                                       min_value=1,
+                                       max_value=32 * 1048576),
+                                   input_sizes=strategies.data())
+                 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
+                     cctx = zstd.ZstdCompressor(level=level)
+                     chunker = cctx.chunker(chunk_size=chunk_size)
+                     chunks = []
+                     i = 0
+                     while True:
+                         input_size = input_sizes.draw(strategies.integers(1, 4096))
+                         source = original[i:i + input_size]
+                         if not source:
+                             break
+                         chunks.extend(chunker.compress(source))
+                         i += input_size
+                     chunks.extend(chunker.finish())
+                     dctx = zstd.ZstdDecompressor()
+                     self.assertEqual(dctx.decompress(b''.join(chunks),
+                                                      max_output_size=len(original)),
+                                      original)
+                     self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
+                 @hypothesis.settings(
+                     suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                                   level=strategies.integers(min_value=1, max_value=5),
+                                   chunk_size=strategies.integers(
+                                       min_value=1,
+                                       max_value=32 * 1048576),
+                                   input_sizes=strategies.data(),
+                                   flushes=strategies.data())
+                 def test_flush_block(self, original, level, chunk_size, input_sizes,
+                                      flushes):
+                     cctx = zstd.ZstdCompressor(level=level)
+                     chunker = cctx.chunker(chunk_size=chunk_size)
+                     dctx = zstd.ZstdDecompressor()
+                     dobj = dctx.decompressobj()
+                     compressed_chunks = []
+                     decompressed_chunks = []
+                     i = 0
+                     while True:
+                         input_size = input_sizes.draw(strategies.integers(1, 4096))
+                         source = original[i:i + input_size]
+                         if not source:
+                             break
+                         i += input_size
+                         chunks = list(chunker.compress(source))
+                         compressed_chunks.extend(chunks)
+                         decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
+                         if not flushes.draw(strategies.booleans()):
+                             continue
+                         chunks = list(chunker.flush())
+                         compressed_chunks.extend(chunks)
+                         decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
+                         self.assertEqual(b''.join(decompressed_chunks), original[0:i])
+                     chunks = list(chunker.finish())
+                     compressed_chunks.extend(chunks)
+                     decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
+                     self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
+                                                      max_output_size=len(original)),
+                                      original)
+                     self.assertEqual(b''.join(decompressed_chunks), original)
  No newline at end of file

contrib/python-zstandard/tests/test_data_structures.py

0 +2 -10

              import sys
              import unittest
              import zstandard as zstd
              from . common import (
                  make_cffi,
              )
              @make_cffi
              class TestCompressionParameters(unittest.TestCase):
                  def test_bounds(self):
                      zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
                                                     chain_log=zstd.CHAINLOG_MIN,
                                                     hash_log=zstd.HASHLOG_MIN,
                                                     search_log=zstd.SEARCHLOG_MIN,
                                                     min_match=zstd.SEARCHLENGTH_MIN + 1,
                                                     target_length=zstd.TARGETLENGTH_MIN,
                                                     compression_strategy=zstd.STRATEGY_FAST)
                      zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
                                                     chain_log=zstd.CHAINLOG_MAX,
                                                     hash_log=zstd.HASHLOG_MAX,
                                                     search_log=zstd.SEARCHLOG_MAX,
                                                     min_match=zstd.SEARCHLENGTH_MAX - 1,
+                                                    target_length=zstd.TARGETLENGTH_MAX,
                                                     compression_strategy=zstd.STRATEGY_BTULTRA)
                  def test_from_level(self):
                      p = zstd.ZstdCompressionParameters.from_level(1)
                      self.assertIsInstance(p, zstd.CompressionParameters)
                      self.assertEqual(p.window_log, 19)
                      p = zstd.ZstdCompressionParameters.from_level(-4)
                      self.assertEqual(p.window_log, 19)
-                     self.assertEqual(p.compress_literals, 0)
                  def test_members(self):
                      p = zstd.ZstdCompressionParameters(window_log=10,
                                                         chain_log=6,
                                                         hash_log=7,
                                                         search_log=4,
                                                         min_match=5,
                                                         target_length=8,
                                                         compression_strategy=1)
                      self.assertEqual(p.window_log, 10)
                      self.assertEqual(p.chain_log, 6)
                      self.assertEqual(p.hash_log, 7)
                      self.assertEqual(p.search_log, 4)
                      self.assertEqual(p.min_match, 5)
                      self.assertEqual(p.target_length, 8)
                      self.assertEqual(p.compression_strategy, 1)
                      p = zstd.ZstdCompressionParameters(compression_level=2)
                      self.assertEqual(p.compression_level, 2)
                      p = zstd.ZstdCompressionParameters(threads=4)
                      self.assertEqual(p.threads, 4)
                      p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
                                                     overlap_size_log=6)
                      self.assertEqual(p.threads, 2)
                      self.assertEqual(p.job_size, 1048576)
                      self.assertEqual(p.overlap_size_log, 6)
-                     p = zstd.ZstdCompressionParameters(compression_level=2)
-                     self.assertEqual(p.compress_literals, 1)
-                     p = zstd.ZstdCompressionParameters(compress_literals=False)
-                     self.assertEqual(p.compress_literals, 0)
                      p = zstd.ZstdCompressionParameters(compression_level=-1)
                      self.assertEqual(p.compression_level, -1)
-                     self.assertEqual(p.compress_literals, 0)
-                     p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True)
+                     p = zstd.ZstdCompressionParameters(compression_level=-2)
                      self.assertEqual(p.compression_level, -2)
-                     self.assertEqual(p.compress_literals, 1)
                      p = zstd.ZstdCompressionParameters(force_max_window=True)
                      self.assertEqual(p.force_max_window, 1)
                      p = zstd.ZstdCompressionParameters(enable_ldm=True)
                      self.assertEqual(p.enable_ldm, 1)
                      p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
                      self.assertEqual(p.ldm_hash_log, 7)
                      p = zstd.ZstdCompressionParameters(ldm_min_match=6)
                      self.assertEqual(p.ldm_min_match, 6)
                      p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
                      self.assertEqual(p.ldm_bucket_size_log, 7)
                      p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8)
                      self.assertEqual(p.ldm_hash_every_log, 8)
                  def test_estimated_compression_context_size(self):
                      p = zstd.ZstdCompressionParameters(window_log=20,
                                                         chain_log=16,
                                                         hash_log=17,
                                                         search_log=1,
                                                         min_match=5,
                                                         target_length=16,
                                                         compression_strategy=zstd.STRATEGY_DFAST)
                      # 32-bit has slightly different values from 64-bit.
                      self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
                                             delta=250)
              @make_cffi
              class TestFrameParameters(unittest.TestCase):
                  def test_invalid_type(self):
                      with self.assertRaises(TypeError):
                          zstd.get_frame_parameters(None)
                      # Python 3 doesn't appear to convert unicode to Py_buffer.
                      if sys.version_info[0] >= 3:
                          with self.assertRaises(TypeError):
                              zstd.get_frame_parameters(u'foobarbaz')
                      else:
                          # CPython will convert unicode to Py_buffer. But CFFI won't.
                          if zstd.backend == 'cffi':
                              with self.assertRaises(TypeError):
                                  zstd.get_frame_parameters(u'foobarbaz')
                          else:
                              with self.assertRaises(zstd.ZstdError):
                                  zstd.get_frame_parameters(u'foobarbaz')
                  def test_invalid_input_sizes(self):
                      with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
                          zstd.get_frame_parameters(b'')
                      with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
                          zstd.get_frame_parameters(zstd.FRAME_HEADER)
                  def test_invalid_frame(self):
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                          zstd.get_frame_parameters(b'foobarbaz')
                  def test_attributes(self):
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1024)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1024)
                      self.assertEqual(params.dict_id, 255)
                      self.assertFalse(params.has_checksum)
                      # Lowest 3rd bit indicates if checksum is present.
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 1024)
                      self.assertEqual(params.dict_id, 0)
                      self.assertTrue(params.has_checksum)
                      # Upper 2 bits indicate content size.
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
                      self.assertEqual(params.content_size, 511)
                      self.assertEqual(params.window_size, 1024)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      # Window descriptor is 2nd byte after frame header.
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
                      self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                      self.assertEqual(params.window_size, 262144)
                      self.assertEqual(params.dict_id, 0)
                      self.assertFalse(params.has_checksum)
                      # Set multiple things.
                      params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
                      self.assertEqual(params.content_size, 272)
                      self.assertEqual(params.window_size, 262144)
                      self.assertEqual(params.dict_id, 15)
                      self.assertTrue(params.has_checksum)
                  def test_input_types(self):
                      v = zstd.FRAME_HEADER + b'\x00\x00'
                      mutable_array = bytearray(len(v))
                      mutable_array[:] = v
                      sources = [
                          memoryview(v),
                          bytearray(v),
                          mutable_array,
                      ]
                      for source in sources:
                          params = zstd.get_frame_parameters(source)
                          self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                          self.assertEqual(params.window_size, 1024)
                          self.assertEqual(params.dict_id, 0)
                          self.assertFalse(params.has_checksum)

contrib/python-zstandard/tests/test_data_structures_fuzzing.py

0 +1 -1

              import io
              import os
              import sys
              import unittest
              try:
                  import hypothesis
                  import hypothesis.strategies as strategies
              except ImportError:
                  raise unittest.SkipTest('hypothesis not available')
              import zstandard as zstd
              from .common import (
                  make_cffi,
              )
              s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
                                                  max_value=zstd.WINDOWLOG_MAX)
              s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
                                                  max_value=zstd.CHAINLOG_MAX)
              s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
                                              max_value=zstd.HASHLOG_MAX)
              s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
                                                  max_value=zstd.SEARCHLOG_MAX)
              s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
                                                   max_value=zstd.SEARCHLENGTH_MAX)
              s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
-                                                  max_value=2**32)
+                                                  max_value=zstd.TARGETLENGTH_MAX)
              s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
                                                      zstd.STRATEGY_DFAST,
                                                      zstd.STRATEGY_GREEDY,
                                                      zstd.STRATEGY_LAZY,
                                                      zstd.STRATEGY_LAZY2,
                                                      zstd.STRATEGY_BTLAZY2,
                                                      zstd.STRATEGY_BTOPT,
                                                      zstd.STRATEGY_BTULTRA))
              @make_cffi
              @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
              class TestCompressionParametersHypothesis(unittest.TestCase):
                  @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
                                      s_searchlength, s_targetlength, s_strategy)
                  def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
                                      searchlength, targetlength, strategy):
                      zstd.ZstdCompressionParameters(window_log=windowlog,
                                                     chain_log=chainlog,
                                                     hash_log=hashlog,
                                                     search_log=searchlog,
                                                     min_match=searchlength,
                                                     target_length=targetlength,
                                                     compression_strategy=strategy)
                  @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
                                      s_searchlength, s_targetlength, s_strategy)
                  def test_estimated_compression_context_size(self, windowlog, chainlog,
                                                              hashlog, searchlog,
                                                              searchlength, targetlength,
                                                              strategy):
                      if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
                          searchlength += 1
                      elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
                          searchlength -= 1
                      p = zstd.ZstdCompressionParameters(window_log=windowlog,
                                                         chain_log=chainlog,
                                                         hash_log=hashlog,
                                                         search_log=searchlog,
                                                         min_match=searchlength,
                                                         target_length=targetlength,
                                                         compression_strategy=strategy)
                      size = p.estimated_compression_context_size()

contrib/python-zstandard/tests/test_decompressor.py

0 +49 -10

              import io
              import os
              import random
              import struct
              import sys
              import unittest
              import zstandard as zstd
              from .common import (
                  generate_samples,
                  make_cffi,
                  OpCountingBytesIO,
              )
              if sys.version_info[0] >= 3:
                  next = lambda it: it.__next__()
              else:
                  next = lambda it: it.next()
              @make_cffi
              class TestFrameHeaderSize(unittest.TestCase):
                  def test_empty(self):
                      with self.assertRaisesRegexp(
                          zstd.ZstdError, 'could not determine frame header size: Src size '
                                          'is incorrect'):
                          zstd.frame_header_size(b'')
                  def test_too_small(self):
                      with self.assertRaisesRegexp(
                          zstd.ZstdError, 'could not determine frame header size: Src size '
                                          'is incorrect'):
                          zstd.frame_header_size(b'foob')
                  def test_basic(self):
                      # It doesn't matter that it isn't a valid frame.
                      self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
              @make_cffi
              class TestFrameContentSize(unittest.TestCase):
                  def test_empty(self):
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                                   'error when determining content size'):
                          zstd.frame_content_size(b'')
                  def test_too_small(self):
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                                   'error when determining content size'):
                          zstd.frame_content_size(b'foob')
                  def test_bad_frame(self):
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                                   'error when determining content size'):
                          zstd.frame_content_size(b'invalid frame header')
                  def test_unknown(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      frame = cctx.compress(b'foobar')
                      self.assertEqual(zstd.frame_content_size(frame), -1)
                  def test_empty(self):
                      cctx = zstd.ZstdCompressor()
                      frame = cctx.compress(b'')
                      self.assertEqual(zstd.frame_content_size(frame), 0)
                  def test_basic(self):
                      cctx = zstd.ZstdCompressor()
                      frame = cctx.compress(b'foobar')
                      self.assertEqual(zstd.frame_content_size(frame), 6)
              @make_cffi
              class TestDecompressor(unittest.TestCase):
                  def test_memory_size(self):
                      dctx = zstd.ZstdDecompressor()
                      self.assertGreater(dctx.memory_size(), 100)
              @make_cffi
              class TestDecompressor_decompress(unittest.TestCase):
                  def test_empty_input(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
                          dctx.decompress(b'')
                  def test_invalid_input(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
                          dctx.decompress(b'foobar')
                  def test_input_types(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      compressed = cctx.compress(b'foo')
                      mutable_array = bytearray(len(compressed))
                      mutable_array[:] = compressed
                      sources = [
                          memoryview(compressed),
                          bytearray(compressed),
                          mutable_array,
                      ]
                      dctx = zstd.ZstdDecompressor()
                      for source in sources:
                          self.assertEqual(dctx.decompress(source), b'foo')
                  def test_no_content_size_in_frame(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      compressed = cctx.compress(b'foobar')
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
                          dctx.decompress(compressed)
                  def test_content_size_present(self):
                      cctx = zstd.ZstdCompressor()
                      compressed = cctx.compress(b'foobar')
                      dctx = zstd.ZstdDecompressor()
                      decompressed = dctx.decompress(compressed)
                      self.assertEqual(decompressed, b'foobar')
                  def test_empty_roundtrip(self):
                      cctx = zstd.ZstdCompressor()
                      compressed = cctx.compress(b'')
                      dctx = zstd.ZstdDecompressor()
                      decompressed = dctx.decompress(compressed)
                      self.assertEqual(decompressed, b'')
                  def test_max_output_size(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      source = b'foobar' * 256
                      compressed = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      # Will fit into buffer exactly the size of input.
                      decompressed = dctx.decompress(compressed, max_output_size=len(source))
                      self.assertEqual(decompressed, source)
                      # Input size - 1 fails
                      with self.assertRaisesRegexp(zstd.ZstdError,
                              'decompression error: did not decompress full frame'):
                          dctx.decompress(compressed, max_output_size=len(source) - 1)
                      # Input size + 1 works
                      decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
                      self.assertEqual(decompressed, source)
                      # A much larger buffer works.
                      decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
                      self.assertEqual(decompressed, source)
                  def test_stupidly_large_output_buffer(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      compressed = cctx.compress(b'foobar' * 256)
                      dctx = zstd.ZstdDecompressor()
                      # Will get OverflowError on some Python distributions that can't
                      # handle really large integers.
                      with self.assertRaises((MemoryError, OverflowError)):
                          dctx.decompress(compressed, max_output_size=2**62)
                  def test_dictionary(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      orig = b'foobar' * 16384
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      compressed = cctx.compress(orig)
                      dctx = zstd.ZstdDecompressor(dict_data=d)
                      decompressed = dctx.decompress(compressed)
                      self.assertEqual(decompressed, orig)
                  def test_dictionary_multiple(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
                      compressed = []
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      for source in sources:
                          compressed.append(cctx.compress(source))
                      dctx = zstd.ZstdDecompressor(dict_data=d)
                      for i in range(len(sources)):
                          decompressed = dctx.decompress(compressed[i])
                          self.assertEqual(decompressed, sources[i])
                  def test_max_window_size(self):
                      with open(__file__, 'rb') as fh:
                          source = fh.read()
                      # If we write a content size, the decompressor engages single pass
                      # mode and the window size doesn't come into play.
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor(max_window_size=1)
                      with self.assertRaisesRegexp(
                          zstd.ZstdError, 'decompression error: Frame requires too much memory'):
                          dctx.decompress(frame, max_output_size=len(source))
              @make_cffi
              class TestDecompressor_copy_stream(unittest.TestCase):
                  def test_no_read(self):
                      source = object()
                      dest = io.BytesIO()
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaises(ValueError):
                          dctx.copy_stream(source, dest)
                  def test_no_write(self):
                      source = io.BytesIO()
                      dest = object()
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaises(ValueError):
                          dctx.copy_stream(source, dest)
                  def test_empty(self):
                      source = io.BytesIO()
                      dest = io.BytesIO()
                      dctx = zstd.ZstdDecompressor()
                      # TODO should this raise an error?
                      r, w = dctx.copy_stream(source, dest)
                      self.assertEqual(r, 0)
                      self.assertEqual(w, 0)
                      self.assertEqual(dest.getvalue(), b'')
                  def test_large_data(self):
                      source = io.BytesIO()
                      for i in range(255):
                          source.write(struct.Struct('>B').pack(i) * 16384)
                      source.seek(0)
                      compressed = io.BytesIO()
                      cctx = zstd.ZstdCompressor()
                      cctx.copy_stream(source, compressed)
                      compressed.seek(0)
                      dest = io.BytesIO()
                      dctx = zstd.ZstdDecompressor()
                      r, w = dctx.copy_stream(compressed, dest)
                      self.assertEqual(r, len(compressed.getvalue()))
                      self.assertEqual(w, len(source.getvalue()))
                  def test_read_write_size(self):
                      source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
                          b'foobarfoobar'))
                      dest = OpCountingBytesIO()
                      dctx = zstd.ZstdDecompressor()
                      r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
                      self.assertEqual(r, len(source.getvalue()))
                      self.assertEqual(w, len(b'foobarfoobar'))
                      self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                      self.assertEqual(dest._write_count, len(dest.getvalue()))
              @make_cffi
              class TestDecompressor_stream_reader(unittest.TestCase):
                  def test_context_manager(self):
                      dctx = zstd.ZstdDecompressor()
-                     reader = dctx.stream_reader(b'foo')
-                     with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
-                         reader.read(1)
                      with dctx.stream_reader(b'foo') as reader:
                          with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
                              with reader as reader2:
                                  pass
                  def test_not_implemented(self):
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(b'foo') as reader:
                          with self.assertRaises(NotImplementedError):
                              reader.readline()
                          with self.assertRaises(NotImplementedError):
                              reader.readlines()
                          with self.assertRaises(NotImplementedError):
                              reader.readall()
                          with self.assertRaises(NotImplementedError):
                              iter(reader)
                          with self.assertRaises(NotImplementedError):
                              next(reader)
                          with self.assertRaises(io.UnsupportedOperation):
                              reader.write(b'foo')
                          with self.assertRaises(io.UnsupportedOperation):
                              reader.writelines([])
                  def test_constant_methods(self):
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(b'foo') as reader:
+                         self.assertFalse(reader.closed)
                          self.assertTrue(reader.readable())
                          self.assertFalse(reader.writable())
                          self.assertTrue(reader.seekable())
                          self.assertFalse(reader.isatty())
+                         self.assertFalse(reader.closed)
                          self.assertIsNone(reader.flush())
+                         self.assertFalse(reader.closed)
+                     self.assertTrue(reader.closed)
                  def test_read_closed(self):
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(b'foo') as reader:
                          reader.close()
+                         self.assertTrue(reader.closed)
                          with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                              reader.read(1)
                  def test_bad_read_size(self):
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(b'foo') as reader:
                          with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
                              reader.read(-1)
                          with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
                              reader.read(0)
                  def test_read_buffer(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(frame) as reader:
                          self.assertEqual(reader.tell(), 0)
                          # We should get entire frame in one read.
                          result = reader.read(8192)
                          self.assertEqual(result, source)
                          self.assertEqual(reader.tell(), len(source))
                          # Read after EOF should return empty bytes.
-                         self.assertEqual(reader.read(), b'')
+                         self.assertEqual(reader.read(1), b'')
                          self.assertEqual(reader.tell(), len(result))
-                     self.assertTrue(reader.closed())
+                     self.assertTrue(reader.closed)
                  def test_read_buffer_small_chunks(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      chunks = []
                      with dctx.stream_reader(frame, read_size=1) as reader:
                          while True:
                              chunk = reader.read(1)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                              self.assertEqual(reader.tell(), sum(map(len, chunks)))
                      self.assertEqual(b''.join(chunks), source)
                  def test_read_stream(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(io.BytesIO(frame)) as reader:
                          self.assertEqual(reader.tell(), 0)
                          chunk = reader.read(8192)
                          self.assertEqual(chunk, source)
                          self.assertEqual(reader.tell(), len(source))
-                         self.assertEqual(reader.read(), b'')
+                         self.assertEqual(reader.read(1), b'')
                          self.assertEqual(reader.tell(), len(source))
+                         self.assertFalse(reader.closed)
+                     self.assertTrue(reader.closed)
                  def test_read_stream_small_chunks(self):
                      cctx = zstd.ZstdCompressor()
                      source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      chunks = []
                      with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
                          while True:
                              chunk = reader.read(1)
                              if not chunk:
                                  break
                              chunks.append(chunk)
                              self.assertEqual(reader.tell(), sum(map(len, chunks)))
                      self.assertEqual(b''.join(chunks), source)
                  def test_read_after_exit(self):
                      cctx = zstd.ZstdCompressor()
                      frame = cctx.compress(b'foo' * 60)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(frame) as reader:
                          while reader.read(16):
                              pass
-                     with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
+                     self.assertTrue(reader.closed)
+                     with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                          reader.read(10)
                  def test_illegal_seeks(self):
                      cctx = zstd.ZstdCompressor()
                      frame = cctx.compress(b'foo' * 60)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(frame) as reader:
                          with self.assertRaisesRegexp(ValueError,
                                                       'cannot seek to negative position'):
                              reader.seek(-1, os.SEEK_SET)
                          reader.read(1)
                          with self.assertRaisesRegexp(
                              ValueError, 'cannot seek zstd decompression stream backwards'):
                              reader.seek(0, os.SEEK_SET)
                          with self.assertRaisesRegexp(
                              ValueError, 'cannot seek zstd decompression stream backwards'):
                              reader.seek(-1, os.SEEK_CUR)
                          with self.assertRaisesRegexp(
                              ValueError,
                              'zstd decompression streams cannot be seeked with SEEK_END'):
                              reader.seek(0, os.SEEK_END)
                          reader.close()
                          with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                              reader.seek(4, os.SEEK_SET)
-                     with self.assertRaisesRegexp(
-                         zstd.ZstdError, 'seek\(\) must be called from an active context'):
+                     with self.assertRaisesRegexp(ValueError, 'stream is closed'):
                          reader.seek(0)
                  def test_seek(self):
                      source = b'foobar' * 60
                      cctx = zstd.ZstdCompressor()
                      frame = cctx.compress(source)
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_reader(frame) as reader:
                          reader.seek(3)
                          self.assertEqual(reader.read(3), b'bar')
                          reader.seek(4, os.SEEK_CUR)
                          self.assertEqual(reader.read(2), b'ar')
+                 def test_no_context_manager(self):
+                     source = b'foobar' * 60
+                     cctx = zstd.ZstdCompressor()
+                     frame = cctx.compress(source)
+                     dctx = zstd.ZstdDecompressor()
+                     reader = dctx.stream_reader(frame)
+                     self.assertEqual(reader.read(6), b'foobar')
+                     self.assertEqual(reader.read(18), b'foobar' * 3)
+                     self.assertFalse(reader.closed)
+                     # Calling close prevents subsequent use.
+                     reader.close()
+                     self.assertTrue(reader.closed)
+                     with self.assertRaisesRegexp(ValueError, 'stream is closed'):
+                         reader.read(6)
+                 def test_read_after_error(self):
+                     source = io.BytesIO(b'')
+                     dctx = zstd.ZstdDecompressor()
+                     reader = dctx.stream_reader(source)
+                     with reader:
+                         with self.assertRaises(TypeError):
+                             reader.read()
+                     with reader:
+                         with self.assertRaisesRegexp(ValueError, 'stream is closed'):
+                             reader.read(100)
              @make_cffi
              class TestDecompressor_decompressobj(unittest.TestCase):
                  def test_simple(self):
                      data = zstd.ZstdCompressor(level=1).compress(b'foobar')
                      dctx = zstd.ZstdDecompressor()
                      dobj = dctx.decompressobj()
                      self.assertEqual(dobj.decompress(data), b'foobar')
                  def test_input_types(self):
                      compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
                      dctx = zstd.ZstdDecompressor()
                      mutable_array = bytearray(len(compressed))
                      mutable_array[:] = compressed
                      sources = [
                          memoryview(compressed),
                          bytearray(compressed),
                          mutable_array,
                      ]
                      for source in sources:
                          dobj = dctx.decompressobj()
                          self.assertEqual(dobj.decompress(source), b'foo')
                  def test_reuse(self):
                      data = zstd.ZstdCompressor(level=1).compress(b'foobar')
                      dctx = zstd.ZstdDecompressor()
                      dobj = dctx.decompressobj()
                      dobj.decompress(data)
                      with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
                          dobj.decompress(data)
                  def test_bad_write_size(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
                          dctx.decompressobj(write_size=0)
                  def test_write_size(self):
                      source = b'foo' * 64 + b'bar' * 128
                      data = zstd.ZstdCompressor(level=1).compress(source)
                      dctx = zstd.ZstdDecompressor()
                      for i in range(128):
                          dobj = dctx.decompressobj(write_size=i + 1)
                          self.assertEqual(dobj.decompress(data), source)
              def decompress_via_writer(data):
                  buffer = io.BytesIO()
                  dctx = zstd.ZstdDecompressor()
                  with dctx.stream_writer(buffer) as decompressor:
                      decompressor.write(data)
                  return buffer.getvalue()
              @make_cffi
              class TestDecompressor_stream_writer(unittest.TestCase):
                  def test_empty_roundtrip(self):
                      cctx = zstd.ZstdCompressor()
                      empty = cctx.compress(b'')
                      self.assertEqual(decompress_via_writer(empty), b'')
                  def test_input_types(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      compressed = cctx.compress(b'foo')
                      mutable_array = bytearray(len(compressed))
                      mutable_array[:] = compressed
                      sources = [
                          memoryview(compressed),
                          bytearray(compressed),
                          mutable_array,
                      ]
                      dctx = zstd.ZstdDecompressor()
                      for source in sources:
                          buffer = io.BytesIO()
                          with dctx.stream_writer(buffer) as decompressor:
                              decompressor.write(source)
                          self.assertEqual(buffer.getvalue(), b'foo')
                  def test_large_roundtrip(self):
                      chunks = []
                      for i in range(255):
                          chunks.append(struct.Struct('>B').pack(i) * 16384)
                      orig = b''.join(chunks)
                      cctx = zstd.ZstdCompressor()
                      compressed = cctx.compress(orig)
                      self.assertEqual(decompress_via_writer(compressed), orig)
                  def test_multiple_calls(self):
                      chunks = []
                      for i in range(255):
                          for j in range(255):
                              chunks.append(struct.Struct('>B').pack(j) * i)
                      orig = b''.join(chunks)
                      cctx = zstd.ZstdCompressor()
                      compressed = cctx.compress(orig)
                      buffer = io.BytesIO()
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_writer(buffer) as decompressor:
                          pos = 0
                          while pos < len(compressed):
                              pos2 = pos + 8192
                              decompressor.write(compressed[pos:pos2])
                              pos += 8192
                      self.assertEqual(buffer.getvalue(), orig)
                  def test_dictionary(self):
                      samples = []
                      for i in range(128):
                          samples.append(b'foo' * 64)
                          samples.append(b'bar' * 64)
                          samples.append(b'foobar' * 64)
                      d = zstd.train_dictionary(8192, samples)
                      orig = b'foobar' * 16384
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(dict_data=d)
                      with cctx.stream_writer(buffer) as compressor:
                          self.assertEqual(compressor.write(orig), 0)
                      compressed = buffer.getvalue()
                      buffer = io.BytesIO()
                      dctx = zstd.ZstdDecompressor(dict_data=d)
                      with dctx.stream_writer(buffer) as decompressor:
                          self.assertEqual(decompressor.write(compressed), len(orig))
                      self.assertEqual(buffer.getvalue(), orig)
                  def test_memory_size(self):
                      dctx = zstd.ZstdDecompressor()
                      buffer = io.BytesIO()
                      with dctx.stream_writer(buffer) as decompressor:
                          size = decompressor.memory_size()
                      self.assertGreater(size, 100000)
                  def test_write_size(self):
                      source = zstd.ZstdCompressor().compress(b'foobarfoobar')
                      dest = OpCountingBytesIO()
                      dctx = zstd.ZstdDecompressor()
                      with dctx.stream_writer(dest, write_size=1) as decompressor:
                          s = struct.Struct('>B')
                          for c in source:
                              if not isinstance(c, str):
                                  c = s.pack(c)
                              decompressor.write(c)
                      self.assertEqual(dest.getvalue(), b'foobarfoobar')
                      self.assertEqual(dest._write_count, len(dest.getvalue()))
              @make_cffi
              class TestDecompressor_read_to_iter(unittest.TestCase):
                  def test_type_validation(self):
                      dctx = zstd.ZstdDecompressor()
                      # Object with read() works.
                      dctx.read_to_iter(io.BytesIO())
                      # Buffer protocol works.
                      dctx.read_to_iter(b'foobar')
                      with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
                          b''.join(dctx.read_to_iter(True))
                  def test_empty_input(self):
                      dctx = zstd.ZstdDecompressor()
                      source = io.BytesIO()
                      it = dctx.read_to_iter(source)
                      # TODO this is arguably wrong. Should get an error about missing frame foo.
                      with self.assertRaises(StopIteration):
                          next(it)
                      it = dctx.read_to_iter(b'')
                      with self.assertRaises(StopIteration):
                          next(it)
                  def test_invalid_input(self):
                      dctx = zstd.ZstdDecompressor()
                      source = io.BytesIO(b'foobar')
                      it = dctx.read_to_iter(source)
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                          next(it)
                      it = dctx.read_to_iter(b'foobar')
                      with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
                          next(it)
                  def test_empty_roundtrip(self):
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                      empty = cctx.compress(b'')
                      source = io.BytesIO(empty)
                      source.seek(0)
                      dctx = zstd.ZstdDecompressor()
                      it = dctx.read_to_iter(source)
                      # No chunks should be emitted since there is no data.
                      with self.assertRaises(StopIteration):
                          next(it)
                      # Again for good measure.
                      with self.assertRaises(StopIteration):
                          next(it)
                  def test_skip_bytes_too_large(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
                          b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
                      with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
                          b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
                  def test_skip_bytes(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      compressed = cctx.compress(b'foobar')
                      dctx = zstd.ZstdDecompressor()
                      output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
                      self.assertEqual(output, b'foobar')
                  def test_large_output(self):
                      source = io.BytesIO()
                      source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
                      source.write(b'o')
                      source.seek(0)
                      cctx = zstd.ZstdCompressor(level=1)
                      compressed = io.BytesIO(cctx.compress(source.getvalue()))
                      compressed.seek(0)
                      dctx = zstd.ZstdDecompressor()
                      it = dctx.read_to_iter(compressed)
                      chunks = []
                      chunks.append(next(it))
                      chunks.append(next(it))
                      with self.assertRaises(StopIteration):
                          next(it)
                      decompressed = b''.join(chunks)
                      self.assertEqual(decompressed, source.getvalue())
                      # And again with buffer protocol.
                      it = dctx.read_to_iter(compressed.getvalue())
                      chunks = []
                      chunks.append(next(it))
                      chunks.append(next(it))
                      with self.assertRaises(StopIteration):
                          next(it)
                      decompressed = b''.join(chunks)
                      self.assertEqual(decompressed, source.getvalue())
                  @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
                  def test_large_input(self):
                      bytes = list(struct.Struct('>B').pack(i) for i in range(256))
                      compressed = io.BytesIO()
                      input_size = 0
                      cctx = zstd.ZstdCompressor(level=1)
                      with cctx.stream_writer(compressed) as compressor:
                          while True:
                              compressor.write(random.choice(bytes))
                              input_size += 1
                              have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
                              have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
                              if have_compressed and have_raw:
                                  break
                      compressed.seek(0)
                      self.assertGreater(len(compressed.getvalue()),
                                         zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
                      dctx = zstd.ZstdDecompressor()
                      it = dctx.read_to_iter(compressed)
                      chunks = []
                      chunks.append(next(it))
                      chunks.append(next(it))
                      chunks.append(next(it))
                      with self.assertRaises(StopIteration):
                          next(it)
                      decompressed = b''.join(chunks)
                      self.assertEqual(len(decompressed), input_size)
                      # And again with buffer protocol.
                      it = dctx.read_to_iter(compressed.getvalue())
                      chunks = []
                      chunks.append(next(it))
                      chunks.append(next(it))
                      chunks.append(next(it))
                      with self.assertRaises(StopIteration):
                          next(it)
                      decompressed = b''.join(chunks)
                      self.assertEqual(len(decompressed), input_size)
                  def test_interesting(self):
                      # Found this edge case via fuzzing.
                      cctx = zstd.ZstdCompressor(level=1)
                      source = io.BytesIO()
                      compressed = io.BytesIO()
                      with cctx.stream_writer(compressed) as compressor:
                          for i in range(256):
                              chunk = b'\0' * 1024
                              compressor.write(chunk)
                              source.write(chunk)
                      dctx = zstd.ZstdDecompressor()
                      simple = dctx.decompress(compressed.getvalue(),
                                               max_output_size=len(source.getvalue()))
                      self.assertEqual(simple, source.getvalue())
                      compressed.seek(0)
                      streamed = b''.join(dctx.read_to_iter(compressed))
                      self.assertEqual(streamed, source.getvalue())
                  def test_read_write_size(self):
                      source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
                      dctx = zstd.ZstdDecompressor()
                      for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
                          self.assertEqual(len(chunk), 1)
                      self.assertEqual(source._read_count, len(source.getvalue()))
                  def test_magic_less(self):
                      params = zstd.CompressionParameters.from_level(
 , format=zstd.FORMAT_ZSTD1_MAGICLESS)
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      frame = cctx.compress(b'foobar')
                      self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(
                          zstd.ZstdError, 'error determining content size from frame header'):
                          dctx.decompress(frame)
                      dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
                      res = b''.join(dctx.read_to_iter(frame))
                      self.assertEqual(res, b'foobar')
              @make_cffi
              class TestDecompressor_content_dict_chain(unittest.TestCase):
                  def test_bad_inputs_simple(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaises(TypeError):
                          dctx.decompress_content_dict_chain(b'foo')
                      with self.assertRaises(TypeError):
                          dctx.decompress_content_dict_chain((b'foo', b'bar'))
                      with self.assertRaisesRegexp(ValueError, 'empty input chain'):
                          dctx.decompress_content_dict_chain([])
                      with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
                          dctx.decompress_content_dict_chain([u'foo'])
                      with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
                          dctx.decompress_content_dict_chain([True])
                      with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
                          dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
                      with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
                          dctx.decompress_content_dict_chain([b'foo' * 8])
                      no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
                      with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
                          dctx.decompress_content_dict_chain([no_size])
                      # Corrupt first frame.
                      frame = zstd.ZstdCompressor().compress(b'foo' * 64)
                      frame = frame[0:12] + frame[15:]
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                                   'chunk 0 did not decompress full frame'):
                          dctx.decompress_content_dict_chain([frame])
                  def test_bad_subsequent_input(self):
                      initial = zstd.ZstdCompressor().compress(b'foo' * 64)
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
                          dctx.decompress_content_dict_chain([initial, u'foo'])
                      with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
                          dctx.decompress_content_dict_chain([initial, None])
                      with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
                          dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
                      with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
                          dctx.decompress_content_dict_chain([initial, b'foo' * 8])
                      no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
                      with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
                          dctx.decompress_content_dict_chain([initial, no_size])
                      # Corrupt second frame.
                      cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
                      frame = cctx.compress(b'bar' * 64)
                      frame = frame[0:12] + frame[15:]
                      with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
                          dctx.decompress_content_dict_chain([initial, frame])
                  def test_simple(self):
                      original = [
                          b'foo' * 64,
                          b'foobar' * 64,
                          b'baz' * 64,
                          b'foobaz' * 64,
                          b'foobarbaz' * 64,
                      ]
                      chunks = []
                      chunks.append(zstd.ZstdCompressor().compress(original[0]))
                      for i, chunk in enumerate(original[1:]):
                          d = zstd.ZstdCompressionDict(original[i])
                          cctx = zstd.ZstdCompressor(dict_data=d)
                          chunks.append(cctx.compress(chunk))
                      for i in range(1, len(original)):
                          chain = chunks[0:i]
                          expected = original[i - 1]
                          dctx = zstd.ZstdDecompressor()
                          decompressed = dctx.decompress_content_dict_chain(chain)
                          self.assertEqual(decompressed, expected)
              # TODO enable for CFFI
              class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
                  def test_invalid_inputs(self):
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaises(TypeError):
                          dctx.multi_decompress_to_buffer(True)
                      with self.assertRaises(TypeError):
                          dctx.multi_decompress_to_buffer((1, 2))
                      with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
                          dctx.multi_decompress_to_buffer([u'foo'])
                      with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
                          dctx.multi_decompress_to_buffer([b'foobarbaz'])
                  def test_list_input(self):
                      cctx = zstd.ZstdCompressor()
                      original = [b'foo' * 4, b'bar' * 6]
                      frames = [cctx.compress(d) for d in original]
                      dctx = zstd.ZstdDecompressor()
                      result = dctx.multi_decompress_to_buffer(frames)
                      self.assertEqual(len(result), len(frames))
                      self.assertEqual(result.size(), sum(map(len, original)))
                      for i, data in enumerate(original):
                          self.assertEqual(result[i].tobytes(), data)
                      self.assertEqual(result[0].offset, 0)
                      self.assertEqual(len(result[0]), 12)
                      self.assertEqual(result[1].offset, 12)
                      self.assertEqual(len(result[1]), 18)
                  def test_list_input_frame_sizes(self):
                      cctx = zstd.ZstdCompressor()
                      original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
                      frames = [cctx.compress(d) for d in original]
                      sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
                      dctx = zstd.ZstdDecompressor()
                      result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
                      self.assertEqual(len(result), len(frames))
                      self.assertEqual(result.size(), sum(map(len, original)))
                      for i, data in enumerate(original):
                          self.assertEqual(result[i].tobytes(), data)
                  def test_buffer_with_segments_input(self):
                      cctx = zstd.ZstdCompressor()
                      original = [b'foo' * 4, b'bar' * 6]
                      frames = [cctx.compress(d) for d in original]
                      dctx = zstd.ZstdDecompressor()
                      segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
                      b = zstd.BufferWithSegments(b''.join(frames), segments)
                      result = dctx.multi_decompress_to_buffer(b)
                      self.assertEqual(len(result), len(frames))
                      self.assertEqual(result[0].offset, 0)
                      self.assertEqual(len(result[0]), 12)
                      self.assertEqual(result[1].offset, 12)
                      self.assertEqual(len(result[1]), 18)
                  def test_buffer_with_segments_sizes(self):
                      cctx = zstd.ZstdCompressor(write_content_size=False)
                      original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
                      frames = [cctx.compress(d) for d in original]
                      sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
                      segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
                                             len(frames[0]), len(frames[1]),
                                             len(frames[0]) + len(frames[1]), len(frames[2]))
                      b = zstd.BufferWithSegments(b''.join(frames), segments)
                      dctx = zstd.ZstdDecompressor()
                      result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
                      self.assertEqual(len(result), len(frames))
                      self.assertEqual(result.size(), sum(map(len, original)))
                      for i, data in enumerate(original):
                          self.assertEqual(result[i].tobytes(), data)
                  def test_buffer_with_segments_collection_input(self):
                      cctx = zstd.ZstdCompressor()
                      original = [
                          b'foo0' * 2,
                          b'foo1' * 3,
                          b'foo2' * 4,
                          b'foo3' * 5,
                          b'foo4' * 6,
                      ]
                      frames = cctx.multi_compress_to_buffer(original)
                      # Check round trip.
                      dctx = zstd.ZstdDecompressor()
                      decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
                      self.assertEqual(len(decompressed), len(original))
                      for i, data in enumerate(original):
                          self.assertEqual(data, decompressed[i].tobytes())
                      # And a manual mode.
                      b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
                      b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
 , len(frames[0]),
                                                                  len(frames[0]), len(frames[1])))
                      b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
                      b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
 , len(frames[2]),
                                                                  len(frames[2]), len(frames[3]),
                                                                  len(frames[2]) + len(frames[3]), len(frames[4])))
                      c = zstd.BufferWithSegmentsCollection(b1, b2)
                      dctx = zstd.ZstdDecompressor()
                      decompressed = dctx.multi_decompress_to_buffer(c)
                      self.assertEqual(len(decompressed), 5)
                      for i in range(5):
                          self.assertEqual(decompressed[i].tobytes(), original[i])
                  def test_dict(self):
                      d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
                      cctx = zstd.ZstdCompressor(dict_data=d, level=1)
                      frames = [cctx.compress(s) for s in generate_samples()]
                      dctx = zstd.ZstdDecompressor(dict_data=d)
                      result = dctx.multi_decompress_to_buffer(frames)
                      self.assertEqual([o.tobytes() for o in result], generate_samples())
                  def test_multiple_threads(self):
                      cctx = zstd.ZstdCompressor()
                      frames = []
                      frames.extend(cctx.compress(b'x' * 64) for i in range(256))
                      frames.extend(cctx.compress(b'y' * 64) for i in range(256))
                      dctx = zstd.ZstdDecompressor()
                      result = dctx.multi_decompress_to_buffer(frames, threads=-1)
                      self.assertEqual(len(result), len(frames))
                      self.assertEqual(result.size(), 2 * 64 * 256)
                      self.assertEqual(result[0].tobytes(), b'x' * 64)
                      self.assertEqual(result[256].tobytes(), b'y' * 64)
                  def test_item_failure(self):
                      cctx = zstd.ZstdCompressor()
                      frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
                      frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
                      dctx = zstd.ZstdDecompressor()
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                                   'error decompressing item 1: ('
                                                   'Corrupted block|'
                                                   'Destination buffer is too small)'):
                          dctx.multi_decompress_to_buffer(frames)
                      with self.assertRaisesRegexp(zstd.ZstdError,
                                          'error decompressing item 1: ('
                                          'Corrupted block|'
                                          'Destination buffer is too small)'):
                          dctx.multi_decompress_to_buffer(frames, threads=2)

contrib/python-zstandard/tests/test_module_attributes.py

0 +6 -1

              from __future__ import unicode_literals
              import unittest
              import zstandard as zstd
              from . common import (
                  make_cffi,
              )
              @make_cffi
              class TestModuleAttributes(unittest.TestCase):
                  def test_version(self):
-                     self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4))
+                     self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 6))
+                     self.assertEqual(zstd.__version__, '0.10.1')
                  def test_constants(self):
                      self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
                      self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
                  def test_hasattr(self):
                      attrs = (
                          'CONTENTSIZE_UNKNOWN',
                          'CONTENTSIZE_ERROR',
                          'COMPRESSION_RECOMMENDED_INPUT_SIZE',
                          'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
                          'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
                          'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
                          'MAGIC_NUMBER',
+                         'BLOCKSIZELOG_MAX',
+                         'BLOCKSIZE_MAX',
                          'WINDOWLOG_MIN',
                          'WINDOWLOG_MAX',
                          'CHAINLOG_MIN',
                          'CHAINLOG_MAX',
                          'HASHLOG_MIN',
                          'HASHLOG_MAX',
                          'HASHLOG3_MAX',
                          'SEARCHLOG_MIN',
                          'SEARCHLOG_MAX',
                          'SEARCHLENGTH_MIN',
                          'SEARCHLENGTH_MAX',
                          'TARGETLENGTH_MIN',
+                         'TARGETLENGTH_MAX',
                          'LDM_MINMATCH_MIN',
                          'LDM_MINMATCH_MAX',
                          'LDM_BUCKETSIZELOG_MAX',
                          'STRATEGY_FAST',
                          'STRATEGY_DFAST',
                          'STRATEGY_GREEDY',
                          'STRATEGY_LAZY',
                          'STRATEGY_LAZY2',
                          'STRATEGY_BTLAZY2',
                          'STRATEGY_BTOPT',
                          'STRATEGY_BTULTRA',
                          'DICT_TYPE_AUTO',
                          'DICT_TYPE_RAWCONTENT',
                          'DICT_TYPE_FULLDICT',
                      )
                      for a in attrs:
                          self.assertTrue(hasattr(zstd, a), a)

contrib/python-zstandard/tests/test_train_dictionary.py

0 +2 -1

              import struct
              import sys
              import unittest
              import zstandard as zstd
              from . common import (
                  generate_samples,
                  make_cffi,
              )
              if sys.version_info[0] >= 3:
                  int_type = int
              else:
                  int_type = long
              @make_cffi
              class TestTrainDictionary(unittest.TestCase):
                  def test_no_args(self):
                      with self.assertRaises(TypeError):
                          zstd.train_dictionary()
                  def test_bad_args(self):
                      with self.assertRaises(TypeError):
                          zstd.train_dictionary(8192, u'foo')
                      with self.assertRaises(ValueError):
                          zstd.train_dictionary(8192, [u'foo'])
                  def test_no_params(self):
                      d = zstd.train_dictionary(8192, generate_samples())
                      self.assertIsInstance(d.dict_id(), int_type)
                      # The dictionary ID may be different across platforms.
                      expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
                      data = d.as_bytes()
                      self.assertEqual(data[0:8], expected)
                  def test_basic(self):
                      d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
                      self.assertIsInstance(d.dict_id(), int_type)
                      data = d.as_bytes()
                      self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
                      self.assertEqual(d.k, 64)
                      self.assertEqual(d.d, 16)
                  def test_set_dict_id(self):
                      d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
                                                dict_id=42)
                      self.assertEqual(d.dict_id(), 42)
                  def test_optimize(self):
                      d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
                                                d=16)
-                     self.assertEqual(d.k, 50)
+                     # This varies by platform.
+                     self.assertIn(d.k, (50, 2000))
                      self.assertEqual(d.d, 16)
              @make_cffi
              class TestCompressionDict(unittest.TestCase):
                  def test_bad_mode(self):
                      with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
                          zstd.ZstdCompressionDict(b'foo', dict_type=42)
                  def test_bad_precompute_compress(self):
                      d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
                      with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
                          d.precompute_compress()
                      with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
                          d.precompute_compress(level=3,
                                                compression_params=zstd.CompressionParameters())
                  def test_precompute_compress_rawcontent(self):
                      d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
                                                   dict_type=zstd.DICT_TYPE_RAWCONTENT)
                      d.precompute_compress(level=1)
                      d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
                                                   dict_type=zstd.DICT_TYPE_FULLDICT)
                      with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
                          d.precompute_compress(level=1)

contrib/python-zstandard/zstandard/__init__.py

0 +3 0

              # Copyright (c) 2017-present, Gregory Szorc
              # All rights reserved.
              #
              # This software may be modified and distributed under the terms
              # of the BSD license. See the LICENSE file for details.
              """Python interface to the Zstandard (zstd) compression library."""
              from __future__ import absolute_import, unicode_literals
              # This module serves 2 roles:
              #
              # 1) Export the C or CFFI "backend" through a central module.
              # 2) Implement additional functionality built on top of C or CFFI backend.
              import os
              import platform
              # Some Python implementations don't support C extensions. That's why we have
              # a CFFI implementation in the first place. The code here import one of our
              # "backends" then re-exports the symbols from this module. For convenience,
              # we support falling back to the CFFI backend if the C extension can't be
              # imported. But for performance reasons, we only do this on unknown Python
              # implementation. Notably, for CPython we require the C extension by default.
              # Because someone will inevitably want special behavior, the behavior is
              # configurable via an environment variable. A potentially better way to handle
              # this is to import a special ``__importpolicy__`` module or something
              # defining a variable and `setup.py` could write the file with whatever
              # policy was specified at build time. Until someone needs it, we go with
              # the hacky but simple environment variable approach.
              _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
              if _module_policy == 'default':
                  if platform.python_implementation() in ('CPython',):
                      from zstd import *
                      backend = 'cext'
                  elif platform.python_implementation() in ('PyPy',):
                      from zstd_cffi import *
                      backend = 'cffi'
                  else:
                      try:
                          from zstd import *
                          backend = 'cext'
                      except ImportError:
                          from zstd_cffi import *
                          backend = 'cffi'
              elif _module_policy == 'cffi_fallback':
                  try:
                      from zstd import *
                      backend = 'cext'
                  except ImportError:
                      from zstd_cffi import *
                      backend = 'cffi'
              elif _module_policy == 'cext':
                  from zstd import *
                  backend = 'cext'
              elif _module_policy == 'cffi':
                  from zstd_cffi import *
                  backend = 'cffi'
              else:
                  raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
                                    'cext, or cffi' % _module_policy)
+             # Keep this in sync with python-zstandard.h.
+             __version__ = '0.10.1'

contrib/python-zstandard/zstd.c

0 +3 -1

              /**
               * Copyright (c) 2016-present, Gregory Szorc
               * All rights reserved.
               *
               * This software may be modified and distributed under the terms
               * of the BSD license. See the LICENSE file for details.
               */
              /* A Python C extension for Zstandard. */
              #if defined(_WIN32)
              #define WIN32_LEAN_AND_MEAN
              #include <Windows.h>
              #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
              #include <sys/types.h>
              #include <sys/sysctl.h>
              #endif
              #include "python-zstandard.h"
              PyObject *ZstdError;
              PyDoc_STRVAR(estimate_decompression_context_size__doc__,
              "estimate_decompression_context_size()\n"
              "\n"
              "Estimate the amount of memory allocated to a decompression context.\n"
              );
              static PyObject* estimate_decompression_context_size(PyObject* self) {
              	return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
              }
              PyDoc_STRVAR(frame_content_size__doc__,
              "frame_content_size(data)\n"
              "\n"
              "Obtain the decompressed size of a frame."
              );
              static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"source",
              		NULL
              	};
              	Py_buffer source;
              	PyObject* result = NULL;
              	unsigned long long size;
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
              	size = ZSTD_getFrameContentSize(source.buf, source.len);
              	if (size == ZSTD_CONTENTSIZE_ERROR) {
              		PyErr_SetString(ZstdError, "error when determining content size");
              	}
              	else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
              		result = PyLong_FromLong(-1);
              	}
              	else {
              		result = PyLong_FromUnsignedLongLong(size);
              	}
              finally:
              	PyBuffer_Release(&source);
              	return result;
              }
              PyDoc_STRVAR(frame_header_size__doc__,
              "frame_header_size(data)\n"
              "\n"
              "Obtain the size of a frame header.\n"
              );
              static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
              	static char* kwlist[] = {
              		"source",
              		NULL
              	};
              	Py_buffer source;
              	PyObject* result = NULL;
              	size_t zresult;
              #if PY_MAJOR_VERSION >= 3
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
              #else
              	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
              #endif
              		kwlist, &source)) {
              		return NULL;
              	}
              	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
              		PyErr_SetString(PyExc_ValueError,
              			"data buffer should be contiguous and have at most one dimension");
              		goto finally;
              	}
              	zresult = ZSTD_frameHeaderSize(source.buf, source.len);
              	if (ZSTD_isError(zresult)) {
              		PyErr_Format(ZstdError, "could not determine frame header size: %s",
              			ZSTD_getErrorName(zresult));
              	}
              	else {
              		result = PyLong_FromSize_t(zresult);
              	}
              finally:
              	PyBuffer_Release(&source);
              	return result;
              }
              PyDoc_STRVAR(get_frame_parameters__doc__,
              "get_frame_parameters(data)\n"
              "\n"
              "Obtains a ``FrameParameters`` instance by parsing data.\n");
              PyDoc_STRVAR(train_dictionary__doc__,
              "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
              "                 threads=None,notifications=0, dict_id=0, level=0)\n"
              "\n"
              "Train a dictionary from sample data using the COVER algorithm.\n"
              "\n"
              "A compression dictionary of size ``dict_size`` will be created from the\n"
              "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
              "\n"
              "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
              "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
              "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
              "``d`` must be less than or equal to ``k``.\n"
              "\n"
              "``steps`` can be specified to control the number of steps through potential\n"
              "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
              "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
              "will be varied in this mode.\n"
              "\n"
              "``threads`` can specify how many threads to use to test various ``k`` and\n"
              "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
              "a single thread is used.\n"
              "\n"
              "When ``k`` and ``d`` are not defined, default values are used and the\n"
              "algorithm will perform multiple iterations - or steps - to try to find\n"
              "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
              "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
              "multiple ``k`` and ``d`` variations.\n"
              );
              static char zstd_doc[] = "Interface to zstandard";
              static PyMethodDef zstd_methods[] = {
              	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
              	METH_NOARGS, estimate_decompression_context_size__doc__ },
              	{ "frame_content_size", (PyCFunction)frame_content_size,
              	METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
              	{ "frame_header_size", (PyCFunction)frame_header_size,
              	METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
              	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
              	METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
              	{ "train_dictionary", (PyCFunction)train_dictionary,
              	METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
              	{ NULL, NULL }
              };
              void bufferutil_module_init(PyObject* mod);
              void compressobj_module_init(PyObject* mod);
              void compressor_module_init(PyObject* mod);
              void compressionparams_module_init(PyObject* mod);
              void constants_module_init(PyObject* mod);
+             void compressionchunker_module_init(PyObject* mod);
              void compressiondict_module_init(PyObject* mod);
              void compressionreader_module_init(PyObject* mod);
              void compressionwriter_module_init(PyObject* mod);
              void compressoriterator_module_init(PyObject* mod);
              void decompressor_module_init(PyObject* mod);
              void decompressobj_module_init(PyObject* mod);
              void decompressionreader_module_init(PyObject *mod);
              void decompressionwriter_module_init(PyObject* mod);
              void decompressoriterator_module_init(PyObject* mod);
              void frameparams_module_init(PyObject* mod);
              void zstd_module_init(PyObject* m) {
              	/* python-zstandard relies on unstable zstd C API features. This means
              	   that changes in zstd may break expectations in python-zstandard.
              	   python-zstandard is distributed with a copy of the zstd sources.
              	   python-zstandard is only guaranteed to work with the bundled version
              	   of zstd.
              	   However, downstream redistributors or packagers may unbundle zstd
              	   from python-zstandard. This can result in a mismatch between zstd
              	   versions and API semantics. This essentially "voids the warranty"
              	   of python-zstandard and may cause undefined behavior.
              	   We detect this mismatch here and refuse to load the module if this
              	   scenario is detected.
              	*/
-             	if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) {
+             	if (ZSTD_VERSION_NUMBER != 10306 || ZSTD_versionNumber() != 10306) {
              		PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
              		return;
              	}
              	bufferutil_module_init(m);
              	compressionparams_module_init(m);
              	compressiondict_module_init(m);
              	compressobj_module_init(m);
              	compressor_module_init(m);
+             	compressionchunker_module_init(m);
              	compressionreader_module_init(m);
              	compressionwriter_module_init(m);
              	compressoriterator_module_init(m);
              	constants_module_init(m);
              	decompressor_module_init(m);
              	decompressobj_module_init(m);
              	decompressionreader_module_init(m);
              	decompressionwriter_module_init(m);
              	decompressoriterator_module_init(m);
              	frameparams_module_init(m);
              }
              #if defined(__GNUC__) && (__GNUC__ >= 4)
              #  define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
              #else
              #  define PYTHON_ZSTD_VISIBILITY
              #endif
              #if PY_MAJOR_VERSION >= 3
              static struct PyModuleDef zstd_module = {
              	PyModuleDef_HEAD_INIT,
              	"zstd",
              	zstd_doc,
              	-1,
              	zstd_methods
              };
              PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
              	PyObject *m = PyModule_Create(&zstd_module);
              	if (m) {
              		zstd_module_init(m);
              		if (PyErr_Occurred()) {
              			Py_DECREF(m);
              			m = NULL;
              		}
              	}
              	return m;
              }
              #else
              PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
              	PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
              	if (m) {
              		zstd_module_init(m);
              	}
              }
              #endif
              /* Attempt to resolve the number of CPUs in the system. */
              int cpu_count() {
              	int count = 0;
              #if defined(_WIN32)
              	SYSTEM_INFO si;
              	si.dwNumberOfProcessors = 0;
              	GetSystemInfo(&si);
              	count = si.dwNumberOfProcessors;
              #elif defined(__APPLE__)
              	int num;
              	size_t size = sizeof(int);
              	if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
              		count = num;
              	}
              #elif defined(__linux__)
              	count = sysconf(_SC_NPROCESSORS_ONLN);
              #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
              	int mib[2];
              	size_t len = sizeof(count);
              	mib[0] = CTL_HW;
              	mib[1] = HW_NCPU;
              	if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
              		count = 0;
              	}
              #elif defined(__hpux)
              	count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
              #endif
              	return count;
              }
              size_t roundpow2(size_t i) {
              	i--;
              	i |= i >> 1;
              	i |= i >> 2;
              	i |= i >> 4;
              	i |= i >> 8;
              	i |= i >> 16;
              	i++;
              	return i;
              }
              /* Safer version of _PyBytes_Resize().
               *
               * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
               * we can get an object with a refcount > 1, even if it was just created
               * with PyBytes_FromStringAndSize()! That's because (at least) CPython
               * pre-allocates PyBytes instances of size 1 for every possible byte value.
               *
               * If non-0 is returned, obj may or may not be NULL.
               */
              int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
              	PyObject* tmp;
              	if ((*obj)->ob_refcnt == 1) {
              		return _PyBytes_Resize(obj, size);
              	}
              	tmp = PyBytes_FromStringAndSize(NULL, size);
              	if (!tmp) {
              		return -1;
              	}
              	memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
              		PyBytes_GET_SIZE(*obj));
              	Py_DECREF(*obj);
              	*obj = tmp;
              	return 0;
              }
  No newline at end of file

contrib/python-zstandard/zstd/common/bitstream.h

0 +6 -19

              /* ******************************************************************
                 bitstream
                 Part of FSE library
-                header file (to include)
-                Copyright (C) 2013-2017, Yann Collet.
+                Copyright (C) 2013-present, Yann Collet.
                 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
                 Redistribution and use in source and binary forms, with or without
                 modification, are permitted provided that the following conditions are
                 met:
                     * Redistributions of source code must retain the above copyright
                 notice, this list of conditions and the following disclaimer.
                     * Redistributions in binary form must reproduce the above
                 copyright notice, this list of conditions and the following disclaimer
                 in the documentation and/or other materials provided with the
                 distribution.
                 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
                 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
                 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
                 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
                 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
                 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
                 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                 You can contact the author at :
                 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
              ****************************************************************** */
              #ifndef BITSTREAM_H_MODULE
              #define BITSTREAM_H_MODULE
              #if defined (__cplusplus)
              extern "C" {
              #endif
              /*
              *  This API consists of small unitary functions, which must be inlined for best performance.
              *  Since link-time-optimization is not available for all compilers,
              *  these functions are defined into a .h to be included.
              */
              /*-****************************************
              *  Dependencies
              ******************************************/
              #include "mem.h"            /* unaligned access routines */
+             #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
              #include "error_private.h"  /* error codes and messages */
-             /*-*************************************
-             *  Debug
-             ***************************************/
-             #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
-             #  include <assert.h>
-             #else
-             #  ifndef assert
-             #    define assert(condition) ((void)0)
-             #  endif
-             #endif
              /*=========================================
              *  Target specific
              =========================================*/
              #if defined(__BMI__) && defined(__GNUC__)
              #  include <immintrin.h>   /* support for bextr (experimental) */
              #endif
              #define STREAM_ACCUMULATOR_MIN_32  25
              #define STREAM_ACCUMULATOR_MIN_64  57
              #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
              /*-******************************************
              *  bitStream encoding API (write forward)
              ********************************************/
              /* bitStream can mix input from multiple sources.
               * A critical property of these streams is that they encode and decode in **reverse** direction.
               * So the first bit sequence you add will be the last to be read, like a LIFO stack.
               */
-             typedef struct
+             {
+             typedef struct {
                  size_t bitContainer;
                  unsigned bitPos;
                  char*  startPtr;
                  char*  ptr;
                  char*  endPtr;
              } BIT_CStream_t;
              MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
              MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
              MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
              MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
              /* Start with initCStream, providing the size of buffer to write into.
              *  bitStream will never write outside of this buffer.
              *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
              *
              *  bits are first added to a local register.
              *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
              *  Writing data into memory is an explicit operation, performed by the flushBits function.
              *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
              *  After a flushBits, a maximum of 7 bits might still be stored into local register.
              *
              *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
              *
              *  Last operation is to close the bitStream.
              *  The function returns the final size of CStream in bytes.
              *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
              */
              /*-********************************************
              *  bitStream decoding API (read backward)
              **********************************************/
-             typedef struct
+             {
+             typedef struct {
                  size_t   bitContainer;
                  unsigned bitsConsumed;
                  const char* ptr;
                  const char* start;
                  const char* limitPtr;
              } BIT_DStream_t;
              typedef enum { BIT_DStream_unfinished = 0,
                             BIT_DStream_endOfBuffer = 1,
                             BIT_DStream_completed = 2,
                             BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
                             /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
              MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
              MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
              MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
              MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
              /* Start by invoking BIT_initDStream().
              *  A chunk of the bitStream is then stored into a local register.
              *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
              *  You can then retrieve bitFields stored into the local register, **in reverse order**.
              *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
              *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
              *  Otherwise, it can be less than that, so proceed accordingly.
              *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
              */
              /*-****************************************
              *  unsafe API
              ******************************************/
              MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
              /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
              MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
              /* unsafe version; does not check buffer overflow */
              MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
              /* faster, but works only if nbBits >= 1 */
              /*-**************************************************************
              *  Internal functions
              ****************************************************************/
              MEM_STATIC unsigned BIT_highbit32 (U32 val)
              {
                  assert(val != 0);
                  {
              #   if defined(_MSC_VER)   /* Visual */
                      unsigned long r=0;
                      _BitScanReverse ( &r, val );
                      return (unsigned) r;
              #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
                      return 31 - __builtin_clz (val);
              #   else   /* Software version */
                      static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
 , 14, 16, 18, 22, 25,  3, 30,
 , 12, 20, 28, 15, 17, 24,  7,
 , 27, 23,  6, 26,  5,  4, 31 };
                      U32 v = val;
                      v |= v >> 1;
                      v |= v >> 2;
                      v |= v >> 4;
                      v |= v >> 8;
                      v |= v >> 16;
                      return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
              #   endif
                  }
              }
              /*=====    Local Constants   =====*/
              static const unsigned BIT_mask[] = {
 ,          1,         3,         7,         0xF,       0x1F,
 x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
 xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
 x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
 xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
 x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
              #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
              /*-**************************************************************
              *  bitStream encoding
              ****************************************************************/
              /*! BIT_initCStream() :
               *  `dstCapacity` must be > sizeof(size_t)
               *  @return : 0 if success,
               *            otherwise an error code (can be tested using ERR_isError()) */
              MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
                                                void* startPtr, size_t dstCapacity)
              {
                  bitC->bitContainer = 0;
                  bitC->bitPos = 0;
                  bitC->startPtr = (char*)startPtr;
                  bitC->ptr = bitC->startPtr;
                  bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
                  if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
                  return 0;
              }
              /*! BIT_addBits() :
               *  can add up to 31 bits into `bitC`.
               *  Note : does not check for register overflow ! */
              MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
                                          size_t value, unsigned nbBits)
              {
                  MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
                  assert(nbBits < BIT_MASK_SIZE);
                  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                  bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
                  bitC->bitPos += nbBits;
              }
              /*! BIT_addBitsFast() :
-              *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
+              *  works only if `value` is _clean_,
+              *  meaning all high bits above nbBits are 0 */
              MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
                                              size_t value, unsigned nbBits)
              {
                  assert((value>>nbBits) == 0);
                  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                  bitC->bitContainer |= value << bitC->bitPos;
                  bitC->bitPos += nbBits;
              }
              /*! BIT_flushBitsFast() :
               *  assumption : bitContainer has not overflowed
               *  unsafe version; does not check buffer overflow */
              MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
              {
                  size_t const nbBytes = bitC->bitPos >> 3;
                  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
                  bitC->ptr += nbBytes;
                  assert(bitC->ptr <= bitC->endPtr);
                  bitC->bitPos &= 7;
                  bitC->bitContainer >>= nbBytes*8;
              }
              /*! BIT_flushBits() :
               *  assumption : bitContainer has not overflowed
               *  safe version; check for buffer overflow, and prevents it.
               *  note : does not signal buffer overflow.
               *  overflow will be revealed later on using BIT_closeCStream() */
              MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
              {
                  size_t const nbBytes = bitC->bitPos >> 3;
                  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
                  bitC->ptr += nbBytes;
                  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
                  bitC->bitPos &= 7;
                  bitC->bitContainer >>= nbBytes*8;
              }
              /*! BIT_closeCStream() :
               *  @return : size of CStream, in bytes,
               *            or 0 if it could not fit into dstBuffer */
              MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
              {
                  BIT_addBitsFast(bitC, 1, 1);   /* endMark */
                  BIT_flushBits(bitC);
                  if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
                  return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
              }
              /*-********************************************************
              *  bitStream decoding
              **********************************************************/
              /*! BIT_initDStream() :
               *  Initialize a BIT_DStream_t.
               * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
               * `srcSize` must be the *exact* size of the bitStream, in bytes.
               * @return : size of stream (== srcSize), or an errorCode if a problem is detected
               */
              MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
              {
                  if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
                  bitD->start = (const char*)srcBuffer;
                  bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
                  if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
                      bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
                      bitD->bitContainer = MEM_readLEST(bitD->ptr);
                      { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
                        bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
                        if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
                  } else {
                      bitD->ptr   = bitD->start;
                      bitD->bitContainer = *(const BYTE*)(bitD->start);
                      switch(srcSize)
                      {
                      case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
                              /* fall-through */
                      case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
                              /* fall-through */
                      case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
                              /* fall-through */
                      case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
                              /* fall-through */
                      case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
                              /* fall-through */
                      case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
                              /* fall-through */
                      default: break;
                      }
                      {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
                          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
                          if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
                      }
                      bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
                  }
                  return srcSize;
              }
              MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
              {
                  return bitContainer >> start;
              }
              MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
              {
              #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008  /* experimental */
              #  if defined(__x86_64__)
                  if (sizeof(bitContainer)==8)
                      return _bextr_u64(bitContainer, start, nbBits);
                  else
              #  endif
                      return _bextr_u32(bitContainer, start, nbBits);
              #else
                  assert(nbBits < BIT_MASK_SIZE);
                  return (bitContainer >> start) & BIT_mask[nbBits];
              #endif
              }
              MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
              {
                  assert(nbBits < BIT_MASK_SIZE);
                  return bitContainer & BIT_mask[nbBits];
              }
              /*! BIT_lookBits() :
               *  Provides next n bits from local register.
               *  local register is not modified.
               *  On 32-bits, maxNbBits==24.
               *  On 64-bits, maxNbBits==56.
               * @return : value extracted */
              MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
              {
              #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
                  return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
              #else
                  U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
                  return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
              #endif
              }
              /*! BIT_lookBitsFast() :
               *  unsafe version; only works if nbBits >= 1 */
              MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
              {
                  U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
                  assert(nbBits >= 1);
                  return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
              }
              MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
              {
                  bitD->bitsConsumed += nbBits;
              }
              /*! BIT_readBits() :
               *  Read (consume) next n bits from local register and update.
               *  Pay attention to not read more than nbBits contained into local register.
               * @return : extracted value. */
              MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
              {
                  size_t const value = BIT_lookBits(bitD, nbBits);
                  BIT_skipBits(bitD, nbBits);
                  return value;
              }
              /*! BIT_readBitsFast() :
               *  unsafe version; only works only if nbBits >= 1 */
              MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
              {
                  size_t const value = BIT_lookBitsFast(bitD, nbBits);
                  assert(nbBits >= 1);
                  BIT_skipBits(bitD, nbBits);
                  return value;
              }
              /*! BIT_reloadDStream() :
               *  Refill `bitD` from buffer previously set in BIT_initDStream() .
               *  This function is safe, it guarantees it will not read beyond src buffer.
               * @return : status of `BIT_DStream_t` internal register.
               *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
              MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
              {
                  if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
                      return BIT_DStream_overflow;
                  if (bitD->ptr >= bitD->limitPtr) {
                      bitD->ptr -= bitD->bitsConsumed >> 3;
                      bitD->bitsConsumed &= 7;
                      bitD->bitContainer = MEM_readLEST(bitD->ptr);
                      return BIT_DStream_unfinished;
                  }
                  if (bitD->ptr == bitD->start) {
                      if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
                      return BIT_DStream_completed;
                  }
                  /* start < ptr < limitPtr */
                  {   U32 nbBytes = bitD->bitsConsumed >> 3;
                      BIT_DStream_status result = BIT_DStream_unfinished;
                      if (bitD->ptr - nbBytes < bitD->start) {
                          nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
                          result = BIT_DStream_endOfBuffer;
                      }
                      bitD->ptr -= nbBytes;
                      bitD->bitsConsumed -= nbBytes*8;
                      bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
                      return result;
                  }
              }
              /*! BIT_endOfDStream() :
               * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
               */
              MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
              {
                  return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
              }
              #if defined (__cplusplus)
              }
              #endif
              #endif /* BITSTREAM_H_MODULE */

contrib/python-zstandard/zstd/common/compiler.h

0 +32 -10

              /*
               * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
               * All rights reserved.
               *
               * This source code is licensed under both the BSD-style license (found in the
               * LICENSE file in the root directory of this source tree) and the GPLv2 (found
               * in the COPYING file in the root directory of this source tree).
               * You may select, at your option, one of the above-listed licenses.
               */
              #ifndef ZSTD_COMPILER_H
              #define ZSTD_COMPILER_H
              /*-*******************************************************
              *  Compiler specifics
              *********************************************************/
              /* force inlining */
              #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
              #  define INLINE_KEYWORD inline
              #else
              #  define INLINE_KEYWORD
              #endif
              #if defined(__GNUC__)
              #  define FORCE_INLINE_ATTR __attribute__((always_inline))
              #elif defined(_MSC_VER)
              #  define FORCE_INLINE_ATTR __forceinline
              #else
              #  define FORCE_INLINE_ATTR
              #endif
              /**
               * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
               * parameters. They must be inlined for the compiler to elimininate the constant
               * branches.
               */
              #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
              /**
               * HINT_INLINE is used to help the compiler generate better code. It is *not*
               * used for "templates", so it can be tweaked based on the compilers
               * performance.
               *
               * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
               * always_inline attribute.
               *
               * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
               * attribute.
               */
              #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
              #  define HINT_INLINE static INLINE_KEYWORD
              #else
              #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
              #endif
              /* force no inlining */
              #ifdef _MSC_VER
              #  define FORCE_NOINLINE static __declspec(noinline)
              #else
              #  ifdef __GNUC__
              #    define FORCE_NOINLINE static __attribute__((__noinline__))
              #  else
              #    define FORCE_NOINLINE static
              #  endif
              #endif
              /* target attribute */
              #ifndef __has_attribute
                #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
              #endif
              #if defined(__GNUC__)
              #  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
              #else
              #  define TARGET_ATTRIBUTE(target)
              #endif
              /* Enable runtime BMI2 dispatch based on the CPU.
               * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
               */
              #ifndef DYNAMIC_BMI2
-               #if (defined(__clang__) && __has_attribute(__target__)) \
+               #if ((defined(__clang__) && __has_attribute(__target__)) \
                    || (defined(__GNUC__) \
-                       && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
+                       && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
                    && (defined(__x86_64__) || defined(_M_X86)) \
                    && !defined(__BMI2__)
                #  define DYNAMIC_BMI2 1
                #else
                #  define DYNAMIC_BMI2 0
                #endif
              #endif
-             /* prefetch */
-             #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
-             #  include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-             #  define PREFETCH(ptr)   _mm_prefetch((const char*)ptr, _MM_HINT_T0)
-             #elif defined(__GNUC__)
-             #  define PREFETCH(ptr)   __builtin_prefetch(ptr, 0, 0)
+             /* prefetch
+              * can be disabled, by declaring NO_PREFETCH macro
+              * All prefetch invocations use a single default locality 2,
+              * generating instruction prefetcht1,
+              * which, according to Intel, means "load data into L2 cache".
+              * This is a good enough "middle ground" for the time being,
+              * though in theory, it would be better to specialize locality depending on data being prefetched.
+              * Tests could not determine any sensible difference based on locality value. */
+             #if defined(NO_PREFETCH)
+             #  define PREFETCH(ptr)     (void)(ptr)  /* disabled */
              #else
-             #  define PREFETCH(ptr)   /* disabled */
-             #endif
+             #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
+             #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+             #    define PREFETCH(ptr)   _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+             #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+             #    define PREFETCH(ptr)   __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+             #  else
+             #    define PREFETCH(ptr)   (void)(ptr)  /* disabled */
+             #  endif
+             #endif  /* NO_PREFETCH */
+             #define CACHELINE_SIZE 64
+             #define PREFETCH_AREA(p, s)  {            \
+                 const char* const _ptr = (const char*)(p);  \
+                 size_t const _size = (size_t)(s);     \
+                 size_t _pos;                          \
+                 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+                     PREFETCH(_ptr + _pos);            \
+                 }                                     \
+             }
              /* disable warnings */
              #ifdef _MSC_VER    /* Visual Studio */
              #  include <intrin.h>                    /* For Visual 2005 */
              #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
              #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
              #  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
              #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
              #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
              #endif
              #endif /* ZSTD_COMPILER_H */

contrib/python-zstandard/zstd/common/cpu.h

0 +2 -3

              /*
               * Copyright (c) 2018-present, Facebook, Inc.
               * All rights reserved.
               *
               * This source code is licensed under both the BSD-style license (found in the
               * LICENSE file in the root directory of this source tree) and the GPLv2 (found
               * in the COPYING file in the root directory of this source tree).
               * You may select, at your option, one of the above-listed licenses.
               */
              #ifndef ZSTD_COMMON_CPU_H
              #define ZSTD_COMMON_CPU_H
              /**
               * Implementation taken from folly/CpuId.h
               * https://github.com/facebook/folly/blob/master/folly/CpuId.h
               */
              #include <string.h>
              #include "mem.h"
              #ifdef _MSC_VER
              #include <intrin.h>
              #endif
              typedef struct {
                  U32 f1c;
                  U32 f1d;
                  U32 f7b;
                  U32 f7c;
              } ZSTD_cpuid_t;
              MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
                  U32 f1c = 0;
                  U32 f1d = 0;
                  U32 f7b = 0;
                  U32 f7c = 0;
-             #ifdef _MSC_VER
+             #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
                  int reg[4];
                  __cpuid((int*)reg, 0);
                  {
                      int const n = reg[0];
                      if (n >= 1) {
                          __cpuid((int*)reg, 1);
                          f1c = (U32)reg[2];
                          f1d = (U32)reg[3];
                      }
                      if (n >= 7) {
                          __cpuidex((int*)reg, 7, 0);
                          f7b = (U32)reg[1];
                          f7c = (U32)reg[2];
                      }
                  }
              #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
                  /* The following block like the normal cpuid branch below, but gcc
                   * reserves ebx for use of its pic register so we must specially
                   * handle the save and restore to avoid clobbering the register
                   */
                  U32 n;
                  __asm__(
                      "pushl %%ebx\n\t"
                      "cpuid\n\t"
                      "popl %%ebx\n\t"
                      : "=a"(n)
                      : "a"(0)
                      : "ecx", "edx");
                  if (n >= 1) {
                    U32 f1a;
                    __asm__(
                        "pushl %%ebx\n\t"
                        "cpuid\n\t"
                        "popl %%ebx\n\t"
                        : "=a"(f1a), "=c"(f1c), "=d"(f1d)
-                       : "a"(1)
-                       :);
+                       : "a"(1));
                  }
                  if (n >= 7) {
                    __asm__(
                        "pushl %%ebx\n\t"
                        "cpuid\n\t"
                        "movl %%ebx, %%eax\n\r"
                        "popl %%ebx"
                        : "=a"(f7b), "=c"(f7c)
                        : "a"(7), "c"(0)
                        : "edx");
                  }
              #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
                  U32 n;
                  __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
                  if (n >= 1) {
                    U32 f1a;
                    __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
                  }
                  if (n >= 7) {
                    U32 f7a;
                    __asm__("cpuid"
                            : "=a"(f7a), "=b"(f7b), "=c"(f7c)
                            : "a"(7), "c"(0)
                            : "edx");
                  }
              #endif
                  {
                      ZSTD_cpuid_t cpuid;
                      cpuid.f1c = f1c;
                      cpuid.f1d = f1d;
                      cpuid.f7b = f7b;
                      cpuid.f7c = f7c;
                      return cpuid;
                  }
              }
              #define X(name, r, bit)                                                        \
                MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
                  return ((cpuid.r) & (1U << bit)) != 0;                                     \
                }
              /* cpuid(1): Processor Info and Feature Bits. */
              #define C(name, bit) X(name, f1c, bit)
                C(sse3, 0)
                C(pclmuldq, 1)
                C(dtes64, 2)
                C(monitor, 3)
                C(dscpl, 4)
                C(vmx, 5)
                C(smx, 6)
                C(eist, 7)
                C(tm2, 8)
                C(ssse3, 9)
                C(cnxtid, 10)
                C(fma, 12)
                C(cx16, 13)
                C(xtpr, 14)
                C(pdcm, 15)
                C(pcid, 17)
                C(dca, 18)
                C(sse41, 19)
                C(sse42, 20)
                C(x2apic, 21)
                C(movbe, 22)
                C(popcnt, 23)
                C(tscdeadline, 24)
                C(aes, 25)
                C(xsave, 26)
                C(osxsave, 27)
                C(avx, 28)
                C(f16c, 29)
                C(rdrand, 30)
              #undef C
              #define D(name, bit) X(name, f1d, bit)
                D(fpu, 0)
                D(vme, 1)
                D(de, 2)
                D(pse, 3)
                D(tsc, 4)
                D(msr, 5)
                D(pae, 6)
                D(mce, 7)
                D(cx8, 8)
                D(apic, 9)
                D(sep, 11)
                D(mtrr, 12)
                D(pge, 13)
                D(mca, 14)
                D(cmov, 15)
                D(pat, 16)
                D(pse36, 17)
                D(psn, 18)
                D(clfsh, 19)
                D(ds, 21)
                D(acpi, 22)
                D(mmx, 23)
                D(fxsr, 24)
                D(sse, 25)
                D(sse2, 26)
                D(ss, 27)
                D(htt, 28)
                D(tm, 29)
                D(pbe, 31)
              #undef D
              /* cpuid(7): Extended Features. */
              #define B(name, bit) X(name, f7b, bit)
                B(bmi1, 3)
                B(hle, 4)
                B(avx2, 5)
                B(smep, 7)
                B(bmi2, 8)
                B(erms, 9)
                B(invpcid, 10)
                B(rtm, 11)
                B(mpx, 14)
                B(avx512f, 16)
                B(avx512dq, 17)
                B(rdseed, 18)
                B(adx, 19)
                B(smap, 20)
                B(avx512ifma, 21)
                B(pcommit, 22)
                B(clflushopt, 23)
                B(clwb, 24)
                B(avx512pf, 26)
                B(avx512er, 27)
                B(avx512cd, 28)
                B(sha, 29)
                B(avx512bw, 30)
                B(avx512vl, 31)
              #undef B
              #define C(name, bit) X(name, f7c, bit)
                C(prefetchwt1, 0)
                C(avx512vbmi, 1)
              #undef C
              #undef X
              #endif /* ZSTD_COMMON_CPU_H */

contrib/python-zstandard/zstd/common/entropy_common.c

0 +16 -1

              /*
                 Common functions of New Generation Entropy library
                 Copyright (C) 2016, Yann Collet.
                 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
                 Redistribution and use in source and binary forms, with or without
                 modification, are permitted provided that the following conditions are
                 met:
                     * Redistributions of source code must retain the above copyright
                 notice, this list of conditions and the following disclaimer.
                     * Redistributions in binary form must reproduce the above
                 copyright notice, this list of conditions and the following disclaimer
                 in the documentation and/or other materials provided with the
                 distribution.
                 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
                 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
                 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
                 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
                 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
                 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
                 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
                 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                  You can contact the author at :
                  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
                  - Public forum : https://groups.google.com/forum/#!forum/lz4c
              *************************************************************************** */
              /* *************************************
              *  Dependencies
              ***************************************/
              #include "mem.h"
              #include "error_private.h"       /* ERR_*, ERROR */
              #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
              #include "fse.h"
              #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
              #include "huf.h"
              /*===   Version   ===*/
              unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
              /*===   Error Management   ===*/
              unsigned FSE_isError(size_t code) { return ERR_isError(code); }
              const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
              unsigned HUF_isError(size_t code) { return ERR_isError(code); }
              const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
              /*-**************************************************************
              *  FSE NCount encoding-decoding
              ****************************************************************/
              size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
                               const void* headerBuffer, size_t hbSize)
              {
                  const BYTE* const istart = (const BYTE*) headerBuffer;
                  const BYTE* const iend = istart + hbSize;
                  const BYTE* ip = istart;
                  int nbBits;
                  int remaining;
                  int threshold;
                  U32 bitStream;
                  int bitCount;
                  unsigned charnum = 0;
                  int previous0 = 0;
-                 if (hbSize < 4) return ERROR(srcSize_wrong);
+                 if (hbSize < 4) {
+                     /* This function only works when hbSize >= 4 */
+                     char buffer[4];
+                     memset(buffer, 0, sizeof(buffer));
+                     memcpy(buffer, headerBuffer, hbSize);
+                     {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                                 buffer, sizeof(buffer));
+                         if (FSE_isError(countSize)) return countSize;
+                         if (countSize > hbSize) return ERROR(corruption_detected);
+                         return countSize;
+                 }   }
+                 assert(hbSize >= 4);
+                 /* init */
+                 memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
                  bitStream = MEM_readLE32(ip);
                  nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
                  if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
                  bitStream >>= 4;
                  bitCount = 4;
                  *tableLogPtr = nbBits;
                  remaining = (1<<nbBits)+1;
                  threshold = 1<<nbBits;
                  nbBits++;
                  while ((remaining>1) & (charnum<=*maxSVPtr)) {
                      if (previous0) {
                          unsigned n0 = charnum;
                          while ((bitStream & 0xFFFF) == 0xFFFF) {
                              n0 += 24;
                              if (ip < iend-5) {
                                  ip += 2;
                                  bitStream = MEM_readLE32(ip) >> bitCount;
                              } else {
                                  bitStream >>= 16;
                                  bitCount   += 16;
                          }   }
                          while ((bitStream & 3) == 3) {
                              n0 += 3;
                              bitStream >>= 2;
                              bitCount += 2;
                          }
                          n0 += bitStream & 3;
                          bitCount += 2;
                          if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
                          while (charnum < n0) normalizedCounter[charnum++] = 0;
                          if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                             assert((bitCount >> 3) <= 3); /* For first condition to work */
                              ip += bitCount>>3;
                              bitCount &= 7;
                              bitStream = MEM_readLE32(ip) >> bitCount;
                          } else {
                              bitStream >>= 2;
                      }   }
                      {   int const max = (2*threshold-1) - remaining;
                          int count;
                          if ((bitStream & (threshold-1)) < (U32)max) {
                              count = bitStream & (threshold-1);
                              bitCount += nbBits-1;
                          } else {
                              count = bitStream & (2*threshold-1);
                              if (count >= threshold) count -= max;
                              bitCount += nbBits;
                          }
                          count--;   /* extra accuracy */
                          remaining -= count < 0 ? -count : count;   /* -1 means +1 */
                          normalizedCounter[charnum++] = (short)count;
                          previous0 = !count;
                          while (remaining < threshold) {
                              nbBits--;
                              threshold >>= 1;
                          }
                          if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
                              ip += bitCount>>3;
                              bitCount &= 7;
                          } else {
                              bitCount -= (int)(8 * (iend - 4 - ip));
                              ip = iend - 4;
                          }
                          bitStream = MEM_readLE32(ip) >> (bitCount & 31);
                  }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
                  if (remaining != 1) return ERROR(corruption_detected);
                  if (bitCount > 32) return ERROR(corruption_detected);
                  *maxSVPtr = charnum-1;
                  ip += (bitCount+7)>>3;
                  return ip-istart;
              }
              /*! HUF_readStats() :
                  Read compact Huffman tree, saved by HUF_writeCTable().
                  `huffWeight` is destination buffer.
                  `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
                  @return : size read from `src` , or an error Code .
                  Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
              */
              size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                                   U32* nbSymbolsPtr, U32* tableLogPtr,
                                   const void* src, size_t srcSize)
              {
                  U32 weightTotal;
                  const BYTE* ip = (const BYTE*) src;
                  size_t iSize;
                  size_t oSize;
                  if (!srcSize) return ERROR(srcSize_wrong);
                  iSize = ip[0];
                  /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
                  if (iSize >= 128) {  /* special header */
                      oSize = iSize - 127;
                      iSize = ((oSize+1)/2);
                      if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
                      if (oSize >= hwSize) return ERROR(corruption_detected);
                      ip += 1;
                      {   U32 n;
                          for (n=0; n<oSize; n+=2) {
                              huffWeight[n]   = ip[n/2] >> 4;
                              huffWeight[n+1] = ip[n/2] & 15;
                  }   }   }
                  else  {   /* header compressed with FSE (normal case) */
                      FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
                      if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
                      oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
                      if (FSE_isError(oSize)) return oSize;
                  }
                  /* collect weight stats */
                  memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
                  weightTotal = 0;
                  {   U32 n; for (n=0; n<oSize; n++) {
                          if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
                          rankStats[huffWeight[n]]++;
                          weightTotal += (1 << huffWeight[n]) >> 1;
                  }   }
                  if (weightTotal == 0) return ERROR(corruption_detected);
                  /* get last non-null symbol weight (implied, total must be 2^n) */
                  {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
                      if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
                      *tableLogPtr = tableLog;
                      /* determine last weight */
                      {   U32 const total = 1 << tableLog;
                          U32 const rest = total - weightTotal;
                          U32 const verif = 1 << BIT_highbit32(rest);
                          U32 const lastWeight = BIT_highbit32(rest) + 1;
                          if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
                          huffWeight[oSize] = (BYTE)lastWeight;
                          rankStats[lastWeight]++;
                  }   }
                  /* check tree construction validity */
                  if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
                  /* results */
                  *nbSymbolsPtr = (U32)(oSize+1);
                  return iSize+1;
              }

contrib/python-zstandard/zstd/common/fse.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/fse_decompress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/huf.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/mem.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/pool.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/pool.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/xxhash.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/zstd_common.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/zstd_internal.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/fse_compress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/huf_compress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_compress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_compress_internal.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_double_fast.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_double_fast.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_fast.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_fast.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_lazy.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_lazy.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_ldm.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_ldm.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_opt.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_opt.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstdmt_compress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstdmt_compress.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/decompress/huf_decompress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/decompress/zstd_decompress.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/cover.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/divsufsort.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/zdict.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/zdict.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/zstd.h

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd_cffi.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

setup.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages