upstream/mercurial-mirror Commit - r37513:b1fb341d

zstandard: vendor python-zstandard 0.9.0...

Gregory Szorc -

r37513:b1fb341d default

parent child

Expand all files

The requested changes are too big and content was truncated. Show full diff

contrib/python-zstandard/c-ext/compressionreader.c

0 created 644 +405 0

@@ -0,0 +1,405 b''
	1	/**
	2	* Copyright (c) 2017-present, Gregory Szorc
	3	* All rights reserved.
	4	*
	5	* This software may be modified and distributed under the terms
	6	* of the BSD license. See the LICENSE file for details.
	7	*/
	8
	9	#include "python-zstandard.h"
	10
	11	extern PyObject* ZstdError;
	12
	13	static void set_unsupported_operation(void) {
	14	PyObject* iomod;
	15	PyObject* exc;
	16
	17	iomod = PyImport_ImportModule("io");
	18	if (NULL == iomod) {
	19	return;
	20	}
	21
	22	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
	23	if (NULL == exc) {
	24	Py_DECREF(iomod);
	25	return;
	26	}
	27
	28	PyErr_SetNone(exc);
	29	Py_DECREF(exc);
	30	Py_DECREF(iomod);
	31	}
	32
	33	static void reader_dealloc(ZstdCompressionReader* self) {
	34	Py_XDECREF(self->compressor);
	35	Py_XDECREF(self->reader);
	36
	37	if (self->buffer.buf) {
	38	PyBuffer_Release(&self->buffer);
	39	memset(&self->buffer, 0, sizeof(self->buffer));
	40	}
	41
	42	PyObject_Del(self);
	43	}
	44
	45	static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
	46	size_t zresult;
	47
	48	if (self->entered) {
	49	PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
	50	return NULL;
	51	}
	52
	53	zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
	54	if (ZSTD_isError(zresult)) {
	55	PyErr_Format(ZstdError, "error setting source size: %s",
	56	ZSTD_getErrorName(zresult));
	57	return NULL;
	58	}
	59
	60	self->entered = 1;
	61
	62	Py_INCREF(self);
	63	return self;
	64	}
	65
	66	static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
	67	PyObject* exc_type;
	68	PyObject* exc_value;
	69	PyObject* exc_tb;
	70
	71	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
	72	return NULL;
	73	}
	74
	75	self->entered = 0;
	76	self->closed = 1;
	77
	78	/* Release resources associated with source. */
	79	Py_CLEAR(self->reader);
	80	if (self->buffer.buf) {
	81	PyBuffer_Release(&self->buffer);
	82	memset(&self->buffer, 0, sizeof(self->buffer));
	83	}
	84
	85	Py_CLEAR(self->compressor);
	86
	87	Py_RETURN_FALSE;
	88	}
	89
	90	static PyObject* reader_readable(ZstdCompressionReader* self) {
	91	Py_RETURN_TRUE;
	92	}
	93
	94	static PyObject* reader_writable(ZstdCompressionReader* self) {
	95	Py_RETURN_FALSE;
	96	}
	97
	98	static PyObject* reader_seekable(ZstdCompressionReader* self) {
	99	Py_RETURN_FALSE;
	100	}
	101
	102	static PyObject* reader_readline(PyObject* self, PyObject* args) {
	103	set_unsupported_operation();
	104	return NULL;
	105	}
	106
	107	static PyObject* reader_readlines(PyObject* self, PyObject* args) {
	108	set_unsupported_operation();
	109	return NULL;
	110	}
	111
	112	static PyObject* reader_write(PyObject* self, PyObject* args) {
	113	PyErr_SetString(PyExc_OSError, "stream is not writable");
	114	return NULL;
	115	}
	116
	117	static PyObject* reader_writelines(PyObject* self, PyObject* args) {
	118	PyErr_SetString(PyExc_OSError, "stream is not writable");
	119	return NULL;
	120	}
	121
	122	static PyObject* reader_isatty(PyObject* self) {
	123	Py_RETURN_FALSE;
	124	}
	125
	126	static PyObject* reader_flush(PyObject* self) {
	127	Py_RETURN_NONE;
	128	}
	129
	130	static PyObject* reader_close(ZstdCompressionReader* self) {
	131	self->closed = 1;
	132	Py_RETURN_NONE;
	133	}
	134
	135	static PyObject* reader_closed(ZstdCompressionReader* self) {
	136	if (self->closed) {
	137	Py_RETURN_TRUE;
	138	}
	139	else {
	140	Py_RETURN_FALSE;
	141	}
	142	}
	143
	144	static PyObject* reader_tell(ZstdCompressionReader* self) {
	145	/* TODO should this raise OSError since stream isn't seekable? */
	146	return PyLong_FromUnsignedLongLong(self->bytesCompressed);
	147	}
	148
	149	static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
	150	static char* kwlist[] = {
	151	"size",
	152	NULL
	153	};
	154
	155	Py_ssize_t size = -1;
	156	PyObject* result = NULL;
	157	char* resultBuffer;
	158	Py_ssize_t resultSize;
	159	size_t zresult;
	160	size_t oldPos;
	161
	162	if (!self->entered) {
	163	PyErr_SetString(ZstdError, "read() must be called from an active context manager");
	164	return NULL;
	165	}
	166
	167	if (self->closed) {
	168	PyErr_SetString(PyExc_ValueError, "stream is closed");
	169	return NULL;
	170	}
	171
	172	if (self->finishedOutput) {
	173	return PyBytes_FromStringAndSize("", 0);
	174	}
	175
	176	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
	177	return NULL;
	178	}
	179
	180	if (size < 1) {
	181	PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
	182	return NULL;
	183	}
	184
	185	result = PyBytes_FromStringAndSize(NULL, size);
	186	if (NULL == result) {
	187	return NULL;
	188	}
	189
	190	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
	191
	192	self->output.dst = resultBuffer;
	193	self->output.size = resultSize;
	194	self->output.pos = 0;
	195
	196	readinput:
	197
	198	/* If we have data left over, consume it. */
	199	if (self->input.pos < self->input.size) {
	200	oldPos = self->output.pos;
	201
	202	Py_BEGIN_ALLOW_THREADS
	203	zresult = ZSTD_compress_generic(self->compressor->cctx,
	204	&self->output, &self->input, ZSTD_e_continue);
	205
	206	Py_END_ALLOW_THREADS
	207
	208	self->bytesCompressed += self->output.pos - oldPos;
	209
	210	/* Input exhausted. Clear out state tracking. */
	211	if (self->input.pos == self->input.size) {
	212	memset(&self->input, 0, sizeof(self->input));
	213	Py_CLEAR(self->readResult);
	214
	215	if (self->buffer.buf) {
	216	self->finishedInput = 1;
	217	}
	218	}
	219
	220	if (ZSTD_isError(zresult)) {
	221	PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
	222	return NULL;
	223	}
	224
	225	if (self->output.pos) {
	226	/* If no more room in output, emit it. */
	227	if (self->output.pos == self->output.size) {
	228	memset(&self->output, 0, sizeof(self->output));
	229	return result;
	230	}
	231
	232	/*
	233	* There is room in the output. We fall through to below, which will either
	234	* get more input for us or will attempt to end the stream.
	235	*/
	236	}
	237
	238	/* Fall through to gather more input. */
	239	}
	240
	241	if (!self->finishedInput) {
	242	if (self->reader) {
	243	Py_buffer buffer;
	244
	245	assert(self->readResult == NULL);
	246	self->readResult = PyObject_CallMethod(self->reader, "read",
	247	"k", self->readSize);
	248	if (self->readResult == NULL) {
	249	return NULL;
	250	}
	251
	252	memset(&buffer, 0, sizeof(buffer));
	253
	254	if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
	255	return NULL;
	256	}
	257
	258	/* EOF */
	259	if (0 == buffer.len) {
	260	self->finishedInput = 1;
	261	Py_CLEAR(self->readResult);
	262	}
	263	else {
	264	self->input.src = buffer.buf;
	265	self->input.size = buffer.len;
	266	self->input.pos = 0;
	267	}
	268
	269	PyBuffer_Release(&buffer);
	270	}
	271	else {
	272	assert(self->buffer.buf);
	273
	274	self->input.src = self->buffer.buf;
	275	self->input.size = self->buffer.len;
	276	self->input.pos = 0;
	277	}
	278	}
	279
	280	if (self->input.size) {
	281	goto readinput;
	282	}
	283
	284	/* Else EOF */
	285	oldPos = self->output.pos;
	286
	287	zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
	288	&self->input, ZSTD_e_end);
	289
	290	self->bytesCompressed += self->output.pos - oldPos;
	291
	292	if (ZSTD_isError(zresult)) {
	293	PyErr_Format(ZstdError, "error ending compression stream: %s",
	294	ZSTD_getErrorName(zresult));
	295	return NULL;
	296	}
	297
	298	assert(self->output.pos);
	299
	300	if (0 == zresult) {
	301	self->finishedOutput = 1;
	302	}
	303
	304	if (safe_pybytes_resize(&result, self->output.pos)) {
	305	Py_XDECREF(result);
	306	return NULL;
	307	}
	308
	309	memset(&self->output, 0, sizeof(self->output));
	310
	311	return result;
	312	}
	313
	314	static PyObject* reader_readall(PyObject* self) {
	315	PyErr_SetNone(PyExc_NotImplementedError);
	316	return NULL;
	317	}
	318
	319	static PyObject* reader_iter(PyObject* self) {
	320	set_unsupported_operation();
	321	return NULL;
	322	}
	323
	324	static PyObject* reader_iternext(PyObject* self) {
	325	set_unsupported_operation();
	326	return NULL;
	327	}
	328
	329	static PyMethodDef reader_methods[] = {
	330	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
	331	PyDoc_STR("Enter a compression context") },
	332	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
	333	PyDoc_STR("Exit a compression context") },
	334	{ "close", (PyCFunction)reader_close, METH_NOARGS,
	335	PyDoc_STR("Close the stream so it cannot perform any more operations") },
	336	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
	337	PyDoc_STR("Whether stream is closed") },
	338	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
	339	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
	340	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
	341	PyDoc_STR("Returns True") },
	342	{ "read", (PyCFunction)reader_read, METH_VARARGS \| METH_KEYWORDS, PyDoc_STR("read compressed data") },
	343	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
	344	{ "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
	345	{ "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
	346	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
	347	PyDoc_STR("Returns False") },
	348	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
	349	PyDoc_STR("Returns current number of bytes compressed") },
	350	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
	351	PyDoc_STR("Returns False") },
	352	{ "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
	353	{ "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
	354	{ NULL, NULL }
	355	};
	356
	357	PyTypeObject ZstdCompressionReaderType = {
	358	PyVarObject_HEAD_INIT(NULL, 0)
	359	"zstd.ZstdCompressionReader", /* tp_name */
	360	sizeof(ZstdCompressionReader), /* tp_basicsize */
	361	0, /* tp_itemsize */
	362	(destructor)reader_dealloc, /* tp_dealloc */
	363	0, /* tp_print */
	364	0, /* tp_getattr */
	365	0, /* tp_setattr */
	366	0, /* tp_compare */
	367	0, /* tp_repr */
	368	0, /* tp_as_number */
	369	0, /* tp_as_sequence */
	370	0, /* tp_as_mapping */
	371	0, /* tp_hash */
	372	0, /* tp_call */
	373	0, /* tp_str */
	374	0, /* tp_getattro */
	375	0, /* tp_setattro */
	376	0, /* tp_as_buffer */
	377	Py_TPFLAGS_DEFAULT, /* tp_flags */
	378	0, /* tp_doc */
	379	0, /* tp_traverse */
	380	0, /* tp_clear */
	381	0, /* tp_richcompare */
	382	0, /* tp_weaklistoffset */
	383	reader_iter, /* tp_iter */
	384	reader_iternext, /* tp_iternext */
	385	reader_methods, /* tp_methods */
	386	0, /* tp_members */
	387	0, /* tp_getset */
	388	0, /* tp_base */
	389	0, /* tp_dict */
	390	0, /* tp_descr_get */
	391	0, /* tp_descr_set */
	392	0, /* tp_dictoffset */
	393	0, /* tp_init */
	394	0, /* tp_alloc */
	395	PyType_GenericNew, /* tp_new */
	396	};
	397
	398	void compressionreader_module_init(PyObject* mod) {
	399	/* TODO make reader a sub-class of io.RawIOBase */
	400
	401	Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
	402	if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
	403	return;
	404	}
	405	}

contrib/python-zstandard/c-ext/decompressionreader.c

0 created 644 +459 0

@@ -0,0 +1,459 b''
	1	/**
	2	* Copyright (c) 2017-present, Gregory Szorc
	3	* All rights reserved.
	4	*
	5	* This software may be modified and distributed under the terms
	6	* of the BSD license. See the LICENSE file for details.
	7	*/
	8
	9	#include "python-zstandard.h"
	10
	11	extern PyObject* ZstdError;
	12
	13	static void set_unsupported_operation(void) {
	14	PyObject* iomod;
	15	PyObject* exc;
	16
	17	iomod = PyImport_ImportModule("io");
	18	if (NULL == iomod) {
	19	return;
	20	}
	21
	22	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
	23	if (NULL == exc) {
	24	Py_DECREF(iomod);
	25	return;
	26	}
	27
	28	PyErr_SetNone(exc);
	29	Py_DECREF(exc);
	30	Py_DECREF(iomod);
	31	}
	32
	33	static void reader_dealloc(ZstdDecompressionReader* self) {
	34	Py_XDECREF(self->decompressor);
	35	Py_XDECREF(self->reader);
	36
	37	if (self->buffer.buf) {
	38	PyBuffer_Release(&self->buffer);
	39	}
	40
	41	PyObject_Del(self);
	42	}
	43
	44	static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
	45	if (self->entered) {
	46	PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
	47	return NULL;
	48	}
	49
	50	if (ensure_dctx(self->decompressor, 1)) {
	51	return NULL;
	52	}
	53
	54	self->entered = 1;
	55
	56	Py_INCREF(self);
	57	return self;
	58	}
	59
	60	static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
	61	PyObject* exc_type;
	62	PyObject* exc_value;
	63	PyObject* exc_tb;
	64
	65	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
	66	return NULL;
	67	}
	68
	69	self->entered = 0;
	70	self->closed = 1;
	71
	72	/* Release resources. */
	73	Py_CLEAR(self->reader);
	74	if (self->buffer.buf) {
	75	PyBuffer_Release(&self->buffer);
	76	memset(&self->buffer, 0, sizeof(self->buffer));
	77	}
	78
	79	Py_CLEAR(self->decompressor);
	80
	81	Py_RETURN_FALSE;
	82	}
	83
	84	static PyObject* reader_readable(PyObject* self) {
	85	Py_RETURN_TRUE;
	86	}
	87
	88	static PyObject* reader_writable(PyObject* self) {
	89	Py_RETURN_FALSE;
	90	}
	91
	92	static PyObject* reader_seekable(PyObject* self) {
	93	Py_RETURN_TRUE;
	94	}
	95
	96	static PyObject* reader_close(ZstdDecompressionReader* self) {
	97	self->closed = 1;
	98	Py_RETURN_NONE;
	99	}
	100
	101	static PyObject* reader_closed(ZstdDecompressionReader* self) {
	102	if (self->closed) {
	103	Py_RETURN_TRUE;
	104	}
	105	else {
	106	Py_RETURN_FALSE;
	107	}
	108	}
	109
	110	static PyObject* reader_flush(PyObject* self) {
	111	Py_RETURN_NONE;
	112	}
	113
	114	static PyObject* reader_isatty(PyObject* self) {
	115	Py_RETURN_FALSE;
	116	}
	117
	118	static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
	119	static char* kwlist[] = {
	120	"size",
	121	NULL
	122	};
	123
	124	Py_ssize_t size = -1;
	125	PyObject* result = NULL;
	126	char* resultBuffer;
	127	Py_ssize_t resultSize;
	128	ZSTD_outBuffer output;
	129	size_t zresult;
	130
	131	if (!self->entered) {
	132	PyErr_SetString(ZstdError, "read() must be called from an active context manager");
	133	return NULL;
	134	}
	135
	136	if (self->closed) {
	137	PyErr_SetString(PyExc_ValueError, "stream is closed");
	138	return NULL;
	139	}
	140
	141	if (self->finishedOutput) {
	142	return PyBytes_FromStringAndSize("", 0);
	143	}
	144
	145	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
	146	return NULL;
	147	}
	148
	149	if (size < 1) {
	150	PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
	151	return NULL;
	152	}
	153
	154	result = PyBytes_FromStringAndSize(NULL, size);
	155	if (NULL == result) {
	156	return NULL;
	157	}
	158
	159	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
	160
	161	output.dst = resultBuffer;
	162	output.size = resultSize;
	163	output.pos = 0;
	164
	165	readinput:
	166
	167	/* Consume input data left over from last time. */
	168	if (self->input.pos < self->input.size) {
	169	Py_BEGIN_ALLOW_THREADS
	170	zresult = ZSTD_decompress_generic(self->decompressor->dctx,
	171	&output, &self->input);
	172	Py_END_ALLOW_THREADS
	173
	174	/* Input exhausted. Clear our state tracking. */
	175	if (self->input.pos == self->input.size) {
	176	memset(&self->input, 0, sizeof(self->input));
	177	Py_CLEAR(self->readResult);
	178
	179	if (self->buffer.buf) {
	180	self->finishedInput = 1;
	181	}
	182	}
	183
	184	if (ZSTD_isError(zresult)) {
	185	PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
	186	return NULL;
	187	}
	188	else if (0 == zresult) {
	189	self->finishedOutput = 1;
	190	}
	191
	192	/* We fulfilled the full read request. Emit it. */
	193	if (output.pos && output.pos == output.size) {
	194	self->bytesDecompressed += output.size;
	195	return result;
	196	}
	197
	198	/*
	199	* There is more room in the output. Fall through to try to collect
	200	* more data so we can try to fill the output.
	201	*/
	202	}
	203
	204	if (!self->finishedInput) {
	205	if (self->reader) {
	206	Py_buffer buffer;
	207
	208	assert(self->readResult == NULL);
	209	self->readResult = PyObject_CallMethod(self->reader, "read",
	210	"k", self->readSize);
	211	if (NULL == self->readResult) {
	212	return NULL;
	213	}
	214
	215	memset(&buffer, 0, sizeof(buffer));
	216
	217	if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
	218	return NULL;
	219	}
	220
	221	/* EOF */
	222	if (0 == buffer.len) {
	223	self->finishedInput = 1;
	224	Py_CLEAR(self->readResult);
	225	}
	226	else {
	227	self->input.src = buffer.buf;
	228	self->input.size = buffer.len;
	229	self->input.pos = 0;
	230	}
	231
	232	PyBuffer_Release(&buffer);
	233	}
	234	else {
	235	assert(self->buffer.buf);
	236	/*
	237	* We should only get here once since above block will exhaust
	238	* source buffer until finishedInput is set.
	239	*/
	240	assert(self->input.src == NULL);
	241
	242	self->input.src = self->buffer.buf;
	243	self->input.size = self->buffer.len;
	244	self->input.pos = 0;
	245	}
	246	}
	247
	248	if (self->input.size) {
	249	goto readinput;
	250	}
	251
	252	/* EOF */
	253	self->bytesDecompressed += output.pos;
	254
	255	if (safe_pybytes_resize(&result, output.pos)) {
	256	Py_XDECREF(result);
	257	return NULL;
	258	}
	259
	260	return result;
	261	}
	262
	263	static PyObject* reader_readall(PyObject* self) {
	264	PyErr_SetNone(PyExc_NotImplementedError);
	265	return NULL;
	266	}
	267
	268	static PyObject* reader_readline(PyObject* self) {
	269	PyErr_SetNone(PyExc_NotImplementedError);
	270	return NULL;
	271	}
	272
	273	static PyObject* reader_readlines(PyObject* self) {
	274	PyErr_SetNone(PyExc_NotImplementedError);
	275	return NULL;
	276	}
	277
	278	static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
	279	Py_ssize_t pos;
	280	int whence = 0;
	281	unsigned long long readAmount = 0;
	282	size_t defaultOutSize = ZSTD_DStreamOutSize();
	283
	284	if (!self->entered) {
	285	PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
	286	return NULL;
	287	}
	288
	289	if (self->closed) {
	290	PyErr_SetString(PyExc_ValueError, "stream is closed");
	291	return NULL;
	292	}
	293
	294	if (!PyArg_ParseTuple(args, "n\|i:seek", &pos, &whence)) {
	295	return NULL;
	296	}
	297
	298	if (whence == SEEK_SET) {
	299	if (pos < 0) {
	300	PyErr_SetString(PyExc_ValueError,
	301	"cannot seek to negative position with SEEK_SET");
	302	return NULL;
	303	}
	304
	305	if ((unsigned long long)pos < self->bytesDecompressed) {
	306	PyErr_SetString(PyExc_ValueError,
	307	"cannot seek zstd decompression stream backwards");
	308	return NULL;
	309	}
	310
	311	readAmount = pos - self->bytesDecompressed;
	312	}
	313	else if (whence == SEEK_CUR) {
	314	if (pos < 0) {
	315	PyErr_SetString(PyExc_ValueError,
	316	"cannot seek zstd decompression stream backwards");
	317	return NULL;
	318	}
	319
	320	readAmount = pos;
	321	}
	322	else if (whence == SEEK_END) {
	323	/* We /could/ support this with pos==0. But let's not do that until someone
	324	needs it. */
	325	PyErr_SetString(PyExc_ValueError,
	326	"zstd decompression streams cannot be seeked with SEEK_END");
	327	return NULL;
	328	}
	329
	330	/* It is a bit inefficient to do this via the Python API. But since there
	331	is a bit of state tracking involved to read from this type, it is the
	332	easiest to implement. */
	333	while (readAmount) {
	334	Py_ssize_t readSize;
	335	PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
	336	readAmount < defaultOutSize ? readAmount : defaultOutSize);
	337
	338	if (!readResult) {
	339	return NULL;
	340	}
	341
	342	readSize = PyBytes_GET_SIZE(readResult);
	343
	344	/* Empty read means EOF. */
	345	if (!readSize) {
	346	break;
	347	}
	348
	349	readAmount -= readSize;
	350	}
	351
	352	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
	353	}
	354
	355	static PyObject* reader_tell(ZstdDecompressionReader* self) {
	356	/* TODO should this raise OSError since stream isn't seekable? */
	357	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
	358	}
	359
	360	static PyObject* reader_write(PyObject* self, PyObject* args) {
	361	set_unsupported_operation();
	362	return NULL;
	363	}
	364
	365	static PyObject* reader_writelines(PyObject* self, PyObject* args) {
	366	set_unsupported_operation();
	367	return NULL;
	368	}
	369
	370	static PyObject* reader_iter(PyObject* self) {
	371	PyErr_SetNone(PyExc_NotImplementedError);
	372	return NULL;
	373	}
	374
	375	static PyObject* reader_iternext(PyObject* self) {
	376	PyErr_SetNone(PyExc_NotImplementedError);
	377	return NULL;
	378	}
	379
	380	static PyMethodDef reader_methods[] = {
	381	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
	382	PyDoc_STR("Enter a compression context") },
	383	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
	384	PyDoc_STR("Exit a compression context") },
	385	{ "close", (PyCFunction)reader_close, METH_NOARGS,
	386	PyDoc_STR("Close the stream so it cannot perform any more operations") },
	387	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
	388	PyDoc_STR("Whether stream is closed") },
	389	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
	390	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
	391	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
	392	PyDoc_STR("Returns True") },
	393	{ "read", (PyCFunction)reader_read, METH_VARARGS \| METH_KEYWORDS,
	394	PyDoc_STR("read compressed data") },
	395	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
	396	{ "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
	397	{ "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
	398	{ "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
	399	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
	400	PyDoc_STR("Returns True") },
	401	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
	402	PyDoc_STR("Returns current number of bytes compressed") },
	403	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
	404	PyDoc_STR("Returns False") },
	405	{ "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
	406	{ "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
	407	{ NULL, NULL }
	408	};
	409
	410	PyTypeObject ZstdDecompressionReaderType = {
	411	PyVarObject_HEAD_INIT(NULL, 0)
	412	"zstd.ZstdDecompressionReader", /* tp_name */
	413	sizeof(ZstdDecompressionReader), /* tp_basicsize */
	414	0, /* tp_itemsize */
	415	(destructor)reader_dealloc, /* tp_dealloc */
	416	0, /* tp_print */
	417	0, /* tp_getattr */
	418	0, /* tp_setattr */
	419	0, /* tp_compare */
	420	0, /* tp_repr */
	421	0, /* tp_as_number */
	422	0, /* tp_as_sequence */
	423	0, /* tp_as_mapping */
	424	0, /* tp_hash */
	425	0, /* tp_call */
	426	0, /* tp_str */
	427	0, /* tp_getattro */
	428	0, /* tp_setattro */
	429	0, /* tp_as_buffer */
	430	Py_TPFLAGS_DEFAULT, /* tp_flags */
	431	0, /* tp_doc */
	432	0, /* tp_traverse */
	433	0, /* tp_clear */
	434	0, /* tp_richcompare */
	435	0, /* tp_weaklistoffset */
	436	reader_iter, /* tp_iter */
	437	reader_iternext, /* tp_iternext */
	438	reader_methods, /* tp_methods */
	439	0, /* tp_members */
	440	0, /* tp_getset */
	441	0, /* tp_base */
	442	0, /* tp_dict */
	443	0, /* tp_descr_get */
	444	0, /* tp_descr_set */
	445	0, /* tp_dictoffset */
	446	0, /* tp_init */
	447	0, /* tp_alloc */
	448	PyType_GenericNew, /* tp_new */
	449	};
	450
	451
	452	void decompressionreader_module_init(PyObject* mod) {
	453	/* TODO make reader a sub-class of io.RawIOBase */
	454
	455	Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
	456	if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
	457	return;
	458	}
	459	}

contrib/python-zstandard/zstandard/__init__.py

0 created 644 +62 0

@@ -0,0 +1,62 b''
	1	# Copyright (c) 2017-present, Gregory Szorc
	2	# All rights reserved.
	3	#
	4	# This software may be modified and distributed under the terms
	5	# of the BSD license. See the LICENSE file for details.
	6
	7	"""Python interface to the Zstandard (zstd) compression library."""
	8
	9	from __future__ import absolute_import, unicode_literals
	10
	11	# This module serves 2 roles:
	12	#
	13	# 1) Export the C or CFFI "backend" through a central module.
	14	# 2) Implement additional functionality built on top of C or CFFI backend.
	15
	16	import os
	17	import platform
	18
	19	# Some Python implementations don't support C extensions. That's why we have
	20	# a CFFI implementation in the first place. The code here import one of our
	21	# "backends" then re-exports the symbols from this module. For convenience,
	22	# we support falling back to the CFFI backend if the C extension can't be
	23	# imported. But for performance reasons, we only do this on unknown Python
	24	# implementation. Notably, for CPython we require the C extension by default.
	25	# Because someone will inevitably want special behavior, the behavior is
	26	# configurable via an environment variable. A potentially better way to handle
	27	# this is to import a special ``__importpolicy__`` module or something
	28	# defining a variable and `setup.py` could write the file with whatever
	29	# policy was specified at build time. Until someone needs it, we go with
	30	# the hacky but simple environment variable approach.
	31	_module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
	32
	33	if _module_policy == 'default':
	34	if platform.python_implementation() in ('CPython',):
	35	from zstd import *
	36	backend = 'cext'
	37	elif platform.python_implementation() in ('PyPy',):
	38	from zstd_cffi import *
	39	backend = 'cffi'
	40	else:
	41	try:
	42	from zstd import *
	43	backend = 'cext'
	44	except ImportError:
	45	from zstd_cffi import *
	46	backend = 'cffi'
	47	elif _module_policy == 'cffi_fallback':
	48	try:
	49	from zstd import *
	50	backend = 'cext'
	51	except ImportError:
	52	from zstd_cffi import *
	53	backend = 'cffi'
	54	elif _module_policy == 'cext':
	55	from zstd import *
	56	backend = 'cext'
	57	elif _module_policy == 'cffi':
	58	from zstd_cffi import *
	59	backend = 'cffi'
	60	else:
	61	raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
	62	'cext, or cffi' % _module_policy)

contrib/python-zstandard/zstd/COPYING

0 created 644 +339 0

@@ -0,0 +1,339 b''
	1	GNU GENERAL PUBLIC LICENSE
	2	Version 2, June 1991
	3
	4	Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
	5	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	6	Everyone is permitted to copy and distribute verbatim copies
	7	of this license document, but changing it is not allowed.
	8
	9	Preamble
	10
	11	The licenses for most software are designed to take away your
	12	freedom to share and change it. By contrast, the GNU General Public
	13	License is intended to guarantee your freedom to share and change free
	14	software--to make sure the software is free for all its users. This
	15	General Public License applies to most of the Free Software
	16	Foundation's software and to any other program whose authors commit to
	17	using it. (Some other Free Software Foundation software is covered by
	18	the GNU Lesser General Public License instead.) You can apply it to
	19	your programs, too.
	20
	21	When we speak of free software, we are referring to freedom, not
	22	price. Our General Public Licenses are designed to make sure that you
	23	have the freedom to distribute copies of free software (and charge for
	24	this service if you wish), that you receive source code or can get it
	25	if you want it, that you can change the software or use pieces of it
	26	in new free programs; and that you know you can do these things.
	27
	28	To protect your rights, we need to make restrictions that forbid
	29	anyone to deny you these rights or to ask you to surrender the rights.
	30	These restrictions translate to certain responsibilities for you if you
	31	distribute copies of the software, or if you modify it.
	32
	33	For example, if you distribute copies of such a program, whether
	34	gratis or for a fee, you must give the recipients all the rights that
	35	you have. You must make sure that they, too, receive or can get the
	36	source code. And you must show them these terms so they know their
	37	rights.
	38
	39	We protect your rights with two steps: (1) copyright the software, and
	40	(2) offer you this license which gives you legal permission to copy,
	41	distribute and/or modify the software.
	42
	43	Also, for each author's protection and ours, we want to make certain
	44	that everyone understands that there is no warranty for this free
	45	software. If the software is modified by someone else and passed on, we
	46	want its recipients to know that what they have is not the original, so
	47	that any problems introduced by others will not reflect on the original
	48	authors' reputations.
	49
	50	Finally, any free program is threatened constantly by software
	51	patents. We wish to avoid the danger that redistributors of a free
	52	program will individually obtain patent licenses, in effect making the
	53	program proprietary. To prevent this, we have made it clear that any
	54	patent must be licensed for everyone's free use or not licensed at all.
	55
	56	The precise terms and conditions for copying, distribution and
	57	modification follow.
	58
	59	GNU GENERAL PUBLIC LICENSE
	60	TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
	61
	62	0. This License applies to any program or other work which contains
	63	a notice placed by the copyright holder saying it may be distributed
	64	under the terms of this General Public License. The "Program", below,
	65	refers to any such program or work, and a "work based on the Program"
	66	means either the Program or any derivative work under copyright law:
	67	that is to say, a work containing the Program or a portion of it,
	68	either verbatim or with modifications and/or translated into another
	69	language. (Hereinafter, translation is included without limitation in
	70	the term "modification".) Each licensee is addressed as "you".
	71
	72	Activities other than copying, distribution and modification are not
	73	covered by this License; they are outside its scope. The act of
	74	running the Program is not restricted, and the output from the Program
	75	is covered only if its contents constitute a work based on the
	76	Program (independent of having been made by running the Program).
	77	Whether that is true depends on what the Program does.
	78
	79	1. You may copy and distribute verbatim copies of the Program's
	80	source code as you receive it, in any medium, provided that you
	81	conspicuously and appropriately publish on each copy an appropriate
	82	copyright notice and disclaimer of warranty; keep intact all the
	83	notices that refer to this License and to the absence of any warranty;
	84	and give any other recipients of the Program a copy of this License
	85	along with the Program.
	86
	87	You may charge a fee for the physical act of transferring a copy, and
	88	you may at your option offer warranty protection in exchange for a fee.
	89
	90	2. You may modify your copy or copies of the Program or any portion
	91	of it, thus forming a work based on the Program, and copy and
	92	distribute such modifications or work under the terms of Section 1
	93	above, provided that you also meet all of these conditions:
	94
	95	a) You must cause the modified files to carry prominent notices
	96	stating that you changed the files and the date of any change.
	97
	98	b) You must cause any work that you distribute or publish, that in
	99	whole or in part contains or is derived from the Program or any
	100	part thereof, to be licensed as a whole at no charge to all third
	101	parties under the terms of this License.
	102
	103	c) If the modified program normally reads commands interactively
	104	when run, you must cause it, when started running for such
	105	interactive use in the most ordinary way, to print or display an
	106	announcement including an appropriate copyright notice and a
	107	notice that there is no warranty (or else, saying that you provide
	108	a warranty) and that users may redistribute the program under
	109	these conditions, and telling the user how to view a copy of this
	110	License. (Exception: if the Program itself is interactive but
	111	does not normally print such an announcement, your work based on
	112	the Program is not required to print an announcement.)
	113
	114	These requirements apply to the modified work as a whole. If
	115	identifiable sections of that work are not derived from the Program,
	116	and can be reasonably considered independent and separate works in
	117	themselves, then this License, and its terms, do not apply to those
	118	sections when you distribute them as separate works. But when you
	119	distribute the same sections as part of a whole which is a work based
	120	on the Program, the distribution of the whole must be on the terms of
	121	this License, whose permissions for other licensees extend to the
	122	entire whole, and thus to each and every part regardless of who wrote it.
	123
	124	Thus, it is not the intent of this section to claim rights or contest
	125	your rights to work written entirely by you; rather, the intent is to
	126	exercise the right to control the distribution of derivative or
	127	collective works based on the Program.
	128
	129	In addition, mere aggregation of another work not based on the Program
	130	with the Program (or with a work based on the Program) on a volume of
	131	a storage or distribution medium does not bring the other work under
	132	the scope of this License.
	133
	134	3. You may copy and distribute the Program (or a work based on it,
	135	under Section 2) in object code or executable form under the terms of
	136	Sections 1 and 2 above provided that you also do one of the following:
	137
	138	a) Accompany it with the complete corresponding machine-readable
	139	source code, which must be distributed under the terms of Sections
	140	1 and 2 above on a medium customarily used for software interchange; or,
	141
	142	b) Accompany it with a written offer, valid for at least three
	143	years, to give any third party, for a charge no more than your
	144	cost of physically performing source distribution, a complete
	145	machine-readable copy of the corresponding source code, to be
	146	distributed under the terms of Sections 1 and 2 above on a medium
	147	customarily used for software interchange; or,
	148
	149	c) Accompany it with the information you received as to the offer
	150	to distribute corresponding source code. (This alternative is
	151	allowed only for noncommercial distribution and only if you
	152	received the program in object code or executable form with such
	153	an offer, in accord with Subsection b above.)
	154
	155	The source code for a work means the preferred form of the work for
	156	making modifications to it. For an executable work, complete source
	157	code means all the source code for all modules it contains, plus any
	158	associated interface definition files, plus the scripts used to
	159	control compilation and installation of the executable. However, as a
	160	special exception, the source code distributed need not include
	161	anything that is normally distributed (in either source or binary
	162	form) with the major components (compiler, kernel, and so on) of the
	163	operating system on which the executable runs, unless that component
	164	itself accompanies the executable.
	165
	166	If distribution of executable or object code is made by offering
	167	access to copy from a designated place, then offering equivalent
	168	access to copy the source code from the same place counts as
	169	distribution of the source code, even though third parties are not
	170	compelled to copy the source along with the object code.
	171
	172	4. You may not copy, modify, sublicense, or distribute the Program
	173	except as expressly provided under this License. Any attempt
	174	otherwise to copy, modify, sublicense or distribute the Program is
	175	void, and will automatically terminate your rights under this License.
	176	However, parties who have received copies, or rights, from you under
	177	this License will not have their licenses terminated so long as such
	178	parties remain in full compliance.
	179
	180	5. You are not required to accept this License, since you have not
	181	signed it. However, nothing else grants you permission to modify or
	182	distribute the Program or its derivative works. These actions are
	183	prohibited by law if you do not accept this License. Therefore, by
	184	modifying or distributing the Program (or any work based on the
	185	Program), you indicate your acceptance of this License to do so, and
	186	all its terms and conditions for copying, distributing or modifying
	187	the Program or works based on it.
	188
	189	6. Each time you redistribute the Program (or any work based on the
	190	Program), the recipient automatically receives a license from the
	191	original licensor to copy, distribute or modify the Program subject to
	192	these terms and conditions. You may not impose any further
	193	restrictions on the recipients' exercise of the rights granted herein.
	194	You are not responsible for enforcing compliance by third parties to
	195	this License.
	196
	197	7. If, as a consequence of a court judgment or allegation of patent
	198	infringement or for any other reason (not limited to patent issues),
	199	conditions are imposed on you (whether by court order, agreement or
	200	otherwise) that contradict the conditions of this License, they do not
	201	excuse you from the conditions of this License. If you cannot
	202	distribute so as to satisfy simultaneously your obligations under this
	203	License and any other pertinent obligations, then as a consequence you
	204	may not distribute the Program at all. For example, if a patent
	205	license would not permit royalty-free redistribution of the Program by
	206	all those who receive copies directly or indirectly through you, then
	207	the only way you could satisfy both it and this License would be to
	208	refrain entirely from distribution of the Program.
	209
	210	If any portion of this section is held invalid or unenforceable under
	211	any particular circumstance, the balance of the section is intended to
	212	apply and the section as a whole is intended to apply in other
	213	circumstances.
	214
	215	It is not the purpose of this section to induce you to infringe any
	216	patents or other property right claims or to contest validity of any
	217	such claims; this section has the sole purpose of protecting the
	218	integrity of the free software distribution system, which is
	219	implemented by public license practices. Many people have made
	220	generous contributions to the wide range of software distributed
	221	through that system in reliance on consistent application of that
	222	system; it is up to the author/donor to decide if he or she is willing
	223	to distribute software through any other system and a licensee cannot
	224	impose that choice.
	225
	226	This section is intended to make thoroughly clear what is believed to
	227	be a consequence of the rest of this License.
	228
	229	8. If the distribution and/or use of the Program is restricted in
	230	certain countries either by patents or by copyrighted interfaces, the
	231	original copyright holder who places the Program under this License
	232	may add an explicit geographical distribution limitation excluding
	233	those countries, so that distribution is permitted only in or among
	234	countries not thus excluded. In such case, this License incorporates
	235	the limitation as if written in the body of this License.
	236
	237	9. The Free Software Foundation may publish revised and/or new versions
	238	of the General Public License from time to time. Such new versions will
	239	be similar in spirit to the present version, but may differ in detail to
	240	address new problems or concerns.
	241
	242	Each version is given a distinguishing version number. If the Program
	243	specifies a version number of this License which applies to it and "any
	244	later version", you have the option of following the terms and conditions
	245	either of that version or of any later version published by the Free
	246	Software Foundation. If the Program does not specify a version number of
	247	this License, you may choose any version ever published by the Free Software
	248	Foundation.
	249
	250	10. If you wish to incorporate parts of the Program into other free
	251	programs whose distribution conditions are different, write to the author
	252	to ask for permission. For software which is copyrighted by the Free
	253	Software Foundation, write to the Free Software Foundation; we sometimes
	254	make exceptions for this. Our decision will be guided by the two goals
	255	of preserving the free status of all derivatives of our free software and
	256	of promoting the sharing and reuse of software generally.
	257
	258	NO WARRANTY
	259
	260	11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
	261	FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
	262	OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
	263	PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
	264	OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	265	MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
	266	TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
	267	PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
	268	REPAIR OR CORRECTION.
	269
	270	12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
	271	WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
	272	REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
	273	INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
	274	OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
	275	TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
	276	YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
	277	PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
	278	POSSIBILITY OF SUCH DAMAGES.
	279
	280	END OF TERMS AND CONDITIONS
	281
	282	How to Apply These Terms to Your New Programs
	283
	284	If you develop a new program, and you want it to be of the greatest
	285	possible use to the public, the best way to achieve this is to make it
	286	free software which everyone can redistribute and change under these terms.
	287
	288	To do so, attach the following notices to the program. It is safest
	289	to attach them to the start of each source file to most effectively
	290	convey the exclusion of warranty; and each file should have at least
	291	the "copyright" line and a pointer to where the full notice is found.
	292
	293	<one line to give the program's name and a brief idea of what it does.>
	294	Copyright (C) <year> <name of author>
	295
	296	This program is free software; you can redistribute it and/or modify
	297	it under the terms of the GNU General Public License as published by
	298	the Free Software Foundation; either version 2 of the License, or
	299	(at your option) any later version.
	300
	301	This program is distributed in the hope that it will be useful,
	302	but WITHOUT ANY WARRANTY; without even the implied warranty of
	303	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	304	GNU General Public License for more details.
	305
	306	You should have received a copy of the GNU General Public License along
	307	with this program; if not, write to the Free Software Foundation, Inc.,
	308	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	309
	310	Also add information on how to contact you by electronic and paper mail.
	311
	312	If the program is interactive, make it output a short notice like this
	313	when it starts in an interactive mode:
	314
	315	Gnomovision version 69, Copyright (C) year name of author
	316	Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
	317	This is free software, and you are welcome to redistribute it
	318	under certain conditions; type `show c' for details.
	319
	320	The hypothetical commands `show w' and `show c' should show the appropriate
	321	parts of the General Public License. Of course, the commands you use may
	322	be called something other than `show w' and `show c'; they could even be
	323	mouse-clicks or menu items--whatever suits your program.
	324
	325	You should also get your employer (if you work as a programmer) or your
	326	school, if any, to sign a "copyright disclaimer" for the program, if
	327	necessary. Here is a sample; alter the names:
	328
	329	Yoyodyne, Inc., hereby disclaims all copyright interest in the program
	330	`Gnomovision' (which makes passes at compilers) written by James Hacker.
	331
	332	<signature of Ty Coon>, 1 April 1989
	333	Ty Coon, President of Vice
	334
	335	This General Public License does not permit incorporating your program into
	336	proprietary programs. If your program is a subroutine library, you may
	337	consider it more useful to permit linking proprietary applications with the
	338	library. If this is what you want to do, use the GNU Lesser General
	339	Public License instead of this License. No newline at end of file

contrib/python-zstandard/zstd/common/compiler.h

0 created 644 +111 0

@@ -0,0 +1,111 b''
	1	/*
	2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	#ifndef ZSTD_COMPILER_H
	12	#define ZSTD_COMPILER_H
	13
	14	/-******************************************************
	15	* Compiler specifics
	16	*********************************************************/
	17	/* force inlining */
	18	#if defined (__GNUC__) \|\| defined(__cplusplus) \|\| defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
	19	# define INLINE_KEYWORD inline
	20	#else
	21	# define INLINE_KEYWORD
	22	#endif
	23
	24	#if defined(__GNUC__)
	25	# define FORCE_INLINE_ATTR __attribute__((always_inline))
	26	#elif defined(_MSC_VER)
	27	# define FORCE_INLINE_ATTR __forceinline
	28	#else
	29	# define FORCE_INLINE_ATTR
	30	#endif
	31
	32	/**
	33	* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
	34	* parameters. They must be inlined for the compiler to elimininate the constant
	35	* branches.
	36	*/
	37	#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
	38	/**
	39	* HINT_INLINE is used to help the compiler generate better code. It is not
	40	* used for "templates", so it can be tweaked based on the compilers
	41	* performance.
	42	*
	43	* gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
	44	* always_inline attribute.
	45	*
	46	* clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
	47	* attribute.
	48	*/
	49	#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
	50	# define HINT_INLINE static INLINE_KEYWORD
	51	#else
	52	# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
	53	#endif
	54
	55	/* force no inlining */
	56	#ifdef _MSC_VER
	57	# define FORCE_NOINLINE static __declspec(noinline)
	58	#else
	59	# ifdef __GNUC__
	60	# define FORCE_NOINLINE static __attribute__((__noinline__))
	61	# else
	62	# define FORCE_NOINLINE static
	63	# endif
	64	#endif
	65
	66	/* target attribute */
	67	#ifndef __has_attribute
	68	#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
	69	#endif
	70	#if defined(__GNUC__)
	71	# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
	72	#else
	73	# define TARGET_ATTRIBUTE(target)
	74	#endif
	75
	76	/* Enable runtime BMI2 dispatch based on the CPU.
	77	* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
	78	*/
	79	#ifndef DYNAMIC_BMI2
	80	#if (defined(__clang__) && __has_attribute(__target__)) \
	81	\|\| (defined(__GNUC__) \
	82	&& (__GNUC__ >= 5 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
	83	&& (defined(__x86_64__) \|\| defined(_M_X86)) \
	84	&& !defined(__BMI2__)
	85	# define DYNAMIC_BMI2 1
	86	#else
	87	# define DYNAMIC_BMI2 0
	88	#endif
	89	#endif
	90
	91	/* prefetch */
	92	#if defined(_MSC_VER) && (defined(_M_X64) \|\| defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
	93	# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
	94	# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
	95	#elif defined(__GNUC__)
	96	# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
	97	#else
	98	# define PREFETCH(ptr) /* disabled */
	99	#endif
	100
	101	/* disable warnings */
	102	#ifdef _MSC_VER /* Visual Studio */
	103	# include <intrin.h> /* For Visual 2005 */
	104	# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
	105	# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
	106	# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
	107	# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
	108	# pragma warning(disable : 4324) /* disable: C4324: padded structure */
	109	#endif
	110
	111	#endif /* ZSTD_COMPILER_H */

contrib/python-zstandard/zstd/common/cpu.h

0 created 644 +216 0

@@ -0,0 +1,216 b''
	1	/*
	2	* Copyright (c) 2018-present, Facebook, Inc.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	#ifndef ZSTD_COMMON_CPU_H
	12	#define ZSTD_COMMON_CPU_H
	13
	14	/**
	15	* Implementation taken from folly/CpuId.h
	16	* https://github.com/facebook/folly/blob/master/folly/CpuId.h
	17	*/
	18
	19	#include <string.h>
	20
	21	#include "mem.h"
	22
	23	#ifdef _MSC_VER
	24	#include <intrin.h>
	25	#endif
	26
	27	typedef struct {
	28	U32 f1c;
	29	U32 f1d;
	30	U32 f7b;
	31	U32 f7c;
	32	} ZSTD_cpuid_t;
	33
	34	MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
	35	U32 f1c = 0;
	36	U32 f1d = 0;
	37	U32 f7b = 0;
	38	U32 f7c = 0;
	39	#ifdef _MSC_VER
	40	int reg[4];
	41	__cpuid((int*)reg, 0);
	42	{
	43	int const n = reg[0];
	44	if (n >= 1) {
	45	__cpuid((int*)reg, 1);
	46	f1c = (U32)reg[2];
	47	f1d = (U32)reg[3];
	48	}
	49	if (n >= 7) {
	50	__cpuidex((int*)reg, 7, 0);
	51	f7b = (U32)reg[1];
	52	f7c = (U32)reg[2];
	53	}
	54	}
	55	#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
	56	/* The following block like the normal cpuid branch below, but gcc
	57	* reserves ebx for use of its pic register so we must specially
	58	* handle the save and restore to avoid clobbering the register
	59	*/
	60	U32 n;
	61	__asm__(
	62	"pushl %%ebx\n\t"
	63	"cpuid\n\t"
	64	"popl %%ebx\n\t"
	65	: "=a"(n)
	66	: "a"(0)
	67	: "ecx", "edx");
	68	if (n >= 1) {
	69	U32 f1a;
	70	__asm__(
	71	"pushl %%ebx\n\t"
	72	"cpuid\n\t"
	73	"popl %%ebx\n\t"
	74	: "=a"(f1a), "=c"(f1c), "=d"(f1d)
	75	: "a"(1)
	76	:);
	77	}
	78	if (n >= 7) {
	79	__asm__(
	80	"pushl %%ebx\n\t"
	81	"cpuid\n\t"
	82	"movl %%ebx, %%eax\n\r"
	83	"popl %%ebx"
	84	: "=a"(f7b), "=c"(f7c)
	85	: "a"(7), "c"(0)
	86	: "edx");
	87	}
	88	#elif defined(__x86_64__) \|\| defined(_M_X64) \|\| defined(__i386__)
	89	U32 n;
	90	__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
	91	if (n >= 1) {
	92	U32 f1a;
	93	__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
	94	}
	95	if (n >= 7) {
	96	U32 f7a;
	97	__asm__("cpuid"
	98	: "=a"(f7a), "=b"(f7b), "=c"(f7c)
	99	: "a"(7), "c"(0)
	100	: "edx");
	101	}
	102	#endif
	103	{
	104	ZSTD_cpuid_t cpuid;
	105	cpuid.f1c = f1c;
	106	cpuid.f1d = f1d;
	107	cpuid.f7b = f7b;
	108	cpuid.f7c = f7c;
	109	return cpuid;
	110	}
	111	}
	112
	113	#define X(name, r, bit) \
	114	MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
	115	return ((cpuid.r) & (1U << bit)) != 0; \
	116	}
	117
	118	/* cpuid(1): Processor Info and Feature Bits. */
	119	#define C(name, bit) X(name, f1c, bit)
	120	C(sse3, 0)
	121	C(pclmuldq, 1)
	122	C(dtes64, 2)
	123	C(monitor, 3)
	124	C(dscpl, 4)
	125	C(vmx, 5)
	126	C(smx, 6)
	127	C(eist, 7)
	128	C(tm2, 8)
	129	C(ssse3, 9)
	130	C(cnxtid, 10)
	131	C(fma, 12)
	132	C(cx16, 13)
	133	C(xtpr, 14)
	134	C(pdcm, 15)
	135	C(pcid, 17)
	136	C(dca, 18)
	137	C(sse41, 19)
	138	C(sse42, 20)
	139	C(x2apic, 21)
	140	C(movbe, 22)
	141	C(popcnt, 23)
	142	C(tscdeadline, 24)
	143	C(aes, 25)
	144	C(xsave, 26)
	145	C(osxsave, 27)
	146	C(avx, 28)
	147	C(f16c, 29)
	148	C(rdrand, 30)
	149	#undef C
	150	#define D(name, bit) X(name, f1d, bit)
	151	D(fpu, 0)
	152	D(vme, 1)
	153	D(de, 2)
	154	D(pse, 3)
	155	D(tsc, 4)
	156	D(msr, 5)
	157	D(pae, 6)
	158	D(mce, 7)
	159	D(cx8, 8)
	160	D(apic, 9)
	161	D(sep, 11)
	162	D(mtrr, 12)
	163	D(pge, 13)
	164	D(mca, 14)
	165	D(cmov, 15)
	166	D(pat, 16)
	167	D(pse36, 17)
	168	D(psn, 18)
	169	D(clfsh, 19)
	170	D(ds, 21)
	171	D(acpi, 22)
	172	D(mmx, 23)
	173	D(fxsr, 24)
	174	D(sse, 25)
	175	D(sse2, 26)
	176	D(ss, 27)
	177	D(htt, 28)
	178	D(tm, 29)
	179	D(pbe, 31)
	180	#undef D
	181
	182	/* cpuid(7): Extended Features. */
	183	#define B(name, bit) X(name, f7b, bit)
	184	B(bmi1, 3)
	185	B(hle, 4)
	186	B(avx2, 5)
	187	B(smep, 7)
	188	B(bmi2, 8)
	189	B(erms, 9)
	190	B(invpcid, 10)
	191	B(rtm, 11)
	192	B(mpx, 14)
	193	B(avx512f, 16)
	194	B(avx512dq, 17)
	195	B(rdseed, 18)
	196	B(adx, 19)
	197	B(smap, 20)
	198	B(avx512ifma, 21)
	199	B(pcommit, 22)
	200	B(clflushopt, 23)
	201	B(clwb, 24)
	202	B(avx512pf, 26)
	203	B(avx512er, 27)
	204	B(avx512cd, 28)
	205	B(sha, 29)
	206	B(avx512bw, 30)
	207	B(avx512vl, 31)
	208	#undef B
	209	#define C(name, bit) X(name, f7c, bit)
	210	C(prefetchwt1, 0)
	211	C(avx512vbmi, 1)
	212	#undef C
	213
	214	#undef X
	215
	216	#endif /* ZSTD_COMMON_CPU_H */

contrib/python-zstandard/zstd/compress/zstd_compress_internal.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_double_fast.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_double_fast.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_fast.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_fast.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_lazy.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_lazy.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_ldm.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_ldm.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_opt.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/deprecated/zbuff.h

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/deprecated/zbuff_common.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/deprecated/zbuff_compress.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c

0 created 644 0 0

	1	NO CONTENT: new file 100644			NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

contrib/clang-format-ignorelist

0 +20 -2

             contrib/python-zstandard/c-ext/bufferutil.c
             contrib/python-zstandard/c-ext/compressiondict.c
             contrib/python-zstandard/c-ext/compressionparams.c
+            contrib/python-zstandard/c-ext/compressionreader.c
             contrib/python-zstandard/c-ext/compressionwriter.c
             contrib/python-zstandard/c-ext/compressobj.c
             contrib/python-zstandard/c-ext/compressor.c
             contrib/python-zstandard/c-ext/compressoriterator.c
             contrib/python-zstandard/c-ext/constants.c
+            contrib/python-zstandard/c-ext/decompressionreader.c
             contrib/python-zstandard/c-ext/decompressionwriter.c
             contrib/python-zstandard/c-ext/decompressobj.c
             contrib/python-zstandard/c-ext/decompressor.c
             contrib/python-zstandard/c-ext/python-zstandard.h
             contrib/python-zstandard/zstd.c
             contrib/python-zstandard/zstd/common/bitstream.h
+            contrib/python-zstandard/zstd/common/compiler.h
+            contrib/python-zstandard/zstd/common/cpu.h
             contrib/python-zstandard/zstd/common/entropy_common.c
             contrib/python-zstandard/zstd/common/error_private.c
             contrib/python-zstandard/zstd/common/error_private.h
+            contrib/python-zstandard/zstd/common/fse_decompress.c
             contrib/python-zstandard/zstd/common/fse.h
-            contrib/python-zstandard/zstd/common/fse_decompress.c
             contrib/python-zstandard/zstd/common/huf.h
             contrib/python-zstandard/zstd/common/mem.h
             contrib/python-zstandard/zstd/common/pool.c
             contrib/python-zstandard/zstd/compress/fse_compress.c
             contrib/python-zstandard/zstd/compress/huf_compress.c
             contrib/python-zstandard/zstd/compress/zstd_compress.c
-            contrib/python-zstandard/zstd/compress/zstd_opt.h
+            contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
+            contrib/python-zstandard/zstd/compress/zstd_double_fast.c
+            contrib/python-zstandard/zstd/compress/zstd_double_fast.h
+            contrib/python-zstandard/zstd/compress/zstd_fast.c
+            contrib/python-zstandard/zstd/compress/zstd_fast.h
+            contrib/python-zstandard/zstd/compress/zstd_lazy.c
+            contrib/python-zstandard/zstd/compress/zstd_lazy.h
+            contrib/python-zstandard/zstd/compress/zstd_ldm.c
+            contrib/python-zstandard/zstd/compress/zstd_ldm.h
             contrib/python-zstandard/zstd/compress/zstdmt_compress.c
             contrib/python-zstandard/zstd/compress/zstdmt_compress.h
+            contrib/python-zstandard/zstd/compress/zstd_opt.c
+            contrib/python-zstandard/zstd/compress/zstd_opt.h
             contrib/python-zstandard/zstd/decompress/huf_decompress.c
             contrib/python-zstandard/zstd/decompress/zstd_decompress.c
+            contrib/python-zstandard/zstd/deprecated/zbuff_common.c
+            contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
+            contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
+            contrib/python-zstandard/zstd/deprecated/zbuff.h
             contrib/python-zstandard/zstd/dictBuilder/cover.c
             contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
             contrib/python-zstandard/zstd/dictBuilder/divsufsort.h

contrib/python-zstandard/MANIFEST.in

0 +2 0

             graft c-ext
             graft zstd
+            graft tests
             include make_cffi.py
             include setup_zstd.py
             include zstd.c
+            include LICENSE

contrib/python-zstandard/NEWS.rst

0 +196 -8

@@ -1,13 +1,201 b''
		1	===============
1	Version History	2	Version History
2	===============	3	===============
3		4
		5	1.0.0 (not yet released)
		6	========================
		7
		8	Actions Blocking Release
		9	------------------------
		10
		11	* compression and decompression APIs that support ``io.rawIOBase`` interface
		12	(#13).
		13	* Refactor module names so C and CFFI extensions live under ``zstandard``
		14	package.
		15	* Overall API design review.
		16	* Use Python allocator where possible.
		17	* Figure out what to do about experimental APIs not implemented by CFFI.
		18	* APIs for auto adjusting compression parameters based on input size. e.g.
		19	clamping the window log so it isn't too large for input.
		20	* Consider allowing compressor and decompressor instances to be thread safe,
		21	support concurrent operations. Or track when an operation is in progress and
		22	refuse to let concurrent operations use the same instance.
		23	* Support for magic-less frames for all decompression operations (``decompress()``
		24	doesn't work due to sniffing the content size and the lack of a ZSTD API to
		25	sniff magic-less frames - this should be fixed in 1.3.5.).
		26	* Audit for complete flushing when ending compression streams.
		27	* Deprecate legacy APIs.
		28	* Audit for ability to control read/write sizes on all APIs.
		29	* Detect memory leaks via bench.py.
		30	* Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
		31	require use of ``CompressionParameters``.
		32	* Expose ``ZSTD_getFrameProgression()`` from more compressor types.
		33
		34	Other Actions Not Blocking Release
		35	---------------------------------------
		36
		37	* Support for block compression APIs.
		38	* API for ensuring max memory ceiling isn't exceeded.
		39	* Move off nose for testing.
		40
		41	0.9.0 (released 2018-04-08)
		42	===========================
		43
		44	Backwards Compatibility Notes
		45	-----------------------------
		46
		47	* CFFI 1.11 or newer is now required (previous requirement was 1.8).
		48	* The primary module is now ``zstandard``. Please change imports of ``zstd``
		49	and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
		50	for importing the old names will be dropped in the next release.
		51	* ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
		52	been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
		53	name and will be deleted in a future release.
		54	* Support for Python 2.6 has been removed.
		55	* Support for Python 3.3 has been removed.
		56	* The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
		57	the feature disappeared from zstd 1.3.
		58	* Support for legacy dictionaries has been removed. Cover dictionaries are now
		59	the default. ``train_cover_dictionary()`` has effectively been renamed to
		60	``train_dictionary()``.
		61	* The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
		62	deleted and the method now allows empty inputs to be compressed by default.
		63	* ``estimate_compression_context_size()`` has been removed. Use
		64	``CompressionParameters.estimated_compression_context_size()`` instead.
		65	* ``get_compression_parameters()`` has been removed. Use
		66	``CompressionParameters.from_level()`` instead.
		67	* The arguments to ``CompressionParameters.__init__()`` have changed. If you
		68	were using positional arguments before, the positions now map to different
		69	arguments. It is recommended to use keyword arguments to construct
		70	``CompressionParameters`` instances.
		71	* ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
		72	1.3.4).
		73	* ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
		74	renamed to ``ZstdCompressor.stream_writer()`` and
		75	``ZstdDecompressor.stream_writer()``, respectively. The old names are still
		76	aliased, but will be removed in the next major release.
		77	* Content sizes are written into frame headers by default
		78	(``ZstdCompressor(write_content_size=True)`` is now the default).
		79	* ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
		80	for consistency with other types. The old name is an alias and will be removed
		81	in the next major release.
		82
		83	Bug Fixes
		84	---------
		85
		86	* Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
		87	* Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
		88	* Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
		89
		90	New Features
		91	------------
		92
		93	* Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
		94	bug fixes and performance improvements. It also gives us access to newer
		95	features.
		96	* Support for negative compression levels.
		97	* Support for long distance matching (facilitates compression ratios that approach
		98	LZMA).
		99	* Supporting for reading empty zstandard frames (with an embedded content size
		100	of 0).
		101	* Support for writing and partial support for reading zstandard frames without a
		102	magic header.
		103	* New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
		104	you to ``.read()`` from a file-like object).
		105	* Several minor features, bug fixes, and performance enhancements.
		106	* Wheels for Linux and macOS are now provided with releases.
		107
		108	Changes
		109	-------
		110
		111	* Functions accepting bytes data now use the buffer protocol and can accept
		112	more types (like ``memoryview`` and ``bytearray``) (#26).
		113	* Add #includes so compilation on OS X and BSDs works (#20).
		114	* New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
		115	of decompressed data for a source.
		116	* New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
		117	compressed data for a source.
		118	* Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
		119	The old name is still available.
		120	* Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
		121	``read_from()`` is still available at its old location.
		122	* Introduce the ``zstandard`` module to import and re-export the C or CFFI
		123	backend as appropriate. Behavior can be controlled via the
		124	``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
		125	usage info.
		126	* Vendored version of zstd upgraded to 1.3.4.
		127	* Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
		128	* Add ``STRATEGY_BTULTRA`` compression strategy constant.
		129	* Switch from deprecated ``ZSTD_getDecompressedSize()`` to
		130	``ZSTD_getFrameContentSize()`` replacement.
		131	* ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
		132	special handling.
		133	* ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
		134	method for determining the current memory utilization of the underlying zstd
		135	primitive.
		136	* ``train_dictionary()`` has new arguments and functionality for trying multiple
		137	variations of COVER parameters and selecting the best one.
		138	* Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
		139	``LDM_BUCKETSIZELOG_MAX``.
		140	* Converted all consumers to the zstandard new advanced API, which uses
		141	``ZSTD_compress_generic()``
		142	* ``CompressionParameters.__init__`` now accepts several more arguments,
		143	including support for long distance matching.
		144	* ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
		145	controls how the dictionary should be interpreted. This can be used to
		146	force the use of content-only dictionaries or to require the presence
		147	of the dictionary magic header.
		148	* ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
		149	compression dictionary so it can efficiently be used with multiple
		150	``ZstdCompressor`` instances.
		151	* Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
		152	created automatically on first use, and automatically reused by all
		153	``ZstdDecompressor`` instances bound to that dictionary.
		154	* All meaningful functions now accept keyword arguments.
		155	* ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
		156	to control how much work to perform on every decompressor invocation.
		157	* ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
		158	total number of bytes written so far.
		159	* ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
		160	forward in the stream.
		161	* Removed ``TARGETLENGTH_MAX`` constant.
		162	* Added ``frame_header_size(data)`` function.
		163	* Added ``frame_content_size(data)`` function.
		164	* Consumers of ``ZSTD_decompress`` have been switched to the new advanced
		165	decompression* API.
		166	* ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
		167	negative compression levels.
		168	* ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
		169	amount of memory required for decompression operations.
		170	* ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
		171	the ``format`` compression parameter to control whether the frame magic
		172	header is written.
		173	* ``ZstdDecompressor`` now accepts a ``format`` argument to control the
		174	expected frame format.
		175	* ``ZstdCompressor`` now has a ``frame_progression()`` method to return
		176	information about the current compression operation.
		177	* Error messages in CFFI no longer have ``b''`` literals.
		178	* Compiler warnings and underlying overflow issues on 32-bit platforms have been
		179	fixed.
		180	* Builds in CI now build with compiler warnings as errors. This should hopefully
		181	fix new compiler warnings from being introduced.
		182	* Make ``ZstdCompressor(write_content_size=True)`` and
		183	``CompressionParameters(write_content_size=True)`` the default.
		184	* ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
		185
		186	0.8.2 (released 2018-02-22)
		187	---------------------------
		188
		189	* Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
		190	* Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
		191
4	0.8.1 (released 2017-04-08)	192	0.8.1 (released 2017-04-08)
5	---------------------------	193	---------------------------
6		194
7	* Add #includes so compilation on OS X and BSDs works (#20).	195	* Add #includes so compilation on OS X and BSDs works (#20).
8		196
9	0.8.0 (released 2017-03-08)	197	0.8.0 (released 2017-03-08)
10	---------------------------	198	===========================
11		199
12	* CompressionParameters now has a estimated_compression_context_size() method.	200	* CompressionParameters now has a estimated_compression_context_size() method.
13	zstd.estimate_compression_context_size() is now deprecated and slated for	201	zstd.estimate_compression_context_size() is now deprecated and slated for
@@ -35,7 +223,7 b' 0.8.0 (released 2017-03-08)'
35	DictParameters instance to control dictionary generation.	223	DictParameters instance to control dictionary generation.
36		224
37	0.7.0 (released 2017-02-07)	225	0.7.0 (released 2017-02-07)
38	---------------------------	226	===========================
39		227
40	* Added zstd.get_frame_parameters() to obtain info about a zstd frame.	228	* Added zstd.get_frame_parameters() to obtain info about a zstd frame.
41	* Added ZstdDecompressor.decompress_content_dict_chain() for efficient	229	* Added ZstdDecompressor.decompress_content_dict_chain() for efficient
@@ -62,7 +250,7 b' 0.7.0 (released 2017-02-07)'
62	* DictParameters instances now expose their values as attributes.	250	* DictParameters instances now expose their values as attributes.
63		251
64	0.6.0 (released 2017-01-14)	252	0.6.0 (released 2017-01-14)
65	---------------------------	253	===========================
66		254
67	* Support for legacy zstd protocols (build time opt in feature).	255	* Support for legacy zstd protocols (build time opt in feature).
68	* Automation improvements to test against Python 3.6, latest versions	256	* Automation improvements to test against Python 3.6, latest versions
@@ -79,17 +267,17 b' 0.6.0 (released 2017-01-14)'
79	* Disallow compress(b'') when writing content sizes by default (issue #11).	267	* Disallow compress(b'') when writing content sizes by default (issue #11).
80		268
81	0.5.2 (released 2016-11-12)	269	0.5.2 (released 2016-11-12)
82	---------------------------	270	===========================
83		271
84	* more packaging fixes for source distribution	272	* more packaging fixes for source distribution
85		273
86	0.5.1 (released 2016-11-12)	274	0.5.1 (released 2016-11-12)
87	---------------------------	275	===========================
88		276
89	* setup_zstd.py is included in the source distribution	277	* setup_zstd.py is included in the source distribution
90		278
91	0.5.0 (released 2016-11-10)	279	0.5.0 (released 2016-11-10)
92	---------------------------	280	===========================
93		281
94	* Vendored version of zstd updated to 1.1.1.	282	* Vendored version of zstd updated to 1.1.1.
95	* Continuous integration for Python 3.6 and 3.7	283	* Continuous integration for Python 3.6 and 3.7
@@ -114,8 +302,8 b' 0.5.0 (released 2016-11-10)'
114	* The monolithic ``zstd.c`` file has been split into a header file defining	302	* The monolithic ``zstd.c`` file has been split into a header file defining
115	types and separate ``.c`` source files for the implementation.	303	types and separate ``.c`` source files for the implementation.
116		304
117	History of the Project	305	Older History
118	=============~~=========~~	306	=============
119		307
120	2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a	308	2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
121	Python extension for use by the Mercurial project. A very hacky prototype	309	Python extension for use by the Mercurial project. A very hacky prototype

contrib/python-zstandard/README.rst

0 +499 -472

This diff has been collapsed as it changes many lines, (971 lines changed) Show them Hide them
	@@ -11,69 +11,18 b' underlying C API through a Pythonic inte'
	11	performance. This means exposing most of the features and flexibility	11	performance. This means exposing most of the features and flexibility
	12	of the C API while not sacrificing usability or safety that Python provides.	12	of the C API while not sacrificing usability or safety that Python provides.
	13		13
	14	The canonical home for this project is	14	The canonical home for this project lives in a Mercurial repository run by
			15	the author. For convenience, that repository is frequently synchronized to
	15	https://github.com/indygreg/python-zstandard.	16	https://github.com/indygreg/python-zstandard.
	16		17
	17	\| \|ci-status\| \|win-ci-status\|	18	\| \|ci-status\| \|win-ci-status\|
	18		19
	19	State of Project
	20	================
	21
	22	The project is officially in beta state. The author is reasonably satisfied
	23	that functionality works as advertised. **There will be some backwards
	24	incompatible changes before 1.0, probably in the 0.9 release.** This may
	25	involve renaming the main module from zstd to zstandard and renaming
	26	various types and methods. Pin the package version to prevent unwanted
	27	breakage when this change occurs!
	28
	29	This project is vendored and distributed with Mercurial 4.1, where it is
	30	used in a production capacity.
	31
	32	There is continuous integration for Python versions 2.6, 2.7, and 3.3+
	33	on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
	34	confident the extension is stable and works as advertised on these
	35	platforms.
	36
	37	The CFFI bindings are mostly feature complete. Where a feature is implemented
	38	in CFFI, unit tests run against both C extension and CFFI implementation to
	39	ensure behavior parity.
	40
	41	Expected Changes
	42	----------------
	43
	44	The author is reasonably confident in the current state of what's
	45	implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types.
	46	Those APIs likely won't change significantly. Some low-level behavior
	47	(such as naming and types expected by arguments) may change.
	48
	49	There will likely be arguments added to control the input and output
	50	buffer sizes (currently, certain operations read and write in chunk
	51	sizes using zstd's preferred defaults).
	52
	53	There should be an API that accepts an object that conforms to the buffer
	54	interface and returns an iterator over compressed or decompressed output.
	55
	56	There should be an API that exposes an ``io.RawIOBase`` interface to
	57	compressor and decompressor streams, like how ``gzip.GzipFile`` from
	58	the standard library works (issue 13).
	59
	60	The author is on the fence as to whether to support the extremely
	61	low level compression and decompression APIs. It could be useful to
	62	support compression without the framing headers. But the author doesn't
	63	believe it a high priority at this time.
	64
	65	There will likely be a refactoring of the module names. Currently,
	66	``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
	67	This means that all code for the C extension must be implemented in
	68	C. ``zstd`` may be converted to a Python module so code can be reused
	69	between CFFI and C and so not all code in the C extension has to be C.
	70
	71	Requirements	20	Requirements
	72	============	21	============
	73		22
	74	This extension is designed to run with Python 2.~~6, 2.~~7~~, 3.3~~, 3.4, 3.5, and	23	This extension is designed to run with Python 2.7, 3.4, 3.5, and 3.6
	75	~~3.6~~ on common platforms (Linux, Windows, and OS X). ~~Only x86_64 is~~	24	on common platforms (Linux, Windows, and OS X). x86 and x86_64 are well-tested
	76	currently well-tested as an architecture.	25	on Windows. Only x86_64 is well-tested on Linux and macOS.
	77		26
	78	Installing	27	Installing
	79	==========	28	==========
	@@ -96,114 +45,82 b' this package with ``conda``.'
	96	Performance	45	Performance
	97	===========	46	===========
	98		47
	99	Very crude and non-scientific benchmarking (most benchmarks fall in this	48	zstandard is a highly tunable compression algorithm. In its default settings
	100	category because proper benchmarking is hard) show that the Python bindings	49	(compression level 3), it will be faster at compression and decompression and
	101	perform within 10% of the native C implementation.	50	will have better compression ratios than zlib on most data sets. When tuned
	102		51	for speed, it approaches lz4's speed and ratios. When tuned for compression
	103	The following table compares the performance of compressing and decompressing	52	ratio, it approaches lzma ratios and compression speed, but decompression
	104	a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values	53	speed is much faster. See the official zstandard documentation for more.
	105	obtained with the ``zstd`` program are on the left. The remaining columns detail
	106	performance of various compression APIs in the Python bindings.
	107		54
	108	+-------+-----------------+-----------------+-----------------+---------------+	55	zstandard and this library support multi-threaded compression. There is a
	109	\| Level \| Native \| Simple \| Stream In \| Stream Out \|	56	mechanism to compress large inputs using multiple threads.
	110	\| \| Comp / Decomp \| Comp / Decomp \| Comp / Decomp \| Comp \|
	111	+=======+=================+=================+=================+===============+
	112	\| 1 \| 490 / 1338 MB/s \| 458 / 1266 MB/s \| 407 / 1156 MB/s \| 405 MB/s \|
	113	+-------+-----------------+-----------------+-----------------+---------------+
	114	\| 2 \| 412 / 1288 MB/s \| 381 / 1203 MB/s \| 345 / 1128 MB/s \| 349 MB/s \|
	115	+-------+-----------------+-----------------+-----------------+---------------+
	116	\| 3 \| 342 / 1312 MB/s \| 319 / 1182 MB/s \| 285 / 1165 MB/s \| 287 MB/s \|
	117	+-------+-----------------+-----------------+-----------------+---------------+
	118	\| 11 \| 64 / 1506 MB/s \| 66 / 1436 MB/s \| 56 / 1342 MB/s \| 57 MB/s \|
	119	+-------+-----------------+-----------------+-----------------+---------------+
	120
	121	Again, these are very unscientific. But it shows that Python is capable of
	122	compressing at several hundred MB/s and decompressing at over 1 GB/s.
	123
	124	Comparison to Other Python Bindings
	125	===================================
	126
	127	https://pypi.python.org/pypi/zstd is an alternate Python binding to
	128	Zstandard. At the time this was written, the latest release of that
	129	package (1.1.2) only exposed the simple APIs for compression and decompression.
	130	This package exposes much more of the zstd API, including streaming and
	131	dictionary compression. This package also has CFFI support.
	132
	133	Bundling of Zstandard Source Code
	134	=================================
	135
	136	The source repository for this project contains a vendored copy of the
	137	Zstandard source code. This is done for a few reasons.
	138		57
	139	First, Zstandard is relatively new and not yet widely available as a system	58	The performance of this library is usually very similar to what the zstandard
	140	package. Providing a copy of the source code enables the Python C extension	59	C API can deliver. Overhead in this library is due to general Python overhead
	141	to be compiled without requiring the user to obtain the Zstandard source code	60	and can't easily be avoided by any zstandard Python binding. This library
	142	separately.	61	exposes multiple APIs for performing compression and decompression so callers
	143		62	can pick an API suitable for their need. Contrast with the compression
	144	Second, Zstandard has both a stable public API and an experimental API.	63	modules in Python's standard library (like ``zlib``), which only offer limited
	145	The experimental API is actually quite useful (contains functionality for	64	mechanisms for performing operations. The API flexibility means consumers can
	146	training dictionaries for example), so it is something we wish to expose to	65	choose to use APIs that facilitate zero copying or minimize Python object
	147	Python. However, the experimental API is only available via static linking.	66	creation and garbage collection overhead.
	148	Furthermore, the experimental API can change at any time. So, control over
	149	the exact version of the Zstandard library linked against is important to
	150	ensure known behavior.
	151
	152	Instructions for Building and Testing
	153	=====================================
	154
	155	Once you have the source code, the extension can be built via setup.py::
	156
	157	$ python setup.py build_ext
	158
	159	We recommend testing with ``nose``::
	160
	161	$ nosetests
	162		67
	163	A Tox configuration is present to test against multiple Python versions::	68	This library is capable of single-threaded throughputs well over 1 GB/s. For
	164		69	exact numbers, measure yourself. The source code repository has a ``bench.py``
	165	$ tox	70	script that can be used to measure things.
	166
	167	Tests use the ``hypothesis`` Python package to perform fuzzing. If you
	168	don't have it, those tests won't run. Since the fuzzing tests take longer
	169	to execute than normal tests, you'll need to opt in to running them by
	170	setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
	171	automatically when using ``tox``.
	172
	173	The ``cffi`` Python package needs to be installed in order to build the CFFI
	174	bindings. If it isn't present, the CFFI bindings won't be built.
	175
	176	To create a virtualenv with all development dependencies, do something
	177	like the following::
	178
	179	# Python 2
	180	$ virtualenv venv
	181
	182	# Python 3
	183	$ python3 -m venv venv
	184
	185	$ source venv/bin/activate
	186	$ pip install cffi hypothesis nose tox
	187		71
	188	API	72	API
	189	===	73	===
	190		74
	191	The compiled C extension provides a ``zstd`` Python module. The CFFI	75	To interface with Zstandard, simply import the ``zstandard`` module::
	192	bindings provide a ``zstd_cffi`` module. Both provide an identical API	76
	193	interface. The types, functions, and attributes exposed by these modules	77	import zstandard
			78
			79	It is a popular convention to alias the module as a different name for
			80	brevity::
			81
			82	import zstandard as zstd
			83
			84	This module attempts to import and use either the C extension or CFFI
			85	implementation. On Python platforms known to support C extensions (like
			86	CPython), it raises an ImportError if the C extension cannot be imported.
			87	On Python platforms known to not support C extensions (like PyPy), it only
			88	attempts to import the CFFI implementation and raises ImportError if that
			89	can't be done. On other platforms, it first tries to import the C extension
			90	then falls back to CFFI if that fails and raises ImportError if CFFI fails.
			91
			92	To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
			93	environment variable can be set. The following values are accepted:
			94
			95	default
			96	The behavior described above.
			97	cffi_fallback
			98	Always try to import the C extension then fall back to CFFI if that
			99	fails.
			100	cext
			101	Only attempt to import the C extension.
			102	cffi
			103	Only attempt to import the CFFI implementation.
			104
			105	In addition, the ``zstandard`` module exports a ``backend`` attribute
			106	containing the string name of the backend being used. It will be one
			107	of ``cext`` or ``cffi`` (for C extension and cffi, respectively).
			108
			109	The types, functions, and attributes exposed by the ``zstandard`` module
	194	are documented in the sections below.	110	are documented in the sections below.
	195		111
	196	.. note::	112	.. note::
	197		113
	198	The documentation in this section makes references to various zstd	114	The documentation in this section makes references to various zstd
	199	concepts and functionality. The ~~``Concepts``~~ ~~section below explains~~	115	concepts and functionality. The source repository contains a
	200	these concepts in more detail.	116	``docs/concepts.rst`` file explaining these in more detail.
	201		117
	202	ZstdCompressor	118	ZstdCompressor
	203	--------------	119	--------------
	204		120
	205	The ``ZstdCompressor`` class provides an interface for performing	121	The ``ZstdCompressor`` class provides an interface for performing
	206	compression operations.	122	compression operations. Each instance is essentially a wrapper around a
			123	``ZSTD_CCtx`` from the C API.
	207		124
	208	Each instance is associated with parameters that control compression	125	Each instance is associated with parameters that control compression
	209	behavior. These come from the following named arguments (all optional):	126	behavior. These come from the following named arguments (all optional):
	@@ -214,21 +131,21 b' dict_data'
	214	Compression dictionary to use.	131	Compression dictionary to use.
	215		132
	216	Note: When using dictionary data and ``compress()`` is called multiple	133	Note: When using dictionary data and ``compress()`` is called multiple
	217	times, the ``CompressionParameters`` derived from an integer ~~compression~~	134	times, the ``ZstdCompressionParameters`` derived from an integer
	218	``level`` and the first compressed data's size will be reused ~~for all~~	135	compression ``level`` and the first compressed data's size will be reused
	219	subsequent operations. This may not be desirable if source data ~~size~~	136	for all subsequent operations. This may not be desirable if source data
	220	varies significantly.	137	size varies significantly.
	221	compression_params	138	compression_params
	222	A ``CompressionParameters`` instance ~~(overrides the~~ ~~``level``~~ ~~value)~~.	139	A ``ZstdCompressionParameters`` instance defining compression settings.
	223	write_checksum	140	write_checksum
	224	Whether a 4 byte checksum should be written with the compressed data.	141	Whether a 4 byte checksum should be written with the compressed data.
	225	Defaults to False. If True, the decompressor can verify that decompressed	142	Defaults to False. If True, the decompressor can verify that decompressed
	226	data matches the original input data.	143	data matches the original input data.
	227	write_content_size	144	write_content_size
	228	Whether the size of the uncompressed data will be written into the	145	Whether the size of the uncompressed data will be written into the
	229	header of compressed data. Defaults to ~~Fals~~e. The data will only be	146	header of compressed data. Defaults to True. The data will only be
	230	written if the compressor knows the size of the input data. This is	147	written if the compressor knows the size of the input data. This is
	231	~~likely~~ not true for streaming compression.	148	often not true for streaming compression.
	232	write_dict_id	149	write_dict_id
	233	Whether to write the dictionary ID into the compressed data.	150	Whether to write the dictionary ID into the compressed data.
	234	Defaults to True. The dictionary ID is only written if a dictionary	151	Defaults to True. The dictionary ID is only written if a dictionary
	@@ -242,10 +159,25 b' threads'
	242	data. APIs that spawn multiple threads for working on multiple pieces of	159	data. APIs that spawn multiple threads for working on multiple pieces of
	243	data have their own ``threads`` argument.	160	data have their own ``threads`` argument.
	244		161
			162	``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
			163	``write_content_size``, ``write_dict_id``, and ``threads``.
			164
	245	Unless specified otherwise, assume that no two methods of ``ZstdCompressor``	165	Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
	246	instances can be called from multiple Python threads simultaneously. In other	166	instances can be called from multiple Python threads simultaneously. In other
	247	words, assume instances are not thread safe unless stated otherwise.	167	words, assume instances are not thread safe unless stated otherwise.
	248		168
			169	Utility Methods
			170	^^^^^^^^^^^^^^^
			171
			172	``frame_progression()`` returns a 3-tuple containing the number of bytes
			173	ingested, consumed, and produced by the current compression operation.
			174
			175	``memory_size()`` obtains the memory utilization of the underlying zstd
			176	compression context, in bytes.::
			177
			178	cctx = zstd.ZstdCompressor()
			179	memory = cctx.memory_size()
			180
	249	Simple API	181	Simple API
	250	^^^^^^^^^^	182	^^^^^^^^^^
	251		183
	@@ -256,40 +188,75 b' Simple API'
	256		188
	257	The ``data`` argument can be any object that implements the buffer protocol.	189	The ``data`` argument can be any object that implements the buffer protocol.
	258		190
	259	Unless ``compression_params`` or ``dict_data`` are passed to the	191	Stream Reader API
	260	``ZstdCompressor``, each invocation of ``compress()`` will calculate the	192	^^^^^^^^^^^^^^^^^
	261	optimal compression parameters for the configured compression ``level`` and	193
	262	input data size (some parameters are fine-tuned for small input sizes).	194	``stream_reader(source)`` can be used to obtain an object conforming to the
			195	``io.RawIOBase`` interface for reading compressed output as a stream::
			196
			197	with open(path, 'rb') as fh:
			198	cctx = zstd.ZstdCompressor()
			199	with cctx.stream_reader(fh) as reader:
			200	while True:
			201	chunk = reader.read(16384)
			202	if not chunk:
			203	break
			204
			205	# Do something with compressed chunk.
			206
			207	The stream can only be read within a context manager. When the context
			208	manager exits, the stream is closed and the underlying resource is
			209	released and future operations against the compression stream stream will fail.
			210
			211	The ``source`` argument to ``stream_reader()`` can be any object with a
			212	``read(size)`` method or any object implementing the buffer protocol.
	263		213
	264	If a compression dictionary is being used, the compression parameters	214	``stream_reader()`` accepts a ``size`` argument specifying how large the input
	265	determined from the first input's size will be reused for subsequent	215	stream is. This is used to adjust compression parameters so they are
	266	operations.	216	tailored to the source size.::
			217
			218	with open(path, 'rb') as fh:
			219	cctx = zstd.ZstdCompressor()
			220	with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
			221	...
			222
			223	If the ``source`` is a stream, you can specify how large ``read()`` requests
			224	to that stream should be via the ``read_size`` argument. It defaults to
			225	``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
	267		226
	268	There is currently a deficiency in zstd's C APIs that makes it difficult	227	with open(path, 'rb') as fh:
	269	to round trip empty inputs when ``write_content_size=True``. Attempting	228	cctx = zstd.ZstdCompressor()
	270	this will raise a ``ValueError`` unless ``allow_empty=True`` is passed	229	# Will perform fh.read(8192) when obtaining data to feed into the
	271	to ``compress()``.	230	# compressor.
			231	with cctx.stream_reader(fh, read_size=8192) as reader:
			232	...
			233
			234	The stream returned by ``stream_reader()`` is neither writable nor seekable
			235	(even if the underlying source is seekable). ``readline()`` and
			236	``readlines()`` are not implemented because they don't make sense for
			237	compressed data. ``tell()`` returns the number of compressed bytes
			238	emitted so far.
	272		239
	273	Streaming Input API	240	Streaming Input API
	274	^^^^^^^^^^^^^^^^^^^	241	^^^^^^^^^^^^^^^^^^^
	275		242
	276	``~~write_to~~(fh)`` (which behaves as a context manager) allows you to stream	243	``stream_writer(fh)`` (which behaves as a context manager) allows you to stream
	277	data into a compressor.::	244	data into a compressor.::
	278		245
	279	cctx = zstd.ZstdCompressor(level=10)	246	cctx = zstd.ZstdCompressor(level=10)
	280	with cctx.~~write_to~~(fh) as compressor:	247	with cctx.stream_writer(fh) as compressor:
	281	compressor.write(b'chunk 0')	248	compressor.write(b'chunk 0')
	282	compressor.write(b'chunk 1')	249	compressor.write(b'chunk 1')
	283	...	250	...
	284		251
	285	The argument to ``~~write_to~~()`` must have a ``write(data)`` method. As	252	The argument to ``stream_writer()`` must have a ``write(data)`` method. As
	286	compressed data is available, ``write()`` will be called with the compressed	253	compressed data is available, ``write()`` will be called with the compressed
	287	data as its argument. Many common Python types implement ``write()``, including	254	data as its argument. Many common Python types implement ``write()``, including
	288	open file handles and ``io.BytesIO``.	255	open file handles and ``io.BytesIO``.
	289		256
	290	``~~write_to~~()`` returns an object representing a streaming compressor ~~instance.~~	257	``stream_writer()`` returns an object representing a streaming compressor
	291	It must be used as a context manager. That object's ~~``write(data)``~~ ~~method~~	258	instance. It must be used as a context manager. That object's
	292	is used to feed data into the compressor.	259	``write(data)`` method is used to feed data into the compressor.
	293		260
	294	A ``flush()`` method can be called to evict whatever data remains within the	261	A ``flush()`` method can be called to evict whatever data remains within the
	295	compressor's internal state into the output object. This may result in 0 or	262	compressor's internal state into the output object. This may result in 0 or
	@@ -303,7 +270,7 b' If the size of the data being fed to thi'
	303	you can declare it before compression begins::	270	you can declare it before compression begins::
	304		271
	305	cctx = zstd.ZstdCompressor()	272	cctx = zstd.ZstdCompressor()
	306	with cctx.~~write_to~~(fh, size=data_len) as compressor:	273	with cctx.stream_writer(fh, size=data_len) as compressor:
	307	compressor.write(chunk0)	274	compressor.write(chunk0)
	308	compressor.write(chunk1)	275	compressor.write(chunk1)
	309	...	276	...
	@@ -315,29 +282,35 b' content size being written into the fram'
	315	The size of chunks being ``write()`` to the destination can be specified::	282	The size of chunks being ``write()`` to the destination can be specified::
	316		283
	317	cctx = zstd.ZstdCompressor()	284	cctx = zstd.ZstdCompressor()
	318	with cctx.~~write_to~~(fh, write_size=32768) as compressor:	285	with cctx.stream_writer(fh, write_size=32768) as compressor:
	319	...	286	...
	320		287
	321	To see how much memory is being used by the streaming compressor::	288	To see how much memory is being used by the streaming compressor::
	322		289
	323	cctx = zstd.ZstdCompressor()	290	cctx = zstd.ZstdCompressor()
	324	with cctx.~~write_to~~(fh) as compressor:	291	with cctx.stream_writer(fh) as compressor:
	325	...	292	...
	326	byte_size = compressor.memory_size()	293	byte_size = compressor.memory_size()
	327		294
			295	Thte total number of bytes written so far are exposed via ``tell()``::
			296
			297	cctx = zstd.ZstdCompressor()
			298	with cctx.stream_writer(fh) as compressor:
			299	...
			300	total_written = compressor.tell()
			301
	328	Streaming Output API	302	Streaming Output API
	329	^^^^^^^^^^^^^^^^^^^^	303	^^^^^^^^^^^^^^^^^^^^
	330		304
	331	``read_~~from~~(reader)`` provides a mechanism to stream data out of a ~~compressor~~	305	``read_to_iter(reader)`` provides a mechanism to stream data out of a
	332	as an iterator of data chunks.::	306	compressor as an iterator of data chunks.::
	333		307
	334	cctx = zstd.ZstdCompressor()	308	cctx = zstd.ZstdCompressor()
	335	for chunk in cctx.read_~~from~~(fh):	309	for chunk in cctx.read_to_iter(fh):
	336	# Do something with emitted data.	310	# Do something with emitted data.
	337		311
	338	``read_~~from~~()`` accepts an object that has a ``read(size)`` method or ~~conforms~~	312	``read_to_iter()`` accepts an object that has a ``read(size)`` method or
	339	to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that	313	conforms to the buffer protocol.
	340	provide the buffer protocol.)
	341		314
	342	Uncompressed data is fetched from the source either by calling ``read(size)``	315	Uncompressed data is fetched from the source either by calling ``read(size)``
	343	or by fetching a slice of data from the object directly (in the case where	316	or by fetching a slice of data from the object directly (in the case where
	@@ -348,23 +321,24 b' If reading from the source via ``read()`'
	348	it raises or returns an empty bytes (``b''``). It is perfectly valid for	321	it raises or returns an empty bytes (``b''``). It is perfectly valid for
	349	the source to deliver fewer bytes than were what requested by ``read(size)``.	322	the source to deliver fewer bytes than were what requested by ``read(size)``.
	350		323
	351	Like ``~~write_to~~()``, ``read_~~from~~()`` also accepts a ``size`` argument	324	Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
	352	declaring the size of the input stream::	325	declaring the size of the input stream::
	353		326
	354	cctx = zstd.ZstdCompressor()	327	cctx = zstd.ZstdCompressor()
	355	for chunk in cctx.read_~~from~~(fh, size=some_int):	328	for chunk in cctx.read_to_iter(fh, size=some_int):
	356	pass	329	pass
	357		330
	358	You can also control the size that data is ``read()`` from the source and	331	You can also control the size that data is ``read()`` from the source and
	359	the ideal size of output chunks::	332	the ideal size of output chunks::
	360		333
	361	cctx = zstd.ZstdCompressor()	334	cctx = zstd.ZstdCompressor()
	362	for chunk in cctx.read_~~from~~(fh, read_size=16384, write_size=8192):	335	for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
	363	pass	336	pass
	364		337
	365	Unlike ``~~write_to~~()``, ``read_~~from~~()`` does not give direct control ~~over the~~	338	Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
	366	sizes of chunks fed into the compressor. Instead, chunk sizes will ~~be whatever~~	339	over the sizes of chunks fed into the compressor. Instead, chunk sizes will
	367	the object being read from delivers. These will often be of a ~~uniform size.~~	340	be whatever the object being read from delivers. These will often be of a
			341	uniform size.
	368		342
	369	Stream Copying API	343	Stream Copying API
	370	^^^^^^^^^^^^^^^^^^	344	^^^^^^^^^^^^^^^^^^
	@@ -404,7 +378,7 b' Compressor API'
	404	``flush()`` methods. Each returns compressed data or an empty bytes.	378	``flush()`` methods. Each returns compressed data or an empty bytes.
	405		379
	406	The purpose of ``compressobj()`` is to provide an API-compatible interface	380	The purpose of ``compressobj()`` is to provide an API-compatible interface
	407	with ``zlib.compressobj`` ~~and~~ ``bz2.BZ2Compressor``. This allows callers to	381	with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
	408	swap in different compressor objects while using the same API.	382	swap in different compressor objects while using the same API.
	409		383
	410	``flush()`` accepts an optional argument indicating how to end the stream.	384	``flush()`` accepts an optional argument indicating how to end the stream.
	@@ -485,13 +459,23 b' ZstdDecompressor'
	485	----------------	459	----------------
	486		460
	487	The ``ZstdDecompressor`` class provides an interface for performing	461	The ``ZstdDecompressor`` class provides an interface for performing
	488	decompression.	462	decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
			463	the C API.
	489		464
	490	Each instance is associated with parameters that control decompression. These	465	Each instance is associated with parameters that control decompression. These
	491	come from the following named arguments (all optional):	466	come from the following named arguments (all optional):
	492		467
	493	dict_data	468	dict_data
	494	Compression dictionary to use.	469	Compression dictionary to use.
			470	max_window_size
			471	Sets an uppet limit on the window size for decompression operations in
			472	kibibytes. This setting can be used to prevent large memory allocations
			473	for inputs using large compression windows.
			474	format
			475	Set the format of data for the decoder. By default, this is
			476	``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
			477	allow decoding frames without the 4 byte magic header. Not all decompression
			478	APIs support this mode.
	495		479
	496	The interface of this class is very similar to ``ZstdCompressor`` (by design).	480	The interface of this class is very similar to ``ZstdCompressor`` (by design).
	497		481
	@@ -499,6 +483,15 b' Unless specified otherwise, assume that '
	499	instances can be called from multiple Python threads simultaneously. In other	483	instances can be called from multiple Python threads simultaneously. In other
	500	words, assume instances are not thread safe unless stated otherwise.	484	words, assume instances are not thread safe unless stated otherwise.
	501		485
			486	Utility Methods
			487	^^^^^^^^^^^^^^^
			488
			489	``memory_size()`` obtains the size of the underlying zstd decompression context,
			490	in bytes.::
			491
			492	dctx = zstd.ZstdDecompressor()
			493	size = dctx.memory_size()
			494
	502	Simple API	495	Simple API
	503	^^^^^^^^^^	496	^^^^^^^^^^
	504		497
	@@ -509,9 +502,10 b' frame in a single operation.::'
	509	decompressed = dctx.decompress(data)	502	decompressed = dctx.decompress(data)
	510		503
	511	By default, ``decompress(data)`` will only work on data written with the content	504	By default, ``decompress(data)`` will only work on data written with the content
	512	size encoded in its header. This can be achieved by creating a	505	size encoded in its header (this is the default behavior of
	513	``ZstdCompressor`` with ``write_content_size=True``. If compressed data without	506	``ZstdCompressor().compress()`` but may not be true for streaming compression). If
	514	an embedded content size is seen, ``zstd.ZstdError`` will ~~be raised.~~	507	compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
			508	be raised.
	515		509
	516	If the compressed data doesn't have its content size embedded within it,	510	If the compressed data doesn't have its content size embedded within it,
	517	decompression can be attempted by specifying the ``max_output_size``	511	decompression can be attempted by specifying the ``max_output_size``
	@@ -534,17 +528,67 b' performed every time the method is calle'
	534	result in a lot of work for the memory allocator and may result in	528	result in a lot of work for the memory allocator and may result in
	535	``MemoryError`` being raised if the allocation fails.	529	``MemoryError`` being raised if the allocation fails.
	536		530
	537	If the exact size of decompressed data is unknown, it is strongly	531	.. important::
	538	recommended to use a streaming API.	532
			533	If the exact size of decompressed data is unknown (not passed in explicitly
			534	and not stored in the zstandard frame), for performance reasons it is
			535	encouraged to use a streaming API.
			536
			537	Stream Reader API
			538	^^^^^^^^^^^^^^^^^
			539
			540	``stream_reader(source)`` can be used to obtain an object conforming to the
			541	``io.RawIOBase`` interface for reading decompressed output as a stream::
			542
			543	with open(path, 'rb') as fh:
			544	dctx = zstd.ZstdDecompressor()
			545	with dctx.stream_reader(fh) as reader:
			546	while True:
			547	chunk = reader.read(16384)
			548	if not chunk:
			549	break
			550
			551	# Do something with decompressed chunk.
			552
			553	The stream can only be read within a context manager. When the context
			554	manager exits, the stream is closed and the underlying resource is
			555	released and future operations against the stream will fail.
			556
			557	The ``source`` argument to ``stream_reader()`` can be any object with a
			558	``read(size)`` method or any object implementing the buffer protocol.
			559
			560	If the ``source`` is a stream, you can specify how large ``read()`` requests
			561	to that stream should be via the ``read_size`` argument. It defaults to
			562	``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
			563
			564	with open(path, 'rb') as fh:
			565	dctx = zstd.ZstdDecompressor()
			566	# Will perform fh.read(8192) when obtaining data for the decompressor.
			567	with dctx.stream_reader(fh, read_size=8192) as reader:
			568	...
			569
			570	The stream returned by ``stream_reader()`` is not writable.
			571
			572	The stream returned by ``stream_reader()`` is partially seekable.
			573	Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
			574	of the current position are allowed. Offsets behind the current read
			575	position and offsets relative to the end of stream are not allowed and
			576	will raise ``ValueError`` if attempted.
			577
			578	``tell()`` returns the number of decompressed bytes read so far.
			579
			580	Not all I/O methods are implemented. Notably missing is support for
			581	``readline()``, ``readlines()``, and linewise iteration support. Support for
			582	these is planned for a future release.
	539		583
	540	Streaming Input API	584	Streaming Input API
	541	^^^^^^^^^^^^^^^^^^^	585	^^^^^^^^^^^^^^^^^^^
	542		586
	543	``~~write_to~~(fh)`` can be used to incrementally send compressed data to a	587	``stream_writer(fh)`` can be used to incrementally send compressed data to a
	544	decompressor.::	588	decompressor.::
	545		589
	546	dctx = zstd.ZstdDecompressor()	590	dctx = zstd.ZstdDecompressor()
	547	with dctx.~~write_to~~(fh) as decompressor:	591	with dctx.stream_writer(fh) as decompressor:
	548	decompressor.write(compressed_data)	592	decompressor.write(compressed_data)
	549		593
	550	This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to	594	This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to
	@@ -558,54 +602,56 b' of ``0`` are possible.'
	558	The size of chunks being ``write()`` to the destination can be specified::	602	The size of chunks being ``write()`` to the destination can be specified::
	559		603
	560	dctx = zstd.ZstdDecompressor()	604	dctx = zstd.ZstdDecompressor()
	561	with dctx.~~write_to~~(fh, write_size=16384) as decompressor:	605	with dctx.stream_writer(fh, write_size=16384) as decompressor:
	562	pass	606	pass
	563		607
	564	You can see how much memory is being used by the decompressor::	608	You can see how much memory is being used by the decompressor::
	565		609
	566	dctx = zstd.ZstdDecompressor()	610	dctx = zstd.ZstdDecompressor()
	567	with dctx.~~write_to~~(fh) as decompressor:	611	with dctx.stream_writer(fh) as decompressor:
	568	byte_size = decompressor.memory_size()	612	byte_size = decompressor.memory_size()
	569		613
	570	Streaming Output API	614	Streaming Output API
	571	^^^^^^^^^^^^^^^^^^^^	615	^^^^^^^^^^^^^^^^^^^^
	572		616
	573	``read_~~from~~(fh)`` provides a mechanism to stream decompressed data out of a	617	``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
	574	compressed source as an iterator of data chunks.::	618	compressed source as an iterator of data chunks.::
	575		619
	576	dctx = zstd.ZstdDecompressor()	620	dctx = zstd.ZstdDecompressor()
	577	for chunk in dctx.read_~~from~~(fh):	621	for chunk in dctx.read_to_iter(fh):
	578	# Do something with original data.	622	# Do something with original data.
	579		623
	580	``read_~~from~~()`` accepts a~~) a~~n object with a ``read(size)`` method that will	624	``read_to_iter()`` accepts an object with a ``read(size)`` method that will
	581	return compressed bytes b) an object conforming to the buffer protocol that	625	return compressed bytes or an object conforming to the buffer protocol that
	582	can expose its data as a contiguous range of bytes. ~~The~~ ~~``bytes``~~ ~~and~~	626	can expose its data as a contiguous range of bytes.
	583	``memoryview`` types expose this buffer protocol.
	584		627
	585	``read_~~from~~()`` returns an iterator whose elements are chunks of the	628	``read_to_iter()`` returns an iterator whose elements are chunks of the
	586	decompressed data.	629	decompressed data.
	587		630
	588	The size of requested ``read()`` from the source can be specified::	631	The size of requested ``read()`` from the source can be specified::
	589		632
	590	dctx = zstd.ZstdDecompressor()	633	dctx = zstd.ZstdDecompressor()
	591	for chunk in dctx.read_~~from~~(fh, read_size=16384):	634	for chunk in dctx.read_to_iter(fh, read_size=16384):
	592	pass	635	pass
	593		636
	594	It is also possible to skip leading bytes in the input data::	637	It is also possible to skip leading bytes in the input data::
	595		638
	596	dctx = zstd.ZstdDecompressor()	639	dctx = zstd.ZstdDecompressor()
	597	for chunk in dctx.read_~~from~~(fh, skip_bytes=1):	640	for chunk in dctx.read_to_iter(fh, skip_bytes=1):
	598	pass	641	pass
	599		642
	600	Skipping leading bytes is useful if the source data contains extra	643	.. tip::
	601	header data but you want to avoid the overhead of making a buffer copy
	602	or allocating a new ``memoryview`` object in order to decompress the data.
	603		644
	604	Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator	645	Skipping leading bytes is useful if the source data contains extra
			646	header data. Traditionally, you would need to create a slice or
			647	``memoryview`` of the data you want to decompress. This would create
			648	overhead. It is more efficient to pass the offset into this API.
			649
			650	Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
	605	controls when data is decompressed. If the iterator isn't consumed,	651	controls when data is decompressed. If the iterator isn't consumed,
	606	decompression is put on hold.	652	decompression is put on hold.
	607		653
	608	When ``read_~~from~~()`` is passed an object conforming to the buffer protocol,	654	When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
	609	the behavior may seem similar to what occurs when the simple decompression	655	the behavior may seem similar to what occurs when the simple decompression
	610	API is used. However, this API works when the decompressed size is unknown.	656	API is used. However, this API works when the decompressed size is unknown.
	611	Furthermore, if feeding large inputs, the decompressor will work in chunks	657	Furthermore, if feeding large inputs, the decompressor will work in chunks
	@@ -636,7 +682,7 b' Decompressor API'
	636	^^^^^^^^^^^^^^^^	682	^^^^^^^^^^^^^^^^
	637		683
	638	``decompressobj()`` returns an object that exposes a ``decompress(data)``	684	``decompressobj()`` returns an object that exposes a ``decompress(data)``
	639	methods. Compressed data chunks are fed into ``decompress(data)`` and	685	method. Compressed data chunks are fed into ``decompress(data)`` and
	640	uncompressed output (or an empty bytes) is returned. Output from subsequent	686	uncompressed output (or an empty bytes) is returned. Output from subsequent
	641	calls needs to be concatenated to reassemble the full decompressed byte	687	calls needs to be concatenated to reassemble the full decompressed byte
	642	sequence.	688	sequence.
	@@ -650,11 +696,25 b' can no longer be called.'
	650		696
	651	Here is how this API should be used::	697	Here is how this API should be used::
	652		698
	653	dctx = zstd.ZstdDeompressor()	699	dctx = zstd.ZstdDecompressor()
	654	dobj = cctx.decompressobj()	700	dobj = dctx.decompressobj()
	655	data = dobj.decompress(compressed_chunk_0)	701	data = dobj.decompress(compressed_chunk_0)
	656	data = dobj.decompress(compressed_chunk_1)	702	data = dobj.decompress(compressed_chunk_1)
	657		703
			704	By default, calls to ``decompress()`` write output data in chunks of size
			705	``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
			706	before being returned to the caller. It is possible to define the size of
			707	these temporary chunks by passing ``write_size`` to ``decompressobj()``::
			708
			709	dctx = zstd.ZstdDecompressor()
			710	dobj = dctx.decompressobj(write_size=1048576)
			711
			712	.. note::
			713
			714	Because calls to ``decompress()`` may need to perform multiple
			715	memory (re)allocations, this streaming decompression API isn't as
			716	efficient as other APIs.
			717
	658	Batch Decompression API	718	Batch Decompression API
	659	^^^^^^^^^^^^^^^^^^^^^^^	719	^^^^^^^^^^^^^^^^^^^^^^^
	660		720
	@@ -671,9 +731,12 b' conform to the buffer protocol. For best'
	671	minimal input validation will be done for that type. If calling from	731	minimal input validation will be done for that type. If calling from
	672	Python (as opposed to C), constructing one of these instances may add	732	Python (as opposed to C), constructing one of these instances may add
	673	overhead cancelling out the performance overhead of validation for list	733	overhead cancelling out the performance overhead of validation for list
	674	inputs.	734	inputs.::
	675		735
	676	The decompressed size of each frame must be discoverable. It can either be	736	dctx = zstd.ZstdDecompressor()
			737	results = dctx.multi_decompress_to_buffer([b'...', b'...'])
			738
			739	The decompressed size of each frame MUST be discoverable. It can either be
	677	embedded within the zstd frame (``write_content_size=True`` argument to	740	embedded within the zstd frame (``write_content_size=True`` argument to
	678	``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.	741	``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
	679		742
	@@ -681,7 +744,13 b' The ``decompressed_sizes`` argument is a'
	681	protocol which holds an array of 64-bit unsigned integers in the machine's	744	protocol which holds an array of 64-bit unsigned integers in the machine's
	682	native format defining the decompressed sizes of each frame. If this argument	745	native format defining the decompressed sizes of each frame. If this argument
	683	is passed, it avoids having to scan each frame for its decompressed size.	746	is passed, it avoids having to scan each frame for its decompressed size.
	684	This frame scanning can add noticeable overhead in some scenarios.	747	This frame scanning can add noticeable overhead in some scenarios.::
			748
			749	frames = [...]
			750	sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
			751
			752	dctx = zstd.ZstdDecompressor()
			753	results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
	685		754
	686	The ``threads`` argument controls the number of threads to use to perform	755	The ``threads`` argument controls the number of threads to use to perform
	687	decompression operations. The default (``0``) or the value ``1`` means to	756	decompression operations. The default (``0``) or the value ``1`` means to
	@@ -701,22 +770,23 b' This function exists to perform decompre'
	701	as possible by having as little overhead as possible. Since decompression is	770	as possible by having as little overhead as possible. Since decompression is
	702	performed as a single operation and since the decompressed output is stored in	771	performed as a single operation and since the decompressed output is stored in
	703	a single buffer, extra memory allocations, Python objects, and Python function	772	a single buffer, extra memory allocations, Python objects, and Python function
	704	calls are avoided. This is ideal for scenarios where callers ~~need to access~~	773	calls are avoided. This is ideal for scenarios where callers know up front that
	705	decompressed data for multiple frames.	774	they need to access data for multiple frames, such as when delta chains are
			775	being used.
	706		776
	707	Currently, the implementation always spawns multiple threads when requested,	777	Currently, the implementation always spawns multiple threads when requested,
	708	even if the amount of work to do is small. In the future, it will be smarter	778	even if the amount of work to do is small. In the future, it will be smarter
	709	about avoiding threads and their associated overhead when the amount of	779	about avoiding threads and their associated overhead when the amount of
	710	work to do is small.	780	work to do is small.
	711		781
	712	~~Content-Only~~ Dictionary Chain Decompression	782	Prefix Dictionary Chain Decompression
	713	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^~~^^^^^^~~	783	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	714		784
	715	``decompress_content_dict_chain(frames)`` performs decompression of a list of	785	``decompress_content_dict_chain(frames)`` performs decompression of a list of
	716	zstd frames produced using chained ~~content-only~~ dictionary compression. Such	786	zstd frames produced using chained prefix dictionary compression. Such
	717	a list of frames is produced by compressing discrete inputs where each	787	a list of frames is produced by compressing discrete inputs where each
	718	non-initial input is compressed with a ~~content-only~~ dictionary consisting	788	non-initial input is compressed with a prefix dictionary consisting of the
	719	~~of the~~ content of the previous input.	789	content of the previous input.
	720		790
	721	For example, say you have the following inputs::	791	For example, say you have the following inputs::
	722		792
	@@ -725,25 +795,25 b' For example, say you have the following '
	725	The zstd frame chain consists of:	795	The zstd frame chain consists of:
	726		796
	727	1. ``b'input 1'`` compressed in standalone/discrete mode	797	1. ``b'input 1'`` compressed in standalone/discrete mode
	728	2. ``b'input 2'`` compressed using ``b'input 1'`` as a ~~content-only~~ dictionary	798	2. ``b'input 2'`` compressed using ``b'input 1'`` as a prefix dictionary
	729	3. ``b'input 3'`` compressed using ``b'input 2'`` as a ~~content-only~~ dictionary	799	3. ``b'input 3'`` compressed using ``b'input 2'`` as a prefix dictionary
	730		800
	731	Each zstd frame must have the content size written.	801	Each zstd frame must have the content size written.
	732		802
	733	The following Python code can be used to produce a *~~content-only~~ dictionary	803	The following Python code can be used to produce a prefix dictionary chain::
	734	chain*::
	735		804
	736	def make_chain(inputs):	805	def make_chain(inputs):
	737	frames = []	806	frames = []
	738		807
	739	# First frame is compressed in standalone/discrete mode.	808	# First frame is compressed in standalone/discrete mode.
	740	zctx = zstd.ZstdCompressor(~~write_content_size=True~~)	809	zctx = zstd.ZstdCompressor()
	741	frames.append(zctx.compress(inputs[0]))	810	frames.append(zctx.compress(inputs[0]))
	742		811
	743	# Subsequent frames use the previous fulltext as a ~~content-only~~ dictionary	812	# Subsequent frames use the previous fulltext as a prefix dictionary
	744	for i, raw in enumerate(inputs[1:]):	813	for i, raw in enumerate(inputs[1:]):
	745	dict_data = zstd.ZstdCompressionDict(~~inputs[i])~~	814	dict_data = zstd.ZstdCompressionDict(
	746	zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)	815	inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
			816	zctx = zstd.ZstdCompressor(dict_data=dict_data)
	747	frames.append(zctx.compress(raw))	817	frames.append(zctx.compress(raw))
	748		818
	749	return frames	819	return frames
	@@ -751,10 +821,13 b' chain*::'
	751	``decompress_content_dict_chain()`` returns the uncompressed data of the last	821	``decompress_content_dict_chain()`` returns the uncompressed data of the last
	752	element in the input chain.	822	element in the input chain.
	753		823
	754	It is possible to implement content-only dictionary chain decompression	824
	755	on top of other Python APIs. However, this function will likely be significantly	825	.. note::
	756	faster, especially for long input chains, as it avoids the overhead of	826
	757	instantiating and passing around intermediate objects between C and Python.	827	It is possible to implement prefix dictionary chain decompression
			828	on top of other APIs. However, this function will likely be faster -
			829	especially for long input chains - as it avoids the overhead of instantiating
			830	and passing around intermediate objects between C and Python.
	758		831
	759	Multi-Threaded Compression	832	Multi-Threaded Compression
	760	--------------------------	833	--------------------------
	@@ -764,9 +837,15 b' of threads to use for compression. The w'
	764	into segments and each segment is fed into a worker pool for compression. Once	837	into segments and each segment is fed into a worker pool for compression. Once
	765	a segment is compressed, it is flushed/appended to the output.	838	a segment is compressed, it is flushed/appended to the output.
	766		839
			840	.. note::
			841
			842	These threads are created at the C layer and are not Python threads. So they
			843	work outside the GIL. It is therefore possible to CPU saturate multiple cores
			844	from Python.
			845
	767	The segment size for multi-threaded compression is chosen from the window size	846	The segment size for multi-threaded compression is chosen from the window size
	768	of the compressor. This is derived from the ``window_log`` attribute of a	847	of the compressor. This is derived from the ``window_log`` attribute of a
	769	``CompressionParameters`` instance. By default, segment sizes are in the 1+MB	848	``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
	770	range.	849	range.
	771		850
	772	If multi-threaded compression is requested and the input is smaller than the	851	If multi-threaded compression is requested and the input is smaller than the
	@@ -785,31 +864,33 b' than non-multi-threaded compression. The'
	785	there is a CPU/wall time versus size trade off that may warrant investigation.	864	there is a CPU/wall time versus size trade off that may warrant investigation.
	786		865
	787	Output from multi-threaded compression does not require any special handling	866	Output from multi-threaded compression does not require any special handling
	788	on the decompression side. ~~In other words, any zstd~~ decompressor ~~should be ab~~le	867	on the decompression side. To the decompressor, data generated with single
	789	to consume data produced with multi-threaded compression.	868	threaded compressor looks the same as data generated by a multi-threaded
			869	compressor and does not require any special handling or additional resource
			870	requirements.
	790		871
	791	Dictionary Creation and Management	872	Dictionary Creation and Management
	792	----------------------------------	873	----------------------------------
	793		874
	794	Compression dictionaries are represented as the ``ZstdCompressionDict`` type.	875	Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
	795		876
	796	Instances can be constructed from bytes::	877	Instances can be constructed from bytes::
	797		878
	798	dict_data = zstd.ZstdCompressionDict(data)	879	dict_data = zstd.ZstdCompressionDict(data)
	799		880
	800	It is possible to construct a dictionary from any data. ~~Unless the~~	881	It is possible to construct a dictionary from any data. If the data doesn't
	801	~~data~~ begins with a magic header, ~~the d~~ict~~ionary~~ will be treated as	882	begin with a magic header, it will be treated as a prefix dictionary.
	802	content-only. Content-only dictionaries allow compression operations	883	Prefix dictionaries allow compression operations to reference raw data
	803	that follow to reference raw data within the content. For one use of	884	within the dictionary.
	804	content-only dictionaries, see
	805	``ZstdDecompressor.decompress_content_dict_chain()``.
	806		885
	807	More interestingly, instances can be created by training on sample data::	886	It is possible to force the use of prefix dictionaries or to require a
			887	dictionary header:
	808		888
	809	dict_data = zstd.~~train_dictionary(size, samples)~~	889	dict_data = zstd.ZstdCompressionDict(data,
			890	dict_type=zstd.DICT_TYPE_RAWCONTENT)
	810		891
	811	This takes a list of bytes instances and creates and returns a	892	dict_data = zstd.ZstdCompressionDict(data,
	812	``ZstdCompressionDict``.	893	dict_type=zstd.DICT_TYPE_FULLDICT)
	813		894
	814	You can see how many bytes are in the dictionary by calling ``len()``::	895	You can see how many bytes are in the dictionary by calling ``len()``::
	815		896
	@@ -819,7 +900,7 b' You can see how many bytes are in the di'
	819	Once you have a dictionary, you can pass it to the objects performing	900	Once you have a dictionary, you can pass it to the objects performing
	820	compression and decompression::	901	compression and decompression::
	821		902
	822	dict_data = zstd.train_dictionary(1~~6384~~, samples)	903	dict_data = zstd.train_dictionary(131072, samples)
	823		904
	824	cctx = zstd.ZstdCompressor(dict_data=dict_data)	905	cctx = zstd.ZstdCompressor(dict_data=dict_data)
	825	for source_data in input_data:	906	for source_data in input_data:
	@@ -829,7 +910,7 b' compression and decompression::'
	829	dctx = zstd.ZstdDecompressor(dict_data=dict_data)	910	dctx = zstd.ZstdDecompressor(dict_data=dict_data)
	830	for compressed_data in input_data:	911	for compressed_data in input_data:
	831	buffer = io.BytesIO()	912	buffer = io.BytesIO()
	832	with dctx.~~write_to~~(buffer) as decompressor:	913	with dctx.stream_writer(buffer) as decompressor:
	833	decompressor.write(compressed_data)	914	decompressor.write(compressed_data)
	834	# Do something with raw data in ``buffer``.	915	# Do something with raw data in ``buffer``.
	835		916
	@@ -843,56 +924,69 b' a ``ZstdCompressionDict`` later) via ``a'
	843	dict_data = zstd.train_dictionary(size, samples)	924	dict_data = zstd.train_dictionary(size, samples)
	844	raw_data = dict_data.as_bytes()	925	raw_data = dict_data.as_bytes()
	845		926
	846	The following named arguments to ``train_dictionary`` can also be used	927	By default, when a ``ZstdCompressionDict`` is attached to a
	847	to further control dictionary generation.	928	``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
			929	dictionary for use. This is fine if only 1 compression operation is being
			930	performed or if the ``ZstdCompressor`` is being reused for multiple operations.
			931	But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
			932	this can add overhead.
	848		933
	849	selectivity	934	It is possible to precompute the dictionary so it can readily be consumed
	850	Integer selectivity level. Default is 9. Larger values yield more data in	935	by multiple ``ZstdCompressor`` instances::
	851	dictionary.	936
	852	level	937	d = zstd.ZstdCompressionDict(data)
	853	Integer compression level. Default is 6.
	854	dict_id
	855	Integer dictionary ID for the produced dictionary. Default is 0, which
	856	means to use a random value.
	857	notifications
	858	Controls writing of informational messages to ``stderr``. ``0`` (the
	859	default) means to write nothing. ``1`` writes errors. ``2`` writes
	860	progression info. ``3`` writes more details. And ``4`` writes all info.
	861		938
	862	Cover Dictionaries	939	# Precompute for compression level 3.
	863	^^^^^^^^^^^^^^^^^^	940	d.precompute_compress(level=3)
	864		941
	865	An alternate dictionary training mechanism named cover is also available.	942	# Precompute with specific compression parameters.
	866	More details about this training mechanism are available in the paper	943	params = zstd.ZstdCompressionParameters(...)
	867	Effective Construction of Relative Lempel-Ziv Dictionaries (authors:	944	d.precompute_compress(compression_params=params)
	868	Liao, Petri, Moffat, Wirth).
	869
	870	To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
	871	``zstd.train_dictionary()``. The function behaves nearly the same except
	872	its arguments are different and the returned dictionary will contain ``k``
	873	and ``d`` attributes reflecting the parameters to the cover algorithm.
	874		945
	875	.. note::	946	.. note::
	876		947
	877	The ``k`` and ``d`` attributes are only populated on dictionary	948	When a dictionary is precomputed, the compression parameters used to
	878	instances created by this function. If a ``ZstdCompressionDict`` is	949	precompute the dictionary overwrite some of the compression parameters
	879	constructed from raw bytes data, the ``k`` and ``d`` attributes will	950	specified to ``ZstdCompressor.__init__``.
	880	be ``0``.	951
			952	Training Dictionaries
			953	^^^^^^^^^^^^^^^^^^^^^
			954
			955	Unless using prefix dictionaries, dictionary data is produced by training
			956	on existing data::
			957
			958	dict_data = zstd.train_dictionary(size, samples)
			959
			960	This takes a target dictionary size and list of bytes instances and creates and
			961	returns a ``ZstdCompressionDict``.
			962
			963	The dictionary training mechanism is known as cover. More details about it are
			964	available in the paper *Effective Construction of Relative Lempel-Ziv
			965	Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
			966
			967	The cover algorithm takes parameters ``k` and ``d``. These are the
			968	segment size and dmer size, respectively. The returned dictionary
			969	instance created by this function has ``k`` and ``d`` attributes
			970	containing the values for these parameters. If a ``ZstdCompressionDict``
			971	is constructed from raw bytes data (a content-only dictionary), the
			972	``k`` and ``d`` attributes will be ``0``.
	881		973
	882	The segment and dmer size parameters to the cover algorithm can either be	974	The segment and dmer size parameters to the cover algorithm can either be
	883	specified manually or ~~you can ask~~ ``train_~~cover_~~dictionary()`` ~~to try~~	975	specified manually or ``train_dictionary()`` can try multiple values
	884	~~multiple values~~ and pick the best one, where best means the smallest	976	and pick the best one, where best means the smallest compressed data size.
	885	compressed data size.	977	This later mode is called optimization mode.
	886
	887	In manual mode, the ``k`` and ``d`` arguments must be specified or a
	888	``ZstdError`` will be raised.
	889		978
	890	In automatic mode (triggered by specifying ``optimize=True``), ``k``	979	If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
	891	and ``d`` are optional. If a value isn't specified, then default values for	980	or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
	892	both are tested. The ``steps`` argument can control the number of steps	981	struct) are defined, optimization mode is used with default parameter
	893	through ``k`` values. The ``level`` argument defines the compression level	982	values.
	894	that will be used when testing the compressed size. And ``threads`` can	983
	895	specify the number of threads to use for concurrent operation.	984	If ``steps`` or ``threads`` are defined, then optimization mode is engaged
			985	with explicit control over those parameters. Specifying ``threads=0`` or
			986	``threads=1`` can be used to engage optimization mode if other parameters
			987	are not defined.
			988
			989	Otherwise, non-optimization mode is used with the parameters specified.
	896		990
	897	This function takes the following arguments:	991	This function takes the following arguments:
	898		992
	@@ -909,64 +1003,92 b' d'
	909	dict_id	1003	dict_id
	910	Integer dictionary ID for the produced dictionary. Default is 0, which uses	1004	Integer dictionary ID for the produced dictionary. Default is 0, which uses
	911	a random value.	1005	a random value.
	912	optimize	1006	steps
	913	When true, test dictionary generation with multiple parameters.	1007	Number of steps through ``k`` values to perform when trying parameter
			1008	variations.
			1009	threads
			1010	Number of threads to use when trying parameter variations. Default is 0,
			1011	which means to use a single thread. A negative value can be specified to
			1012	use as many threads as there are detected logical CPUs.
	914	level	1013	level
	915	Integer target compression level when t~~esting compression with~~	1014	Integer target compression level when trying parameter variations.
	916	``optimize=True``. Default is 1.
	917	steps
	918	Number of steps through ``k`` values to perform when ``optimize=True``.
	919	Default is 32.
	920	threads
	921	Number of threads to use when ``optimize=True``. Default is 0, which means
	922	to use a single thread. A negative value can be specified to use as many
	923	threads as there are detected logical CPUs.
	924	notifications	1015	notifications
	925	Controls writing of informational messages to ``stderr``. ~~See~~ the	1016	Controls writing of informational messages to ``stderr``. ``0`` (the
	926	documentation for ``train_dictionary()`` for more.	1017	default) means to write nothing. ``1`` writes errors. ``2`` writes
			1018	progression info. ``3`` writes more details. And ``4`` writes all info.
	927		1019
	928	Explicit Compression Parameters	1020	Explicit Compression Parameters
	929	-------------------------------	1021	-------------------------------
	930		1022
	931	Zstandard's integer compression levels along with the input size and dictionary	1023	Zstandard offers a high-level compression level that maps to lower-level
	932	size are converted into a data structure defining multiple parameters to tune	1024	compression parameters. For many consumers, this numeric level is the only
	933	behavior of the compression algorithm. It is possible to use define this	1025	compression setting you'll need to touch.
	934	data structure explicitly to have lower-level control over compression behavior.	1026
			1027	But for advanced use cases, it might be desirable to tweak these lower-level
			1028	settings.
	935		1029
	936	The ``~~zstd.~~CompressionParameters`` type represents th~~is data structure.~~	1030	The ``ZstdCompressionParameters`` type represents these low-level compression
	937	You can see how Zstandard converts compression levels to this data structure	1031	settings.
	938	by calling ``zstd.get_compression_parameters()``. e.g.::
	939		1032
	940	params = zstd.get_compression_parameters(5)	1033	Instances of this type can be constructed from a myriad of keyword arguments
			1034	(defined below) for complete low-level control over each adjustable
			1035	compression setting.
			1036
			1037	From a higher level, one can construct a ``ZstdCompressionParameters`` instance
			1038	given a desired compression level and target input and dictionary size
			1039	using ``ZstdCompressionParameters.from_level()``. e.g.::
	941		1040
	942	This function also accepts the uncompressed data size and dictionary size	1041	# Derive compression settings for compression level 7.
	943	to adjust parameters::	1042	params = zstd.ZstdCompressionParameters.from_level(7)
	944		1043
	945	params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data))	1044	# With an input size of 1MB
			1045	params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
			1046
			1047	Using ``from_level()``, it is also possible to override individual compression
			1048	parameters or to define additional settings that aren't automatically derived.
			1049	e.g.::
	946		1050
	947	You can also construct compression parameters from their low-level components::	1051	params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
			1052	params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
			1053
			1054	Or you can define low-level compression settings directly::
	948		1055
	949	params = zstd.CompressionParameters(~~20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST~~)	1056	params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
	950		1057
	951	You can then configure a compressor to use the custom parameters::	1058	Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
			1059	configure a compressor::
	952		1060
	953	cctx = zstd.ZstdCompressor(compression_params=params)	1061	cctx = zstd.ZstdCompressor(compression_params=params)
	954		1062
	955	The ~~members/~~attributes of ``CompressionParameters`` ~~instances~~ are as ~~follows::~~	1063	The named arguments and attributes of ``ZstdCompressionParameters`` are as
			1064	follows:
	956		1065
			1066	* format
			1067	* compression_level
	957	* window_log	1068	* window_log
			1069	* hash_log
	958	* chain_log	1070	* chain_log
	959	* hash_log
	960	* search_log	1071	* search_log
	961	* search_length	1072	* min_match
	962	* target_length	1073	* target_length
	963	* strategy	1074	* compression_strategy
			1075	* write_content_size
			1076	* write_checksum
			1077	* write_dict_id
			1078	* job_size
			1079	* overlap_size_log
			1080	* compress_literals
			1081	* force_max_window
			1082	* enable_ldm
			1083	* ldm_hash_log
			1084	* ldm_min_match
			1085	* ldm_bucket_size_log
			1086	* ldm_hash_every_log
			1087	* threads
	964		1088
	965	This is the order the arguments are passed to the constructor if not using	1089	Some of these are very low-level settings. It may help to consult the official
	966	named arguments.	1090	zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
	967		1091	in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
	968	You'll need to read the Zstandard documentation for what these parameters
	969	do.
	970		1092
	971	Frame Inspection	1093	Frame Inspection
	972	----------------	1094	----------------
	@@ -1003,15 +1125,17 b' has_checksum'
	1003	Bool indicating whether a 4 byte content checksum is stored at the end	1125	Bool indicating whether a 4 byte content checksum is stored at the end
	1004	of the frame.	1126	of the frame.
	1005		1127
			1128	``zstd.frame_header_size(data)`` returns the size of the zstandard frame
			1129	header.
			1130
			1131	``zstd.frame_content_size(data)`` returns the content size as parsed from
			1132	the frame header. ``-1`` means the content size is unknown. ``0`` means
			1133	an empty frame. The content size is usually correct. However, it may not
			1134	be accurate.
			1135
	1006	Misc Functionality	1136	Misc Functionality
	1007	------------------	1137	------------------
	1008		1138
	1009	estimate_compression_context_size(CompressionParameters)
	1010	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	1011
	1012	Given a ``CompressionParameters`` struct, estimate the memory size required
	1013	to perform compression.
	1014
	1015	estimate_decompression_context_size()	1139	estimate_decompression_context_size()
	1016	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^	1140	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	1017		1141
	@@ -1041,6 +1165,11 b' FRAME_HEADER'
	1041	MAGIC_NUMBER	1165	MAGIC_NUMBER
	1042	Frame header as an integer	1166	Frame header as an integer
	1043		1167
			1168	CONTENTSIZE_UNKNOWN
			1169	Value for content size when the content size is unknown.
			1170	CONTENTSIZE_ERROR
			1171	Value for content size when content size couldn't be determined.
			1172
	1044	WINDOWLOG_MIN	1173	WINDOWLOG_MIN
	1045	Minimum value for compression parameter	1174	Minimum value for compression parameter
	1046	WINDOWLOG_MAX	1175	WINDOWLOG_MAX
	@@ -1063,8 +1192,6 b' SEARCHLENGTH_MAX'
	1063	Maximum value for compression parameter	1192	Maximum value for compression parameter
	1064	TARGETLENGTH_MIN	1193	TARGETLENGTH_MIN
	1065	Minimum value for compression parameter	1194	Minimum value for compression parameter
	1066	TARGETLENGTH_MAX
	1067	Maximum value for compression parameter
	1068	STRATEGY_FAST	1195	STRATEGY_FAST
	1069	Compression strategy	1196	Compression strategy
	1070	STRATEGY_DFAST	1197	STRATEGY_DFAST
	@@ -1079,6 +1206,13 b' STRATEGY_BTLAZY2'
	1079	Compression strategy	1206	Compression strategy
	1080	STRATEGY_BTOPT	1207	STRATEGY_BTOPT
	1081	Compression strategy	1208	Compression strategy
			1209	STRATEGY_BTULTRA
			1210	Compression strategy
			1211
			1212	FORMAT_ZSTD1
			1213	Zstandard frame format
			1214	FORMAT_ZSTD1_MAGICLESS
			1215	Zstandard frame format without magic header
	1082		1216
	1083	Performance Considerations	1217	Performance Considerations
	1084	--------------------------	1218	--------------------------
	@@ -1090,7 +1224,7 b' instantiating a new ``ZstdCompressor`` o'
	1090	operation. The differences are magnified as the size of data decreases. For	1224	operation. The differences are magnified as the size of data decreases. For
	1091	example, the difference between context reuse and non-reuse for 100,000	1225	example, the difference between context reuse and non-reuse for 100,000
	1092	100 byte inputs will be significant (possiby over 10x faster to reuse contexts)	1226	100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
	1093	whereas 10 1,000,000 byte inputs will be more similar in speed (because the	1227	whereas 10 100,000,000 byte inputs will be more similar in speed (because the
	1094	time spent doing compression dwarfs time spent creating new contexts).	1228	time spent doing compression dwarfs time spent creating new contexts).
	1095		1229
	1096	Buffer Types	1230	Buffer Types
	@@ -1187,9 +1321,8 b' There are multiple APIs for performing c'
	1187	because different applications have different needs and the library wants to	1321	because different applications have different needs and the library wants to
	1188	facilitate optimal use in as many use cases as possible.	1322	facilitate optimal use in as many use cases as possible.
	1189		1323
	1190	From a high-level, APIs are divided into one-shot and streaming~~. See~~	1324	From a high-level, APIs are divided into one-shot and streaming: either you
	1191	the ``Concepts`` section for a description of how these are different at	1325	are operating on all data at once or you operate on it piecemeal.
	1192	the C layer.
	1193		1326
	1194	The one-shot APIs are useful for small data, where the input or output	1327	The one-shot APIs are useful for small data, where the input or output
	1195	size is known. (The size can come from a buffer length, file size, or	1328	size is known. (The size can come from a buffer length, file size, or
	@@ -1222,145 +1355,39 b' There is potential for long pauses as da'
	1222	underlying stream (say from interacting with a filesystem or network). This	1355	underlying stream (say from interacting with a filesystem or network). This
	1223	could add considerable overhead.	1356	could add considerable overhead.
	1224		1357
	1225	Concepts	1358	Thread Safety
	1226	========	1359	=============
	1227
	1228	It is important to have a basic understanding of how Zstandard works in order
	1229	to optimally use this library. In addition, there are some low-level Python
	1230	concepts that are worth explaining to aid understanding. This section aims to
	1231	provide that knowledge.
	1232
	1233	Zstandard Frames and Compression Format
	1234	---------------------------------------
	1235
	1236	Compressed zstandard data almost always exists within a container called a
	1237	frame. (For the technically curious, see the
	1238	`specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
	1239
	1240	The frame contains a header and optional trailer. The header contains a
	1241	magic number to self-identify as a zstd frame and a description of the
	1242	compressed data that follows.
	1243
	1244	Among other things, the frame optionally contains the size of the
	1245	decompressed data the frame represents, a 32-bit checksum of the
	1246	decompressed data (to facilitate verification during decompression),
	1247	and the ID of the dictionary used to compress the data.
	1248
	1249	Storing the original content size in the frame (``write_content_size=True``
	1250	to ``ZstdCompressor``) is important for performance in some scenarios. Having
	1251	the decompressed size stored there (or storing it elsewhere) allows
	1252	decompression to perform a single memory allocation that is exactly sized to
	1253	the output. This is faster than continuously growing a memory buffer to hold
	1254	output.
	1255		1360
	1256	Compression and Decompression Contexts	1361	``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
	1257	--------------------------------------	1362	about thread safety. Do not operate on the same ``ZstdCompressor`` and
	1258		1363	``ZstdDecompressor`` instance simultaneously from different threads. It is
	1259	In order to perform a compression or decompression operation with the zstd	1364	fine to have different threads call into a single instance, just not at the
	1260	C API, you need what's called a context. A context essentially holds	1365	same time.
	1261	configuration and state for a compression or decompression operation. For
	1262	example, a compression context holds the configured compression level.
	1263
	1264	Contexts can be reused for multiple operations. Since creating and
	1265	destroying contexts is not free, there are performance advantages to
	1266	reusing contexts.
	1267
	1268	The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
	1269	wrappers around these contexts in the zstd C API.
	1270		1366
	1271	One-shot And Streaming Operations	1367	Some operations require multiple function calls to complete. e.g. streaming
	1272	---------------------------------	1368	operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
	1273		1369	for simultaneously active operations. e.g. you must not start a streaming
	1274	A compression or decompression operation can either be performed as a	1370	operation when another streaming operation is already active.
	1275	single one-shot operation or as a continuous streaming operation.
	1276
	1277	In one-shot mode (the simple APIs provided by the Python interface),
	1278	all input is handed to the compressor or decompressor as a single buffer
	1279	and all output is returned as a single buffer.
	1280
	1281	In streaming mode, input is delivered to the compressor or decompressor as
	1282	a series of chunks via multiple function calls. Likewise, output is
	1283	obtained in chunks as well.
	1284
	1285	Streaming operations require an additional stream object to be created
	1286	to track the operation. These are logical extensions of context
	1287	instances.
	1288		1371
	1289	There are advantages and disadvantages to each mode of operation. There	1372	The C extension releases the GIL during non-trivial calls into the zstd C
	1290	are scenarios where certain modes can't be used. See the	1373	API. Non-trivial calls are notably compression and decompression. Trivial
	1291	``Choosing an API`` section for more.	1374	calls are things like parsing frame parameters. Where the GIL is released
	1292		1375	is considered an implementation detail and can change in any release.
	1293	Dictionaries
	1294	------------
	1295
	1296	A compression dictionary is essentially data used to seed the compressor
	1297	state so it can achieve better compression. The idea is that if you are
	1298	compressing a lot of similar pieces of data (e.g. JSON documents or anything
	1299	sharing similar structure), then you can find common patterns across multiple
	1300	objects then leverage those common patterns during compression and
	1301	decompression operations to achieve better compression ratios.
	1302
	1303	Dictionary compression is generally only useful for small inputs - data no
	1304	larger than a few kilobytes. The upper bound on this range is highly dependent
	1305	on the input data and the dictionary.
	1306
	1307	Python Buffer Protocol
	1308	----------------------
	1309
	1310	Many functions in the library operate on objects that implement Python's
	1311	`buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
	1312
	1313	The buffer protocol is an internal implementation detail of a Python
	1314	type that allows instances of that type (objects) to be exposed as a raw
	1315	pointer (or buffer) in the C API. In other words, it allows objects to be
	1316	exposed as an array of bytes.
	1317		1376
	1318	From the perspective of the C API, objects implementing the buffer protocol	1377	APIs that accept bytes-like objects don't enforce that the underlying object
	1319	all look the same: they are just a pointer to a memory address of a defined	1378	is read-only. However, it is assumed that the passed object is read-only for
	1320	length. This allows the C API to be largely type agnostic when accessing their	1379	the duration of the function call. It is possible to pass a mutable object
	1321	data. This allows custom types to be passed in without first converting them	1380	(like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
	1322	to a specific type.	1381	released, and mutate the object from another thread. Such a race condition
	1323		1382	is a bug in the consumer of python-zstandard. Most Python data types are
	1324	Many Python types implement the buffer protocol. These include ``bytes``	1383	immutable, so unless you are doing something fancy, you don't need to
	1325	(``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,	1384	worry about this.
	1326	``mmap.mmap``, and ``memoryview``.
	1327
	1328	``python-zstandard`` APIs that accept objects conforming to the buffer
	1329	protocol require that the buffer is C contiguous and has a single
	1330	dimension (``ndim==1``). This is usually the case. An example of where it
	1331	is not is a Numpy matrix type.
	1332
	1333	Requiring Output Sizes for Non-Streaming Decompression APIs
	1334	-----------------------------------------------------------
	1335
	1336	Non-streaming decompression APIs require that either the output size is
	1337	explicitly defined (either in the zstd frame header or passed into the
	1338	function) or that a max output size is specified. This restriction is for
	1339	your safety.
	1340
	1341	The one-shot decompression APIs store the decompressed result in a
	1342	single buffer. This means that a buffer needs to be pre-allocated to hold
	1343	the result. If the decompressed size is not known, then there is no universal
	1344	good default size to use. Any default will fail or will be highly sub-optimal
	1345	in some scenarios (it will either be too small or will put stress on the
	1346	memory allocator to allocate a too large block).
	1347
	1348	A helpful API may retry decompression with buffers of increasing size.
	1349	While useful, there are obvious performance disadvantages, namely redoing
	1350	decompression N times until it works. In addition, there is a security
	1351	concern. Say the input came from highly compressible data, like 1 GB of the
	1352	same byte value. The output size could be several magnitudes larger than the
	1353	input size. An input of <100KB could decompress to >1GB. Without a bounds
	1354	restriction on the decompressed size, certain inputs could exhaust all system
	1355	memory. That's not good and is why the maximum output size is limited.
	1356		1385
	1357	Note on Zstandard's Experimental API	1386	Note on Zstandard's Experimental API
	1358	======================================	1387	======================================
	1359		1388
	1360	Many of the Zstandard APIs used by this module are marked as experimental	1389	Many of the Zstandard APIs used by this module are marked as experimental
	1361	within the Zstandard project. This includes a large number of useful	1390	within the Zstandard project.
	1362	features, such as compression and frame parameters and parts of dictionary
	1363	compression.
	1364		1391
	1365	It is unclear how Zstandard's C API will evolve over time, especially with	1392	It is unclear how Zstandard's C API will evolve over time, especially with
	1366	regards to this experimental functionality. We will try to maintain	1393	regards to this experimental functionality. We will try to maintain
	@@ -1371,7 +1398,7 b' Since a copy of the Zstandard source cod'
	1371	module and since we compile against it, the behavior of a specific	1398	module and since we compile against it, the behavior of a specific
	1372	version of this module should be constant for all of time. So if you	1399	version of this module should be constant for all of time. So if you
	1373	pin the version of this module used in your projects (which is a Python	1400	pin the version of this module used in your projects (which is a Python
	1374	best practice), you should be ~~buffer~~ed from unwanted future changes.	1401	best practice), you should be shielded from unwanted future changes.
	1375		1402
	1376	Donate	1403	Donate
	1377	======	1404	======

contrib/python-zstandard/c-ext/bufferutil.c

0 +28 -6

             	}
             	if (segments.len % sizeof(BufferSegment)) {
-            		PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
+            		PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
             			sizeof(BufferSegment));
             		goto except;
             	}
             	PyBuffer_Release(&self->parent);
             	PyBuffer_Release(&segments);
             	return -1;
-            };
             /**
              * Construct a BufferWithSegments from existing memory and offsets.
             		return NULL;
             	}
+            	if (self->segments[i].length > PY_SSIZE_T_MAX) {
+            		PyErr_Format(PyExc_ValueError,
+            			"item at offset %zd is too large for this platform", i);
+            		return NULL;
+            	}
             	result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
             	if (NULL == result) {
             		return NULL;
             	Py_INCREF(self);
             	result->data = (char*)self->data + self->segments[i].offset;
-            	result->dataSize = self->segments[i].length;
+            	result->dataSize = (Py_ssize_t)self->segments[i].length;
             	result->offset = self->segments[i].offset;
             	return result;
             #if PY_MAJOR_VERSION >= 3
             static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
-            	return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
+            	if (self->dataSize > PY_SSIZE_T_MAX) {
+            		view->obj = NULL;
+            		PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
+            		return -1;
+            	}
+            	return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
             }
             #else
             static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
             		return -1;
             	}
+            	if (self->dataSize > PY_SSIZE_T_MAX) {
+            		PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
+            		return -1;
+            	}
             	*ptrptr = self->data;
-            	return self->dataSize;
+            	return (Py_ssize_t)self->dataSize;
             }
             static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
             );
             static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
-            	return PyBytes_FromStringAndSize(self->data, self->dataSize);
+            	if (self->dataSize > PY_SSIZE_T_MAX) {
+            		PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
+            		return NULL;
+            	}
+            	return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
             }
             PyDoc_STRVAR(BufferWithSegments_segments__doc__,

contrib/python-zstandard/c-ext/compressiondict.c

0 +161 -138

@@ -14,125 +14,11 b' ZstdCompressionDict* train_dictionary(Py'
14	static char* kwlist[] = {	14	static char* kwlist[] = {
15	"dict_size",	15	"dict_size",
16	"samples",	16	"samples",
17	"selectivity",
18	"level",
19	"notifications",
20	"dict_id",
21	NULL
22	};
23	size_t capacity;
24	PyObject* samples;
25	Py_ssize_t samplesLen;
26	unsigned selectivity = 0;
27	int level = 0;
28	unsigned notifications = 0;
29	unsigned dictID = 0;
30	ZDICT_params_t zparams;
31	Py_ssize_t sampleIndex;
32	Py_ssize_t sampleSize;
33	PyObject* sampleItem;
34	size_t zresult;
35	void* sampleBuffer = NULL;
36	void* sampleOffset;
37	size_t samplesSize = 0;
38	size_t* sampleSizes = NULL;
39	void* dict = NULL;
40	ZstdCompressionDict* result = NULL;
41
42	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!\|IiII:train_dictionary",
43	kwlist,
44	&capacity,
45	&PyList_Type, &samples,
46	&selectivity, &level, &notifications, &dictID)) {
47	return NULL;
48	}
49
50	memset(&zparams, 0, sizeof(zparams));
51
52	zparams.selectivityLevel = selectivity;
53	zparams.compressionLevel = level;
54	zparams.notificationLevel = notifications;
55	zparams.dictID = dictID;
56
57	/* Figure out the size of the raw samples */
58	samplesLen = PyList_Size(samples);
59	for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
60	sampleItem = PyList_GetItem(samples, sampleIndex);
61	if (!PyBytes_Check(sampleItem)) {
62	PyErr_SetString(PyExc_ValueError, "samples must be bytes");
63	return NULL;
64	}
65	samplesSize += PyBytes_GET_SIZE(sampleItem);
66	}
67
68	/* Now that we know the total size of the raw simples, we can allocate
69	a buffer for the raw data */
70	sampleBuffer = PyMem_Malloc(samplesSize);
71	if (!sampleBuffer) {
72	PyErr_NoMemory();
73	goto finally;
74	}
75	sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
76	if (!sampleSizes) {
77	PyErr_NoMemory();
78	goto finally;
79	}
80
81	sampleOffset = sampleBuffer;
82	/* Now iterate again and assemble the samples in the buffer */
83	for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
84	sampleItem = PyList_GetItem(samples, sampleIndex);
85	sampleSize = PyBytes_GET_SIZE(sampleItem);
86	sampleSizes[sampleIndex] = sampleSize;
87	memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
88	sampleOffset = (char*)sampleOffset + sampleSize;
89	}
90
91	dict = PyMem_Malloc(capacity);
92	if (!dict) {
93	PyErr_NoMemory();
94	goto finally;
95	}
96
97	/* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
98	Py_BEGIN_ALLOW_THREADS
99	zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
100	sampleBuffer, sampleSizes, (unsigned int)samplesLen,
101	zparams);
102	Py_END_ALLOW_THREADS
103	if (ZDICT_isError(zresult)) {
104	PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
105	PyMem_Free(dict);
106	goto finally;
107	}
108
109	result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
110	if (!result) {
111	goto finally;
112	}
113
114	result->dictData = dict;
115	result->dictSize = zresult;
116	result->d = 0;
117	result->k = 0;
118
119	finally:
120	PyMem_Free(sampleBuffer);
121	PyMem_Free(sampleSizes);
122
123	return result;
124	}
125
126	ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
127	static char* kwlist[] = {
128	"dict_size",
129	"samples",
130	"k",	17	"k",
131	"d",	18	"d",
132	"notifications",	19	"notifications",
133	"dict_id",	20	"dict_id",
134	"level",	21	"level",
135	"optimize",
136	"steps",	22	"steps",
137	"threads",	23	"threads",
138	NULL	24	NULL
@@ -145,10 +31,9 b' ZstdCompressionDict* train_cover_diction'
145	unsigned notifications = 0;	31	unsigned notifications = 0;
146	unsigned dictID = 0;	32	unsigned dictID = 0;
147	int level = 0;	33	int level = 0;
148	PyObject* optimize = NULL;
149	unsigned steps = 0;	34	unsigned steps = 0;
150	int threads = 0;	35	int threads = 0;
151	~~COVER~~_params_t params;	36	ZDICT_cover_params_t params;
152	Py_ssize_t samplesLen;	37	Py_ssize_t samplesLen;
153	Py_ssize_t i;	38	Py_ssize_t i;
154	size_t samplesSize = 0;	39	size_t samplesSize = 0;
@@ -160,9 +45,9 b' ZstdCompressionDict* train_cover_diction'
160	size_t zresult;	45	size_t zresult;
161	ZstdCompressionDict* result = NULL;	46	ZstdCompressionDict* result = NULL;
162		47
163	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!\|IIIIiOIi:train~~_cover~~_dictionary",	48	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!\|IIIIiIi:train_dictionary",
164	kwlist, &capacity, &PyList_Type, &samples,	49	kwlist, &capacity, &PyList_Type, &samples,
165	&k, &d, &notifications, &dictID, &level, &~~optimize~~, &steps, &threads)) {	50	&k, &d, &notifications, &dictID, &level, &steps, &threads)) {
166	return NULL;	51	return NULL;
167	}	52	}
168		53
@@ -175,9 +60,9 b' ZstdCompressionDict* train_cover_diction'
175	params.d = d;	60	params.d = d;
176	params.steps = steps;	61	params.steps = steps;
177	params.nbThreads = threads;	62	params.nbThreads = threads;
178	params.notificationLevel = notifications;	63	params.zParams.notificationLevel = notifications;
179	params.dictID = dictID;	64	params.zParams.dictID = dictID;
180	params.compressionLevel = level;	65	params.zParams.compressionLevel = level;
181		66
182	/* Figure out total size of input samples. */	67	/* Figure out total size of input samples. */
183	samplesLen = PyList_Size(samples);	68	samplesLen = PyList_Size(samples);
@@ -219,12 +104,21 b' ZstdCompressionDict* train_cover_diction'
219	}	104	}
220		105
221	Py_BEGIN_ALLOW_THREADS	106	Py_BEGIN_ALLOW_THREADS
222	if (optimize && PyObject_IsTrue(optimize)) {	107	/* No parameters uses the default function, which will use default params
223	zresult = COVER_optimizeTrainFromBuffer(dict, capacity,	108	and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
		109	if (!params.k && !params.d && !params.zParams.compressionLevel
		110	&& !params.zParams.notificationLevel && !params.zParams.dictID) {
		111	zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
		112	sampleSizes, (unsigned)samplesLen);
		113	}
		114	/* Use optimize mode if user controlled steps or threads explicitly. */
		115	else if (params.steps \|\| params.nbThreads) {
		116	zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
224	sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);	117	sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
225	}	118	}
		119	/* Non-optimize mode with explicit control. */
226	else {	120	else {
227	zresult = ~~COVER~~_trainFromBuffer(dict, capacity,	121	zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
228	sampleBuffer, sampleSizes, (unsigned)samplesLen, params);	122	sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
229	}	123	}
230	Py_END_ALLOW_THREADS	124	Py_END_ALLOW_THREADS
@@ -243,8 +137,11 b' ZstdCompressionDict* train_cover_diction'
243		137
244	result->dictData = dict;	138	result->dictData = dict;
245	result->dictSize = zresult;	139	result->dictSize = zresult;
		140	result->dictType = ZSTD_dct_fullDict;
246	result->d = params.d;	141	result->d = params.d;
247	result->k = params.k;	142	result->k = params.k;
		143	result->cdict = NULL;
		144	result->ddict = NULL;
248		145
249	finally:	146	finally:
250	PyMem_Free(sampleBuffer);	147	PyMem_Free(sampleBuffer);
@@ -253,43 +150,99 b' finally:'
253	return result;	150	return result;
254	}	151	}
255		152
		153	int ensure_ddict(ZstdCompressionDict* dict) {
		154	if (dict->ddict) {
		155	return 0;
		156	}
		157
		158	Py_BEGIN_ALLOW_THREADS
		159	dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
		160	ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
		161	Py_END_ALLOW_THREADS
		162	if (!dict->ddict) {
		163	PyErr_SetString(ZstdError, "could not create decompression dict");
		164	return 1;
		165	}
		166
		167	return 0;
		168	}
		169
256	PyDoc_STRVAR(ZstdCompressionDict__doc__,	170	PyDoc_STRVAR(ZstdCompressionDict__doc__,
257	"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"	171	"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
258	"\n"	172	"\n"
259	"This type holds the results of a computed Zstandard compression dictionary.\n"	173	"This type holds the results of a computed Zstandard compression dictionary.\n"
260	"Instances are obtained by calling ``train_dictionary()`` or by passing ~~bytes~~\n"	174	"Instances are obtained by calling ``train_dictionary()`` or by passing\n"
261	"obtained from another source into the constructor.\n"	175	"bytes obtained from another source into the constructor.\n"
262	);	176	);
263		177
264	static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {	178	static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
265	const char* source;	179	static char* kwlist[] = {
266	Py_ssize_t sourceSize;	180	"data",
		181	"dict_type",
		182	NULL
		183	};
		184
		185	int result = -1;
		186	Py_buffer source;
		187	unsigned dictType = ZSTD_dct_auto;
267		188
268	self->dictData = NULL;	189	self->dictData = NULL;
269	self->dictSize = 0;	190	self->dictSize = 0;
		191	self->cdict = NULL;
		192	self->ddict = NULL;
270		193
271	#if PY_MAJOR_VERSION >= 3	194	#if PY_MAJOR_VERSION >= 3
272	if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",	195	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*\|I:ZstdCompressionDict",
273	#else	196	#else
274	if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",	197	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*\|I:ZstdCompressionDict",
275	#endif	198	#endif
276	&source, &~~sourceSiz~~e)) {	199	kwlist, &source, &dictType)) {
277	return -1;	200	return -1;
278	}	201	}
279		202
280	self->dictData = PyMem_Malloc(sourceSize);	203	if (!PyBuffer_IsContiguous(&source, 'C') \|\| source.ndim > 1) {
		204	PyErr_SetString(PyExc_ValueError,
		205	"data buffer should be contiguous and have at most one dimension");
		206	goto finally;
		207	}
		208
		209	if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
		210	&& dictType != ZSTD_dct_fullDict) {
		211	PyErr_Format(PyExc_ValueError,
		212	"invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
		213	dictType);
		214	goto finally;
		215	}
		216
		217	self->dictType = dictType;
		218
		219	self->dictData = PyMem_Malloc(source.len);
281	if (!self->dictData) {	220	if (!self->dictData) {
282	PyErr_NoMemory();	221	PyErr_NoMemory();
283	return -1;	222	goto finally;
284	}	223	}
285		224
286	memcpy(self->dictData, source, source~~Size~~);	225	memcpy(self->dictData, source.buf, source.len);
287	self->dictSize = source~~Size~~;	226	self->dictSize = source.len;
		227
		228	result = 0;
288		229
289	return 0;	230	finally:
		231	PyBuffer_Release(&source);
		232	return result;
		233	}
		234
		235	static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
		236	if (self->cdict) {
		237	ZSTD_freeCDict(self->cdict);
		238	self->cdict = NULL;
290	}	239	}
291		240
292	static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {	241	if (self->ddict) {
		242	ZSTD_freeDDict(self->ddict);
		243	self->ddict = NULL;
		244	}
		245
293	if (self->dictData) {	246	if (self->dictData) {
294	PyMem_Free(self->dictData);	247	PyMem_Free(self->dictData);
295	self->dictData = NULL;	248	self->dictData = NULL;
@@ -298,6 +251,74 b' static void ZstdCompressionDict_dealloc('
298	PyObject_Del(self);	251	PyObject_Del(self);
299	}	252	}
300		253
		254	PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
		255	"Precompute a dictionary so it can be used by multiple compressors.\n"
		256	);
		257
		258	static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
		259	static char* kwlist[] = {
		260	"level",
		261	"compression_params",
		262	NULL
		263	};
		264
		265	int level = 0;
		266	ZstdCompressionParametersObject* compressionParams = NULL;
		267	ZSTD_compressionParameters cParams;
		268	size_t zresult;
		269
		270	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|iO!:precompute_compress", kwlist,
		271	&level, &ZstdCompressionParametersType, &compressionParams)) {
		272	return NULL;
		273	}
		274
		275	if (level && compressionParams) {
		276	PyErr_SetString(PyExc_ValueError,
		277	"must only specify one of level or compression_params");
		278	return NULL;
		279	}
		280
		281	if (!level && !compressionParams) {
		282	PyErr_SetString(PyExc_ValueError,
		283	"must specify one of level or compression_params");
		284	return NULL;
		285	}
		286
		287	if (self->cdict) {
		288	zresult = ZSTD_freeCDict(self->cdict);
		289	self->cdict = NULL;
		290	if (ZSTD_isError(zresult)) {
		291	PyErr_Format(ZstdError, "unable to free CDict: %s",
		292	ZSTD_getErrorName(zresult));
		293	return NULL;
		294	}
		295	}
		296
		297	if (level) {
		298	cParams = ZSTD_getCParams(level, 0, self->dictSize);
		299	}
		300	else {
		301	cParams.chainLog = compressionParams->chainLog;
		302	cParams.hashLog = compressionParams->hashLog;
		303	cParams.searchLength = compressionParams->minMatch;
		304	cParams.searchLog = compressionParams->searchLog;
		305	cParams.strategy = compressionParams->compressionStrategy;
		306	cParams.targetLength = compressionParams->targetLength;
		307	cParams.windowLog = compressionParams->windowLog;
		308	}
		309
		310	assert(!self->cdict);
		311	self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
		312	ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
		313
		314	if (!self->cdict) {
		315	PyErr_SetString(ZstdError, "unable to precompute dictionary");
		316	return NULL;
		317	}
		318
		319	Py_RETURN_NONE;
		320	}
		321
301	static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {	322	static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
302	unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);	323	unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
303		324
@@ -313,6 +334,8 b' static PyMethodDef ZstdCompressionDict_m'
313	PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },	334	PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
314	{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,	335	{ "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
315	PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },	336	PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
		337	{ "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
		338	METH_VARARGS \| METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
316	{ NULL, NULL }	339	{ NULL, NULL }
317	};	340	};
318		341

contrib/python-zstandard/c-ext/compressionparams.c

0 +387 -138

This diff has been collapsed as it changes many lines, (525 lines changed) Show them Hide them
	@@ -8,204 +8,448 b''
	8		8
	9	#include "python-zstandard.h"	9	#include "python-zstandard.h"
	10		10
	11	void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) {	11	extern PyObject* ZstdError;
	12	zparams->windowLog = params->windowLog;
	13	zparams->chainLog = params->chainLog;
	14	zparams->hashLog = params->hashLog;
	15	zparams->searchLog = params->searchLog;
	16	zparams->searchLength = params->searchLength;
	17	zparams->targetLength = params->targetLength;
	18	zparams->strategy = params->strategy;
	19	}
	20		12
	21	CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) {	13	int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) {
	22	int compressionLevel;	14	size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
	23	unsigned PY_LONG_LONG sourceSize = 0;	15	if (ZSTD_isError(zresult)) {
	24	Py_ssize_t dictSize = 0;	16	PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
	25	ZSTD_compressionParameters params;	17	ZSTD_getErrorName(zresult));
	26	CompressionParametersObject* result;	18	return 1;
	27
	28	if (!PyArg_ParseTuple(args, "i\|Kn:get_compression_parameters",
	29	&compressionLevel, &sourceSize, &dictSize)) {
	30	return NULL;
	31	}	19	}
	32		20
	33	params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize);	21	return 0;
			22	}
			23
			24	#define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
	34		25
	35	result = PyObject_New(CompressionParametersObject, &CompressionParametersType);	26	int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
	36	if (!result) {	27	TRY_SET_PARAMETER(params, ZSTD_p_format, obj->format);
	37	return NULL;	28	TRY_SET_PARAMETER(params, ZSTD_p_compressionLevel, (unsigned)obj->compressionLevel);
			29	TRY_SET_PARAMETER(params, ZSTD_p_windowLog, obj->windowLog);
			30	TRY_SET_PARAMETER(params, ZSTD_p_hashLog, obj->hashLog);
			31	TRY_SET_PARAMETER(params, ZSTD_p_chainLog, obj->chainLog);
			32	TRY_SET_PARAMETER(params, ZSTD_p_searchLog, obj->searchLog);
			33	TRY_SET_PARAMETER(params, ZSTD_p_minMatch, obj->minMatch);
			34	TRY_SET_PARAMETER(params, ZSTD_p_targetLength, obj->targetLength);
			35	TRY_SET_PARAMETER(params, ZSTD_p_compressionStrategy, obj->compressionStrategy);
			36	TRY_SET_PARAMETER(params, ZSTD_p_contentSizeFlag, obj->contentSizeFlag);
			37	TRY_SET_PARAMETER(params, ZSTD_p_checksumFlag, obj->checksumFlag);
			38	TRY_SET_PARAMETER(params, ZSTD_p_dictIDFlag, obj->dictIDFlag);
			39	TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
			40	TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
			41	TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
			42	TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals);
			43	TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
			44	TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
			45	TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
			46	TRY_SET_PARAMETER(params, ZSTD_p_ldmMinMatch, obj->ldmMinMatch);
			47	TRY_SET_PARAMETER(params, ZSTD_p_ldmBucketSizeLog, obj->ldmBucketSizeLog);
			48	TRY_SET_PARAMETER(params, ZSTD_p_ldmHashEveryLog, obj->ldmHashEveryLog);
			49
			50	return 0;
			51	}
			52
			53	int reset_params(ZstdCompressionParametersObject* params) {
			54	if (params->params) {
			55	ZSTD_CCtxParams_reset(params->params);
			56	}
			57	else {
			58	params->params = ZSTD_createCCtxParams();
			59	if (!params->params) {
			60	PyErr_NoMemory();
			61	return 1;
			62	}
	38	}	63	}
	39		64
	40	result->windowLog = params.windowLog;	65	return set_parameters(params->params, params);
	41	result->chainLog = params.chainLog;
	42	result->hashLog = params.hashLog;
	43	result->searchLog = params.searchLog;
	44	result->searchLength = params.searchLength;
	45	result->targetLength = params.targetLength;
	46	result->strategy = params.strategy;
	47
	48	return result;
	49	}	66	}
	50		67
	51	static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {	68	static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
	52	static char* kwlist[] = {	69	static char* kwlist[] = {
			70	"format",
			71	"compression_level",
	53	"window_log",	72	"window_log",
	54	"chain_log",
	55	"hash_log",	73	"hash_log",
			74	"chain_log",
	56	"search_log",	75	"search_log",
	57	"search_length",	76	"min_match",
	58	"target_length",	77	"target_length",
	59	"strategy",	78	"compression_strategy",
			79	"write_content_size",
			80	"write_checksum",
			81	"write_dict_id",
			82	"job_size",
			83	"overlap_size_log",
			84	"force_max_window",
			85	"enable_ldm",
			86	"ldm_hash_log",
			87	"ldm_min_match",
			88	"ldm_bucket_size_log",
			89	"ldm_hash_every_log",
			90	"threads",
			91	"compress_literals",
	60	NULL	92	NULL
	61	};	93	};
	62		94
	63	unsigned windowLog;	95	unsigned format = 0;
	64	unsigned chainLog;	96	int compressionLevel = 0;
	65	unsigned ~~hashLog~~;	97	unsigned windowLog = 0;
	66	unsigned ~~searchLog~~;	98	unsigned hashLog = 0;
	67	unsigned ~~searchLength~~;	99	unsigned chainLog = 0;
	68	unsigned targetLength;	100	unsigned searchLog = 0;
	69	unsigned ~~strategy~~;	101	unsigned minMatch = 0;
	70	ZSTD_compressionParameters params;	102	unsigned targetLength = 0;
	71	size_t zresult;	103	unsigned compressionStrategy = 0;
			104	unsigned contentSizeFlag = 1;
			105	unsigned checksumFlag = 0;
			106	unsigned dictIDFlag = 0;
			107	unsigned jobSize = 0;
			108	unsigned overlapSizeLog = 0;
			109	unsigned forceMaxWindow = 0;
			110	unsigned enableLDM = 0;
			111	unsigned ldmHashLog = 0;
			112	unsigned ldmMinMatch = 0;
			113	unsigned ldmBucketSizeLog = 0;
			114	unsigned ldmHashEveryLog = 0;
			115	int threads = 0;
	72		116
	73	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",	117	/* Setting value 0 has the effect of disabling. So we use -1 as a default
	74	kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,	118	* to detect whether to set. Then we automatically derive the expected value
	75	&targetLength, &strategy)) {	119	* based on the level, just like zstandard does itself. */
	76	return -1;	120	int compressLiterals = -1;
	77	}
	78		121
	79	if (windowLog < ZSTD_WINDOWLOG_MIN \|\| windowLog > ZSTD_WINDOWLOG_MAX) {	122	if (!PyArg_ParseTupleAndKeywords(args, kwargs,
	80	PyErr_SetString(PyExc_ValueError, "invalid window log value");	123	"\|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters",
	81	return -1;	124	kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
	82	}	125	&searchLog, &minMatch, &targetLength, &compressionStrategy,
	83		126	&contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
	84	if (chainLog < ZSTD_CHAINLOG_MIN \|\| chainLog > ZSTD_CHAINLOG_MAX) {	127	&forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
	85	PyErr_SetString(PyExc_ValueError, "invalid chain log value");	128	&ldmHashEveryLog, &threads, &compressLiterals)) {
	86	return -1;
	87	}
	88
	89	if (hashLog < ZSTD_HASHLOG_MIN \|\| hashLog > ZSTD_HASHLOG_MAX) {
	90	PyErr_SetString(PyExc_ValueError, "invalid hash log value");
	91	return -1;	129	return -1;
	92	}	130	}
	93		131
	94	if (searchLog < ZSTD_SEARCHLOG_MIN \|\| searchLog > ZSTD_SEARCHLOG_MAX) {	132	if (threads < 0) {
	95	PyErr_SetString(PyExc_ValueError, "invalid search log value");	133	threads = cpu_count();
	96	return -1;
	97	}	134	}
	98		135
	99	if (searchLength < ZSTD_SEARCHLENGTH_MIN \|\| searchLength > ZSTD_SEARCHLENGTH_MAX) {	136	if (compressLiterals < 0) {
	100	PyErr_SetString(PyExc_ValueError, "invalid search length value");	137	compressLiterals = compressionLevel >= 0;
	101	return -1;
	102	}
	103
	104	if (targetLength < ZSTD_TARGETLENGTH_MIN \|\| targetLength > ZSTD_TARGETLENGTH_MAX) {
	105	PyErr_SetString(PyExc_ValueError, "invalid target length value");
	106	return -1;
	107	}	138	}
	108		139
	109	if (strategy < ZSTD_fast \|\| strategy > ZSTD_btopt) {	140	self->format = format;
	110	PyErr_SetString(PyExc_ValueError, "invalid strategy value");	141	self->compressionLevel = compressionLevel;
	111	return -1;
	112	}
	113
	114	self->windowLog = windowLog;	142	self->windowLog = windowLog;
			143	self->hashLog = hashLog;
	115	self->chainLog = chainLog;	144	self->chainLog = chainLog;
	116	self->hashLog = hashLog;
	117	self->searchLog = searchLog;	145	self->searchLog = searchLog;
	118	self->searchLength = searchLength;	146	self->minMatch = minMatch;
	119	self->targetLength = targetLength;	147	self->targetLength = targetLength;
	120	self->strategy = strategy;	148	self->compressionStrategy = compressionStrategy;
			149	self->contentSizeFlag = contentSizeFlag;
			150	self->checksumFlag = checksumFlag;
			151	self->dictIDFlag = dictIDFlag;
			152	self->threads = threads;
			153	self->jobSize = jobSize;
			154	self->overlapSizeLog = overlapSizeLog;
			155	self->compressLiterals = compressLiterals;
			156	self->forceMaxWindow = forceMaxWindow;
			157	self->enableLongDistanceMatching = enableLDM;
			158	self->ldmHashLog = ldmHashLog;
			159	self->ldmMinMatch = ldmMinMatch;
			160	self->ldmBucketSizeLog = ldmBucketSizeLog;
			161	self->ldmHashEveryLog = ldmHashEveryLog;
	121		162
	122	ztopy_compression_parameters(self, &params);	163	if (reset_params(self)) {
	123	zresult = ZSTD_checkCParams(params);
	124
	125	if (ZSTD_isError(zresult)) {
	126	PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
	127	ZSTD_getErrorName(zresult));
	128	return -1;	164	return -1;
	129	}	165	}
	130		166
	131	return 0;	167	return 0;
	132	}	168	}
	133		169
	134	PyDoc_STRVAR(CompressionParameters_~~estimated_compression_context_size~~__doc__,	170	PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
			171	"Create a CompressionParameters from a compression level and target sizes\n"
			172	);
			173
			174	ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
			175	int managedKwargs = 0;
			176	int level;
			177	PyObject* sourceSize = NULL;
			178	PyObject* dictSize = NULL;
			179	unsigned PY_LONG_LONG iSourceSize = 0;
			180	Py_ssize_t iDictSize = 0;
			181	PyObject* val;
			182	ZSTD_compressionParameters params;
			183	ZstdCompressionParametersObject* result = NULL;
			184	int res;
			185
			186	if (!PyArg_ParseTuple(args, "i:from_level",
			187	&level)) {
			188	return NULL;
			189	}
			190
			191	if (!kwargs) {
			192	kwargs = PyDict_New();
			193	if (!kwargs) {
			194	return NULL;
			195	}
			196	managedKwargs = 1;
			197	}
			198
			199	sourceSize = PyDict_GetItemString(kwargs, "source_size");
			200	if (sourceSize) {
			201	#if PY_MAJOR_VERSION >= 3
			202	iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
			203	if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
			204	goto cleanup;
			205	}
			206	#else
			207	iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
			208	#endif
			209
			210	PyDict_DelItemString(kwargs, "source_size");
			211	}
			212
			213	dictSize = PyDict_GetItemString(kwargs, "dict_size");
			214	if (dictSize) {
			215	#if PY_MAJOR_VERSION >= 3
			216	iDictSize = PyLong_AsSsize_t(dictSize);
			217	#else
			218	iDictSize = PyInt_AsSsize_t(dictSize);
			219	#endif
			220	if (iDictSize == -1) {
			221	goto cleanup;
			222	}
			223
			224	PyDict_DelItemString(kwargs, "dict_size");
			225	}
			226
			227
			228	params = ZSTD_getCParams(level, iSourceSize, iDictSize);
			229
			230	/* Values derived from the input level and sizes are passed along to the
			231	constructor. But only if a value doesn't already exist. */
			232	val = PyDict_GetItemString(kwargs, "window_log");
			233	if (!val) {
			234	val = PyLong_FromUnsignedLong(params.windowLog);
			235	if (!val) {
			236	goto cleanup;
			237	}
			238	PyDict_SetItemString(kwargs, "window_log", val);
			239	Py_DECREF(val);
			240	}
			241
			242	val = PyDict_GetItemString(kwargs, "chain_log");
			243	if (!val) {
			244	val = PyLong_FromUnsignedLong(params.chainLog);
			245	if (!val) {
			246	goto cleanup;
			247	}
			248	PyDict_SetItemString(kwargs, "chain_log", val);
			249	Py_DECREF(val);
			250	}
			251
			252	val = PyDict_GetItemString(kwargs, "hash_log");
			253	if (!val) {
			254	val = PyLong_FromUnsignedLong(params.hashLog);
			255	if (!val) {
			256	goto cleanup;
			257	}
			258	PyDict_SetItemString(kwargs, "hash_log", val);
			259	Py_DECREF(val);
			260	}
			261
			262	val = PyDict_GetItemString(kwargs, "search_log");
			263	if (!val) {
			264	val = PyLong_FromUnsignedLong(params.searchLog);
			265	if (!val) {
			266	goto cleanup;
			267	}
			268	PyDict_SetItemString(kwargs, "search_log", val);
			269	Py_DECREF(val);
			270	}
			271
			272	val = PyDict_GetItemString(kwargs, "min_match");
			273	if (!val) {
			274	val = PyLong_FromUnsignedLong(params.searchLength);
			275	if (!val) {
			276	goto cleanup;
			277	}
			278	PyDict_SetItemString(kwargs, "min_match", val);
			279	Py_DECREF(val);
			280	}
			281
			282	val = PyDict_GetItemString(kwargs, "target_length");
			283	if (!val) {
			284	val = PyLong_FromUnsignedLong(params.targetLength);
			285	if (!val) {
			286	goto cleanup;
			287	}
			288	PyDict_SetItemString(kwargs, "target_length", val);
			289	Py_DECREF(val);
			290	}
			291
			292	val = PyDict_GetItemString(kwargs, "compression_strategy");
			293	if (!val) {
			294	val = PyLong_FromUnsignedLong(params.strategy);
			295	if (!val) {
			296	goto cleanup;
			297	}
			298	PyDict_SetItemString(kwargs, "compression_strategy", val);
			299	Py_DECREF(val);
			300	}
			301
			302	val = PyDict_GetItemString(kwargs, "compress_literals");
			303	if (!val) {
			304	val = PyLong_FromLong(level >= 0 ? 1 : 0);
			305	if (!val) {
			306	goto cleanup;
			307	}
			308	PyDict_SetItemString(kwargs, "compress_literals", val);
			309	Py_DECREF(val);
			310	}
			311
			312	result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
			313	if (!result) {
			314	goto cleanup;
			315	}
			316
			317	result->params = NULL;
			318
			319	val = PyTuple_New(0);
			320	if (!val) {
			321	Py_CLEAR(result);
			322	goto cleanup;
			323	}
			324
			325	res = ZstdCompressionParameters_init(result, val, kwargs);
			326	Py_DECREF(val);
			327
			328	if (res) {
			329	Py_CLEAR(result);
			330	goto cleanup;
			331	}
			332
			333	cleanup:
			334	if (managedKwargs) {
			335	Py_DECREF(kwargs);
			336	}
			337
			338	return result;
			339	}
			340
			341	PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
	135	"Estimate the size in bytes of a compression context for compression parameters\n"	342	"Estimate the size in bytes of a compression context for compression parameters\n"
	136	);	343	);
	137		344
	138	PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {	345	PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
	139	ZSTD_compressionParameters params;	346	return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
	140
	141	ztopy_compression_parameters(self, &params);
	142
	143	return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
	144	}	347	}
	145		348
	146	PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {	349	PyDoc_STRVAR(ZstdCompressionParameters__doc__,
	147	CompressionParametersObject* params;	350	"ZstdCompressionParameters: low-level control over zstd compression");
	148	ZSTD_compressionParameters zparams;
	149	PyObject* result;
	150		351
	151	if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",	352	static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
	152	&CompressionParametersType, &params)) {	353	if (self->params) {
	153	return NULL;	354	ZSTD_freeCCtxParams(self->params);
			355	self->params = NULL;
	154	}	356	}
	155		357
	156	ztopy_compression_parameters(params, &zparams);
	157	result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams));
	158	return result;
	159	}
	160
	161	PyDoc_STRVAR(CompressionParameters__doc__,
	162	"CompressionParameters: low-level control over zstd compression");
	163
	164	static void CompressionParameters_dealloc(PyObject* self) {
	165	PyObject_Del(self);	358	PyObject_Del(self);
	166	}	359	}
	167		360
	168	static PyMethodDef CompressionParameters_methods[] = {	361	static PyMethodDef ZstdCompressionParameters_methods[] = {
			362	{
			363	"from_level",
			364	(PyCFunction)CompressionParameters_from_level,
			365	METH_VARARGS \| METH_KEYWORDS \| METH_STATIC,
			366	ZstdCompressionParameters_from_level__doc__
			367	},
	169	{	368	{
	170	"estimated_compression_context_size",	369	"estimated_compression_context_size",
	171	(PyCFunction)CompressionParameters_estimated_compression_context_size,	370	(PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
	172	METH_NOARGS,	371	METH_NOARGS,
	173	CompressionParameters_estimated_compression_context_size__doc__	372	ZstdCompressionParameters_estimated_compression_context_size__doc__
	174	},	373	},
	175	{ NULL, NULL }	374	{ NULL, NULL }
	176	};	375	};
	177		376
	178	static PyMemberDef CompressionParameters_members[] = {	377	static PyMemberDef ZstdCompressionParameters_members[] = {
			378	{ "format", T_UINT,
			379	offsetof(ZstdCompressionParametersObject, format), READONLY,
			380	"compression format" },
			381	{ "compression_level", T_INT,
			382	offsetof(ZstdCompressionParametersObject, compressionLevel), READONLY,
			383	"compression level" },
	179	{ "window_log", T_UINT,	384	{ "window_log", T_UINT,
	180	offsetof(CompressionParametersObject, windowLog), READONLY,	385	offsetof(ZstdCompressionParametersObject, windowLog), READONLY,
	181	"window log" },	386	"window log" },
	182	{ "chain_log", T_UINT,
	183	offsetof(CompressionParametersObject, chainLog), READONLY,
	184	"chain log" },
	185	{ "hash_log", T_UINT,	387	{ "hash_log", T_UINT,
	186	offsetof(CompressionParametersObject, hashLog), READONLY,	388	offsetof(ZstdCompressionParametersObject, hashLog), READONLY,
	187	"hash log" },	389	"hash log" },
			390	{ "chain_log", T_UINT,
			391	offsetof(ZstdCompressionParametersObject, chainLog), READONLY,
			392	"chain log" },
	188	{ "search_log", T_UINT,	393	{ "search_log", T_UINT,
	189	offsetof(CompressionParametersObject, searchLog), READONLY,	394	offsetof(ZstdCompressionParametersObject, searchLog), READONLY,
	190	"search log" },	395	"search log" },
	191	{ "~~search_lengt~~h", T_UINT,	396	{ "min_match", T_UINT,
	192	offsetof(CompressionParametersObject, ~~searchLengt~~h), READONLY,	397	offsetof(ZstdCompressionParametersObject, minMatch), READONLY,
	193	"search length" },	398	"search length" },
	194	{ "target_length", T_UINT,	399	{ "target_length", T_UINT,
	195	offsetof(CompressionParametersObject, targetLength), READONLY,	400	offsetof(ZstdCompressionParametersObject, targetLength), READONLY,
	196	"target length" },	401	"target length" },
	197	{ "strategy", T_INT,	402	{ "compression_strategy", T_UINT,
	198	offsetof(CompressionParametersObject, strategy), READONLY,	403	offsetof(ZstdCompressionParametersObject, compressionStrategy), READONLY,
	199	"strategy" },	404	"compression strategy" },
			405	{ "write_content_size", T_UINT,
			406	offsetof(ZstdCompressionParametersObject, contentSizeFlag), READONLY,
			407	"whether to write content size in frames" },
			408	{ "write_checksum", T_UINT,
			409	offsetof(ZstdCompressionParametersObject, checksumFlag), READONLY,
			410	"whether to write checksum in frames" },
			411	{ "write_dict_id", T_UINT,
			412	offsetof(ZstdCompressionParametersObject, dictIDFlag), READONLY,
			413	"whether to write dictionary ID in frames" },
			414	{ "threads", T_UINT,
			415	offsetof(ZstdCompressionParametersObject, threads), READONLY,
			416	"number of threads to use" },
			417	{ "job_size", T_UINT,
			418	offsetof(ZstdCompressionParametersObject, jobSize), READONLY,
			419	"size of compression job when using multiple threads" },
			420	{ "overlap_size_log", T_UINT,
			421	offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
			422	"Size of previous input reloaded at the beginning of each job" },
			423	{ "compress_literals", T_UINT,
			424	offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY,
			425	"whether Huffman compression of literals is in use" },
			426	{ "force_max_window", T_UINT,
			427	offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
			428	"force back references to remain smaller than window size" },
			429	{ "enable_ldm", T_UINT,
			430	offsetof(ZstdCompressionParametersObject, enableLongDistanceMatching), READONLY,
			431	"whether to enable long distance matching" },
			432	{ "ldm_hash_log", T_UINT,
			433	offsetof(ZstdCompressionParametersObject, ldmHashLog), READONLY,
			434	"Size of the table for long distance matching, as a power of 2" },
			435	{ "ldm_min_match", T_UINT,
			436	offsetof(ZstdCompressionParametersObject, ldmMinMatch), READONLY,
			437	"minimum size of searched matches for long distance matcher" },
			438	{ "ldm_bucket_size_log", T_UINT,
			439	offsetof(ZstdCompressionParametersObject, ldmBucketSizeLog), READONLY,
			440	"log size of each bucket in the LDM hash table for collision resolution" },
			441	{ "ldm_hash_every_log", T_UINT,
			442	offsetof(ZstdCompressionParametersObject, ldmHashEveryLog), READONLY,
			443	"frequency of inserting/looking up entries in the LDM hash table" },
	200	{ NULL }	444	{ NULL }
	201	};	445	};
	202		446
	203	PyTypeObject CompressionParametersType = {	447	PyTypeObject ZstdCompressionParametersType = {
	204	PyVarObject_HEAD_INIT(NULL, 0)	448	PyVarObject_HEAD_INIT(NULL, 0)
	205	"CompressionParameters", /* tp_name */	449	"ZstdCompressionParameters", /* tp_name */
	206	sizeof(CompressionParametersObject), /* tp_basicsize */	450	sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
	207	0, /* tp_itemsize */	451	0, /* tp_itemsize */
	208	(destructor)CompressionParameters_dealloc, /* tp_dealloc */	452	(destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
	209	0, /* tp_print */	453	0, /* tp_print */
	210	0, /* tp_getattr */	454	0, /* tp_getattr */
	211	0, /* tp_setattr */	455	0, /* tp_setattr */
	@@ -221,33 +465,38 b' PyTypeObject CompressionParametersType ='
	221	0, /* tp_setattro */	465	0, /* tp_setattro */
	222	0, /* tp_as_buffer */	466	0, /* tp_as_buffer */
	223	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */	467	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
	224	CompressionParameters__doc__, /* tp_doc */	468	ZstdCompressionParameters__doc__, /* tp_doc */
	225	0, /* tp_traverse */	469	0, /* tp_traverse */
	226	0, /* tp_clear */	470	0, /* tp_clear */
	227	0, /* tp_richcompare */	471	0, /* tp_richcompare */
	228	0, /* tp_weaklistoffset */	472	0, /* tp_weaklistoffset */
	229	0, /* tp_iter */	473	0, /* tp_iter */
	230	0, /* tp_iternext */	474	0, /* tp_iternext */
	231	CompressionParameters_methods, /* tp_methods */	475	ZstdCompressionParameters_methods, /* tp_methods */
	232	CompressionParameters_members, /* tp_members */	476	ZstdCompressionParameters_members, /* tp_members */
	233	0, /* tp_getset */	477	0, /* tp_getset */
	234	0, /* tp_base */	478	0, /* tp_base */
	235	0, /* tp_dict */	479	0, /* tp_dict */
	236	0, /* tp_descr_get */	480	0, /* tp_descr_get */
	237	0, /* tp_descr_set */	481	0, /* tp_descr_set */
	238	0, /* tp_dictoffset */	482	0, /* tp_dictoffset */
	239	(initproc)CompressionParameters_init, /* tp_init */	483	(initproc)ZstdCompressionParameters_init, /* tp_init */
	240	0, /* tp_alloc */	484	0, /* tp_alloc */
	241	PyType_GenericNew, /* tp_new */	485	PyType_GenericNew, /* tp_new */
	242	};	486	};
	243		487
	244	void compressionparams_module_init(PyObject* mod) {	488	void compressionparams_module_init(PyObject* mod) {
	245	Py_TYPE(&CompressionParametersType) = &PyType_Type;	489	Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
	246	if (PyType_Ready(&CompressionParametersType) < 0) {	490	if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
	247	return;	491	return;
	248	}	492	}
	249		493
	250	Py_INCREF(&CompressionParametersType);	494	Py_INCREF(&ZstdCompressionParametersType);
			495	PyModule_AddObject(mod, "ZstdCompressionParameters",
			496	(PyObject*)&ZstdCompressionParametersType);
			497
			498	/* TODO remove deprecated alias. */
			499	Py_INCREF(&ZstdCompressionParametersType);
	251	PyModule_AddObject(mod, "CompressionParameters",	500	PyModule_AddObject(mod, "CompressionParameters",
	252	(PyObject*)&CompressionParametersType);	501	(PyObject*)&ZstdCompressionParametersType);
	253	}	502	}

contrib/python-zstandard/c-ext/compressionwriter.c

0 +60 -50

             }
             static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
+            	size_t zresult;
             	if (self->entered) {
             		PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
             		return NULL;
             	}
-            	if (self->compressor->mtcctx) {
+            	zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
-            		if (init_mtcstream(self->compressor, self->sourceSize)) {
+            	if (ZSTD_isError(zresult)) {
-            			return NULL;
+            		PyErr_Format(ZstdError, "error setting source size: %s",
+            			ZSTD_getErrorName(zresult));
+            		return NULL;
-            	else {
-            		if (0 != init_cstream(self->compressor, self->sourceSize)) {
-            			return NULL;
             	}
             	self->entered = 1;
             	self->entered = 0;
-            	if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
+            	if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
-            		&& exc_value == Py_None && exc_tb == Py_None) {
+            		ZSTD_inBuffer inBuffer;
+            		inBuffer.src = NULL;
+            		inBuffer.size = 0;
+            		inBuffer.pos = 0;
             		output.dst = PyMem_Malloc(self->outSize);
             		if (!output.dst) {
             		output.pos = 0;
             		while (1) {
-            			if (self->compressor->mtcctx) {
+            			zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &inBuffer, ZSTD_e_end);
-            				zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
-            			else {
-            				zresult = ZSTD_endStream(self->compressor->cstream, &output);
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "error ending compression stream: %s",
             					ZSTD_getErrorName(zresult));
             }
             static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
-            	if (!self->compressor->cstream) {
+            	return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
-            		PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
-            			"call when a context manager is active");
-            		return NULL;
-            	return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
             }
-            static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
+            static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
-            	const char* source;
+            	static char* kwlist[] = {
-            	Py_ssize_t sourceSize;
+            		"data",
+            		NULL
+            	};
+            	PyObject* result = NULL;
+            	Py_buffer source;
             	size_t zresult;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	Py_ssize_t totalWrite = 0;
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
             #else
-            	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
             #endif
+            		kwlist, &source)) {
             		return NULL;
             	}
             	if (!self->entered) {
             		PyErr_SetString(ZstdError, "compress must be called from an active context manager");
-            		return NULL;
+            		goto finally;
+            	}
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+            		PyErr_SetString(PyExc_ValueError,
+            			"data buffer should be contiguous and have at most one dimension");
+            		goto finally;
             	}
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
-            		return PyErr_NoMemory();
+            		PyErr_NoMemory();
+            		goto finally;
             	}
             	output.size = self->outSize;
             	output.pos = 0;
-            	input.src = source;
+            	input.src = source.buf;
-            	input.size = sourceSize;
+            	input.size = source.len;
             	input.pos = 0;
-            	while ((ssize_t)input.pos < sourceSize) {
+            	while ((ssize_t)input.pos < source.len) {
             		Py_BEGIN_ALLOW_THREADS
-            		if (self->compressor->mtcctx) {
+            		zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_continue);
-            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
-            				&output, &input);
-            		else {
-            			zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyMem_Free(output.dst);
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
-            			return NULL;
+            			goto finally;
             		}
             		/* Copy data from output buffer to writer. */
             				output.dst, output.pos);
             			Py_XDECREF(res);
             			totalWrite += output.pos;
+            			self->bytesCompressed += output.pos;
             		}
             		output.pos = 0;
             	}
             	PyMem_Free(output.dst);
-            	return PyLong_FromSsize_t(totalWrite);
+            	result = PyLong_FromSsize_t(totalWrite);
+            finally:
+            	PyBuffer_Release(&source);
+            	return result;
             }
             static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
             	size_t zresult;
             	ZSTD_outBuffer output;
+            	ZSTD_inBuffer input;
             	PyObject* res;
             	Py_ssize_t totalWrite = 0;
             		return NULL;
             	}
+            	input.src = NULL;
+            	input.size = 0;
+            	input.pos = 0;
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
             		return PyErr_NoMemory();
             	while (1) {
             		Py_BEGIN_ALLOW_THREADS
-            		if (self->compressor->mtcctx) {
+            		zresult = ZSTD_compress_generic(self->compressor->cctx, &output, &input, ZSTD_e_flush);
-            			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
-            		else {
-            			zresult = ZSTD_flushStream(self->compressor->cstream, &output);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             				output.dst, output.pos);
             			Py_XDECREF(res);
             			totalWrite += output.pos;
+            			self->bytesCompressed += output.pos;
             		}
             		output.pos = 0;
             	}
             	return PyLong_FromSsize_t(totalWrite);
             }
+            static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
+            	return PyLong_FromUnsignedLongLong(self->bytesCompressed);
+            }
             static PyMethodDef ZstdCompressionWriter_methods[] = {
             	{ "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
             	PyDoc_STR("Enter a compression context.") },
             	PyDoc_STR("Exit a compression context.") },
             	{ "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
             	PyDoc_STR("Obtain the memory size of the underlying compressor") },
-            	{ "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS,
+            	{ "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
             	PyDoc_STR("Compress data") },
             	{ "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS,
             	PyDoc_STR("Flush data and finish a zstd frame") },
+            	{ "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
+            	PyDoc_STR("Returns current number of bytes compressed") },
             	{ NULL, NULL }
             };

contrib/python-zstandard/c-ext/compressobj.c

0 +56 -41

             	PyObject_Del(self);
             }
-            static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) {
+            static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
-            	const char* source;
+            	static char* kwlist[] = {
-            	Py_ssize_t sourceSize;
+            		"data",
+            		NULL
+            	};
+            	Py_buffer source;
             	ZSTD_inBuffer input;
             	size_t zresult;
             	PyObject* result = NULL;
             	}
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
             #else
-            	if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
             #endif
+            		kwlist, &source)) {
             		return NULL;
             	}
-            	input.src = source;
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
-            	input.size = sourceSize;
+            		PyErr_SetString(PyExc_ValueError,
+            			"data buffer should be contiguous and have at most one dimension");
+            		goto finally;
+            	}
+            	input.src = source.buf;
+            	input.size = source.len;
             	input.pos = 0;
-            	while ((ssize_t)input.pos < sourceSize) {
+            	while ((ssize_t)input.pos < source.len) {
             		Py_BEGIN_ALLOW_THREADS
-            		if (self->compressor->mtcctx) {
+            			zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+            				&input, ZSTD_e_continue);
-            				&self->output, &input);
-            		else {
-            			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
-            			return NULL;
+            			Py_CLEAR(result);
+            			goto finally;
             		}
             		if (self->output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
-            				if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
-            					return NULL;
+            				if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
+            					Py_CLEAR(result);
+            					goto finally;
             				}
             				memcpy(PyBytes_AS_STRING(result) + resultSize,
             			else {
             				result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
             				if (!result) {
-            					return NULL;
+            					goto finally;
             				}
             			}
             		}
             	}
-            	if (result) {
+            	if (NULL == result) {
-            		return result;
+            		result = PyBytes_FromString("");
             	}
-            	else {
-            		return PyBytes_FromString("");
+            finally:
+            	PyBuffer_Release(&source);
+            	return result;
             }
-            static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) {
+            static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
+            	static char* kwlist[] = {
+            		"flush_mode",
+            		NULL
+            	};
             	int flushMode = compressorobj_flush_finish;
             	size_t zresult;
             	PyObject* result = NULL;
             	Py_ssize_t resultSize = 0;
+            	ZSTD_inBuffer input;
-            	if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
             		return NULL;
             	}
             	assert(self->output.pos == 0);
+            	input.src = NULL;
+            	input.size = 0;
+            	input.pos = 0;
             	if (flushMode == compressorobj_flush_block) {
             		/* The output buffer is of size ZSTD_CStreamOutSize(), which is
             		   guaranteed to hold a full block. */
             		Py_BEGIN_ALLOW_THREADS
-            		if (self->compressor->mtcctx) {
+            			zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-            			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
+            				&input, ZSTD_e_flush);
-            		else {
-            			zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             	self->finished = 1;
             	while (1) {
-            		if (self->compressor->mtcctx) {
+            		zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-            			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+            			&input, ZSTD_e_end);
-            		else {
-            			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "error ending compression stream: %s",
             				ZSTD_getErrorName(zresult));
             		if (self->output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
-            				if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) {
+            				if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
+            					Py_XDECREF(result);
             					return NULL;
             				}
             }
             static PyMethodDef ZstdCompressionObj_methods[] = {
-            	{ "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS,
+            	{ "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
             	PyDoc_STR("compress data") },
-            	{ "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS,
+            	{ "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
             	PyDoc_STR("finish compression operation") },
             	{ NULL, NULL }
             };

contrib/python-zstandard/c-ext/compressor.c

0 +446 -386

This diff has been collapsed as it changes many lines, (832 lines changed) Show them Hide them
	@@ -11,118 +11,78 b''
	11		11
	12	extern PyObject* ZstdError;	12	extern PyObject* ZstdError;
	13		13
	14	int ~~populate_cdict~~(ZstdCompressor* compressor, ~~ZSTD_parameters~~* ~~zparams~~) {	14	int ensure_cctx(ZstdCompressor* compressor) {
	15	ZSTD_customMem zmem;	15	size_t zresult;
			16
			17	assert(compressor);
			18	assert(compressor->cctx);
			19	assert(compressor->params);
	16		20
	17	if (compressor->cdict \|\| !compressor->dict \|\| !compressor->dict->dictData) {	21	ZSTD_CCtx_reset(compressor->cctx);
	18	return 0;	22
			23	zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
			24	if (ZSTD_isError(zresult)) {
			25	PyErr_Format(ZstdError, "could not set compression parameters: %s",
			26	ZSTD_getErrorName(zresult));
			27	return 1;
	19	}	28	}
	20		29
	21	Py_BEGIN_ALLOW_THREADS	30	if (compressor->dict) {
	22	memset(&zmem, 0, sizeof(zmem));	31	if (compressor->dict->cdict) {
	23	compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,	32	zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
	24	compressor->dict->dictSize, 1, *zparams, zmem);	33	}
	25	Py_END_ALLOW_THREADS	34	else {
	26		35	zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
	27	if (!compressor->cdict) {	36	compressor->dict->dictData, compressor->dict->dictSize,
	28	PyErr_SetString(ZstdError, "could not create compression dictionary");	37	ZSTD_dlm_byRef, compressor->dict->dictType);
	29	return 1;	38	}
			39	if (ZSTD_isError(zresult)) {
			40	PyErr_Format(ZstdError, "could not load compression dictionary: %s",
			41	ZSTD_getErrorName(zresult));
			42	return 1;
			43	}
	30	}	44	}
	31		45
	32	return 0;	46	return 0;
	33	}	47	}
	34		48
	35	/**	49	static PyObject* frame_progression(ZSTD_CCtx* cctx) {
	36	* Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.	50	PyObject* result = NULL;
	37	*	51	PyObject* value;
	38	* Returns 0 on success. Other value on failure. Will set a Python exception	52	ZSTD_frameProgression progression;
	39	* on failure.
	40	*/
	41	int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
	42	ZSTD_parameters zparams;
	43	void* dictData = NULL;
	44	size_t dictSize = 0;
	45	size_t zresult;
	46		53
	47	if (compressor->cstream) {	54	result = PyTuple_New(3);
	48	zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);	55	if (!result) {
	49	if (ZSTD_isError(zresult)) {	56	return NULL;
	50	PyErr_Format(ZstdError, "could not reset CStream: %s",
	51	ZSTD_getErrorName(zresult));
	52	return -1;
	53	}
	54
	55	return 0;
	56	}	57	}
	57		58
	58	compressor->cstream = ZSTD_createCStream();	59	progression = ZSTD_getFrameProgression(cctx);
	59	if (!compressor->cstream) {
	60	PyErr_SetString(ZstdError, "could not create CStream");
	61	return -1;
	62	}
	63		60
	64	if (compressor->dict) {	61	value = PyLong_FromUnsignedLongLong(progression.ingested);
	65	dictData = compressor->dict->dictData;	62	if (!value) {
	66	dictSize = compressor->dict->dictSize;	63	Py_DECREF(result);
	67	}	64	return NULL;
	68
	69	memset(&zparams, 0, sizeof(zparams));
	70	if (compressor->cparams) {
	71	ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
	72	/* Do NOT call ZSTD_adjustCParams() here because the compression params
	73	come from the user. */
	74	}
	75	else {
	76	zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
	77	}	65	}
	78		66
	79	zparams.fParams = compressor->fparams;	67	PyTuple_SET_ITEM(result, 0, value);
	80
	81	zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
	82	zparams, sourceSize);
	83		68
	84	if (ZSTD_isError(zresult)) {	69	value = PyLong_FromUnsignedLongLong(progression.consumed);
	85	ZSTD_freeCStream(compressor->cstream);	70	if (!value) {
	86	compressor->cstream = NULL;	71	Py_DECREF(result);
	87	PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));	72	return NULL;
	88	return -1;
	89	}	73	}
	90		74
	91	return 0;;	75	PyTuple_SET_ITEM(result, 1, value);
	92	}
	93		76
	94	int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {	77	value = PyLong_FromUnsignedLongLong(progression.produced);
	95	size_t zresult;	78	if (!value) {
	96	void* dictData = NULL;	79	Py_DECREF(result);
	97	size_t dictSize = 0;	80	return NULL;
	98	ZSTD_parameters zparams;
	99
	100	assert(compressor->mtcctx);
	101
	102	if (compressor->dict) {
	103	dictData = compressor->dict->dictData;
	104	dictSize = compressor->dict->dictSize;
	105	}	81	}
	106		82
	107	memset(&zparams, 0, sizeof(zparams));	83	PyTuple_SET_ITEM(result, 2, value);
	108	if (compressor->cparams) {
	109	ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
	110	}
	111	else {
	112	zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
	113	}
	114
	115	zparams.fParams = compressor->fparams;
	116		84
	117	zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,	85	return result;
	118	zparams, sourceSize);
	119
	120	if (ZSTD_isError(zresult)) {
	121	PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
	122	return -1;
	123	}
	124
	125	return 0;
	126	}	86	}
	127		87
	128	PyDoc_STRVAR(ZstdCompressor__doc__,	88	PyDoc_STRVAR(ZstdCompressor__doc__,
	@@ -147,9 +107,9 b' PyDoc_STRVAR(ZstdCompressor__doc__,'
	147	" If True, a 4 byte content checksum will be written with the compressed\n"	107	" If True, a 4 byte content checksum will be written with the compressed\n"
	148	" data, allowing the decompressor to perform content verification.\n"	108	" data, allowing the decompressor to perform content verification.\n"
	149	"write_content_size\n"	109	"write_content_size\n"
	150	" If True, the decompressed content size will be included in ~~the header of~~\n"	110	" If True (the default), the decompressed content size will be included in\n"
	151	" the compressed data. This data will only be written if the ~~compressor~~\n"	111	" the header of the compressed data. This data will only be written if the\n"
	152	" knows the size of the input data.\n"	112	" compressor knows the size of the input data.\n"
	153	"write_dict_id\n"	113	"write_dict_id\n"
	154	" Determines whether the dictionary ID will be written into the compressed\n"	114	" Determines whether the dictionary ID will be written into the compressed\n"
	155	" data. Defaults to True. Only adds content to the compressed data if\n"	115	" data. Defaults to True. Only adds content to the compressed data if\n"
	@@ -175,7 +135,7 b' static int ZstdCompressor_init(ZstdCompr'
	175		135
	176	int level = 3;	136	int level = 3;
	177	ZstdCompressionDict* dict = NULL;	137	ZstdCompressionDict* dict = NULL;
	178	CompressionParametersObject* params = NULL;	138	ZstdCompressionParametersObject* params = NULL;
	179	PyObject* writeChecksum = NULL;	139	PyObject* writeChecksum = NULL;
	180	PyObject* writeContentSize = NULL;	140	PyObject* writeContentSize = NULL;
	181	PyObject* writeDictID = NULL;	141	PyObject* writeDictID = NULL;
	@@ -183,16 +143,11 b' static int ZstdCompressor_init(ZstdCompr'
	183		143
	184	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|iO!O!OOOi:ZstdCompressor",	144	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|iO!O!OOOi:ZstdCompressor",
	185	kwlist, &level, &ZstdCompressionDictType, &dict,	145	kwlist, &level, &ZstdCompressionDictType, &dict,
	186	&CompressionParametersType, &params,	146	&ZstdCompressionParametersType, &params,
	187	&writeChecksum, &writeContentSize, &writeDictID, &threads)) {	147	&writeChecksum, &writeContentSize, &writeDictID, &threads)) {
	188	return -1;	148	return -1;
	189	}	149	}
	190		150
	191	if (level < 1) {
	192	PyErr_SetString(PyExc_ValueError, "level must be greater than 0");
	193	return -1;
	194	}
	195
	196	if (level > ZSTD_maxCLevel()) {	151	if (level > ZSTD_maxCLevel()) {
	197	PyErr_Format(PyExc_ValueError, "level must be less than %d",	152	PyErr_Format(PyExc_ValueError, "level must be less than %d",
	198	ZSTD_maxCLevel() + 1);	153	ZSTD_maxCLevel() + 1);
	@@ -203,79 +158,135 b' static int ZstdCompressor_init(ZstdCompr'
	203	threads = cpu_count();	158	threads = cpu_count();
	204	}	159	}
	205		160
	206	self->threads = threads;
	207
	208	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the	161	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
	209	overhead of each compression operation. */	162	overhead of each compression operation. */
	210	if (threads) {	163	self->cctx = ZSTD_createCCtx();
	211	self->mtcctx = ZSTDMT_createCCtx(threads);	164	if (!self->cctx) {
	212	if (!self->mtcctx) {	165	PyErr_NoMemory();
	213	PyErr_NoMemory();	166	return -1;
			167	}
			168
			169	/* TODO stuff the original parameters away somewhere so we can reset later. This
			170	will allow us to do things like automatically adjust cparams based on input
			171	size (assuming zstd isn't doing that internally). */
			172
			173	self->params = ZSTD_createCCtxParams();
			174	if (!self->params) {
			175	PyErr_NoMemory();
			176	return -1;
			177	}
			178
			179	if (params && writeChecksum) {
			180	PyErr_SetString(PyExc_ValueError,
			181	"cannot define compression_params and write_checksum");
			182	return -1;
			183	}
			184
			185	if (params && writeContentSize) {
			186	PyErr_SetString(PyExc_ValueError,
			187	"cannot define compression_params and write_content_size");
			188	return -1;
			189	}
			190
			191	if (params && writeDictID) {
			192	PyErr_SetString(PyExc_ValueError,
			193	"cannot define compression_params and write_dict_id");
			194	return -1;
			195	}
			196
			197	if (params && threads) {
			198	PyErr_SetString(PyExc_ValueError,
			199	"cannot define compression_params and threads");
			200	return -1;
			201	}
			202
			203	if (params) {
			204	if (set_parameters(self->params, params)) {
	214	return -1;	205	return -1;
	215	}	206	}
	216	}	207	}
	217	else {	208	else {
	218	self->cctx = ZSTD_createCCtx();	209	if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) {
	219	if (!self->cctx) {	210	return -1;
	220	PyErr_NoMemory();	211	}
			212
			213	if (set_parameter(self->params, ZSTD_p_contentSizeFlag,
			214	writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
			215	return -1;
			216	}
			217
			218	if (set_parameter(self->params, ZSTD_p_checksumFlag,
			219	writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
	221	return -1;	220	return -1;
	222	}	221	}
	223	}	222
			223	if (set_parameter(self->params, ZSTD_p_dictIDFlag,
			224	writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
			225	return -1;
			226	}
	224		227
	225	self->compressionLevel = level;	228	if (threads) {
			229	if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) {
			230	return -1;
			231	}
			232	}
			233	}
	226		234
	227	if (dict) {	235	if (dict) {
	228	self->dict = dict;	236	self->dict = dict;
	229	Py_INCREF(dict);	237	Py_INCREF(dict);
	230	}	238	}
	231		239
	232	if (params) {	240	if (ensure_cctx(self)) {
	233	self->cparams = params;	241	return -1;
	234	Py_INCREF(params);
	235	}
	236
	237	memset(&self->fparams, 0, sizeof(self->fparams));
	238
	239	if (writeChecksum && PyObject_IsTrue(writeChecksum)) {
	240	self->fparams.checksumFlag = 1;
	241	}
	242	if (writeContentSize && PyObject_IsTrue(writeContentSize)) {
	243	self->fparams.contentSizeFlag = 1;
	244	}
	245	if (writeDictID && PyObject_Not(writeDictID)) {
	246	self->fparams.noDictIDFlag = 1;
	247	}	242	}
	248		243
	249	return 0;	244	return 0;
	250	}	245	}
	251		246
	252	static void ZstdCompressor_dealloc(ZstdCompressor* self) {	247	static void ZstdCompressor_dealloc(ZstdCompressor* self) {
	253	if (self->cstream) {
	254	ZSTD_freeCStream(self->cstream);
	255	self->cstream = NULL;
	256	}
	257
	258	Py_XDECREF(self->cparams);
	259	Py_XDECREF(self->dict);
	260
	261	if (self->cdict) {
	262	ZSTD_freeCDict(self->cdict);
	263	self->cdict = NULL;
	264	}
	265
	266	if (self->cctx) {	248	if (self->cctx) {
	267	ZSTD_freeCCtx(self->cctx);	249	ZSTD_freeCCtx(self->cctx);
	268	self->cctx = NULL;	250	self->cctx = NULL;
	269	}	251	}
	270		252
	271	if (self->~~mtcctx~~) {	253	if (self->params) {
	272	ZSTDMT_freeCCtx(self->~~mtcctx~~);	254	ZSTD_freeCCtxParams(self->params);
	273	self->~~mtcctx~~ = NULL;	255	self->params = NULL;
	274	}	256	}
	275		257
			258	Py_XDECREF(self->dict);
	276	PyObject_Del(self);	259	PyObject_Del(self);
	277	}	260	}
	278		261
			262	PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
			263	"memory_size()\n"
			264	"\n"
			265	"Obtain the memory usage of this compressor, in bytes.\n"
			266	);
			267
			268	static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
			269	if (self->cctx) {
			270	return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
			271	}
			272	else {
			273	PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
			274	return NULL;
			275	}
			276	}
			277
			278	PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
			279	"frame_progression()\n"
			280	"\n"
			281	"Return information on how much work the compressor has done.\n"
			282	"\n"
			283	"Returns a 3-tuple of (ingested, consumed, produced).\n"
			284	);
			285
			286	static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
			287	return frame_progression(self->cctx);
			288	}
			289
	279	PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,	290	PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
	280	"copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"	291	"copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
	281	"compress data between streams\n"	292	"compress data between streams\n"
	@@ -304,7 +315,7 b' static PyObject* ZstdCompressor_copy_str'
	304		315
	305	PyObject* source;	316	PyObject* source;
	306	PyObject* dest;	317	PyObject* dest;
	307	Py_ssize_t sourceSize = 0;	318	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
	308	size_t inSize = ZSTD_CStreamInSize();	319	size_t inSize = ZSTD_CStreamInSize();
	309	size_t outSize = ZSTD_CStreamOutSize();	320	size_t outSize = ZSTD_CStreamOutSize();
	310	ZSTD_inBuffer input;	321	ZSTD_inBuffer input;
	@@ -313,14 +324,14 b' static PyObject* ZstdCompressor_copy_str'
	313	Py_ssize_t totalWrite = 0;	324	Py_ssize_t totalWrite = 0;
	314	char* readBuffer;	325	char* readBuffer;
	315	Py_ssize_t readSize;	326	Py_ssize_t readSize;
	316	PyObject* readResult;	327	PyObject* readResult = NULL;
	317	PyObject* res = NULL;	328	PyObject* res = NULL;
	318	size_t zresult;	329	size_t zresult;
	319	PyObject* writeResult;	330	PyObject* writeResult;
	320	PyObject* totalReadPy;	331	PyObject* totalReadPy;
	321	PyObject* totalWritePy;	332	PyObject* totalWritePy;
	322		333
	323	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|nkk:copy_stream", kwlist,	334	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|Kkk:copy_stream", kwlist,
	324	&source, &dest, &sourceSize, &inSize, &outSize)) {	335	&source, &dest, &sourceSize, &inSize, &outSize)) {
	325	return NULL;	336	return NULL;
	326	}	337	}
	@@ -335,22 +346,18 b' static PyObject* ZstdCompressor_copy_str'
	335	return NULL;	346	return NULL;
	336	}	347	}
	337		348
	338	/* Prevent free on uninitialized memory in finally. */	349	if (ensure_cctx(self)) {
	339	output.dst = NULL;	350	return NULL;
	340
	341	if (self->mtcctx) {
	342	if (init_mtcstream(self, sourceSize)) {
	343	res = NULL;
	344	goto finally;
	345	}
	346	}
	347	else {
	348	if (0 != init_cstream(self, sourceSize)) {
	349	res = NULL;
	350	goto finally;
	351	}
	352	}	351	}
	353		352
			353	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
			354	if (ZSTD_isError(zresult)) {
			355	PyErr_Format(ZstdError, "error setting source size: %s",
			356	ZSTD_getErrorName(zresult));
			357	return NULL;
			358	}
			359
			360	/* Prevent free on uninitialized memory in finally. */
	354	output.dst = PyMem_Malloc(outSize);	361	output.dst = PyMem_Malloc(outSize);
	355	if (!output.dst) {	362	if (!output.dst) {
	356	PyErr_NoMemory();	363	PyErr_NoMemory();
	@@ -360,6 +367,10 b' static PyObject* ZstdCompressor_copy_str'
	360	output.size = outSize;	367	output.size = outSize;
	361	output.pos = 0;	368	output.pos = 0;
	362		369
			370	input.src = NULL;
			371	input.size = 0;
			372	input.pos = 0;
			373
	363	while (1) {	374	while (1) {
	364	/* Try to read from source stream. */	375	/* Try to read from source stream. */
	365	readResult = PyObject_CallMethod(source, "read", "n", inSize);	376	readResult = PyObject_CallMethod(source, "read", "n", inSize);
	@@ -384,12 +395,7 b' static PyObject* ZstdCompressor_copy_str'
	384		395
	385	while (input.pos < input.size) {	396	while (input.pos < input.size) {
	386	Py_BEGIN_ALLOW_THREADS	397	Py_BEGIN_ALLOW_THREADS
	387	if (self->mtcctx) {	398	zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_continue);
	388	zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
	389	}
	390	else {
	391	zresult = ZSTD_compressStream(self->cstream, &output, &input);
	392	}
	393	Py_END_ALLOW_THREADS	399	Py_END_ALLOW_THREADS
	394		400
	395	if (ZSTD_isError(zresult)) {	401	if (ZSTD_isError(zresult)) {
	@@ -410,16 +416,18 b' static PyObject* ZstdCompressor_copy_str'
	410	output.pos = 0;	416	output.pos = 0;
	411	}	417	}
	412	}	418	}
			419
			420	Py_CLEAR(readResult);
	413	}	421	}
	414		422
	415	/* We've finished reading. Now flush the compressor stream. */	423	/* We've finished reading. Now flush the compressor stream. */
			424	assert(input.pos == input.size);
			425
	416	while (1) {	426	while (1) {
	417	if (self->mtcctx) {	427	Py_BEGIN_ALLOW_THREADS
	418	zresult = ZSTD~~MT_endStream~~(self->mtcctx, &output);	428	zresult = ZSTD_compress_generic(self->cctx, &output, &input, ZSTD_e_end);
	419	}	429	Py_END_ALLOW_THREADS
	420	else {	430
	421	zresult = ZSTD_endStream(self->cstream, &output);
	422	}
	423	if (ZSTD_isError(zresult)) {	431	if (ZSTD_isError(zresult)) {
	424	PyErr_Format(ZstdError, "error ending compression stream: %s",	432	PyErr_Format(ZstdError, "error ending compression stream: %s",
	425	ZSTD_getErrorName(zresult));	433	ZSTD_getErrorName(zresult));
	@@ -455,11 +463,81 b' finally:'
	455	PyMem_Free(output.dst);	463	PyMem_Free(output.dst);
	456	}	464	}
	457		465
			466	Py_XDECREF(readResult);
			467
	458	return res;	468	return res;
	459	}	469	}
	460		470
			471	PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
			472	"stream_reader(source, [size=0])\n"
			473	"\n"
			474	"Obtain an object that behaves like an I/O stream.\n"
			475	"\n"
			476	"The source object can be any object with a ``read(size)`` method\n"
			477	"or an object that conforms to the buffer protocol.\n"
			478	);
			479
			480	static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
			481	static char* kwlist[] = {
			482	"source",
			483	"size",
			484	"read_size",
			485	NULL
			486	};
			487
			488	PyObject* source;
			489	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
			490	size_t readSize = ZSTD_CStreamInSize();
			491	ZstdCompressionReader* result = NULL;
			492
			493	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|Kk:stream_reader", kwlist,
			494	&source, &sourceSize, &readSize)) {
			495	return NULL;
			496	}
			497
			498	result = (ZstdCompressionReader)PyObject_CallObject((PyObject)&ZstdCompressionReaderType, NULL);
			499	if (!result) {
			500	return NULL;
			501	}
			502
			503	if (PyObject_HasAttrString(source, "read")) {
			504	result->reader = source;
			505	Py_INCREF(source);
			506	result->readSize = readSize;
			507	}
			508	else if (1 == PyObject_CheckBuffer(source)) {
			509	if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
			510	goto except;
			511	}
			512
			513	assert(result->buffer.len >= 0);
			514
			515	sourceSize = result->buffer.len;
			516	}
			517	else {
			518	PyErr_SetString(PyExc_TypeError,
			519	"must pass an object with a read() method or that conforms to the buffer protocol");
			520	goto except;
			521	}
			522
			523	if (ensure_cctx(self)) {
			524	goto except;
			525	}
			526
			527	result->compressor = self;
			528	Py_INCREF(self);
			529	result->sourceSize = sourceSize;
			530
			531	return result;
			532
			533	except:
			534	Py_CLEAR(result);
			535
			536	return NULL;
			537	}
			538
	461	PyDoc_STRVAR(ZstdCompressor_compress__doc__,	539	PyDoc_STRVAR(ZstdCompressor_compress__doc__,
	462	"compress(data~~, allow_empty=False~~)\n"	540	"compress(data)\n"
	463	"\n"	541	"\n"
	464	"Compress data in a single operation.\n"	542	"Compress data in a single operation.\n"
	465	"\n"	543	"\n"
	@@ -473,122 +551,79 b' PyDoc_STRVAR(ZstdCompressor_compress__do'
	473	static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {	551	static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
	474	static char* kwlist[] = {	552	static char* kwlist[] = {
	475	"data",	553	"data",
	476	"allow_empty",
	477	NULL	554	NULL
	478	};	555	};
	479		556
	480	const char* source;	557	Py_buffer source;
	481	Py_ssize_t sourceSize;
	482	PyObject* allowEmpty = NULL;
	483	size_t destSize;	558	size_t destSize;
	484	PyObject* output;	559	PyObject* output = NULL;
	485	char* dest;
	486	void* dictData = NULL;
	487	size_t dictSize = 0;
	488	size_t zresult;	560	size_t zresult;
	489	ZSTD_parameters zparams;	561	ZSTD_outBuffer outBuffer;
			562	ZSTD_inBuffer inBuffer;
	490		563
	491	#if PY_MAJOR_VERSION >= 3	564	#if PY_MAJOR_VERSION >= 3
	492	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#\|O:compress",	565	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*\|O:compress",
	493	#else	566	#else
	494	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|O:compress",	567	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*\|O:compress",
	495	#endif	568	#endif
	496	kwlist, &source, &~~sourceSize~~, &~~allowEmpty~~)) {	569	kwlist, &source)) {
	497	return NULL;
	498	}
	499
	500	if (self->threads && self->dict) {
	501	PyErr_SetString(ZstdError,
	502	"compress() cannot be used with both dictionaries and multi-threaded compression");
	503	return NULL;
	504	}
	505
	506	if (self->threads && self->cparams) {
	507	PyErr_SetString(ZstdError,
	508	"compress() cannot be used with both compression parameters and multi-threaded compression");
	509	return NULL;
	510	}
	511
	512	/* Limitation in zstd C API doesn't let decompression side distinguish
	513	between content size of 0 and unknown content size. This can make round
	514	tripping via Python difficult. Until this is fixed, require a flag
	515	to fire the footgun.
	516	https://github.com/indygreg/python-zstandard/issues/11 */
	517	if (0 == sourceSize && self->fparams.contentSizeFlag
	518	&& (!allowEmpty \|\| PyObject_Not(allowEmpty))) {
	519	PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
	520	return NULL;
	521	}
	522
	523	destSize = ZSTD_compressBound(sourceSize);
	524	output = PyBytes_FromStringAndSize(NULL, destSize);
	525	if (!output) {
	526	return NULL;	570	return NULL;
	527	}	571	}
	528		572
	529	dest = PyBytes_AsString(output);	573	if (!PyBuffer_IsContiguous(&source, 'C') \|\| source.ndim > 1) {
	530		574	PyErr_SetString(PyExc_ValueError,
	531	if (self->dict) {	575	"data buffer should be contiguous and have at most one dimension");
	532	dictData = self->dict->dictData;	576	goto finally;
	533	dictSize = self->dict->dictSize;
	534	}	577	}
	535		578
	536	memset(&zparams, 0, sizeof(zparams));	579	if (ensure_cctx(self)) {
	537	if (!self->cparams) {	580	goto finally;
	538	zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize);
	539	}	581	}
	540	else {	582
	541	ztopy_compression_parameters(self->cparams, &zparams.cParams);	583	destSize = ZSTD_compressBound(source.len);
	542	/* Do NOT call ZSTD_adjustCParams() here because the compression params	584	output = PyBytes_FromStringAndSize(NULL, destSize);
	543	come from the user. */	585	if (!output) {
			586	goto finally;
	544	}	587	}
	545		588
	546	zparams.fParams = self->fparams;	589	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
	547		590	if (ZSTD_isError(zresult)) {
	548	/* The raw dict data has to be processed before it can be used. Since this	591	PyErr_Format(ZstdError, "error setting source size: %s",
	549	adds overhead - especially if multiple dictionary compression operations	592	ZSTD_getErrorName(zresult));
	550	are performed on the same ZstdCompressor instance - we create a	593	Py_CLEAR(output);
	551	ZSTD_CDict once and reuse it for all operations.	594	goto finally;
			595	}
	552		596
	553	Note: the compression parameters used for the first invocation (possibly	597	inBuffer.src = source.buf;
	554	derived from the source size) will be reused on all subsequent invocations.	598	inBuffer.size = source.len;
	555	https://github.com/facebook/zstd/issues/358 contains more info. We could	599	inBuffer.pos = 0;
	556	potentially add an argument somewhere to control this behavior.	600
	557	*/	601	outBuffer.dst = PyBytes_AsString(output);
	558	if (0 != populate_cdict(self, &zparams)) {	602	outBuffer.size = destSize;
	559	Py_DECREF(output);	603	outBuffer.pos = 0;
	560	return NULL;
	561	}
	562		604
	563	Py_BEGIN_ALLOW_THREADS	605	Py_BEGIN_ALLOW_THREADS
	564	if (self->mtcctx) {	606	/* By avoiding ZSTD_compress(), we don't necessarily write out content
	565	zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,	607	size. This means the argument to ZstdCompressor to control frame
	566	source, sourceSize, self->compressionLevel);	608	parameters is honored. */
	567	}	609	zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
	568	else {
	569	/* By avoiding ZSTD_compress(), we don't necessarily write out content
	570	size. This means the argument to ZstdCompressor to control frame
	571	parameters is honored. */
	572	if (self->cdict) {
	573	zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
	574	source, sourceSize, self->cdict);
	575	}
	576	else {
	577	zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
	578	source, sourceSize, dictData, dictSize, zparams);
	579	}
	580	}
	581	Py_END_ALLOW_THREADS	610	Py_END_ALLOW_THREADS
	582		611
	583	if (ZSTD_isError(zresult)) {	612	if (ZSTD_isError(zresult)) {
	584	PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));	613	PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
	585	Py_CLEAR(output);	614	Py_CLEAR(output);
	586	return NULL;	615	goto finally;
	587	}	616	}
	588	else {	617	else if (zresult) {
	589	Py_SIZE(output) = zresult;	618	PyErr_SetString(ZstdError, "unexpected partial frame flush");
			619	Py_CLEAR(output);
			620	goto finally;
	590	}	621	}
	591		622
			623	Py_SIZE(output) = outBuffer.pos;
			624
			625	finally:
			626	PyBuffer_Release(&source);
	592	return output;	627	return output;
	593	}	628	}
	594		629
	@@ -608,11 +643,23 b' static ZstdCompressionObj* ZstdCompresso'
	608	NULL	643	NULL
	609	};	644	};
	610		645
	611	Py_ssize_t inSize = 0;	646	unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
	612	size_t outSize = ZSTD_CStreamOutSize();	647	size_t outSize = ZSTD_CStreamOutSize();
	613	ZstdCompressionObj* result = NULL;	648	ZstdCompressionObj* result = NULL;
			649	size_t zresult;
	614		650
	615	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|n:compressobj", kwlist, &inSize)) {	651	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|K:compressobj", kwlist, &inSize)) {
			652	return NULL;
			653	}
			654
			655	if (ensure_cctx(self)) {
			656	return NULL;
			657	}
			658
			659	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
			660	if (ZSTD_isError(zresult)) {
			661	PyErr_Format(ZstdError, "error setting source size: %s",
			662	ZSTD_getErrorName(zresult));
	616	return NULL;	663	return NULL;
	617	}	664	}
	618		665
	@@ -621,19 +668,6 b' static ZstdCompressionObj* ZstdCompresso'
	621	return NULL;	668	return NULL;
	622	}	669	}
	623		670
	624	if (self->mtcctx) {
	625	if (init_mtcstream(self, inSize)) {
	626	Py_DECREF(result);
	627	return NULL;
	628	}
	629	}
	630	else {
	631	if (0 != init_cstream(self, inSize)) {
	632	Py_DECREF(result);
	633	return NULL;
	634	}
	635	}
	636
	637	result->output.dst = PyMem_Malloc(outSize);	671	result->output.dst = PyMem_Malloc(outSize);
	638	if (!result->output.dst) {	672	if (!result->output.dst) {
	639	PyErr_NoMemory();	673	PyErr_NoMemory();
	@@ -647,9 +681,9 b' static ZstdCompressionObj* ZstdCompresso'
	647	return result;	681	return result;
	648	}	682	}
	649		683
	650	PyDoc_STRVAR(ZstdCompressor_read_~~from~~__doc__,	684	PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
	651	"read_~~from~~(reader, [size=0, read_size=default, write_size=default])\n"	685	"read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
	652	"Read uncompress data from a reader and return an iterator\n"	686	"Read uncompressed data from a reader and return an iterator\n"
	653	"\n"	687	"\n"
	654	"Returns an iterator of compressed data produced from reading from ``reader``.\n"	688	"Returns an iterator of compressed data produced from reading from ``reader``.\n"
	655	"\n"	689	"\n"
	@@ -667,7 +701,7 b' PyDoc_STRVAR(ZstdCompressor_read_from__d'
	667	"not consume from the reader unless the caller consumes from the iterator.\n"	701	"not consume from the reader unless the caller consumes from the iterator.\n"
	668	);	702	);
	669		703
	670	static ZstdCompressorIterator* ZstdCompressor_read_~~from~~(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {	704	static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
	671	static char* kwlist[] = {	705	static char* kwlist[] = {
	672	"reader",	706	"reader",
	673	"size",	707	"size",
	@@ -677,12 +711,13 b' static ZstdCompressorIterator* ZstdCompr'
	677	};	711	};
	678		712
	679	PyObject* reader;	713	PyObject* reader;
	680	Py_ssize_t sourceSize = 0;	714	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
	681	size_t inSize = ZSTD_CStreamInSize();	715	size_t inSize = ZSTD_CStreamInSize();
	682	size_t outSize = ZSTD_CStreamOutSize();	716	size_t outSize = ZSTD_CStreamOutSize();
	683	ZstdCompressorIterator* result;	717	ZstdCompressorIterator* result;
			718	size_t zresult;
	684		719
	685	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|~~nkk:read_from~~", kwlist,	720	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|Kkk:read_to_iter", kwlist,
	686	&reader, &sourceSize, &inSize, &outSize)) {	721	&reader, &sourceSize, &inSize, &outSize)) {
	687	return NULL;	722	return NULL;
	688	}	723	}
	@@ -696,18 +731,11 b' static ZstdCompressorIterator* ZstdCompr'
	696	Py_INCREF(result->reader);	731	Py_INCREF(result->reader);
	697	}	732	}
	698	else if (1 == PyObject_CheckBuffer(reader)) {	733	else if (1 == PyObject_CheckBuffer(reader)) {
	699	result->buffer = PyMem_Malloc(sizeof(Py_buffer));	734	if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
	700	if (!result->buffer) {
	701	goto except;	735	goto except;
	702	}	736	}
	703		737
	704	memset(result->buffer, 0, sizeof(Py_buffer));	738	sourceSize = result->buffer.len;
	705
	706	if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
	707	goto except;
	708	}
	709
	710	sourceSize = result->buffer->len;
	711	}	739	}
	712	else {	740	else {
	713	PyErr_SetString(PyExc_ValueError,	741	PyErr_SetString(PyExc_ValueError,
	@@ -715,22 +743,20 b' static ZstdCompressorIterator* ZstdCompr'
	715	goto except;	743	goto except;
	716	}	744	}
	717		745
			746	if (ensure_cctx(self)) {
			747	return NULL;
			748	}
			749
			750	zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
			751	if (ZSTD_isError(zresult)) {
			752	PyErr_Format(ZstdError, "error setting source size: %s",
			753	ZSTD_getErrorName(zresult));
			754	return NULL;
			755	}
			756
	718	result->compressor = self;	757	result->compressor = self;
	719	Py_INCREF(result->compressor);	758	Py_INCREF(result->compressor);
	720		759
	721	result->sourceSize = sourceSize;
	722
	723	if (self->mtcctx) {
	724	if (init_mtcstream(self, sourceSize)) {
	725	goto except;
	726	}
	727	}
	728	else {
	729	if (0 != init_cstream(self, sourceSize)) {
	730	goto except;
	731	}
	732	}
	733
	734	result->inSize = inSize;	760	result->inSize = inSize;
	735	result->outSize = outSize;	761	result->outSize = outSize;
	736		762
	@@ -744,16 +770,13 b' static ZstdCompressorIterator* ZstdCompr'
	744	goto finally;	770	goto finally;
	745		771
	746	except:	772	except:
	747	Py_~~XDECREF~~(result->~~compressor~~);	773	Py_CLEAR(result);
	748	Py_XDECREF(result->reader);
	749	Py_DECREF(result);
	750	result = NULL;
	751		774
	752	finally:	775	finally:
	753	return result;	776	return result;
	754	}	777	}
	755		778
	756	PyDoc_STRVAR(ZstdCompressor_~~write_to~~___doc__,	779	PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
	757	"Create a context manager to write compressed data to an object.\n"	780	"Create a context manager to write compressed data to an object.\n"
	758	"\n"	781	"\n"
	759	"The passed object must have a ``write()`` method.\n"	782	"The passed object must have a ``write()`` method.\n"
	@@ -771,7 +794,7 b' PyDoc_STRVAR(ZstdCompressor_write_to___d'
	771	"for a compressor output stream.\n"	794	"for a compressor output stream.\n"
	772	);	795	);
	773		796
	774	static ZstdCompressionWriter* ZstdCompressor_~~write_to~~(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {	797	static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
	775	static char* kwlist[] = {	798	static char* kwlist[] = {
	776	"writer",	799	"writer",
	777	"size",	800	"size",
	@@ -781,10 +804,10 b' static ZstdCompressionWriter* ZstdCompre'
	781		804
	782	PyObject* writer;	805	PyObject* writer;
	783	ZstdCompressionWriter* result;	806	ZstdCompressionWriter* result;
	784	Py_ssize_t sourceSize = 0;	807	unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
	785	size_t outSize = ZSTD_CStreamOutSize();	808	size_t outSize = ZSTD_CStreamOutSize();
	786		809
	787	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|~~nk:write_to~~", kwlist,	810	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|Kk:stream_writer", kwlist,
	788	&writer, &sourceSize, &outSize)) {	811	&writer, &sourceSize, &outSize)) {
	789	return NULL;	812	return NULL;
	790	}	813	}
	@@ -794,6 +817,10 b' static ZstdCompressionWriter* ZstdCompre'
	794	return NULL;	817	return NULL;
	795	}	818	}
	796		819
			820	if (ensure_cctx(self)) {
			821	return NULL;
			822	}
			823
	797	result = (ZstdCompressionWriter)PyObject_CallObject((PyObject)&ZstdCompressionWriterType, NULL);	824	result = (ZstdCompressionWriter)PyObject_CallObject((PyObject)&ZstdCompressionWriterType, NULL);
	798	if (!result) {	825	if (!result) {
	799	return NULL;	826	return NULL;
	@@ -807,6 +834,7 b' static ZstdCompressionWriter* ZstdCompre'
	807		834
	808	result->sourceSize = sourceSize;	835	result->sourceSize = sourceSize;
	809	result->outSize = outSize;	836	result->outSize = outSize;
			837	result->bytesCompressed = 0;
	810		838
	811	return result;	839	return result;
	812	}	840	}
	@@ -833,6 +861,7 b' typedef enum {'
	833	WorkerError_none = 0,	861	WorkerError_none = 0,
	834	WorkerError_zstd = 1,	862	WorkerError_zstd = 1,
	835	WorkerError_no_memory = 2,	863	WorkerError_no_memory = 2,
			864	WorkerError_nospace = 3,
	836	} WorkerError;	865	} WorkerError;
	837		866
	838	/**	867	/**
	@@ -841,10 +870,6 b' typedef enum {'
	841	typedef struct {	870	typedef struct {
	842	/* Used for compression. */	871	/* Used for compression. */
	843	ZSTD_CCtx* cctx;	872	ZSTD_CCtx* cctx;
	844	ZSTD_CDict* cdict;
	845	int cLevel;
	846	CompressionParametersObject* cParams;
	847	ZSTD_frameParameters fParams;
	848		873
	849	/* What to compress. */	874	/* What to compress. */
	850	DataSource* sources;	875	DataSource* sources;
	@@ -868,7 +893,6 b' static void compress_worker(WorkerState*'
	868	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;	893	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
	869	Py_ssize_t currentBufferStartOffset = state->startOffset;	894	Py_ssize_t currentBufferStartOffset = state->startOffset;
	870	size_t zresult;	895	size_t zresult;
	871	ZSTD_parameters zparams;
	872	void* newDest;	896	void* newDest;
	873	size_t allocationSize;	897	size_t allocationSize;
	874	size_t boundSize;	898	size_t boundSize;
	@@ -879,16 +903,10 b' static void compress_worker(WorkerState*'
	879	assert(!state->destBuffers);	903	assert(!state->destBuffers);
	880	assert(0 == state->destCount);	904	assert(0 == state->destCount);
	881		905
	882	if (state->cParams) {
	883	ztopy_compression_parameters(state->cParams, &zparams.cParams);
	884	}
	885
	886	zparams.fParams = state->fParams;
	887
	888	/*	906	/*
	889	* The total size of the compressed data is unknown until we actually	907	* The total size of the compressed data is unknown until we actually
	890	* compress data. That means we can't pre-allocate the exact size we need.	908	* compress data. That means we can't pre-allocate the exact size we need.
	891	*	909	*
	892	* There is a cost to every allocation and reallocation. So, it is in our	910	* There is a cost to every allocation and reallocation. So, it is in our
	893	* interest to minimize the number of allocations.	911	* interest to minimize the number of allocations.
	894	*	912	*
	@@ -927,7 +945,8 b' static void compress_worker(WorkerState*'
	927		945
	928	destBuffer->segmentsSize = remainingItems;	946	destBuffer->segmentsSize = remainingItems;
	929		947
	930	allocationSize = roundpow2(state->totalSourceSize >> 4);	948	assert(state->totalSourceSize <= SIZE_MAX);
			949	allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
	931		950
	932	/* If the maximum size of the output is larger than that, round up. */	951	/* If the maximum size of the output is larger than that, round up. */
	933	boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);	952	boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
	@@ -949,6 +968,8 b' static void compress_worker(WorkerState*'
	949	size_t sourceSize = sources[inputOffset].sourceSize;	968	size_t sourceSize = sources[inputOffset].sourceSize;
	950	size_t destAvailable;	969	size_t destAvailable;
	951	void* dest;	970	void* dest;
			971	ZSTD_outBuffer opOutBuffer;
			972	ZSTD_inBuffer opInBuffer;
	952		973
	953	destAvailable = destBuffer->destSize - destOffset;	974	destAvailable = destBuffer->destSize - destOffset;
	954	boundSize = ZSTD_compressBound(sourceSize);	975	boundSize = ZSTD_compressBound(sourceSize);
	@@ -1004,7 +1025,8 b' static void compress_worker(WorkerState*'
	1004	* We could dynamically update allocation size based on work done so far.	1025	* We could dynamically update allocation size based on work done so far.
	1005	* For now, keep is simple.	1026	* For now, keep is simple.
	1006	*/	1027	*/
	1007	a~~llocationSize~~ = ~~roundpow2~~(state->totalSourceSize >> 4);	1028	assert(state->totalSourceSize <= SIZE_MAX);
			1029	allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
	1008		1030
	1009	if (boundSize > allocationSize) {	1031	if (boundSize > allocationSize) {
	1010	allocationSize = roundpow2(boundSize);	1032	allocationSize = roundpow2(boundSize);
	@@ -1032,19 +1054,15 b' static void compress_worker(WorkerState*'
	1032		1054
	1033	dest = (char*)destBuffer->dest + destOffset;	1055	dest = (char*)destBuffer->dest + destOffset;
	1034		1056
	1035	if (state->cdict) {	1057	opInBuffer.src = source;
	1036	zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,	1058	opInBuffer.size = sourceSize;
	1037	source, sourceSize, state->cdict);	1059	opInBuffer.pos = 0;
	1038	}
	1039	else {
	1040	if (!state->cParams) {
	1041	zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
	1042	}
	1043		1060
	1044	zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,	1061	opOutBuffer.dst = dest;
	1045	source, sourceSize, NULL, 0, zparams);	1062	opOutBuffer.size = destAvailable;
	1046	}	1063	opOutBuffer.pos = 0;
	1047		1064
			1065	zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
	1048	if (ZSTD_isError(zresult)) {	1066	if (ZSTD_isError(zresult)) {
	1049	state->error = WorkerError_zstd;	1067	state->error = WorkerError_zstd;
	1050	state->zresult = zresult;	1068	state->zresult = zresult;
	@@ -1052,10 +1070,23 b' static void compress_worker(WorkerState*'
	1052	break;	1070	break;
	1053	}	1071	}
	1054		1072
			1073	zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
			1074	if (ZSTD_isError(zresult)) {
			1075	state->error = WorkerError_zstd;
			1076	state->zresult = zresult;
			1077	state->errorOffset = inputOffset;
			1078	break;
			1079	}
			1080	else if (zresult) {
			1081	state->error = WorkerError_nospace;
			1082	state->errorOffset = inputOffset;
			1083	break;
			1084	}
			1085
	1055	destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;	1086	destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
	1056	destBuffer->segments[inputOffset - currentBufferStartOffset].length = ~~zresult~~;	1087	destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
	1057		1088
	1058	destOffset += ~~zresult~~;	1089	destOffset += opOutBuffer.pos;
	1059	remainingItems--;	1090	remainingItems--;
	1060	}	1091	}
	1061		1092
	@@ -1072,15 +1103,14 b' static void compress_worker(WorkerState*'
	1072	}	1103	}
	1073		1104
	1074	ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,	1105	ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
	1075	DataSources* sources, ~~unsigned~~ int threadCount) {	1106	DataSources* sources, Py_ssize_t threadCount) {
	1076	ZSTD_parameters zparams;
	1077	unsigned long long bytesPerWorker;	1107	unsigned long long bytesPerWorker;
	1078	POOL_ctx* pool = NULL;	1108	POOL_ctx* pool = NULL;
	1079	WorkerState* workerStates = NULL;	1109	WorkerState* workerStates = NULL;
	1080	Py_ssize_t i;	1110	Py_ssize_t i;
	1081	unsigned long long workerBytes = 0;	1111	unsigned long long workerBytes = 0;
	1082	Py_ssize_t workerStartOffset = 0;	1112	Py_ssize_t workerStartOffset = 0;
	1083	size_t currentThread = 0;	1113	Py_ssize_t currentThread = 0;
	1084	int errored = 0;	1114	int errored = 0;
	1085	Py_ssize_t segmentsCount = 0;	1115	Py_ssize_t segmentsCount = 0;
	1086	Py_ssize_t segmentIndex;	1116	Py_ssize_t segmentIndex;
	@@ -1093,34 +1123,12 b' ZstdBufferWithSegmentsCollection* compre'
	1093	assert(threadCount >= 1);	1123	assert(threadCount >= 1);
	1094		1124
	1095	/* More threads than inputs makes no sense. */	1125	/* More threads than inputs makes no sense. */
	1096	threadCount = sources->sourcesSize < threadCount ? (~~unsigned~~ ~~int~~)sources->sourcesSize	1126	threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
	1097	: threadCount;	1127	: threadCount;
	1098		1128
	1099	/* TODO lower thread count when input size is too small and threads would add	1129	/* TODO lower thread count when input size is too small and threads would add
	1100	overhead. */	1130	overhead. */
	1101		1131
	1102	/*
	1103	* When dictionaries are used, parameters are derived from the size of the
	1104	* first element.
	1105	*
	1106	* TODO come up with a better mechanism.
	1107	*/
	1108	memset(&zparams, 0, sizeof(zparams));
	1109	if (compressor->cparams) {
	1110	ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
	1111	}
	1112	else {
	1113	zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
	1114	sources->sources[0].sourceSize,
	1115	compressor->dict ? compressor->dict->dictSize : 0);
	1116	}
	1117
	1118	zparams.fParams = compressor->fparams;
	1119
	1120	if (0 != populate_cdict(compressor, &zparams)) {
	1121	return NULL;
	1122	}
	1123
	1124	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));	1132	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
	1125	if (NULL == workerStates) {	1133	if (NULL == workerStates) {
	1126	PyErr_NoMemory();	1134	PyErr_NoMemory();
	@@ -1140,16 +1148,42 b' ZstdBufferWithSegmentsCollection* compre'
	1140	bytesPerWorker = sources->totalSourceSize / threadCount;	1148	bytesPerWorker = sources->totalSourceSize / threadCount;
	1141		1149
	1142	for (i = 0; i < threadCount; i++) {	1150	for (i = 0; i < threadCount; i++) {
			1151	size_t zresult;
			1152
	1143	workerStates[i].cctx = ZSTD_createCCtx();	1153	workerStates[i].cctx = ZSTD_createCCtx();
	1144	if (!workerStates[i].cctx) {	1154	if (!workerStates[i].cctx) {
	1145	PyErr_NoMemory();	1155	PyErr_NoMemory();
	1146	goto finally;	1156	goto finally;
	1147	}	1157	}
	1148		1158
	1149	workerStates[i].cdict = compressor->cdict;	1159	zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
	1150	workerStates[i].cLevel = compressor->compressionLevel;	1160	compressor->params);
	1151	workerStates[i].cParams = compressor->cparams;	1161	if (ZSTD_isError(zresult)) {
	1152	workerStates[i].fParams = compressor->fparams;	1162	PyErr_Format(ZstdError, "could not set compression parameters: %s",
			1163	ZSTD_getErrorName(zresult));
			1164	goto finally;
			1165	}
			1166
			1167	if (compressor->dict) {
			1168	if (compressor->dict->cdict) {
			1169	zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
			1170	}
			1171	else {
			1172	zresult = ZSTD_CCtx_loadDictionary_advanced(
			1173	workerStates[i].cctx,
			1174	compressor->dict->dictData,
			1175	compressor->dict->dictSize,
			1176	ZSTD_dlm_byRef,
			1177	compressor->dict->dictType);
			1178	}
			1179
			1180	if (ZSTD_isError(zresult)) {
			1181	PyErr_Format(ZstdError, "could not load compression dictionary: %s",
			1182	ZSTD_getErrorName(zresult));
			1183	goto finally;
			1184	}
			1185
			1186	}
	1153		1187
	1154	workerStates[i].sources = sources->sources;	1188	workerStates[i].sources = sources->sources;
	1155	workerStates[i].sourcesSize = sources->sourcesSize;	1189	workerStates[i].sourcesSize = sources->sourcesSize;
	@@ -1221,6 +1255,13 b' ZstdBufferWithSegmentsCollection* compre'
	1221	workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));	1255	workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
	1222	errored = 1;	1256	errored = 1;
	1223	break;	1257	break;
			1258
			1259	case WorkerError_nospace:
			1260	PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
			1261	workerStates[i].errorOffset);
			1262	errored = 1;
			1263	break;
			1264
	1224	default:	1265	default:
	1225	;	1266	;
	1226	}	1267	}
	@@ -1341,12 +1382,6 b' static ZstdBufferWithSegmentsCollection*'
	1341	Py_ssize_t sourceCount = 0;	1382	Py_ssize_t sourceCount = 0;
	1342	ZstdBufferWithSegmentsCollection* result = NULL;	1383	ZstdBufferWithSegmentsCollection* result = NULL;
	1343		1384
	1344	if (self->mtcctx) {
	1345	PyErr_SetString(ZstdError,
	1346	"function cannot be called on ZstdCompressor configured for multi-threaded compression");
	1347	return NULL;
	1348	}
	1349
	1350	memset(&sources, 0, sizeof(sources));	1385	memset(&sources, 0, sizeof(sources));
	1351		1386
	1352	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|i:multi_compress_to_buffer", kwlist,	1387	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|i:multi_compress_to_buffer", kwlist,
	@@ -1372,8 +1407,14 b' static ZstdBufferWithSegmentsCollection*'
	1372	}	1407	}
	1373		1408
	1374	for (i = 0; i < buffer->segmentCount; i++) {	1409	for (i = 0; i < buffer->segmentCount; i++) {
			1410	if (buffer->segments[i].length > SIZE_MAX) {
			1411	PyErr_Format(PyExc_ValueError,
			1412	"buffer segment %zd is too large for this platform", i);
			1413	goto finally;
			1414	}
			1415
	1375	sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;	1416	sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
	1376	sources.sources[i].sourceSize = buffer->segments[i].length;	1417	sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
	1377	sources.totalSourceSize += buffer->segments[i].length;	1418	sources.totalSourceSize += buffer->segments[i].length;
	1378	}	1419	}
	1379		1420
	@@ -1397,8 +1438,15 b' static ZstdBufferWithSegmentsCollection*'
	1397	buffer = collection->buffers[i];	1438	buffer = collection->buffers[i];
	1398		1439
	1399	for (j = 0; j < buffer->segmentCount; j++) {	1440	for (j = 0; j < buffer->segmentCount; j++) {
			1441	if (buffer->segments[j].length > SIZE_MAX) {
			1442	PyErr_Format(PyExc_ValueError,
			1443	"buffer segment %zd in buffer %zd is too large for this platform",
			1444	j, i);
			1445	goto finally;
			1446	}
			1447
	1400	sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;	1448	sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
	1401	sources.sources[offset].sourceSize = buffer->segments[j].length;	1449	sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
	1402	sources.totalSourceSize += buffer->segments[j].length;	1450	sources.totalSourceSize += buffer->segments[j].length;
	1403		1451
	1404	offset++;	1452	offset++;
	@@ -1416,11 +1464,6 b' static ZstdBufferWithSegmentsCollection*'
	1416	goto finally;	1464	goto finally;
	1417	}	1465	}
	1418		1466
	1419	/*
	1420	* It isn't clear whether the address referred to by Py_buffer.buf
	1421	* is still valid after PyBuffer_Release. We we hold a reference to all
	1422	* Py_buffer instances for the duration of the operation.
	1423	*/
	1424	dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));	1467	dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
	1425	if (NULL == dataBuffers) {	1468	if (NULL == dataBuffers) {
	1426	PyErr_NoMemory();	1469	PyErr_NoMemory();
	@@ -1459,6 +1502,11 b' static ZstdBufferWithSegmentsCollection*'
	1459	goto finally;	1502	goto finally;
	1460	}	1503	}
	1461		1504
			1505	if (sources.totalSourceSize > SIZE_MAX) {
			1506	PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
			1507	goto finally;
			1508	}
			1509
	1462	result = compress_from_datasources(self, &sources, threads);	1510	result = compress_from_datasources(self, &sources, threads);
	1463		1511
	1464	finally:	1512	finally:
	@@ -1482,12 +1530,24 b' static PyMethodDef ZstdCompressor_method'
	1482	METH_VARARGS \| METH_KEYWORDS, ZstdCompressionObj__doc__ },	1530	METH_VARARGS \| METH_KEYWORDS, ZstdCompressionObj__doc__ },
	1483	{ "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,	1531	{ "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
	1484	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },	1532	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
	1485	{ "~~read_from~~", (PyCFunction)ZstdCompressor_~~read_from~~,	1533	{ "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
	1486	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_~~read_from~~__doc__ },	1534	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
	1487	{ "~~write_to~~", (PyCFunction)ZstdCompressor_~~write_to~~,	1535	{ "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
	1488	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_~~write_to~~___doc__ },	1536	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
			1537	{ "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
			1538	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
			1539	/* TODO Remove deprecated API */
			1540	{ "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
			1541	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
			1542	/* TODO remove deprecated API */
			1543	{ "write_to", (PyCFunction)ZstdCompressor_stream_writer,
			1544	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
	1489	{ "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,	1545	{ "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
	1490	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },	1546	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
			1547	{ "memory_size", (PyCFunction)ZstdCompressor_memory_size,
			1548	METH_NOARGS, ZstdCompressor_memory_size__doc__ },
			1549	{ "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
			1550	METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
	1491	{ NULL, NULL }	1551	{ NULL, NULL }
	1492	};	1552	};
	1493		1553

contrib/python-zstandard/c-ext/compressoriterator.c

0 +18 -30

             	Py_XDECREF(self->compressor);
             	Py_XDECREF(self->reader);
-            	if (self->buffer) {
+            	if (self->buffer.buf) {
-            		PyBuffer_Release(self->buffer);
+            		PyBuffer_Release(&self->buffer);
-            		PyMem_FREE(self->buffer);
+            		memset(&self->buffer, 0, sizeof(self->buffer));
-            		self->buffer = NULL;
             	}
             	if (self->output.dst) {
             	/* If we have data left in the input, consume it. */
             	if (self->input.pos < self->input.size) {
             		Py_BEGIN_ALLOW_THREADS
-            		if (self->compressor->mtcctx) {
+            		zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-            			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+            			&self->input, ZSTD_e_continue);
-            				&self->output, &self->input);
-            		else {
-            			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
-            				&self->input);
             		Py_END_ALLOW_THREADS
             		/* Release the Python object holding the input buffer. */
             			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		}
             		else {
-            			assert(self->buffer && self->buffer->buf);
+            			assert(self->buffer.buf);
             			/* Only support contiguous C arrays. */
-            			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
+            			assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
-            			assert(self->buffer->itemsize == 1);
+            			assert(self->buffer.itemsize == 1);
-            			readBuffer = (char*)self->buffer->buf + self->bufferOffset;
+            			readBuffer = (char*)self->buffer.buf + self->bufferOffset;
-            			bufferRemaining = self->buffer->len - self->bufferOffset;
+            			bufferRemaining = self->buffer.len - self->bufferOffset;
             			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
             			self->bufferOffset += readSize;
             		}
             	/* EOF */
             	if (0 == readSize) {
-            		if (self->compressor->mtcctx) {
+            		self->input.src = NULL;
-            			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+            		self->input.size = 0;
+            		self->input.pos = 0;
-            		else {
-            			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
+            		zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
+            			&self->input, ZSTD_e_end);
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "error ending compression stream: %s",
             				ZSTD_getErrorName(zresult));
             	self->input.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	if (self->compressor->mtcctx) {
+            	zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
-            		zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
+            		&self->input, ZSTD_e_continue);
-            			&self->input);
-            	else {
-            		zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
             	Py_END_ALLOW_THREADS
             	/* The input buffer currently points to memory managed by Python

contrib/python-zstandard/c-ext/constants.c

0 +16 -1

             		PyErr_Format(PyExc_ValueError, "could not create frame header object");
             	}
+            	PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
+            		PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
+            	PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
+            		PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
             	PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
             	PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
             		(long)ZSTD_CStreamInSize());
             	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
             	PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
             	PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
-            	PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
+            	PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
+            	PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
+            	PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
             	PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
             	PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
             	PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
             	PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
             	PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
+            	PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
+            	PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
+            	PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
+            	PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
+            	PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
+            	PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless);
             }

contrib/python-zstandard/c-ext/decompressionwriter.c

0 +33 -24

             		return NULL;
             	}
-            	if (0 != init_dstream(self->decompressor)) {
+            	if (ensure_dctx(self->decompressor, 1)) {
             		return NULL;
             	}
             }
             static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
-            	if (!self->decompressor->dstream) {
+            	return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
-            		PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
-            			"call when context manager is active");
-            		return NULL;
-            	return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
             }
-            static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
+            static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
-            	const char* source;
+            	static char* kwlist[] = {
-            	Py_ssize_t sourceSize;
+            		"data",
+            		NULL
+            	};
+            	PyObject* result = NULL;
+            	Py_buffer source;
             	size_t zresult = 0;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
             	Py_ssize_t totalWrite = 0;
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
             #else
-            	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
             #endif
+            		kwlist, &source)) {
             		return NULL;
             	}
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+            		PyErr_SetString(PyExc_ValueError,
+            			"data buffer should be contiguous and have at most one dimension");
+            		goto finally;
+            	}
             	if (!self->entered) {
             		PyErr_SetString(ZstdError, "write must be called from an active context manager");
-            		return NULL;
+            		goto finally;
             	}
-            	assert(self->decompressor->dstream);
             	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
-            		return PyErr_NoMemory();
+            		PyErr_NoMemory();
+            		goto finally;
             	}
             	output.size = self->outSize;
             	output.pos = 0;
-            	input.src = source;
+            	input.src = source.buf;
-            	input.size = sourceSize;
+            	input.size = source.len;
             	input.pos = 0;
-            	while ((ssize_t)input.pos < sourceSize) {
+            	while ((ssize_t)input.pos < source.len) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
+            		zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyMem_Free(output.dst);
             			PyErr_Format(ZstdError, "zstd decompress error: %s",
             				ZSTD_getErrorName(zresult));
-            			return NULL;
+            			goto finally;
             		}
             		if (output.pos) {
             	PyMem_Free(output.dst);
-            	return PyLong_FromSsize_t(totalWrite);
+            	result = PyLong_FromSsize_t(totalWrite);
+            finally:
+            	PyBuffer_Release(&source);
+            	return result;
             }
             static PyMethodDef ZstdDecompressionWriter_methods[] = {
             	PyDoc_STR("Exit a decompression context.") },
             	{ "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
             	PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
-            	{ "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS,
+            	{ "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
             	PyDoc_STR("Compress data") },
             	{ NULL, NULL }
             };

contrib/python-zstandard/c-ext/decompressobj.c

0 +27 -20

             	PyObject_Del(self);
             }
-            static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) {
+            static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
-            	const char* source;
+            	static char* kwlist[] = {
-            	Py_ssize_t sourceSize;
+            		"data",
+            		NULL
+            	};
+            	Py_buffer source;
             	size_t zresult;
             	ZSTD_inBuffer input;
             	ZSTD_outBuffer output;
-            	size_t outSize = ZSTD_DStreamOutSize();
             	PyObject* result = NULL;
             	Py_ssize_t resultSize = 0;
-            	/* Constructor should ensure stream is populated. */
-            	assert(self->decompressor->dstream);
             	if (self->finished) {
             		PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
             		return NULL;
             	}
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTuple(args, "y#:decompress",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
             #else
-            	if (!PyArg_ParseTuple(args, "s#:decompress",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
             #endif
-            		&source, &sourceSize)) {
+            		kwlist, &source)) {
             		return NULL;
             	}
-            	input.src = source;
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
-            	input.size = sourceSize;
+            		PyErr_SetString(PyExc_ValueError,
+            			"data buffer should be contiguous and have at most one dimension");
+            		goto finally;
+            	}
+            	input.src = source.buf;
+            	input.size = source.len;
             	input.pos = 0;
-            	output.dst = PyMem_Malloc(outSize);
+            	output.dst = PyMem_Malloc(self->outSize);
             	if (!output.dst) {
             		PyErr_NoMemory();
-            		return NULL;
+            		goto except;
             	}
-            	output.size = outSize;
+            	output.size = self->outSize;
             	output.pos = 0;
             	/* Read input until exhausted. */
             	while (input.pos < input.size) {
             		Py_BEGIN_ALLOW_THREADS
-            		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
+            		zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
             		Py_END_ALLOW_THREADS
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(ZstdError, "zstd decompressor error: %s",
             				ZSTD_getErrorName(zresult));
-            			result = NULL;
+            			goto except;
-            			goto finally;
             		}
             		if (0 == zresult) {
             		if (output.pos) {
             			if (result) {
             				resultSize = PyBytes_GET_SIZE(result);
-            				if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) {
+            				if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
+            					Py_XDECREF(result);
             					goto except;
             				}
             finally:
             	PyMem_Free(output.dst);
+            	PyBuffer_Release(&source);
             	return result;
             }
             static PyMethodDef DecompressionObj_methods[] = {
             	{ "decompress", (PyCFunction)DecompressionObj_decompress,
-            	  METH_VARARGS, PyDoc_STR("decompress data") },
+            	  METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
             	{ NULL, NULL }
             };

contrib/python-zstandard/c-ext/decompressor.c

0 +436 -213

             extern PyObject* ZstdError;
             /**
-              * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
+             * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
+             */
-              * This should be called before starting a decompression operation with a
+            int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
-              * ZSTD_DStream on a ZstdDecompressor.
-              */
-            int init_dstream(ZstdDecompressor* decompressor) {
-            	void* dictData = NULL;
-            	size_t dictSize = 0;
             	size_t zresult;
-            	/* Simple case of dstream already exists. Just reset it. */
+            	ZSTD_DCtx_reset(decompressor->dctx);
-            	if (decompressor->dstream) {
-            		zresult = ZSTD_resetDStream(decompressor->dstream);
+            	if (decompressor->maxWindowSize) {
+            		zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
             		if (ZSTD_isError(zresult)) {
-            			PyErr_Format(ZstdError, "could not reset DStream: %s",
+            			PyErr_Format(ZstdError, "unable to set max window size: %s",
             				ZSTD_getErrorName(zresult));
-            			return -1;
+            			return 1;
             		}
-            		return 0;
             	}
-            	decompressor->dstream = ZSTD_createDStream();
+            	zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
-            	if (!decompressor->dstream) {
+            	if (ZSTD_isError(zresult)) {
-            		PyErr_SetString(ZstdError, "could not create DStream");
+            		PyErr_Format(ZstdError, "unable to set decoding format: %s",
-            		return -1;
+            			ZSTD_getErrorName(zresult));
+            		return 1;
-            	if (decompressor->dict) {
-            		dictData = decompressor->dict->dictData;
-            		dictSize = decompressor->dict->dictSize;
             	}
-            	if (dictData) {
+            	if (loadDict && decompressor->dict) {
-            		zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
+            		if (ensure_ddict(decompressor->dict)) {
+            			return 1;
-            	else {
-            		zresult = ZSTD_initDStream(decompressor->dstream);
-            	if (ZSTD_isError(zresult)) {
+            		zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
-            		/* Don't leave a reference to an invalid object. */
+            		if (ZSTD_isError(zresult)) {
-            		ZSTD_freeDStream(decompressor->dstream);
+            			PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
-            		decompressor->dstream = NULL;
+            				ZSTD_getErrorName(zresult));
+            			return 1;
-            		PyErr_Format(ZstdError, "could not initialize DStream: %s",
-            			ZSTD_getErrorName(zresult));
-            		return -1;
             	}
             	return 0;
             static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"dict_data",
+            		"max_window_size",
+            		"format",
             		NULL
             	};
             	ZstdCompressionDict* dict = NULL;
+            	size_t maxWindowSize = 0;
+            	ZSTD_format_e format = ZSTD_f_zstd1;
             	self->dctx = NULL;
             	self->dict = NULL;
-            	self->ddict = NULL;
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist,
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!II:ZstdDecompressor", kwlist,
-            		&ZstdCompressionDictType, &dict)) {
+            		&ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
             		return -1;
             	}
-            	/* TODO lazily initialize the reference ZSTD_DCtx on first use since
-            	   not instances of ZstdDecompressor will use a ZSTD_DCtx. */
             	self->dctx = ZSTD_createDCtx();
             	if (!self->dctx) {
             		PyErr_NoMemory();
             		goto except;
             	}
+            	self->maxWindowSize = maxWindowSize;
+            	self->format = format;
             	if (dict) {
             		self->dict = dict;
             		Py_INCREF(dict);
             	}
+            	if (ensure_dctx(self, 1)) {
+            		goto except;
+            	}
             	return 0;
             except:
+            	Py_CLEAR(self->dict);
             	if (self->dctx) {
             		ZSTD_freeDCtx(self->dctx);
             		self->dctx = NULL;
             static void Decompressor_dealloc(ZstdDecompressor* self) {
             	Py_CLEAR(self->dict);
-            	if (self->ddict) {
-            		ZSTD_freeDDict(self->ddict);
-            		self->ddict = NULL;
-            	if (self->dstream) {
-            		ZSTD_freeDStream(self->dstream);
-            		self->dstream = NULL;
             	if (self->dctx) {
             		ZSTD_freeDCtx(self->dctx);
             		self->dctx = NULL;
             	PyObject_Del(self);
             }
+            PyDoc_STRVAR(Decompressor_memory_size__doc__,
+            "memory_size() -- Size of decompression context, in bytes\n"
+            );
+            static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
+            	if (self->dctx) {
+            		return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
+            	}
+            	else {
+            		PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
+            		return NULL;
+            	}
+            }
             PyDoc_STRVAR(Decompressor_copy_stream__doc__,
             	"copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
             	"\n"
             	Py_ssize_t totalWrite = 0;
             	char* readBuffer;
             	Py_ssize_t readSize;
-            	PyObject* readResult;
+            	PyObject* readResult = NULL;
             	PyObject* res = NULL;
             	size_t zresult = 0;
             	PyObject* writeResult;
             	/* Prevent free on uninitialized memory in finally. */
             	output.dst = NULL;
-            	if (0 != init_dstream(self)) {
+            	if (ensure_dctx(self, 1)) {
             		res = NULL;
             		goto finally;
             	}
             		while (input.pos < input.size) {
             			Py_BEGIN_ALLOW_THREADS
-            			zresult = ZSTD_decompressStream(self->dstream, &output, &input);
+            			zresult = ZSTD_decompress_generic(self->dctx, &output, &input);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				output.pos = 0;
             			}
             		}
+            		Py_CLEAR(readResult);
             	}
             	/* Source stream is exhausted. Finish up. */
             		PyMem_Free(output.dst);
             	}
+            	Py_XDECREF(readResult);
             	return res;
             }
             		NULL
             	};
-            	const char* source;
+            	Py_buffer source;
-            	Py_ssize_t sourceSize;
             	Py_ssize_t maxOutputSize = 0;
             	unsigned long long decompressedSize;
             	size_t destCapacity;
             	PyObject* result = NULL;
-            	void* dictData = NULL;
-            	size_t dictSize = 0;
             	size_t zresult;
+            	ZSTD_outBuffer outBuffer;
+            	ZSTD_inBuffer inBuffer;
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
             #else
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
             #endif
-            		kwlist, &source, &sourceSize, &maxOutputSize)) {
+            		kwlist, &source, &maxOutputSize)) {
             		return NULL;
             	}
-            	if (self->dict) {
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
-            		dictData = self->dict->dictData;
+            		PyErr_SetString(PyExc_ValueError,
-            		dictSize = self->dict->dictSize;
+            			"data buffer should be contiguous and have at most one dimension");
+            		goto finally;
             	}
-            	if (dictData && !self->ddict) {
+            	if (ensure_dctx(self, 1)) {
-            		Py_BEGIN_ALLOW_THREADS
+            		goto finally;
-            		self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
-            		Py_END_ALLOW_THREADS
-            		if (!self->ddict) {
-            			PyErr_SetString(ZstdError, "could not create decompression dict");
-            			return NULL;
             	}
-            	decompressedSize = ZSTD_getDecompressedSize(source, sourceSize);
+            	decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
-            	/* 0 returned if content size not in the zstd frame header */
-            	if (0 == decompressedSize) {
+            	if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
+            		PyErr_SetString(ZstdError, "error determining content size from frame header");
+            		goto finally;
+            	}
+            	/* Special case of empty frame. */
+            	else if (0 == decompressedSize) {
+            		result = PyBytes_FromStringAndSize("", 0);
+            		goto finally;
+            	}
+            	/* Missing content size in frame header. */
+            	if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
             		if (0 == maxOutputSize) {
-            			PyErr_SetString(ZstdError, "input data invalid or missing content size "
+            			PyErr_SetString(ZstdError, "could not determine content size in frame header");
-            				"in frame header");
+            			goto finally;
-            			return NULL;
             		}
-            		else {
-            			result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
+            		result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
-            			destCapacity = maxOutputSize;
+            		destCapacity = maxOutputSize;
+            		decompressedSize = 0;
+            	}
+            	/* Size is recorded in frame header. */
+            	else {
+            		assert(SIZE_MAX >= PY_SSIZE_T_MAX);
+            		if (decompressedSize > PY_SSIZE_T_MAX) {
+            			PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
+            			goto finally;
             		}
-            	else {
+            		result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
-            		result = PyBytes_FromStringAndSize(NULL, decompressedSize);
+            		destCapacity = (size_t)decompressedSize;
-            		destCapacity = decompressedSize;
             	}
             	if (!result) {
-            		return NULL;
+            		goto finally;
             	}
+            	outBuffer.dst = PyBytes_AsString(result);
+            	outBuffer.size = destCapacity;
+            	outBuffer.pos = 0;
+            	inBuffer.src = source.buf;
+            	inBuffer.size = source.len;
+            	inBuffer.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	if (self->ddict) {
+            	zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
-            		zresult = ZSTD_decompress_usingDDict(self->dctx,
-            			PyBytes_AsString(result), destCapacity,
-            			source, sourceSize, self->ddict);
-            	else {
-            		zresult = ZSTD_decompressDCtx(self->dctx,
-            			PyBytes_AsString(result), destCapacity, source, sourceSize);
             	Py_END_ALLOW_THREADS
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
-            		Py_DECREF(result);
+            		Py_CLEAR(result);
-            		return NULL;
+            		goto finally;
             	}
-            	else if (decompressedSize && zresult != decompressedSize) {
+            	else if (zresult) {
+            		PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
+            		Py_CLEAR(result);
+            		goto finally;
+            	}
+            	else if (decompressedSize && outBuffer.pos != decompressedSize) {
             		PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
             			zresult, decompressedSize);
-            		Py_DECREF(result);
+            		Py_CLEAR(result);
-            		return NULL;
+            		goto finally;
             	}
-            	else if (zresult < destCapacity) {
+            	else if (outBuffer.pos < destCapacity) {
-            		if (_PyBytes_Resize(&result, zresult)) {
+            		if (safe_pybytes_resize(&result, outBuffer.pos)) {
-            			Py_DECREF(result);
+            			Py_CLEAR(result);
-            			return NULL;
+            			goto finally;
             		}
             	}
+            finally:
+            	PyBuffer_Release(&source);
             	return result;
             }
             PyDoc_STRVAR(Decompressor_decompressobj__doc__,
-            "decompressobj()\n"
+            "decompressobj([write_size=default])\n"
             "\n"
             "Incrementally feed data into a decompressor.\n"
             "\n"
             "callers can swap in the zstd decompressor while using the same API.\n"
             );
-            static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
+            static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
-            	ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
+            	static char* kwlist[] = {
+            		"write_size",
+            		NULL
+            	};
+            	ZstdDecompressionObj* result = NULL;
+            	size_t outSize = ZSTD_DStreamOutSize();
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
+            		return NULL;
+            	}
+            	if (!outSize) {
+            		PyErr_SetString(PyExc_ValueError, "write_size must be positive");
+            		return NULL;
+            	}
+            	result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
             	if (!result) {
             		return NULL;
             	}
-            	if (0 != init_dstream(self)) {
+            	if (ensure_dctx(self, 1)) {
             		Py_DECREF(result);
             		return NULL;
             	}
             	result->decompressor = self;
             	Py_INCREF(result->decompressor);
+            	result->outSize = outSize;
             	return result;
             }
-            PyDoc_STRVAR(Decompressor_read_from__doc__,
+            PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
-            "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
+            "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
             "Read compressed data and return an iterator\n"
             "\n"
             "Returns an iterator of decompressed data chunks produced from reading from\n"
             "the source.\n"
             );
-            static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+            static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"reader",
             		"read_size",
             	ZstdDecompressorIterator* result;
             	size_t skipBytes = 0;
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist,
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
             		&reader, &inSize, &outSize, &skipBytes)) {
             		return NULL;
             	}
             	}
             	else if (1 == PyObject_CheckBuffer(reader)) {
             		/* Object claims it is a buffer. Try to get a handle to it. */
-            		result->buffer = PyMem_Malloc(sizeof(Py_buffer));
+            		if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
-            		if (!result->buffer) {
-            			goto except;
-            		memset(result->buffer, 0, sizeof(Py_buffer));
-            		if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
             			goto except;
             		}
             	}
             	result->outSize = outSize;
             	result->skipBytes = skipBytes;
-            	if (0 != init_dstream(self)) {
+            	if (ensure_dctx(self, 1)) {
             		goto except;
             	}
             	goto finally;
             except:
-            	Py_CLEAR(result->reader);
-            	if (result->buffer) {
-            		PyBuffer_Release(result->buffer);
-            		Py_CLEAR(result->buffer);
             	Py_CLEAR(result);
             finally:
             	return result;
             }
-            PyDoc_STRVAR(Decompressor_write_to__doc__,
+            PyDoc_STRVAR(Decompressor_stream_reader__doc__,
+            "stream_reader(source, [read_size=default])\n"
+            "\n"
+            "Obtain an object that behaves like an I/O stream that can be used for\n"
+            "reading decompressed output from an object.\n"
+            "\n"
+            "The source object can be any object with a ``read(size)`` method or that\n"
+            "conforms to the buffer protocol.\n"
+            );
+            static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+            	static char* kwlist[] = {
+            		"source",
+            		"read_size",
+            		NULL
+            	};
+            	PyObject* source;
+            	size_t readSize = ZSTD_DStreamInSize();
+            	ZstdDecompressionReader* result;
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_reader", kwlist,
+            		&source, &readSize)) {
+            		return NULL;
+            	}
+            	result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
+            	if (NULL == result) {
+            		return NULL;
+            	}
+            	if (PyObject_HasAttrString(source, "read")) {
+            		result->reader = source;
+            		Py_INCREF(source);
+            		result->readSize = readSize;
+            	}
+            	else if (1 == PyObject_CheckBuffer(source)) {
+            		if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
+            			Py_CLEAR(result);
+            			return NULL;
+            		}
+            	}
+            	else {
+            		PyErr_SetString(PyExc_TypeError,
+            			"must pass an object with a read() method or that conforms to the buffer protocol");
+            		Py_CLEAR(result);
+            		return NULL;
+            	}
+            	result->decompressor = self;
+            	Py_INCREF(self);
+            	return result;
+            }
+            PyDoc_STRVAR(Decompressor_stream_writer__doc__,
             "Create a context manager to write decompressed data to an object.\n"
             "\n"
             "The passed object must have a ``write()`` method.\n"
             "streaming decompressor.\n"
             );
-            static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+            static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"writer",
             		"write_size",
             	size_t outSize = ZSTD_DStreamOutSize();
             	ZstdDecompressionWriter* result;
-            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist,
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:stream_writer", kwlist,
             		&writer, &outSize)) {
             		return NULL;
             	}
             "Decompress a series of chunks using the content dictionary chaining technique\n"
             );
-            static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
+            static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
             	static char* kwlist[] = {
             		"frames",
             		NULL
             	PyObject* chunk;
             	char* chunkData;
             	Py_ssize_t chunkSize;
-            	ZSTD_DCtx* dctx = NULL;
             	size_t zresult;
-            	ZSTD_frameParams frameParams;
+            	ZSTD_frameHeader frameHeader;
             	void* buffer1 = NULL;
             	size_t buffer1Size = 0;
             	size_t buffer1ContentSize = 0;
             	size_t buffer2ContentSize = 0;
             	void* destBuffer = NULL;
             	PyObject* result = NULL;
+            	ZSTD_outBuffer outBuffer;
+            	ZSTD_inBuffer inBuffer;
             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
             		kwlist, &PyList_Type, &chunks)) {
             	/* We require that all chunks be zstd frames and that they have content size set. */
             	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
-            	zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
+            	zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
             	if (ZSTD_isError(zresult)) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
             		return NULL;
             		return NULL;
             	}
-            	if (0 == frameParams.frameContentSize) {
+            	if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
             		PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
             		return NULL;
             	}
-            	dctx = ZSTD_createDCtx();
+            	assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
-            	if (!dctx) {
-            		PyErr_NoMemory();
+            	/* We check against PY_SSIZE_T_MAX here because we ultimately cast the
+            	 * result to a Python object and it's length can be no greater than
+            	 * Py_ssize_t. In theory, we could have an intermediate frame that is
+            	 * larger. But a) why would this API be used for frames that large b)
+            	 * it isn't worth the complexity to support. */
+            	assert(SIZE_MAX >= PY_SSIZE_T_MAX);
+            	if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
+            		PyErr_SetString(PyExc_ValueError,
+            			"chunk 0 is too large to decompress on this platform");
+            		return NULL;
+            	}
+            	if (ensure_dctx(self, 0)) {
             		goto finally;
             	}
-            	buffer1Size = frameParams.frameContentSize;
+            	buffer1Size = (size_t)frameHeader.frameContentSize;
             	buffer1 = PyMem_Malloc(buffer1Size);
             	if (!buffer1) {
             		goto finally;
             	}
+            	outBuffer.dst = buffer1;
+            	outBuffer.size = buffer1Size;
+            	outBuffer.pos = 0;
+            	inBuffer.src = chunkData;
+            	inBuffer.size = chunkSize;
+            	inBuffer.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
+            	zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
             	Py_END_ALLOW_THREADS
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
             		goto finally;
             	}
+            	else if (zresult) {
+            		PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
+            		goto finally;
+            	}
-            	buffer1ContentSize = zresult;
+            	buffer1ContentSize = outBuffer.pos;
             	/* Special case of a simple chain. */
             	if (1 == chunksLen) {
             	}
             	/* This should ideally look at next chunk. But this is slightly simpler. */
-            	buffer2Size = frameParams.frameContentSize;
+            	buffer2Size = (size_t)frameHeader.frameContentSize;
             	buffer2 = PyMem_Malloc(buffer2Size);
             	if (!buffer2) {
             		goto finally;
             		}
             		PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
-            		zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
+            		zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
             		if (ZSTD_isError(zresult)) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
             			goto finally;
             			goto finally;
             		}
-            		if (0 == frameParams.frameContentSize) {
+            		if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
             			PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
             			goto finally;
             		}
+            		assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
+            		if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
+            			PyErr_Format(PyExc_ValueError,
+            				"chunk %zd is too large to decompress on this platform", chunkIndex);
+            			goto finally;
+            		}
+            		inBuffer.src = chunkData;
+            		inBuffer.size = chunkSize;
+            		inBuffer.pos = 0;
             		parity = chunkIndex % 2;
             		/* This could definitely be abstracted to reduce code duplication. */
             		if (parity) {
             			/* Resize destination buffer to hold larger content. */
-            			if (buffer2Size < frameParams.frameContentSize) {
+            			if (buffer2Size < frameHeader.frameContentSize) {
-            				buffer2Size = frameParams.frameContentSize;
+            				buffer2Size = (size_t)frameHeader.frameContentSize;
             				destBuffer = PyMem_Realloc(buffer2, buffer2Size);
             				if (!destBuffer) {
             					goto finally;
             			}
             			Py_BEGIN_ALLOW_THREADS
-            			zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
+            			zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
-            				chunkData, chunkSize, buffer1, buffer1ContentSize);
+            				buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
+            			Py_END_ALLOW_THREADS
+            			if (ZSTD_isError(zresult)) {
+            				PyErr_Format(ZstdError,
+            					"failed to load prefix dictionary at chunk %zd", chunkIndex);
+            				goto finally;
+            			}
+            			outBuffer.dst = buffer2;
+            			outBuffer.size = buffer2Size;
+            			outBuffer.pos = 0;
+            			Py_BEGIN_ALLOW_THREADS
+            			zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
             					chunkIndex, ZSTD_getErrorName(zresult));
             				goto finally;
             			}
-            			buffer2ContentSize = zresult;
+            			else if (zresult) {
+            				PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
+            					chunkIndex);
+            				goto finally;
+            			}
+            			buffer2ContentSize = outBuffer.pos;
             		}
             		else {
-            			if (buffer1Size < frameParams.frameContentSize) {
+            			if (buffer1Size < frameHeader.frameContentSize) {
-            				buffer1Size = frameParams.frameContentSize;
+            				buffer1Size = (size_t)frameHeader.frameContentSize;
             				destBuffer = PyMem_Realloc(buffer1, buffer1Size);
             				if (!destBuffer) {
             					goto finally;
             			}
             			Py_BEGIN_ALLOW_THREADS
-            			zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
+            			zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
-            				chunkData, chunkSize, buffer2, buffer2ContentSize);
+            				buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
+            			Py_END_ALLOW_THREADS
+            			if (ZSTD_isError(zresult)) {
+            				PyErr_Format(ZstdError,
+            					"failed to load prefix dictionary at chunk %zd", chunkIndex);
+            				goto finally;
+            			}
+            			outBuffer.dst = buffer1;
+            			outBuffer.size = buffer1Size;
+            			outBuffer.pos = 0;
+            			Py_BEGIN_ALLOW_THREADS
+            			zresult = ZSTD_decompress_generic(self->dctx, &outBuffer, &inBuffer);
             			Py_END_ALLOW_THREADS
             			if (ZSTD_isError(zresult)) {
             				PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
             					chunkIndex, ZSTD_getErrorName(zresult));
             				goto finally;
             			}
-            			buffer1ContentSize = zresult;
+            			else if (zresult) {
+            				PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
+            					chunkIndex);
+            				goto finally;
+            			}
+            			buffer1ContentSize = outBuffer.pos;
             		}
             	}
             		PyMem_Free(buffer1);
             	}
-            	if (dctx) {
-            		ZSTD_freeDCtx(dctx);
             	return result;
             }
             typedef struct {
             	void* sourceData;
             	size_t sourceSize;
-            	unsigned long long destSize;
+            	size_t destSize;
             } FramePointer;
             typedef struct {
             	/* Compression state and settings. */
             	ZSTD_DCtx* dctx;
-            	ZSTD_DDict* ddict;
             	int requireOutputSizes;
             	/* Output storage. */
             	assert(0 == state->destCount);
             	assert(state->endOffset - state->startOffset >= 0);
+            	/* We could get here due to the way work is allocated. Ideally we wouldn't
+            	   get here. But that would require a bit of a refactor in the caller. */
+            	if (state->totalSourceSize > SIZE_MAX) {
+            		state->error = WorkerError_memory;
+            		state->errorOffset = 0;
+            		return;
+            	}
             	/*
             	 * We need to allocate a buffer to hold decompressed data. How we do this
             	 * depends on what we know about the output. The following scenarios are
             	/* Resolve ouput segments. */
             	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
             		FramePointer* fp = &framePointers[frameIndex];
+            		unsigned long long decompressedSize;
             		if (0 == fp->destSize) {
-            			fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
+            			decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
-            			if (0 == fp->destSize && state->requireOutputSizes) {
+            			if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
             				state->error = WorkerError_unknownSize;
             				state->errorOffset = frameIndex;
             				return;
             			}
+            			else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
+            				if (state->requireOutputSizes) {
+            					state->error = WorkerError_unknownSize;
+            					state->errorOffset = frameIndex;
+            					return;
+            				}
+            				/* This will fail the assert for .destSize > 0 below. */
+            				decompressedSize = 0;
+            			}
+            			if (decompressedSize > SIZE_MAX) {
+            				state->error = WorkerError_memory;
+            				state->errorOffset = frameIndex;
+            				return;
+            			}
+            			fp->destSize = (size_t)decompressedSize;
             		}
             		totalOutputSize += fp->destSize;
             	assert(framePointers[state->startOffset].destSize > 0); /* For now. */
-            	allocationSize = roundpow2(state->totalSourceSize);
+            	allocationSize = roundpow2((size_t)state->totalSourceSize);
             	if (framePointers[state->startOffset].destSize > allocationSize) {
             		allocationSize = roundpow2(framePointers[state->startOffset].destSize);
             	destBuffer->segmentsSize = remainingItems;
             	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
+            		ZSTD_outBuffer outBuffer;
+            		ZSTD_inBuffer inBuffer;
             		const void* source = framePointers[frameIndex].sourceData;
             		const size_t sourceSize = framePointers[frameIndex].sourceSize;
             		void* dest;
             			/* Don't take any chances will non-NULL pointers. */
             			memset(destBuffer, 0, sizeof(DestBuffer));
-            			allocationSize = roundpow2(state->totalSourceSize);
+            			allocationSize = roundpow2((size_t)state->totalSourceSize);
             			if (decompressedSize > allocationSize) {
             				allocationSize = roundpow2(decompressedSize);
             		dest = (char*)destBuffer->dest + destOffset;
-            		if (state->ddict) {
+            		outBuffer.dst = dest;
-            			zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
+            		outBuffer.size = decompressedSize;
-            				source, sourceSize, state->ddict);
+            		outBuffer.pos = 0;
-            		else {
-            			zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
-            				source, sourceSize);
+            		inBuffer.src = source;
+            		inBuffer.size = sourceSize;
+            		inBuffer.pos = 0;
+            		zresult = ZSTD_decompress_generic(state->dctx, &outBuffer, &inBuffer);
             		if (ZSTD_isError(zresult)) {
             			state->error = WorkerError_zstd;
             			state->zresult = zresult;
             			state->errorOffset = frameIndex;
             			return;
             		}
-            		else if (zresult != decompressedSize) {
+            		else if (zresult || outBuffer.pos != decompressedSize) {
             			state->error = WorkerError_sizeMismatch;
-            			state->zresult = zresult;
+            			state->zresult = outBuffer.pos;
             			state->errorOffset = frameIndex;
             			return;
             		}
             		destBuffer->segments[localOffset].offset = destOffset;
-            		destBuffer->segments[localOffset].length = decompressedSize;
+            		destBuffer->segments[localOffset].length = outBuffer.pos;
-            		destOffset += zresult;
+            		destOffset += outBuffer.pos;
             		localOffset++;
             		remainingItems--;
             	}
             }
             ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
-            	unsigned int threadCount) {
+            	Py_ssize_t threadCount) {
-            	void* dictData = NULL;
-            	size_t dictSize = 0;
             	Py_ssize_t i = 0;
             	int errored = 0;
             	Py_ssize_t segmentsCount;
             	ZstdBufferWithSegmentsCollection* result = NULL;
             	FramePointer* framePointers = frames->frames;
             	unsigned long long workerBytes = 0;
-            	int currentThread = 0;
+            	Py_ssize_t currentThread = 0;
             	Py_ssize_t workerStartOffset = 0;
             	POOL_ctx* pool = NULL;
             	WorkerState* workerStates = NULL;
             	assert(threadCount >= 1);
             	/* More threads than inputs makes no sense under any conditions. */
-            	threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
+            	threadCount = frames->framesSize < threadCount ? frames->framesSize
             												   : threadCount;
             	/* TODO lower thread count if input size is too small and threads would just
             	   add overhead. */
             	if (decompressor->dict) {
-            		dictData = decompressor->dict->dictData;
+            		if (ensure_ddict(decompressor->dict)) {
-            		dictSize = decompressor->dict->dictSize;
-            	if (dictData && !decompressor->ddict) {
-            		Py_BEGIN_ALLOW_THREADS
-            		decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
-            		Py_END_ALLOW_THREADS
-            		if (!decompressor->ddict) {
-            			PyErr_SetString(ZstdError, "could not create decompression dict");
             			return NULL;
             		}
             	}
             	bytesPerWorker = frames->compressedSize / threadCount;
+            	if (bytesPerWorker > SIZE_MAX) {
+            		PyErr_SetString(ZstdError, "too much data per worker for this platform");
+            		goto finally;
+            	}
             	for (i = 0; i < threadCount; i++) {
+            		size_t zresult;
             		workerStates[i].dctx = ZSTD_createDCtx();
             		if (NULL == workerStates[i].dctx) {
             			PyErr_NoMemory();
             		ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
-            		workerStates[i].ddict = decompressor->ddict;
+            		if (decompressor->dict) {
+            			zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
+            			if (zresult) {
+            				PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
+            					ZSTD_getErrorName(zresult));
+            				goto finally;
+            			}
+            		}
             		workerStates[i].framePointers = framePointers;
             		workerStates[i].requireOutputSizes = 1;
             	}
             			break;
             		case WorkerError_sizeMismatch:
-            			PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
+            			PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
             				workerStates[i].errorOffset, workerStates[i].zresult,
             				framePointers[workerStates[i].errorOffset].destSize);
             			errored = 1;
             				decompressedSize = frameSizesP[i];
             			}
+            			if (sourceSize > SIZE_MAX) {
+            				PyErr_Format(PyExc_ValueError,
+            					"item %zd is too large for this platform", i);
+            				goto finally;
+            			}
+            			if (decompressedSize > SIZE_MAX) {
+            				PyErr_Format(PyExc_ValueError,
+            					"decompressed size of item %zd is too large for this platform", i);
+            				goto finally;
+            			}
             			framePointers[i].sourceData = sourceData;
-            			framePointers[i].sourceSize = sourceSize;
+            			framePointers[i].sourceSize = (size_t)sourceSize;
-            			framePointers[i].destSize = decompressedSize;
+            			framePointers[i].destSize = (size_t)decompressedSize;
             		}
             	}
             	else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
             			buffer = collection->buffers[i];
             			for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
+            				unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
             				if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
             					PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
             						offset);
             					goto finally;
             				}
+            				if (buffer->segments[segmentIndex].length > SIZE_MAX) {
+            					PyErr_Format(PyExc_ValueError,
+            						"item %zd in buffer %zd is too large for this platform",
+            						segmentIndex, i);
+            					goto finally;
+            				}
+            				if (decompressedSize > SIZE_MAX) {
+            					PyErr_Format(PyExc_ValueError,
+            						"decompressed size of item %zd in buffer %zd is too large for this platform",
+            						segmentIndex, i);
+            					goto finally;
+            				}
             				totalInputSize += buffer->segments[segmentIndex].length;
             				framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
-            				framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
+            				framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
-            				framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
+            				framePointers[offset].destSize = (size_t)decompressedSize;
             				offset++;
             			}
             			goto finally;
             		}
-            		/*
-            		 * It is not clear whether Py_buffer.buf is still valid after
-            		 * PyBuffer_Release. So, we hold a reference to all Py_buffer instances
-            		 * for the duration of the operation.
-            		 */
             		frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
             		if (NULL == frameBuffers) {
             			PyErr_NoMemory();
             		/* Do a pass to assemble info about our input buffers and output sizes. */
             		for (i = 0; i < frameCount; i++) {
+            			unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
             			if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
             				&frameBuffers[i], PyBUF_CONTIG_RO)) {
             				PyErr_Clear();
             				goto finally;
             			}
+            			if (decompressedSize > SIZE_MAX) {
+            				PyErr_Format(PyExc_ValueError,
+            					"decompressed size of item %zd is too large for this platform", i);
+            				goto finally;
+            			}
             			totalInputSize += frameBuffers[i].len;
             			framePointers[i].sourceData = frameBuffers[i].buf;
             			framePointers[i].sourceSize = frameBuffers[i].len;
-            			framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
+            			framePointers[i].destSize = (size_t)decompressedSize;
             		}
             	}
             	else {
             	Decompressor_copy_stream__doc__ },
             	{ "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_decompress__doc__ },
-            	{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS,
+            	{ "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
             	Decompressor_decompressobj__doc__ },
-            	{ "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS,
+            	{ "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
-            	Decompressor_read_from__doc__ },
+            	Decompressor_read_to_iter__doc__ },
-            	{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS,
+            	/* TODO Remove deprecated API */
-            	Decompressor_write_to__doc__ },
+            	{ "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
+            	Decompressor_read_to_iter__doc__ },
+            	{ "stream_reader", (PyCFunction)Decompressor_stream_reader,
+            	METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
+            	{ "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
+            	Decompressor_stream_writer__doc__ },
+            	/* TODO remove deprecated API */
+            	{ "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
+            	Decompressor_stream_writer__doc__ },
             	{ "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
             	  METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
             	{ "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
             	  METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
+            	{ "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
+            	Decompressor_memory_size__doc__ },
             	{ NULL, NULL }
             };

contrib/python-zstandard/c-ext/decompressoriterator.c

0 +11 -13

             	Py_XDECREF(self->decompressor);
             	Py_XDECREF(self->reader);
-            	if (self->buffer) {
+            	if (self->buffer.buf) {
-            		PyBuffer_Release(self->buffer);
+            		PyBuffer_Release(&self->buffer);
-            		PyMem_FREE(self->buffer);
+            		memset(&self->buffer, 0, sizeof(self->buffer));
-            		self->buffer = NULL;
             	}
             	if (self->input.src) {
             	DecompressorIteratorResult result;
             	size_t oldInputPos = self->input.pos;
-            	assert(self->decompressor->dstream);
             	result.chunk = NULL;
             	chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
             	self->output.pos = 0;
             	Py_BEGIN_ALLOW_THREADS
-            	zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
+            	zresult = ZSTD_decompress_generic(self->decompressor->dctx, &self->output, &self->input);
             	Py_END_ALLOW_THREADS
             	/* We're done with the pointer. Nullify to prevent anyone from getting a
             	/* If it produced output data, return it. */
             	if (self->output.pos) {
             		if (self->output.pos < self->outSize) {
-            			if (_PyBytes_Resize(&chunk, self->output.pos)) {
+            			if (safe_pybytes_resize(&chunk, self->output.pos)) {
+            				Py_XDECREF(chunk);
             				result.errored = 1;
             				return result;
             			}
             			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
             		}
             		else {
-            			assert(self->buffer && self->buffer->buf);
+            			assert(self->buffer.buf);
             			/* Only support contiguous C arrays for now */
-            			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
+            			assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
-            			assert(self->buffer->itemsize == 1);
+            			assert(self->buffer.itemsize == 1);
             			/* TODO avoid memcpy() below */
-            			readBuffer = (char *)self->buffer->buf + self->bufferOffset;
+            			readBuffer = (char *)self->buffer.buf + self->bufferOffset;
-            			bufferRemaining = self->buffer->len - self->bufferOffset;
+            			bufferRemaining = self->buffer.len - self->bufferOffset;
             			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
             			self->bufferOffset += readSize;
             		}

contrib/python-zstandard/c-ext/frameparams.c

0 +26 -20

             PyDoc_STRVAR(FrameParameters__doc__,
             	"FrameParameters: information about a zstd frame");
-            FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
+            FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
-            	const char* source;
+            	static char* kwlist[] = {
-            	Py_ssize_t sourceSize;
+            		"data",
-            	ZSTD_frameParams params;
+            		NULL
+            	};
+            	Py_buffer source;
+            	ZSTD_frameHeader header;
             	FrameParametersObject* result = NULL;
             	size_t zresult;
             #if PY_MAJOR_VERSION >= 3
-            	if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
             #else
-            	if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
+            	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
             #endif
-            		&source, &sourceSize)) {
+            		kwlist, &source)) {
             		return NULL;
             	}
-            	/* Needed for Python 2 to reject unicode */
+            	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
-            	if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
+            		PyErr_SetString(PyExc_ValueError,
-            		PyErr_SetString(PyExc_TypeError, "argument must be bytes");
+            			"data buffer should be contiguous and have at most one dimension");
-            		return NULL;
+            		goto finally;
             	}
-            	zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
+            	zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
             	if (ZSTD_isError(zresult)) {
             		PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
-            		return NULL;
+            		goto finally;
             	}
             	if (zresult) {
             		PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
-            		return NULL;
+            		goto finally;
             	}
             	result = PyObject_New(FrameParametersObject, &FrameParametersType);
             	if (!result) {
-            		return NULL;
+            		goto finally;
             	}
-            	result->frameContentSize = params.frameContentSize;
+            	result->frameContentSize = header.frameContentSize;
-            	result->windowSize = params.windowSize;
+            	result->windowSize = header.windowSize;
-            	result->dictID = params.dictID;
+            	result->dictID = header.dictID;
-            	result->checksumFlag = params.checksumFlag ? 1 : 0;
+            	result->checksumFlag = header.checksumFlag ? 1 : 0;
+            finally:
+            	PyBuffer_Release(&source);
             	return result;
             }
             	{ "content_size", T_ULONGLONG,
             	  offsetof(FrameParametersObject, frameContentSize), READONLY,
             	  "frame content size" },
-            	{ "window_size", T_UINT,
+            	{ "window_size", T_ULONGLONG,
             	  offsetof(FrameParametersObject, windowSize), READONLY,
             	  "window size" },
             	{ "dict_id", T_UINT,

contrib/python-zstandard/c-ext/python-zstandard.h

0 +106 -45

             #define ZSTD_STATIC_LINKING_ONLY
             #define ZDICT_STATIC_LINKING_ONLY
-            #include "mem.h"
+            #include <zstd.h>
-            #include "zstd.h"
+            #include <zdict.h>
-            #include "zdict.h"
-            #include "zstdmt_compress.h"
-            #define PYTHON_ZSTANDARD_VERSION "0.8.1"
+            #define PYTHON_ZSTANDARD_VERSION "0.9.0"
             typedef enum {
             	compressorobj_flush_finish,
             } CompressorObj_Flush;
             /*
-               Represents a CompressionParameters type.
+               Represents a ZstdCompressionParameters type.
-               This type is basically a wrapper around ZSTD_compressionParameters.
+               This type holds all the low-level compression parameters that can be set.
             */
             typedef struct {
             	PyObject_HEAD
+            	ZSTD_CCtx_params* params;
+            	unsigned format;
+            	int compressionLevel;
             	unsigned windowLog;
+            	unsigned hashLog;
             	unsigned chainLog;
-            	unsigned hashLog;
             	unsigned searchLog;
-            	unsigned searchLength;
+            	unsigned minMatch;
             	unsigned targetLength;
-            	ZSTD_strategy strategy;
+            	unsigned compressionStrategy;
-            } CompressionParametersObject;
+            	unsigned contentSizeFlag;
+            	unsigned checksumFlag;
+            	unsigned dictIDFlag;
+            	unsigned threads;
+            	unsigned jobSize;
+            	unsigned overlapSizeLog;
+            	unsigned compressLiterals;
+            	unsigned forceMaxWindow;
+            	unsigned enableLongDistanceMatching;
+            	unsigned ldmHashLog;
+            	unsigned ldmMinMatch;
+            	unsigned ldmBucketSizeLog;
+            	unsigned ldmHashEveryLog;
+            } ZstdCompressionParametersObject;
-            extern PyTypeObject CompressionParametersType;
+            extern PyTypeObject ZstdCompressionParametersType;
             /*
                Represents a FrameParameters type.
             typedef struct {
             	PyObject_HEAD
             	unsigned long long frameContentSize;
-            	unsigned windowSize;
+            	unsigned long long windowSize;
             	unsigned dictID;
             	char checksumFlag;
             } FrameParametersObject;
             	void* dictData;
             	/* Size of dictionary data. */
             	size_t dictSize;
+            	ZSTD_dictContentType_e dictType;
             	/* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
             	unsigned k;
             	/* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
             	unsigned d;
+            	/* Digested dictionary, suitable for reuse. */
+            	ZSTD_CDict* cdict;
+            	ZSTD_DDict* ddict;
             } ZstdCompressionDict;
             extern PyTypeObject ZstdCompressionDictType;
             typedef struct {
             	PyObject_HEAD
-            	/* Configured compression level. Should be always set. */
-            	int compressionLevel;
             	/* Number of threads to use for operations. */
             	unsigned int threads;
             	/* Pointer to compression dictionary to use. NULL if not using dictionary
             	   compression. */
             	ZstdCompressionDict* dict;
-            	/* Compression context to use. Populated during object construction. NULL
+            	/* Compression context to use. Populated during object construction. */
-            	   if using multi-threaded compression. */
             	ZSTD_CCtx* cctx;
-            	/* Multi-threaded compression context to use. Populated during object
+            	/* Compression parameters in use. */
-            	   construction. NULL if not using multi-threaded compression. */
+            	ZSTD_CCtx_params* params;
-            	ZSTDMT_CCtx* mtcctx;
-            	/* Digest compression dictionary. NULL initially. Populated on first use. */
-            	ZSTD_CDict* cdict;
-            	/* Low-level compression parameter control. NULL unless passed to
-            	   constructor. Takes precedence over `compressionLevel` if defined. */
-            	CompressionParametersObject* cparams;
-            	/* Controls zstd frame options. */
-            	ZSTD_frameParameters fparams;
-            	/* Holds state for streaming compression. Shared across all invocation.
-            	   Populated on first use. */
-            	ZSTD_CStream* cstream;
             } ZstdCompressor;
             extern PyTypeObject ZstdCompressorType;
             	ZstdCompressor* compressor;
             	PyObject* writer;
-            	Py_ssize_t sourceSize;
+            	unsigned long long sourceSize;
             	size_t outSize;
             	int entered;
+            	unsigned long long bytesCompressed;
             } ZstdCompressionWriter;
             extern PyTypeObject ZstdCompressionWriterType;
             	ZstdCompressor* compressor;
             	PyObject* reader;
-            	Py_buffer* buffer;
+            	Py_buffer buffer;
             	Py_ssize_t bufferOffset;
-            	Py_ssize_t sourceSize;
             	size_t inSize;
             	size_t outSize;
             typedef struct {
             	PyObject_HEAD
+            	ZstdCompressor* compressor;
+            	PyObject* reader;
+            	Py_buffer buffer;
+            	unsigned long long sourceSize;
+            	size_t readSize;
+            	int entered;
+            	int closed;
+            	unsigned long long bytesCompressed;
+            	ZSTD_inBuffer input;
+            	ZSTD_outBuffer output;
+            	int finishedInput;
+            	int finishedOutput;
+            	PyObject* readResult;
+            } ZstdCompressionReader;
+            extern PyTypeObject ZstdCompressionReaderType;
+            typedef struct {
+            	PyObject_HEAD
             	ZSTD_DCtx* dctx;
             	ZstdCompressionDict* dict;
-            	ZSTD_DDict* ddict;
+            	size_t maxWindowSize;
-            	ZSTD_DStream* dstream;
+            	ZSTD_format_e format;
             } ZstdDecompressor;
             extern PyTypeObject ZstdDecompressorType;
             	PyObject_HEAD
             	ZstdDecompressor* decompressor;
+            	size_t outSize;
             	int finished;
             } ZstdDecompressionObj;
             typedef struct {
             	PyObject_HEAD
+            	/* Parent decompressor to which this object is associated. */
+            	ZstdDecompressor* decompressor;
+            	/* Object to read() from (if reading from a stream). */
+            	PyObject* reader;
+            	/* Size for read() operations on reader. */
+            	size_t readSize;
+            	/* Buffer to read from (if reading from a buffer). */
+            	Py_buffer buffer;
+            	/* Whether the context manager is active. */
+            	int entered;
+            	/* Whether we've closed the stream. */
+            	int closed;
+            	/* Number of bytes decompressed and returned to user. */
+            	unsigned long long bytesDecompressed;
+            	/* Tracks data going into decompressor. */
+            	ZSTD_inBuffer input;
+            	/* Holds output from read() operation on reader. */
+            	PyObject* readResult;
+            	/* Whether all input has been sent to the decompressor. */
+            	int finishedInput;
+            	/* Whether all output has been flushed from the decompressor. */
+            	int finishedOutput;
+            } ZstdDecompressionReader;
+            extern PyTypeObject ZstdDecompressionReaderType;
+            typedef struct {
+            	PyObject_HEAD
             	ZstdDecompressor* decompressor;
             	PyObject* writer;
             	size_t outSize;
             	ZstdDecompressor* decompressor;
             	PyObject* reader;
-            	Py_buffer* buffer;
+            	Py_buffer buffer;
             	Py_ssize_t bufferOffset;
             	size_t inSize;
             	size_t outSize;
             } DecompressorIteratorResult;
             typedef struct {
+            	/* The public API is that these are 64-bit unsigned integers. So these can't
+            	 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
+            	 * be nonsensical for this platform. */
             	unsigned long long offset;
             	unsigned long long length;
             } BufferSegment;
             extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
-            void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
+            int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
-            CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
+            int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
-            FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
+            FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
-            PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
+            int ensure_ddict(ZstdCompressionDict* dict);
-            int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
+            int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
-            int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
-            int init_dstream(ZstdDecompressor* decompressor);
             ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
-            ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
             ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
             Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
             int cpu_count(void);
             size_t roundpow2(size_t);
+            int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);

contrib/python-zstandard/make_cffi.py

0 +16 -7

                 'compress/fse_compress.c',
                 'compress/huf_compress.c',
                 'compress/zstd_compress.c',
+                'compress/zstd_double_fast.c',
+                'compress/zstd_fast.c',
+                'compress/zstd_lazy.c',
+                'compress/zstd_ldm.c',
+                'compress/zstd_opt.c',
                 'compress/zstdmt_compress.c',
                 'decompress/huf_decompress.c',
                 'decompress/zstd_decompress.c',
             # Headers whose preprocessed output will be fed into cdef().
             HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
                 ('zstd.h',),
-                ('compress', 'zstdmt_compress.h'),
                 ('dictBuilder', 'zdict.h'),
             )]
             def preprocess(path):
                 with open(path, 'rb') as fh:
                     lines = []
-                    for l in fh:
+                    it = iter(fh)
+                    for l in it:
                         # zstd.h includes <stddef.h>, which is also included by cffi's
                         # boilerplate. This can lead to duplicate declarations. So we strip
                         # this include from the preprocessor invocation.
             ffi = cffi.FFI()
+            # zstd.h uses a possible undefined MIN(). Define it until
+            # https://github.com/facebook/zstd/issues/976 is fixed.
             # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
             # when cffi uses the function. Since we statically link against zstd, even
             # if we use the deprecated functions it shouldn't be a huge problem.
             ffi.set_source('_zstd_cffi', '''
-            #include "mem.h"
+            #define MIN(a,b) ((a)<(b) ? (a) : (b))
             #define ZSTD_STATIC_LINKING_ONLY
-            #include "zstd.h"
+            #include <zstd.h>
             #define ZDICT_STATIC_LINKING_ONLY
             #define ZDICT_DISABLE_DEPRECATE_WARNINGS
-            #include "zdict.h"
+            #include <zdict.h>
-            #include "zstdmt_compress.h"
+            ''', sources=SOURCES,
-            ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
+                 include_dirs=INCLUDE_DIRS,
+                 extra_compile_args=['-DZSTD_MULTITHREAD'])
             DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')

contrib/python-zstandard/setup.py

0 +28 -7

             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
+            import os
             import sys
             from setuptools import setup
             import setup_zstd
             SUPPORT_LEGACY = False
+            SYSTEM_ZSTD = False
+            WARNINGS_AS_ERRORS = False
-            if "--legacy" in sys.argv:
+            if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
+                WARNINGS_AS_ERRORS = True
+            if '--legacy' in sys.argv:
                 SUPPORT_LEGACY = True
-                sys.argv.remove("--legacy")
+                sys.argv.remove('--legacy')
+            if '--system-zstd' in sys.argv:
+                SYSTEM_ZSTD = True
+                sys.argv.remove('--system-zstd')
+            if '--warnings-as-errors' in sys.argv:
+                WARNINGS_AS_ERRORS = True
+                sys.argv.remote('--warning-as-errors')
             # Code for obtaining the Extension instance is in its own module to
             # facilitate reuse in other projects.
-            extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
+            extensions = [
+                setup_zstd.get_c_extension(name='zstd',
+                                           support_legacy=SUPPORT_LEGACY,
+                                           system_zstd=SYSTEM_ZSTD,
+                                           warnings_as_errors=WARNINGS_AS_ERRORS),
+            ]
             install_requires = []
                 import make_cffi
                 extensions.append(make_cffi.ffi.distutils_extension())
-                # Need change in 1.8 for ffi.from_buffer() behavior.
+                # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
-                install_requires.append('cffi>=1.8')
+                # (like memoryview).
+                # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
+                # garbage collection pitfalls.
+                install_requires.append('cffi>=1.11')
             version = None
                     'Intended Audience :: Developers',
                     'License :: OSI Approved :: BSD License',
                     'Programming Language :: C',
-                    'Programming Language :: Python :: 2.6',
                     'Programming Language :: Python :: 2.7',
-                    'Programming Language :: Python :: 3.3',
                     'Programming Language :: Python :: 3.4',
                     'Programming Language :: Python :: 3.5',
                     'Programming Language :: Python :: 3.6',
                 ],
                 keywords='zstandard zstd compression',
+                packages=['zstandard'],
                 ext_modules=extensions,
                 test_suite='tests',
                 install_requires=install_requires,

contrib/python-zstandard/setup_zstd.py

0 +66 -10

             # This software may be modified and distributed under the terms
             # of the BSD license. See the LICENSE file for details.
+            import distutils.ccompiler
             import os
+            import sys
             from distutils.extension import Extension
                 'compress/fse_compress.c',
                 'compress/huf_compress.c',
                 'compress/zstd_compress.c',
+                'compress/zstd_double_fast.c',
+                'compress/zstd_fast.c',
+                'compress/zstd_lazy.c',
+                'compress/zstd_ldm.c',
+                'compress/zstd_opt.c',
                 'compress/zstdmt_compress.c',
                 'decompress/huf_decompress.c',
                 'decompress/zstd_decompress.c',
             )]
             zstd_includes = [
-                'c-ext',
                 'zstd',
                 'zstd/common',
                 'zstd/compress',
                 'zstd/legacy',
             ]
+            ext_includes = [
+                'c-ext',
+                'zstd/common',
+            ]
             ext_sources = [
+                'zstd/common/pool.c',
+                'zstd/common/threading.c',
                 'zstd.c',
                 'c-ext/bufferutil.c',
                 'c-ext/compressiondict.c',
                 'c-ext/compressor.c',
                 'c-ext/compressoriterator.c',
                 'c-ext/compressionparams.c',
+                'c-ext/compressionreader.c',
                 'c-ext/compressionwriter.c',
                 'c-ext/constants.c',
                 'c-ext/decompressobj.c',
                 'c-ext/decompressor.c',
                 'c-ext/decompressoriterator.c',
+                'c-ext/decompressionreader.c',
                 'c-ext/decompressionwriter.c',
                 'c-ext/frameparams.c',
             ]
             ]
-            def get_c_extension(support_legacy=False, name='zstd'):
+            def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
+                                warnings_as_errors=False):
                 """Obtain a distutils.extension.Extension for the C extension."""
                 root = os.path.abspath(os.path.dirname(__file__))
-                sources = [os.path.join(root, p) for p in zstd_sources + ext_sources]
+                sources = set([os.path.join(root, p) for p in ext_sources])
-                if support_legacy:
+                if not system_zstd:
-                    sources.extend([os.path.join(root, p) for p in zstd_sources_legacy])
+                    sources.update([os.path.join(root, p) for p in zstd_sources])
+                    if support_legacy:
+                        sources.update([os.path.join(root, p) for p in zstd_sources_legacy])
+                sources = list(sources)
-                include_dirs = [os.path.join(root, d) for d in zstd_includes]
+                include_dirs = set([os.path.join(root, d) for d in ext_includes])
-                if support_legacy:
+                if not system_zstd:
-                    include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy])
+                    include_dirs.update([os.path.join(root, d) for d in zstd_includes])
+                    if support_legacy:
+                        include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy])
+                include_dirs = list(include_dirs)
                 depends = [os.path.join(root, p) for p in zstd_depends]
+                compiler = distutils.ccompiler.new_compiler()
+                # Needed for MSVC.
+                if hasattr(compiler, 'initialize'):
+                    compiler.initialize()
+                if compiler.compiler_type == 'unix':
+                    compiler_type = 'unix'
+                elif compiler.compiler_type == 'msvc':
+                    compiler_type = 'msvc'
+                else:
+                    raise Exception('unhandled compiler type: %s' %
+                                    compiler.compiler_type)
                 extra_args = ['-DZSTD_MULTITHREAD']
-                if support_legacy:
+                if not system_zstd:
+                    extra_args.append('-DZSTDLIB_VISIBILITY=')
+                    extra_args.append('-DZDICTLIB_VISIBILITY=')
+                    extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
+                    if compiler_type == 'unix':
+                        extra_args.append('-fvisibility=hidden')
+                if not system_zstd and support_legacy:
                     extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
+                if warnings_as_errors:
+                    if compiler_type == 'unix':
+                        extra_args.append('-Werror')
+                    elif compiler_type == 'msvc':
+                        extra_args.append('/WX')
+                    else:
+                        assert False
+                libraries = ['zstd'] if system_zstd else []
                 # TODO compile with optimizations.
                 return Extension(name, sources,
                                  include_dirs=include_dirs,
                                  depends=depends,
-                                 extra_compile_args=extra_args)
+                                 extra_compile_args=extra_args,
+                                 libraries=libraries)

contrib/python-zstandard/tests/common.py

0 +68 -5

@@ -1,16 +1,48 b''
		1	import imp
1	import inspect	2	import inspect
2	import io	3	import io
3	import os	4	import os
4	import types	5	import types
5		6
		7	try:
		8	import hypothesis
		9	except ImportError:
		10	hypothesis = None
		11
6		12
7	def make_cffi(cls):	13	def make_cffi(cls):
8	"""Decorator to add CFFI versions of each test method."""	14	"""Decorator to add CFFI versions of each test method."""
9		15
		16	# The module containing this class definition should
		17	# `import zstandard as zstd`. Otherwise things may blow up.
		18	mod = inspect.getmodule(cls)
		19	if not hasattr(mod, 'zstd'):
		20	raise Exception('test module does not contain "zstd" symbol')
		21
		22	if not hasattr(mod.zstd, 'backend'):
		23	raise Exception('zstd symbol does not have "backend" attribute; did '
		24	'you `import zstandard as zstd`?')
		25
		26	# If `import zstandard` already chose the cffi backend, there is nothing
		27	# for us to do: we only add the cffi variation if the default backend
		28	# is the C extension.
		29	if mod.zstd.backend == 'cffi':
		30	return cls
		31
		32	old_env = dict(os.environ)
		33	os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi'
10	try:	34	try:
11	import zstd_cffi	35	try:
12	except ImportError:	36	mod_info = imp.find_module('zstandard')
13	return cls	37	mod = imp.load_module('zstandard_cffi', *mod_info)
		38	except ImportError:
		39	return cls
		40	finally:
		41	os.environ.clear()
		42	os.environ.update(old_env)
		43
		44	if mod.backend != 'cffi':
		45	raise Exception('got the zstandard %s backend instead of cffi' % mod.backend)
14		46
15	# If CFFI version is available, dynamically construct test methods	47	# If CFFI version is available, dynamically construct test methods
16	# that use it.	48	# that use it.
@@ -29,13 +61,13 b' def make_cffi(cls):'
29	# the function object and install it in a new attribute.	61	# the function object and install it in a new attribute.
30	if isinstance(fn, types.FunctionType):	62	if isinstance(fn, types.FunctionType):
31	globs = dict(fn.__globals__)	63	globs = dict(fn.__globals__)
32	globs['zstd'] = ~~zstd_cffi~~	64	globs['zstd'] = mod
33	new_fn = types.FunctionType(fn.__code__, globs, name,	65	new_fn = types.FunctionType(fn.__code__, globs, name,
34	fn.__defaults__, fn.__closure__)	66	fn.__defaults__, fn.__closure__)
35	new_method = new_fn	67	new_method = new_fn
36	else:	68	else:
37	globs = dict(fn.__func__.func_globals)	69	globs = dict(fn.__func__.func_globals)
38	globs['zstd'] = ~~zstd_cffi~~	70	globs['zstd'] = mod
39	new_fn = types.FunctionType(fn.__func__.func_code, globs, name,	71	new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
40	fn.__func__.func_defaults,	72	fn.__func__.func_defaults,
41	fn.__func__.func_closure)	73	fn.__func__.func_closure)
@@ -86,3 +118,34 b' def random_input_data():'
86	pass	118	pass
87		119
88	return _source_files	120	return _source_files
		121
		122
		123	def generate_samples():
		124	inputs = [
		125	b'foo',
		126	b'bar',
		127	b'abcdef',
		128	b'sometext',
		129	b'baz',
		130	]
		131
		132	samples = []
		133
		134	for i in range(128):
		135	samples.append(inputs[i % 5])
		136	samples.append(inputs[i % 5] * (i + 3))
		137	samples.append(inputs[-(i % 5)] * (i + 2))
		138
		139	return samples
		140
		141
		142	if hypothesis:
		143	default_settings = hypothesis.settings()
		144	hypothesis.settings.register_profile('default', default_settings)
		145
		146	ci_settings = hypothesis.settings(max_examples=2500,
		147	max_iterations=2500)
		148	hypothesis.settings.register_profile('ci', ci_settings)
		149
		150	hypothesis.settings.load_profile(
		151	os.environ.get('HYPOTHESIS_PROFILE', 'default'))

contrib/python-zstandard/tests/test_buffer_util.py

0 +2 -6

@@ -1,11 +1,7 b''
1	import struct	1	import struct
		2	import unittest
2		3
3	try:	4	import zstandard as zstd
4	import unittest2 as unittest
5	except ImportError:
6	import unittest
7
8	import zstd
9		5
10	ss = struct.Struct('=QQ')	6	ss = struct.Struct('=QQ')
11		7

contrib/python-zstandard/tests/test_compressor.py

0 +492 -131

             import io
             import struct
             import sys
+            import tarfile
+            import unittest
-            try:
+            import zstandard as zstd
-                import unittest2 as unittest
-            except ImportError:
-                import unittest
-            import zstd
             from .common import (
                 make_cffi,
             def multithreaded_chunk_size(level, source_size=0):
-                params = zstd.get_compression_parameters(level, source_size)
+                params = zstd.ZstdCompressionParameters.from_level(level,
+                                                                   source_size=source_size)
                 return 1 << (params.window_log + 2)
             class TestCompressor(unittest.TestCase):
                 def test_level_bounds(self):
                     with self.assertRaises(ValueError):
-                        zstd.ZstdCompressor(level=0)
+                        zstd.ZstdCompressor(level=23)
-                    with self.assertRaises(ValueError):
+                def test_memory_size(self):
-                        zstd.ZstdCompressor(level=23)
+                    cctx = zstd.ZstdCompressor(level=1)
+                    self.assertGreater(cctx.memory_size(), 100)
             @make_cffi
             class TestCompressor_compress(unittest.TestCase):
-                def test_multithreaded_unsupported(self):
-                    samples = []
-                    for i in range(128):
-                        samples.append(b'foo' * 64)
-                        samples.append(b'bar' * 64)
-                    d = zstd.train_dictionary(8192, samples)
-                    cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
-                    with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
-                        cctx.compress(b'foo')
-                    params = zstd.get_compression_parameters(3)
-                    cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
-                    with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
-                        cctx.compress(b'foo')
                 def test_compress_empty(self):
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     result = cctx.compress(b'')
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     params = zstd.get_frame_parameters(result)
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 524288)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum, 0)
-                    # TODO should be temporary until https://github.com/facebook/zstd/issues/506
+                    cctx = zstd.ZstdCompressor()
-                    # is fixed.
+                    result = cctx.compress(b'')
-                    cctx = zstd.ZstdCompressor(write_content_size=True)
+                    self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
-                    with self.assertRaises(ValueError):
+                    params = zstd.get_frame_parameters(result)
-                        cctx.compress(b'')
+                    self.assertEqual(params.content_size, 0)
+                def test_input_types(self):
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
+                    expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
-                    cctx.compress(b'', allow_empty=True)
+                    mutable_array = bytearray(3)
+                    mutable_array[:] = b'foo'
+                    sources = [
+                        memoryview(b'foo'),
+                        bytearray(b'foo'),
+                        mutable_array,
+                    ]
+                    for source in sources:
+                        self.assertEqual(cctx.compress(source), expected)
                 def test_compress_large(self):
                     chunks = []
                     for i in range(255):
                         chunks.append(struct.Struct('>B').pack(i) * 16384)
-                    cctx = zstd.ZstdCompressor(level=3)
+                    cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
                     result = cctx.compress(b''.join(chunks))
                     self.assertEqual(len(result), 999)
                     self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
-                    # This matches the test for read_from() below.
+                    # This matches the test for read_to_iter() below.
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
                                              b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
                                              b'\x02\x09\x00\x00\x6f')
+                def test_negative_level(self):
+                    cctx = zstd.ZstdCompressor(level=-4)
+                    result = cctx.compress(b'foo' * 256)
+                def test_no_magic(self):
+                    params = zstd.ZstdCompressionParameters.from_level(
+, format=zstd.FORMAT_ZSTD1)
+                    cctx = zstd.ZstdCompressor(compression_params=params)
+                    magic = cctx.compress(b'foobar')
+                    params = zstd.ZstdCompressionParameters.from_level(
+, format=zstd.FORMAT_ZSTD1_MAGICLESS)
+                    cctx = zstd.ZstdCompressor(compression_params=params)
+                    no_magic = cctx.compress(b'foobar')
+                    self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
+                    self.assertEqual(magic[4:], no_magic)
                 def test_write_checksum(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     no_checksum = cctx.compress(b'foobar')
                 def test_write_content_size(self):
                     cctx = zstd.ZstdCompressor(level=1)
+                    with_size = cctx.compress(b'foobar' * 256)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     no_size = cctx.compress(b'foobar' * 256)
-                    cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
-                    with_size = cctx.compress(b'foobar' * 256)
                     self.assertEqual(len(with_size), len(no_size) + 1)
                     no_params = zstd.get_frame_parameters(no_size)
                     with_params = zstd.get_frame_parameters(with_size)
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(with_params.content_size, 1536)
                 def test_no_dict_id(self):
                     no_params = zstd.get_frame_parameters(no_dict_id)
                     with_params = zstd.get_frame_parameters(with_dict_id)
                     self.assertEqual(no_params.dict_id, 0)
-                    self.assertEqual(with_params.dict_id, 1584102229)
+                    self.assertEqual(with_params.dict_id, 1387616518)
                 def test_compress_dict_multiple(self):
                     samples = []
                     for i in range(32):
                         cctx.compress(b'foo bar foobar foo bar foobar')
+                def test_dict_precompute(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'bar' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_dictionary(8192, samples)
+                    d.precompute_compress(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, dict_data=d)
+                    for i in range(32):
+                        cctx.compress(b'foo bar foobar foo bar foobar')
                 def test_multithreaded(self):
                     chunk_size = multithreaded_chunk_size(1)
                     source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
                     dctx = zstd.ZstdDecompressor()
                     self.assertEqual(dctx.decompress(compressed), source)
+                def test_multithreaded_dict(self):
+                    samples = []
+                    for i in range(128):
+                        samples.append(b'foo' * 64)
+                        samples.append(b'bar' * 64)
+                        samples.append(b'foobar' * 64)
+                    d = zstd.train_dictionary(1024, samples)
+                    cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
+                    result = cctx.compress(b'foo')
+                    params = zstd.get_frame_parameters(result);
+                    self.assertEqual(params.content_size, 3);
+                    self.assertEqual(params.dict_id, d.dict_id())
+                    self.assertEqual(result,
+                                     b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
+                                     b'\x66\x6f\x6f')
+                def test_multithreaded_compression_params(self):
+                    params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
+                    cctx = zstd.ZstdCompressor(compression_params=params)
+                    result = cctx.compress(b'foo')
+                    params = zstd.get_frame_parameters(result);
+                    self.assertEqual(params.content_size, 3);
+                    self.assertEqual(result,
+                                     b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
             @make_cffi
             class TestCompressor_compressobj(unittest.TestCase):
                 def test_compressobj_empty(self):
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     cobj = cctx.compressobj()
                     self.assertEqual(cobj.compress(b''), b'')
                     self.assertEqual(cobj.flush(),
                                      b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+                def test_input_types(self):
+                    expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
+                    mutable_array = bytearray(3)
+                    mutable_array[:] = b'foo'
+                    sources = [
+                        memoryview(b'foo'),
+                        bytearray(b'foo'),
+                        mutable_array,
+                    ]
+                    for source in sources:
+                        cobj = cctx.compressobj()
+                        self.assertEqual(cobj.compress(source), b'')
+                        self.assertEqual(cobj.flush(), expected)
                 def test_compressobj_large(self):
                     chunks = []
                     for i in range(255):
                     self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
                     params = zstd.get_frame_parameters(result)
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     no_params = zstd.get_frame_parameters(no_checksum)
                     with_params = zstd.get_frame_parameters(with_checksum)
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
-                    self.assertEqual(with_params.content_size, 0)
+                    self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                 def test_write_content_size(self):
                     cctx = zstd.ZstdCompressor(level=1)
                     cobj = cctx.compressobj(size=len(b'foobar' * 256))
+                    with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
+                    cobj = cctx.compressobj(size=len(b'foobar' * 256))
                     no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
-                    cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
-                    cobj = cctx.compressobj(size=len(b'foobar' * 256))
-                    with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
                     no_params = zstd.get_frame_parameters(no_size)
                     with_params = zstd.get_frame_parameters(with_size)
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertEqual(len(compressed), 295)
+                def test_frame_progression(self):
+                    cctx = zstd.ZstdCompressor()
+                    self.assertEqual(cctx.frame_progression(), (0, 0, 0))
+                    cobj = cctx.compressobj()
+                    cobj.compress(b'foobar')
+                    self.assertEqual(cctx.frame_progression(), (6, 0, 0))
+                    cobj.flush()
+                    self.assertEqual(cctx.frame_progression(), (6, 6, 15))
+                def test_bad_size(self):
+                    cctx = zstd.ZstdCompressor()
+                    cobj = cctx.compressobj(size=2)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
+                        cobj.compress(b'foo')
+                    # Try another operation on this instance.
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
+                        cobj.compress(b'aa')
+                    # Try another operation on the compressor.
+                    cctx.compressobj(size=4)
+                    cctx.compress(b'foobar')
             @make_cffi
             class TestCompressor_copy_stream(unittest.TestCase):
                     source = io.BytesIO()
                     dest = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     r, w = cctx.copy_stream(source, dest)
                     self.assertEqual(int(r), 0)
                     self.assertEqual(w, 9)
                     self.assertEqual(w, 999)
                     params = zstd.get_frame_parameters(dest.getvalue())
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
-                    self.assertEqual(with_params.content_size, 0)
+                    self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                     source = io.BytesIO(b'foobar' * 256)
                     no_size = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     cctx.copy_stream(source, no_size)
                     source.seek(0)
                     with_size = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
+                    cctx = zstd.ZstdCompressor(level=1)
                     cctx.copy_stream(source, with_size)
                     # Source content size is unknown, so no content size written.
                     no_params = zstd.get_frame_parameters(no_size.getvalue())
                     with_params = zstd.get_frame_parameters(with_size.getvalue())
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     source.seek(0)
                     dest = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(threads=2)
+                    cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
                     r, w = cctx.copy_stream(source, dest)
                     self.assertEqual(r, 3145728)
                     self.assertEqual(w, 295)
                     params = zstd.get_frame_parameters(dest.getvalue())
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Writing content size and checksum works.
-                    cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
+                    cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
-                                               write_checksum=True)
                     dest = io.BytesIO()
                     source.seek(0)
                     cctx.copy_stream(source, dest, size=len(source.getvalue()))
                     self.assertEqual(params.dict_id, 0)
                     self.assertTrue(params.has_checksum)
+                def test_bad_size(self):
+                    source = io.BytesIO()
+                    source.write(b'a' * 32768)
+                    source.write(b'b' * 32768)
+                    source.seek(0)
-            def compress(data, level):
+                    dest = io.BytesIO()
-                buffer = io.BytesIO()
-                cctx = zstd.ZstdCompressor(level=level)
+                    cctx = zstd.ZstdCompressor()
-                with cctx.write_to(buffer) as compressor:
-                    compressor.write(data)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
-                return buffer.getvalue()
+                        cctx.copy_stream(source, dest, size=42)
+                    # Try another operation on this compressor.
+                    source.seek(0)
+                    dest = io.BytesIO()
+                    cctx.copy_stream(source, dest)
             @make_cffi
-            class TestCompressor_write_to(unittest.TestCase):
+            class TestCompressor_stream_reader(unittest.TestCase):
+                def test_context_manager(self):
+                    cctx = zstd.ZstdCompressor()
+                    reader = cctx.stream_reader(b'foo' * 60)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
+                        reader.read(10)
+                    with cctx.stream_reader(b'foo') as reader:
+                        with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
+                            with reader as reader2:
+                                pass
+                def test_not_implemented(self):
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_reader(b'foo' * 60) as reader:
+                        with self.assertRaises(io.UnsupportedOperation):
+                            reader.readline()
+                        with self.assertRaises(io.UnsupportedOperation):
+                            reader.readlines()
+                        # This could probably be implemented someday.
+                        with self.assertRaises(NotImplementedError):
+                            reader.readall()
+                        with self.assertRaises(io.UnsupportedOperation):
+                            iter(reader)
+                        with self.assertRaises(io.UnsupportedOperation):
+                            next(reader)
+                        with self.assertRaises(OSError):
+                            reader.writelines([])
+                        with self.assertRaises(OSError):
+                            reader.write(b'foo')
+                def test_constant_methods(self):
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_reader(b'boo') as reader:
+                        self.assertTrue(reader.readable())
+                        self.assertFalse(reader.writable())
+                        self.assertFalse(reader.seekable())
+                        self.assertFalse(reader.isatty())
+                        self.assertIsNone(reader.flush())
+                def test_read_closed(self):
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_reader(b'foo' * 60) as reader:
+                        reader.close()
+                        with self.assertRaisesRegexp(ValueError, 'stream is closed'):
+                            reader.read(10)
+                def test_read_bad_size(self):
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_reader(b'foo') as reader:
+                        with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
+                            reader.read(-1)
+                        with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
+                            reader.read(0)
+                def test_read_buffer(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
+                    frame = cctx.compress(source)
+                    with cctx.stream_reader(source) as reader:
+                        self.assertEqual(reader.tell(), 0)
+                        # We should get entire frame in one read.
+                        result = reader.read(8192)
+                        self.assertEqual(result, frame)
+                        self.assertEqual(reader.tell(), len(result))
+                        self.assertEqual(reader.read(), b'')
+                        self.assertEqual(reader.tell(), len(result))
+                def test_read_buffer_small_chunks(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = b'foo' * 60
+                    chunks = []
+                    with cctx.stream_reader(source) as reader:
+                        self.assertEqual(reader.tell(), 0)
+                        while True:
+                            chunk = reader.read(1)
+                            if not chunk:
+                                break
+                            chunks.append(chunk)
+                            self.assertEqual(reader.tell(), sum(map(len, chunks)))
+                    self.assertEqual(b''.join(chunks), cctx.compress(source))
+                def test_read_stream(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
+                    frame = cctx.compress(source)
+                    with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
+                        self.assertEqual(reader.tell(), 0)
+                        chunk = reader.read(8192)
+                        self.assertEqual(chunk, frame)
+                        self.assertEqual(reader.tell(), len(chunk))
+                        self.assertEqual(reader.read(), b'')
+                        self.assertEqual(reader.tell(), len(chunk))
+                def test_read_stream_small_chunks(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = b'foo' * 60
+                    chunks = []
+                    with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
+                        self.assertEqual(reader.tell(), 0)
+                        while True:
+                            chunk = reader.read(1)
+                            if not chunk:
+                                break
+                            chunks.append(chunk)
+                            self.assertEqual(reader.tell(), sum(map(len, chunks)))
+                    self.assertEqual(b''.join(chunks), cctx.compress(source))
+                def test_read_after_exit(self):
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_reader(b'foo' * 60) as reader:
+                        while reader.read(8192):
+                            pass
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
+                        reader.read(10)
+                def test_bad_size(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = io.BytesIO(b'foobar')
+                    with cctx.stream_reader(source, size=2) as reader:
+                        with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
+                            reader.read(10)
+                    # Try another compression operation.
+                    with cctx.stream_reader(source, size=42):
+                        pass
+            @make_cffi
+            class TestCompressor_stream_writer(unittest.TestCase):
                 def test_empty(self):
-                    result = compress(b'', 1)
+                    buffer = io.BytesIO()
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
+                    with cctx.stream_writer(buffer) as compressor:
+                        compressor.write(b'')
+                    result = buffer.getvalue()
                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     params = zstd.get_frame_parameters(result)
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 524288)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
+                def test_input_types(self):
+                    expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
+                    cctx = zstd.ZstdCompressor(level=1)
+                    mutable_array = bytearray(3)
+                    mutable_array[:] = b'foo'
+                    sources = [
+                        memoryview(b'foo'),
+                        bytearray(b'foo'),
+                        mutable_array,
+                    ]
+                    for source in sources:
+                        buffer = io.BytesIO()
+                        with cctx.stream_writer(buffer) as compressor:
+                            compressor.write(source)
+                        self.assertEqual(buffer.getvalue(), expected)
                 def test_multiple_compress(self):
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=5)
-                    with cctx.write_to(buffer) as compressor:
+                    with cctx.stream_writer(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'x' * 8192), 0)
                     d = zstd.train_dictionary(8192, samples)
+                    h = hashlib.sha1(d.as_bytes()).hexdigest()
+                    self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027')
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=9, dict_data=d)
-                    with cctx.write_to(buffer) as compressor:
+                    with cctx.stream_writer(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
-                        self.assertEqual(compressor.write(b'foo' * 16384), 634)
+                        self.assertEqual(compressor.write(b'foo' * 16384), 0)
                     compressed = buffer.getvalue()
                     params = zstd.get_frame_parameters(compressed)
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
-                    self.assertEqual(params.window_size, 1024)
+                    self.assertEqual(params.window_size, 2097152)
                     self.assertEqual(params.dict_id, d.dict_id())
                     self.assertFalse(params.has_checksum)
+                    self.assertEqual(compressed,
-                    self.assertEqual(compressed[0:32],
+                                     b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00'
-                                     b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
+                                     b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89')
-                                     b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
-                                     b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
-                    h = hashlib.sha1(compressed).hexdigest()
-                    self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
                 def test_compression_params(self):
-                    params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST)
+                    params = zstd.ZstdCompressionParameters(
+                        window_log=20,
+                        chain_log=6,
+                        hash_log=12,
+                        min_match=5,
+                        search_log=4,
+                        target_length=10,
+                        compression_strategy=zstd.STRATEGY_FAST)
                     buffer = io.BytesIO()
                     cctx = zstd.ZstdCompressor(compression_params=params)
-                    with cctx.write_to(buffer) as compressor:
+                    with cctx.stream_writer(buffer) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'foobar' * 16384), 0)
                     compressed = buffer.getvalue()
                     params = zstd.get_frame_parameters(compressed)
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 1048576)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     h = hashlib.sha1(compressed).hexdigest()
-                    self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
+                    self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
                 def test_write_checksum(self):
                     no_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1)
-                    with cctx.write_to(no_checksum) as compressor:
+                    with cctx.stream_writer(no_checksum) as compressor:
                         self.assertEqual(compressor.write(b'foobar'), 0)
                     with_checksum = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
-                    with cctx.write_to(with_checksum) as compressor:
+                    with cctx.stream_writer(with_checksum) as compressor:
                         self.assertEqual(compressor.write(b'foobar'), 0)
                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
-                    self.assertEqual(with_params.content_size, 0)
+                    self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     self.assertFalse(no_params.has_checksum)
                 def test_write_content_size(self):
                     no_size = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
-                    with cctx.write_to(no_size) as compressor:
+                    with cctx.stream_writer(no_size) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     with_size = io.BytesIO()
-                    cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
+                    cctx = zstd.ZstdCompressor(level=1)
-                    with cctx.write_to(with_size) as compressor:
+                    with cctx.stream_writer(with_size) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     # Source size is not known in streaming mode, so header not
                     # Declaring size will write the header.
                     with_size = io.BytesIO()
-                    with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
+                    with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                     no_params = zstd.get_frame_parameters(no_size.getvalue())
                     with_params = zstd.get_frame_parameters(with_size.getvalue())
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(with_params.content_size, 1536)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, 0)
                     with_dict_id = io.BytesIO()
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d)
-                    with cctx.write_to(with_dict_id) as compressor:
+                    with cctx.stream_writer(with_dict_id) as compressor:
                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
+                    self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
                     cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                     no_dict_id = io.BytesIO()
-                    with cctx.write_to(no_dict_id) as compressor:
+                    with cctx.stream_writer(no_dict_id) as compressor:
                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
+                    self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
                     no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
                     with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
-                    self.assertEqual(no_params.content_size, 0)
+                    self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
-                    self.assertEqual(with_params.content_size, 0)
+                    self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(no_params.dict_id, 0)
                     self.assertEqual(with_params.dict_id, d.dict_id())
                     self.assertFalse(no_params.has_checksum)
                 def test_memory_size(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     buffer = io.BytesIO()
-                    with cctx.write_to(buffer) as compressor:
+                    with cctx.stream_writer(buffer) as compressor:
+                        compressor.write(b'foo')
                         size = compressor.memory_size()
                     self.assertGreater(size, 100000)
                 def test_write_size(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     dest = OpCountingBytesIO()
-                    with cctx.write_to(dest, write_size=1) as compressor:
+                    with cctx.stream_writer(dest, write_size=1) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(compressor.write(b'bar'), 0)
                         self.assertEqual(compressor.write(b'foobar'), 0)
                 def test_flush_repeated(self):
                     cctx = zstd.ZstdCompressor(level=3)
                     dest = OpCountingBytesIO()
-                    with cctx.write_to(dest) as compressor:
+                    with cctx.stream_writer(dest) as compressor:
                         self.assertEqual(compressor.write(b'foo'), 0)
                         self.assertEqual(dest._write_count, 0)
                         self.assertEqual(compressor.flush(), 12)
                 def test_flush_empty_block(self):
                     cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
                     dest = OpCountingBytesIO()
-                    with cctx.write_to(dest) as compressor:
+                    with cctx.stream_writer(dest) as compressor:
                         self.assertEqual(compressor.write(b'foobar' * 8192), 0)
                         count = dest._write_count
                         offset = dest.tell()
                 def test_multithreaded(self):
                     dest = io.BytesIO()
                     cctx = zstd.ZstdCompressor(threads=2)
-                    with cctx.write_to(dest) as compressor:
+                    with cctx.stream_writer(dest) as compressor:
                         compressor.write(b'a' * 1048576)
                         compressor.write(b'b' * 1048576)
                         compressor.write(b'c' * 1048576)
                     self.assertEqual(len(dest.getvalue()), 295)
+                def test_tell(self):
+                    dest = io.BytesIO()
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_writer(dest) as compressor:
+                        self.assertEqual(compressor.tell(), 0)
+                        for i in range(256):
+                            compressor.write(b'foo' * (i + 1))
+                            self.assertEqual(compressor.tell(), dest.tell())
+                def test_bad_size(self):
+                    cctx = zstd.ZstdCompressor()
+                    dest = io.BytesIO()
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
+                        with cctx.stream_writer(dest, size=2) as compressor:
+                            compressor.write(b'foo')
+                    # Test another operation.
+                    with cctx.stream_writer(dest, size=42):
+                        pass
+                def test_tarfile_compat(self):
+                    raise unittest.SkipTest('not yet fully working')
+                    dest = io.BytesIO()
+                    cctx = zstd.ZstdCompressor()
+                    with cctx.stream_writer(dest) as compressor:
+                        with tarfile.open('tf', mode='w', fileobj=compressor) as tf:
+                            tf.add(__file__, 'test_compressor.py')
+                    dest.seek(0)
+                    dctx = zstd.ZstdDecompressor()
+                    with dctx.stream_reader(dest) as reader:
+                        with tarfile.open(mode='r:', fileobj=reader) as tf:
+                            for member in tf:
+                                self.assertEqual(member.name, 'test_compressor.py')
             @make_cffi
-            class TestCompressor_read_from(unittest.TestCase):
+            class TestCompressor_read_to_iter(unittest.TestCase):
                 def test_type_validation(self):
                     cctx = zstd.ZstdCompressor()
                     # Object with read() works.
-                    for chunk in cctx.read_from(io.BytesIO()):
+                    for chunk in cctx.read_to_iter(io.BytesIO()):
                         pass
                     # Buffer protocol works.
-                    for chunk in cctx.read_from(b'foobar'):
+                    for chunk in cctx.read_to_iter(b'foobar'):
                         pass
                     with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
-                        for chunk in cctx.read_from(True):
+                        for chunk in cctx.read_to_iter(True):
                             pass
                 def test_read_empty(self):
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     source = io.BytesIO()
-                    it = cctx.read_from(source)
+                    it = cctx.read_to_iter(source)
                     chunks = list(it)
                     self.assertEqual(len(chunks), 1)
                     compressed = b''.join(chunks)
                     self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
                     # And again with the buffer protocol.
-                    it = cctx.read_from(b'')
+                    it = cctx.read_to_iter(b'')
                     chunks = list(it)
                     self.assertEqual(len(chunks), 1)
                     compressed2 = b''.join(chunks)
                     self.assertEqual(compressed2, compressed)
                 def test_read_large(self):
-                    cctx = zstd.ZstdCompressor(level=1)
+                    cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
                     source = io.BytesIO()
                     source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
                     # Creating an iterator should not perform any compression until
                     # first read.
-                    it = cctx.read_from(source, size=len(source.getvalue()))
+                    it = cctx.read_to_iter(source, size=len(source.getvalue()))
                     self.assertEqual(source.tell(), 0)
                     # We should have exactly 2 output chunks.
                     self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                     params = zstd.get_frame_parameters(b''.join(chunks))
-                    self.assertEqual(params.content_size, 0)
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
                     self.assertEqual(params.window_size, 262144)
                     self.assertEqual(params.dict_id, 0)
                     self.assertFalse(params.has_checksum)
                     # Now check the buffer protocol.
-                    it = cctx.read_from(source.getvalue())
+                    it = cctx.read_to_iter(source.getvalue())
                     chunks = list(it)
                     self.assertEqual(len(chunks), 2)
+                    params = zstd.get_frame_parameters(b''.join(chunks))
+                    self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
+                    #self.assertEqual(params.window_size, 262144)
+                    self.assertEqual(params.dict_id, 0)
+                    self.assertFalse(params.has_checksum)
                     self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
                 def test_read_write_size(self):
                     source = OpCountingBytesIO(b'foobarfoobar')
                     cctx = zstd.ZstdCompressor(level=3)
-                    for chunk in cctx.read_from(source, read_size=1, write_size=1):
+                    for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
                         self.assertEqual(len(chunk), 1)
                     self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                     cctx = zstd.ZstdCompressor(threads=2)
-                    compressed = b''.join(cctx.read_from(source))
+                    compressed = b''.join(cctx.read_to_iter(source))
                     self.assertEqual(len(compressed), 295)
+                def test_bad_size(self):
+                    cctx = zstd.ZstdCompressor()
+                    source = io.BytesIO(b'a' * 42)
+                    with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
+                        b''.join(cctx.read_to_iter(source, size=2))
+                    # Test another operation on errored compressor.
+                    b''.join(cctx.read_to_iter(source))
             class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
-                def test_multithreaded_unsupported(self):
-                    cctx = zstd.ZstdCompressor(threads=2)
-                    with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
-                        cctx.multi_compress_to_buffer([b'foo'])
                 def test_invalid_inputs(self):
                     cctx = zstd.ZstdCompressor()
                         cctx.multi_compress_to_buffer([b'', b'', b''])
                 def test_list_input(self):
-                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    cctx = zstd.ZstdCompressor(write_checksum=True)
                     original = [b'foo' * 12, b'bar' * 6]
                     frames = [cctx.compress(c) for c in original]
                     self.assertEqual(b[1].tobytes(), frames[1])
                 def test_buffer_with_segments_input(self):
-                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    cctx = zstd.ZstdCompressor(write_checksum=True)
                     original = [b'foo' * 4, b'bar' * 6]
                     frames = [cctx.compress(c) for c in original]
                     self.assertEqual(result[1].tobytes(), frames[1])
                 def test_buffer_with_segments_collection_input(self):
-                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    cctx = zstd.ZstdCompressor(write_checksum=True)
                     original = [
                         b'foo1',
                 def test_multiple_threads(self):
                     # threads argument will cause multi-threaded ZSTD APIs to be used, which will
                     # make output different.
-                    refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    refcctx = zstd.ZstdCompressor(write_checksum=True)
                     reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
-                    cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                    cctx = zstd.ZstdCompressor(write_checksum=True)
                     frames = []
                     frames.extend(b'x' * 64 for i in range(256))

contrib/python-zstandard/tests/test_compressor_fuzzing.py

0 +62 -17

             import io
             import os
+            import unittest
-            try:
-                import unittest2 as unittest
-            except ImportError:
-                import unittest
             try:
                 import hypothesis
             except ImportError:
                 raise unittest.SkipTest('hypothesis not available')
-            import zstd
+            import zstandard as zstd
             from . common import (
                 make_cffi,
             @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
             @make_cffi
-            class TestCompressor_write_to_fuzzing(unittest.TestCase):
+            class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
+                @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                                  level=strategies.integers(min_value=1, max_value=5),
+                                  source_read_size=strategies.integers(1, 16384),
+                                  read_sizes=strategies.data())
+                def test_stream_source_read_variance(self, original, level, source_read_size,
+                                                     read_sizes):
+                    refctx = zstd.ZstdCompressor(level=level)
+                    ref_frame = refctx.compress(original)
+                    cctx = zstd.ZstdCompressor(level=level)
+                    with cctx.stream_reader(io.BytesIO(original), size=len(original),
+                                            read_size=source_read_size) as reader:
+                        chunks = []
+                        while True:
+                            read_size = read_sizes.draw(strategies.integers(1, 16384))
+                            chunk = reader.read(read_size)
+                            if not chunk:
+                                break
+                            chunks.append(chunk)
+                    self.assertEqual(b''.join(chunks), ref_frame)
+                @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                                  level=strategies.integers(min_value=1, max_value=5),
+                                  source_read_size=strategies.integers(1, 16384),
+                                  read_sizes=strategies.data())
+                def test_buffer_source_read_variance(self, original, level, source_read_size,
+                                                     read_sizes):
+                    refctx = zstd.ZstdCompressor(level=level)
+                    ref_frame = refctx.compress(original)
+                    cctx = zstd.ZstdCompressor(level=level)
+                    with cctx.stream_reader(original, size=len(original),
+                                            read_size=source_read_size) as reader:
+                        chunks = []
+                        while True:
+                            read_size = read_sizes.draw(strategies.integers(1, 16384))
+                            chunk = reader.read(read_size)
+                            if not chunk:
+                                break
+                            chunks.append(chunk)
+                    self.assertEqual(b''.join(chunks), ref_frame)
+            @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
+            @make_cffi
+            class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                     level=strategies.integers(min_value=1, max_value=5),
                                     write_size=strategies.integers(min_value=1, max_value=1048576))
                     cctx = zstd.ZstdCompressor(level=level)
                     b = io.BytesIO()
-                    with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
+                    with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
                         compressor.write(original)
                     self.assertEqual(b.getvalue(), ref_frame)
             @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
             @make_cffi
             class TestCompressor_compressobj_fuzzing(unittest.TestCase):
+                @hypothesis.settings(
+                    suppress_health_check=[hypothesis.HealthCheck.large_base_example])
                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                   level=strategies.integers(min_value=1, max_value=5),
-                                  chunk_sizes=strategies.streaming(
+                                  chunk_sizes=strategies.data())
-                                      strategies.integers(min_value=1, max_value=4096)))
                 def test_random_input_sizes(self, original, level, chunk_sizes):
-                    chunk_sizes = iter(chunk_sizes)
                     refctx = zstd.ZstdCompressor(level=level)
                     ref_frame = refctx.compress(original)
                     chunks = []
                     i = 0
                     while True:
-                        chunk_size = next(chunk_sizes)
+                        chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
                         source = original[i:i + chunk_size]
                         if not source:
                             break
             @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
             @make_cffi
-            class TestCompressor_read_from_fuzzing(unittest.TestCase):
+            class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
                 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                                   level=strategies.integers(min_value=1, max_value=5),
                                   read_size=strategies.integers(min_value=1, max_value=4096),
                     source = io.BytesIO(original)
                     cctx = zstd.ZstdCompressor(level=level)
-                    chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
+                    chunks = list(cctx.read_to_iter(source, size=len(original),
-                                                 write_size=write_size))
+                                                    read_size=read_size,
+                                                    write_size=write_size))
                     self.assertEqual(b''.join(chunks), ref_frame)
                         kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
                     cctx = zstd.ZstdCompressor(level=1,
-                                               write_content_size=True,
                                                write_checksum=True,
                                                **kwargs)

contrib/python-zstandard/tests/test_data_structures.py

0 +120 -41

@@ -1,9 +1,7 b''
1	try:	1	import sys
2	import ~~unittest2~~ as unittest	2	import unittest
3	except ImportError:
4	import unittest
5		3
6	import zstd	4	import zstandard as zstd
7		5
8	from . common import (	6	from . common import (
9	make_cffi,	7	make_cffi,
@@ -12,52 +10,104 b' from . common import ('
12		10
13	@make_cffi	11	@make_cffi
14	class TestCompressionParameters(unittest.TestCase):	12	class TestCompressionParameters(unittest.TestCase):
15	def test_~~init_bad_arg_type~~(self):	13	def test_bounds(self):
16	with self.assertRaises(TypeError):	14	zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
17	zstd.CompressionParameters()	15	chain_log=zstd.CHAINLOG_MIN,
18		16	hash_log=zstd.HASHLOG_MIN,
19	with self.assertRaises(TypeError):	17	search_log=zstd.SEARCHLOG_MIN,
20	zstd.CompressionParameters(0, 1)	18	min_match=zstd.SEARCHLENGTH_MIN + 1,
		19	target_length=zstd.TARGETLENGTH_MIN,
		20	compression_strategy=zstd.STRATEGY_FAST)
21		21
22	def test_bounds(self):	22	zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23	zstd.CompressionParameters(zstd.WINDOWLOG_MIN,	23	chain_log=zstd.CHAINLOG_MAX,
24	zstd.~~CHAINLOG_MIN~~,	24	hash_log=zstd.HASHLOG_MAX,
25	zstd.~~HASHLOG_MIN~~,	25	search_log=zstd.SEARCHLOG_MAX,
26	zstd.SEARCHL~~OG_MIN~~,	26	min_match=zstd.SEARCHLENGTH_MAX - 1,
27	zstd.S~~EARCHLENGTH_MIN~~ + 1,	27	compression_strategy=zstd.STRATEGY_BTULTRA)
28	zstd.TARGETLENGTH_MIN,
29	zstd.STRATEGY_FAST)
30		28
31	zstd.CompressionParameters(zstd.WINDOWLOG_MAX,	29	def test_from_level(self):
32	zstd.CHAINLOG_MAX,	30	p = zstd.ZstdCompressionParameters.from_level(1)
33	zstd.HASHLOG_MAX,
34	zstd.SEARCHLOG_MAX,
35	zstd.SEARCHLENGTH_MAX - 1,
36	zstd.TARGETLENGTH_MAX,
37	zstd.STRATEGY_BTOPT)
38
39	def test_get_compression_parameters(self):
40	p = zstd.get_compression_parameters(1)
41	self.assertIsInstance(p, zstd.CompressionParameters)	31	self.assertIsInstance(p, zstd.CompressionParameters)
42		32
43	self.assertEqual(p.window_log, 19)	33	self.assertEqual(p.window_log, 19)
44		34
		35	p = zstd.ZstdCompressionParameters.from_level(-4)
		36	self.assertEqual(p.window_log, 19)
		37	self.assertEqual(p.compress_literals, 0)
		38
45	def test_members(self):	39	def test_members(self):
46	p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)	40	p = zstd.ZstdCompressionParameters(window_log=10,
		41	chain_log=6,
		42	hash_log=7,
		43	search_log=4,
		44	min_match=5,
		45	target_length=8,
		46	compression_strategy=1)
47	self.assertEqual(p.window_log, 10)	47	self.assertEqual(p.window_log, 10)
48	self.assertEqual(p.chain_log, 6)	48	self.assertEqual(p.chain_log, 6)
49	self.assertEqual(p.hash_log, 7)	49	self.assertEqual(p.hash_log, 7)
50	self.assertEqual(p.search_log, 4)	50	self.assertEqual(p.search_log, 4)
51	self.assertEqual(p.~~search_lengt~~h, 5)	51	self.assertEqual(p.min_match, 5)
52	self.assertEqual(p.target_length, 8)	52	self.assertEqual(p.target_length, 8)
53	self.assertEqual(p.strategy, 1)	53	self.assertEqual(p.compression_strategy, 1)
		54
		55	p = zstd.ZstdCompressionParameters(compression_level=2)
		56	self.assertEqual(p.compression_level, 2)
		57
		58	p = zstd.ZstdCompressionParameters(threads=4)
		59	self.assertEqual(p.threads, 4)
		60
		61	p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
		62	overlap_size_log=6)
		63	self.assertEqual(p.threads, 2)
		64	self.assertEqual(p.job_size, 1048576)
		65	self.assertEqual(p.overlap_size_log, 6)
		66
		67	p = zstd.ZstdCompressionParameters(compression_level=2)
		68	self.assertEqual(p.compress_literals, 1)
		69
		70	p = zstd.ZstdCompressionParameters(compress_literals=False)
		71	self.assertEqual(p.compress_literals, 0)
		72
		73	p = zstd.ZstdCompressionParameters(compression_level=-1)
		74	self.assertEqual(p.compression_level, -1)
		75	self.assertEqual(p.compress_literals, 0)
		76
		77	p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True)
		78	self.assertEqual(p.compression_level, -2)
		79	self.assertEqual(p.compress_literals, 1)
		80
		81	p = zstd.ZstdCompressionParameters(force_max_window=True)
		82	self.assertEqual(p.force_max_window, 1)
		83
		84	p = zstd.ZstdCompressionParameters(enable_ldm=True)
		85	self.assertEqual(p.enable_ldm, 1)
		86
		87	p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
		88	self.assertEqual(p.ldm_hash_log, 7)
		89
		90	p = zstd.ZstdCompressionParameters(ldm_min_match=6)
		91	self.assertEqual(p.ldm_min_match, 6)
		92
		93	p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
		94	self.assertEqual(p.ldm_bucket_size_log, 7)
		95
		96	p = zstd.ZstdCompressionParameters(ldm_hash_every_log=8)
		97	self.assertEqual(p.ldm_hash_every_log, 8)
54		98
55	def test_estimated_compression_context_size(self):	99	def test_estimated_compression_context_size(self):
56	p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, ~~zstd~~.~~STRATEGY_DFAST~~)	100	p = zstd.ZstdCompressionParameters(window_log=20,
		101	chain_log=16,
		102	hash_log=17,
		103	search_log=1,
		104	min_match=5,
		105	target_length=16,
		106	compression_strategy=zstd.STRATEGY_DFAST)
57		107
58	# 32-bit has slightly different values from 64-bit.	108	# 32-bit has slightly different values from 64-bit.
59	self.assertAlmostEqual(p.estimated_compression_context_size(), 12~~87076~~,	109	self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
60	delta=110)	110	delta=250)
61		111
62		112
63	@make_cffi	113	@make_cffi
@@ -66,8 +116,18 b' class TestFrameParameters(unittest.TestC'
66	with self.assertRaises(TypeError):	116	with self.assertRaises(TypeError):
67	zstd.get_frame_parameters(None)	117	zstd.get_frame_parameters(None)
68		118
69	with self.assertRaises(TypeError):	119	# Python 3 doesn't appear to convert unicode to Py_buffer.
70	zstd.get_frame_parameters(u'foobarbaz')	120	if sys.version_info[0] >= 3:
		121	with self.assertRaises(TypeError):
		122	zstd.get_frame_parameters(u'foobarbaz')
		123	else:
		124	# CPython will convert unicode to Py_buffer. But CFFI won't.
		125	if zstd.backend == 'cffi':
		126	with self.assertRaises(TypeError):
		127	zstd.get_frame_parameters(u'foobarbaz')
		128	else:
		129	with self.assertRaises(zstd.ZstdError):
		130	zstd.get_frame_parameters(u'foobarbaz')
71		131
72	def test_invalid_input_sizes(self):	132	def test_invalid_input_sizes(self):
73	with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):	133	with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
@@ -82,21 +142,21 b' class TestFrameParameters(unittest.TestC'
82		142
83	def test_attributes(self):	143	def test_attributes(self):
84	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')	144	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
85	self.assertEqual(params.content_size, 0)	145	self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
86	self.assertEqual(params.window_size, 1024)	146	self.assertEqual(params.window_size, 1024)
87	self.assertEqual(params.dict_id, 0)	147	self.assertEqual(params.dict_id, 0)
88	self.assertFalse(params.has_checksum)	148	self.assertFalse(params.has_checksum)
89		149
90	# Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.	150	# Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
91	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')	151	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
92	self.assertEqual(params.content_size, 0)	152	self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
93	self.assertEqual(params.window_size, 1024)	153	self.assertEqual(params.window_size, 1024)
94	self.assertEqual(params.dict_id, 255)	154	self.assertEqual(params.dict_id, 255)
95	self.assertFalse(params.has_checksum)	155	self.assertFalse(params.has_checksum)
96		156
97	# Lowest 3rd bit indicates if checksum is present.	157	# Lowest 3rd bit indicates if checksum is present.
98	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')	158	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
99	self.assertEqual(params.content_size, 0)	159	self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
100	self.assertEqual(params.window_size, 1024)	160	self.assertEqual(params.window_size, 1024)
101	self.assertEqual(params.dict_id, 0)	161	self.assertEqual(params.dict_id, 0)
102	self.assertTrue(params.has_checksum)	162	self.assertTrue(params.has_checksum)
@@ -110,7 +170,7 b' class TestFrameParameters(unittest.TestC'
110		170
111	# Window descriptor is 2nd byte after frame header.	171	# Window descriptor is 2nd byte after frame header.
112	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')	172	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
113	self.assertEqual(params.content_size, 0)	173	self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
114	self.assertEqual(params.window_size, 262144)	174	self.assertEqual(params.window_size, 262144)
115	self.assertEqual(params.dict_id, 0)	175	self.assertEqual(params.dict_id, 0)
116	self.assertFalse(params.has_checksum)	176	self.assertFalse(params.has_checksum)
@@ -121,3 +181,22 b' class TestFrameParameters(unittest.TestC'
121	self.assertEqual(params.window_size, 262144)	181	self.assertEqual(params.window_size, 262144)
122	self.assertEqual(params.dict_id, 15)	182	self.assertEqual(params.dict_id, 15)
123	self.assertTrue(params.has_checksum)	183	self.assertTrue(params.has_checksum)
		184
		185	def test_input_types(self):
		186	v = zstd.FRAME_HEADER + b'\x00\x00'
		187
		188	mutable_array = bytearray(len(v))
		189	mutable_array[:] = v
		190
		191	sources = [
		192	memoryview(v),
		193	bytearray(v),
		194	mutable_array,
		195	]
		196
		197	for source in sources:
		198	params = zstd.get_frame_parameters(source)
		199	self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
		200	self.assertEqual(params.window_size, 1024)
		201	self.assertEqual(params.dict_id, 0)
		202	self.assertFalse(params.has_checksum)

contrib/python-zstandard/tests/test_data_structures_fuzzing.py

0 +24 -28

             import io
             import os
+            import sys
-            try:
+            import unittest
-                import unittest2 as unittest
-            except ImportError:
-                import unittest
             try:
                 import hypothesis
             except ImportError:
                 raise unittest.SkipTest('hypothesis not available')
-            import zstd
+            import zstandard as zstd
             from .common import (
                 make_cffi,
             s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
                                                 max_value=zstd.SEARCHLOG_MAX)
             s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
-                                                    max_value=zstd.SEARCHLENGTH_MAX)
+                                                 max_value=zstd.SEARCHLENGTH_MAX)
             s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
-                                                    max_value=zstd.TARGETLENGTH_MAX)
+                                                 max_value=2**32)
             s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
                                                     zstd.STRATEGY_DFAST,
                                                     zstd.STRATEGY_GREEDY,
                                                     zstd.STRATEGY_LAZY,
                                                     zstd.STRATEGY_LAZY2,
                                                     zstd.STRATEGY_BTLAZY2,
-                                                    zstd.STRATEGY_BTOPT))
+                                                    zstd.STRATEGY_BTOPT,
+                                                    zstd.STRATEGY_BTULTRA))
             @make_cffi
                                     s_searchlength, s_targetlength, s_strategy)
                 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
                                     searchlength, targetlength, strategy):
-                    # ZSTD_checkCParams moves the goal posts on us from what's advertised
+                    zstd.ZstdCompressionParameters(window_log=windowlog,
-                    # in the constants. So move along with them.
+                                                   chain_log=chainlog,
-                    if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
+                                                   hash_log=hashlog,
-                        searchlength += 1
+                                                   search_log=searchlog,
-                    elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
+                                                   min_match=searchlength,
-                        searchlength -= 1
+                                                   target_length=targetlength,
+                                                   compression_strategy=strategy)
-                    p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
-                                                    searchlog, searchlength,
-                                                    targetlength, strategy)
-                    cctx = zstd.ZstdCompressor(compression_params=p)
-                    with cctx.write_to(io.BytesIO()):
-                        pass
                 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
                                     s_searchlength, s_targetlength, s_strategy)
-                def test_estimate_compression_context_size(self, windowlog, chainlog,
+                def test_estimated_compression_context_size(self, windowlog, chainlog,
                                                             hashlog, searchlog,
                                                             searchlength, targetlength,
                                                             strategy):
                     elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
                         searchlength -= 1
-                    p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
+                    p = zstd.ZstdCompressionParameters(window_log=windowlog,
-                                        searchlog, searchlength,
+                                                       chain_log=chainlog,
-                                        targetlength, strategy)
+                                                       hash_log=hashlog,
-                    size = zstd.estimate_compression_context_size(p)
+                                                       search_log=searchlog,
+                                                       min_match=searchlength,
+                                                       target_length=targetlength,
+                                                       compression_strategy=strategy)
+                    size = p.estimated_compression_context_size()

contrib/python-zstandard/tests/test_decompressor.py

0 +457 -59

This diff has been collapsed as it changes many lines, (516 lines changed) Show them Hide them
	@@ -1,16 +1,14 b''
	1	import io	1	import io
			2	import os
	2	import random	3	import random
	3	import struct	4	import struct
	4	import sys	5	import sys
			6	import unittest
	5		7
	6	try:	8	import zstandard as zstd
	7	import unittest2 as unittest
	8	except ImportError:
	9	import unittest
	10
	11	import zstd
	12		9
	13	from .common import (	10	from .common import (
			11	generate_samples,
	14	make_cffi,	12	make_cffi,
	15	OpCountingBytesIO,	13	OpCountingBytesIO,
	16	)	14	)
	@@ -23,35 +21,124 b' else:'
	23		21
	24		22
	25	@make_cffi	23	@make_cffi
			24	class TestFrameHeaderSize(unittest.TestCase):
			25	def test_empty(self):
			26	with self.assertRaisesRegexp(
			27	zstd.ZstdError, 'could not determine frame header size: Src size '
			28	'is incorrect'):
			29	zstd.frame_header_size(b'')
			30
			31	def test_too_small(self):
			32	with self.assertRaisesRegexp(
			33	zstd.ZstdError, 'could not determine frame header size: Src size '
			34	'is incorrect'):
			35	zstd.frame_header_size(b'foob')
			36
			37	def test_basic(self):
			38	# It doesn't matter that it isn't a valid frame.
			39	self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
			40
			41
			42	@make_cffi
			43	class TestFrameContentSize(unittest.TestCase):
			44	def test_empty(self):
			45	with self.assertRaisesRegexp(zstd.ZstdError,
			46	'error when determining content size'):
			47	zstd.frame_content_size(b'')
			48
			49	def test_too_small(self):
			50	with self.assertRaisesRegexp(zstd.ZstdError,
			51	'error when determining content size'):
			52	zstd.frame_content_size(b'foob')
			53
			54	def test_bad_frame(self):
			55	with self.assertRaisesRegexp(zstd.ZstdError,
			56	'error when determining content size'):
			57	zstd.frame_content_size(b'invalid frame header')
			58
			59	def test_unknown(self):
			60	cctx = zstd.ZstdCompressor(write_content_size=False)
			61	frame = cctx.compress(b'foobar')
			62
			63	self.assertEqual(zstd.frame_content_size(frame), -1)
			64
			65	def test_empty(self):
			66	cctx = zstd.ZstdCompressor()
			67	frame = cctx.compress(b'')
			68
			69	self.assertEqual(zstd.frame_content_size(frame), 0)
			70
			71	def test_basic(self):
			72	cctx = zstd.ZstdCompressor()
			73	frame = cctx.compress(b'foobar')
			74
			75	self.assertEqual(zstd.frame_content_size(frame), 6)
			76
			77
			78	@make_cffi
			79	class TestDecompressor(unittest.TestCase):
			80	def test_memory_size(self):
			81	dctx = zstd.ZstdDecompressor()
			82
			83	self.assertGreater(dctx.memory_size(), 100)
			84
			85
			86	@make_cffi
	26	class TestDecompressor_decompress(unittest.TestCase):	87	class TestDecompressor_decompress(unittest.TestCase):
	27	def test_empty_input(self):	88	def test_empty_input(self):
	28	dctx = zstd.ZstdDecompressor()	89	dctx = zstd.ZstdDecompressor()
	29		90
	30	with self.assertRaisesRegexp(zstd.ZstdError, '~~input data invalid~~'):	91	with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
	31	dctx.decompress(b'')	92	dctx.decompress(b'')
	32		93
	33	def test_invalid_input(self):	94	def test_invalid_input(self):
	34	dctx = zstd.ZstdDecompressor()	95	dctx = zstd.ZstdDecompressor()
	35		96
	36	with self.assertRaisesRegexp(zstd.ZstdError, '~~input data invalid~~'):	97	with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
	37	dctx.decompress(b'foobar')	98	dctx.decompress(b'foobar')
	38		99
			100	def test_input_types(self):
			101	cctx = zstd.ZstdCompressor(level=1)
			102	compressed = cctx.compress(b'foo')
			103
			104	mutable_array = bytearray(len(compressed))
			105	mutable_array[:] = compressed
			106
			107	sources = [
			108	memoryview(compressed),
			109	bytearray(compressed),
			110	mutable_array,
			111	]
			112
			113	dctx = zstd.ZstdDecompressor()
			114	for source in sources:
			115	self.assertEqual(dctx.decompress(source), b'foo')
			116
	39	def test_no_content_size_in_frame(self):	117	def test_no_content_size_in_frame(self):
	40	cctx = zstd.ZstdCompressor(write_content_size=False)	118	cctx = zstd.ZstdCompressor(write_content_size=False)
	41	compressed = cctx.compress(b'foobar')	119	compressed = cctx.compress(b'foobar')
	42		120
	43	dctx = zstd.ZstdDecompressor()	121	dctx = zstd.ZstdDecompressor()
	44	with self.assertRaisesRegexp(zstd.ZstdError, '~~input data invalid~~'):	122	with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
	45	dctx.decompress(compressed)	123	dctx.decompress(compressed)
	46		124
	47	def test_content_size_present(self):	125	def test_content_size_present(self):
	48	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	126	cctx = zstd.ZstdCompressor()
	49	compressed = cctx.compress(b'foobar')	127	compressed = cctx.compress(b'foobar')
	50		128
	51	dctx = zstd.ZstdDecompressor()	129	dctx = zstd.ZstdDecompressor()
	52	decompressed = dctx.decompress(compressed)	130	decompressed = dctx.decompress(compressed)
	53	self.assertEqual(decompressed, b'foobar')	131	self.assertEqual(decompressed, b'foobar')
	54		132
			133	def test_empty_roundtrip(self):
			134	cctx = zstd.ZstdCompressor()
			135	compressed = cctx.compress(b'')
			136
			137	dctx = zstd.ZstdDecompressor()
			138	decompressed = dctx.decompress(compressed)
			139
			140	self.assertEqual(decompressed, b'')
			141
	55	def test_max_output_size(self):	142	def test_max_output_size(self):
	56	cctx = zstd.ZstdCompressor(write_content_size=False)	143	cctx = zstd.ZstdCompressor(write_content_size=False)
	57	source = b'foobar' * 256	144	source = b'foobar' * 256
	@@ -63,7 +150,8 b' class TestDecompressor_decompress(unitte'
	63	self.assertEqual(decompressed, source)	150	self.assertEqual(decompressed, source)
	64		151
	65	# Input size - 1 fails	152	# Input size - 1 fails
	66	with self.assertRaisesRegexp(zstd.ZstdError, ~~'Destination buffer is too small'~~):	153	with self.assertRaisesRegexp(zstd.ZstdError,
			154	'decompression error: did not decompress full frame'):
	67	dctx.decompress(compressed, max_output_size=len(source) - 1)	155	dctx.decompress(compressed, max_output_size=len(source) - 1)
	68		156
	69	# Input size + 1 works	157	# Input size + 1 works
	@@ -94,7 +182,7 b' class TestDecompressor_decompress(unitte'
	94	d = zstd.train_dictionary(8192, samples)	182	d = zstd.train_dictionary(8192, samples)
	95		183
	96	orig = b'foobar' * 16384	184	orig = b'foobar' * 16384
	97	cctx = zstd.ZstdCompressor(level=1, dict_data=d, ~~write_content_size~~=~~True~~)	185	cctx = zstd.ZstdCompressor(level=1, dict_data=d)
	98	compressed = cctx.compress(orig)	186	compressed = cctx.compress(orig)
	99		187
	100	dctx = zstd.ZstdDecompressor(dict_data=d)	188	dctx = zstd.ZstdDecompressor(dict_data=d)
	@@ -113,7 +201,7 b' class TestDecompressor_decompress(unitte'
	113		201
	114	sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)	202	sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
	115	compressed = []	203	compressed = []
	116	cctx = zstd.ZstdCompressor(level=1, dict_data=d, ~~write_content_size~~=~~True~~)	204	cctx = zstd.ZstdCompressor(level=1, dict_data=d)
	117	for source in sources:	205	for source in sources:
	118	compressed.append(cctx.compress(source))	206	compressed.append(cctx.compress(source))
	119		207
	@@ -122,6 +210,21 b' class TestDecompressor_decompress(unitte'
	122	decompressed = dctx.decompress(compressed[i])	210	decompressed = dctx.decompress(compressed[i])
	123	self.assertEqual(decompressed, sources[i])	211	self.assertEqual(decompressed, sources[i])
	124		212
			213	def test_max_window_size(self):
			214	with open(__file__, 'rb') as fh:
			215	source = fh.read()
			216
			217	# If we write a content size, the decompressor engages single pass
			218	# mode and the window size doesn't come into play.
			219	cctx = zstd.ZstdCompressor(write_content_size=False)
			220	frame = cctx.compress(source)
			221
			222	dctx = zstd.ZstdDecompressor(max_window_size=1)
			223
			224	with self.assertRaisesRegexp(
			225	zstd.ZstdError, 'decompression error: Frame requires too much memory'):
			226	dctx.decompress(frame, max_output_size=len(source))
			227
	125		228
	126	@make_cffi	229	@make_cffi
	127	class TestDecompressor_copy_stream(unittest.TestCase):	230	class TestDecompressor_copy_stream(unittest.TestCase):
	@@ -186,6 +289,211 b' class TestDecompressor_copy_stream(unitt'
	186		289
	187		290
	188	@make_cffi	291	@make_cffi
			292	class TestDecompressor_stream_reader(unittest.TestCase):
			293	def test_context_manager(self):
			294	dctx = zstd.ZstdDecompressor()
			295
			296	reader = dctx.stream_reader(b'foo')
			297	with self.assertRaisesRegexp(zstd.ZstdError, 'read must be called from an active'):
			298	reader.read(1)
			299
			300	with dctx.stream_reader(b'foo') as reader:
			301	with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
			302	with reader as reader2:
			303	pass
			304
			305	def test_not_implemented(self):
			306	dctx = zstd.ZstdDecompressor()
			307
			308	with dctx.stream_reader(b'foo') as reader:
			309	with self.assertRaises(NotImplementedError):
			310	reader.readline()
			311
			312	with self.assertRaises(NotImplementedError):
			313	reader.readlines()
			314
			315	with self.assertRaises(NotImplementedError):
			316	reader.readall()
			317
			318	with self.assertRaises(NotImplementedError):
			319	iter(reader)
			320
			321	with self.assertRaises(NotImplementedError):
			322	next(reader)
			323
			324	with self.assertRaises(io.UnsupportedOperation):
			325	reader.write(b'foo')
			326
			327	with self.assertRaises(io.UnsupportedOperation):
			328	reader.writelines([])
			329
			330	def test_constant_methods(self):
			331	dctx = zstd.ZstdDecompressor()
			332
			333	with dctx.stream_reader(b'foo') as reader:
			334	self.assertTrue(reader.readable())
			335	self.assertFalse(reader.writable())
			336	self.assertTrue(reader.seekable())
			337	self.assertFalse(reader.isatty())
			338	self.assertIsNone(reader.flush())
			339
			340	def test_read_closed(self):
			341	dctx = zstd.ZstdDecompressor()
			342
			343	with dctx.stream_reader(b'foo') as reader:
			344	reader.close()
			345	with self.assertRaisesRegexp(ValueError, 'stream is closed'):
			346	reader.read(1)
			347
			348	def test_bad_read_size(self):
			349	dctx = zstd.ZstdDecompressor()
			350
			351	with dctx.stream_reader(b'foo') as reader:
			352	with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
			353	reader.read(-1)
			354
			355	with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'):
			356	reader.read(0)
			357
			358	def test_read_buffer(self):
			359	cctx = zstd.ZstdCompressor()
			360
			361	source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
			362	frame = cctx.compress(source)
			363
			364	dctx = zstd.ZstdDecompressor()
			365
			366	with dctx.stream_reader(frame) as reader:
			367	self.assertEqual(reader.tell(), 0)
			368
			369	# We should get entire frame in one read.
			370	result = reader.read(8192)
			371	self.assertEqual(result, source)
			372	self.assertEqual(reader.tell(), len(source))
			373
			374	# Read after EOF should return empty bytes.
			375	self.assertEqual(reader.read(), b'')
			376	self.assertEqual(reader.tell(), len(result))
			377
			378	self.assertTrue(reader.closed())
			379
			380	def test_read_buffer_small_chunks(self):
			381	cctx = zstd.ZstdCompressor()
			382	source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
			383	frame = cctx.compress(source)
			384
			385	dctx = zstd.ZstdDecompressor()
			386	chunks = []
			387
			388	with dctx.stream_reader(frame, read_size=1) as reader:
			389	while True:
			390	chunk = reader.read(1)
			391	if not chunk:
			392	break
			393
			394	chunks.append(chunk)
			395	self.assertEqual(reader.tell(), sum(map(len, chunks)))
			396
			397	self.assertEqual(b''.join(chunks), source)
			398
			399	def test_read_stream(self):
			400	cctx = zstd.ZstdCompressor()
			401	source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
			402	frame = cctx.compress(source)
			403
			404	dctx = zstd.ZstdDecompressor()
			405	with dctx.stream_reader(io.BytesIO(frame)) as reader:
			406	self.assertEqual(reader.tell(), 0)
			407
			408	chunk = reader.read(8192)
			409	self.assertEqual(chunk, source)
			410	self.assertEqual(reader.tell(), len(source))
			411	self.assertEqual(reader.read(), b'')
			412	self.assertEqual(reader.tell(), len(source))
			413
			414	def test_read_stream_small_chunks(self):
			415	cctx = zstd.ZstdCompressor()
			416	source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
			417	frame = cctx.compress(source)
			418
			419	dctx = zstd.ZstdDecompressor()
			420	chunks = []
			421
			422	with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
			423	while True:
			424	chunk = reader.read(1)
			425	if not chunk:
			426	break
			427
			428	chunks.append(chunk)
			429	self.assertEqual(reader.tell(), sum(map(len, chunks)))
			430
			431	self.assertEqual(b''.join(chunks), source)
			432
			433	def test_read_after_exit(self):
			434	cctx = zstd.ZstdCompressor()
			435	frame = cctx.compress(b'foo' * 60)
			436
			437	dctx = zstd.ZstdDecompressor()
			438
			439	with dctx.stream_reader(frame) as reader:
			440	while reader.read(16):
			441	pass
			442
			443	with self.assertRaisesRegexp(zstd.ZstdError, 'read must be called from an active'):
			444	reader.read(10)
			445
			446	def test_illegal_seeks(self):
			447	cctx = zstd.ZstdCompressor()
			448	frame = cctx.compress(b'foo' * 60)
			449
			450	dctx = zstd.ZstdDecompressor()
			451
			452	with dctx.stream_reader(frame) as reader:
			453	with self.assertRaisesRegexp(ValueError,
			454	'cannot seek to negative position'):
			455	reader.seek(-1, os.SEEK_SET)
			456
			457	reader.read(1)
			458
			459	with self.assertRaisesRegexp(
			460	ValueError, 'cannot seek zstd decompression stream backwards'):
			461	reader.seek(0, os.SEEK_SET)
			462
			463	with self.assertRaisesRegexp(
			464	ValueError, 'cannot seek zstd decompression stream backwards'):
			465	reader.seek(-1, os.SEEK_CUR)
			466
			467	with self.assertRaisesRegexp(
			468	ValueError,
			469	'zstd decompression streams cannot be seeked with SEEK_END'):
			470	reader.seek(0, os.SEEK_END)
			471
			472	reader.close()
			473
			474	with self.assertRaisesRegexp(ValueError, 'stream is closed'):
			475	reader.seek(4, os.SEEK_SET)
			476
			477	with self.assertRaisesRegexp(
			478	zstd.ZstdError, 'seek must be called from an active context'):
			479	reader.seek(0)
			480
			481	def test_seek(self):
			482	source = b'foobar' * 60
			483	cctx = zstd.ZstdCompressor()
			484	frame = cctx.compress(source)
			485
			486	dctx = zstd.ZstdDecompressor()
			487
			488	with dctx.stream_reader(frame) as reader:
			489	reader.seek(3)
			490	self.assertEqual(reader.read(3), b'bar')
			491
			492	reader.seek(4, os.SEEK_CUR)
			493	self.assertEqual(reader.read(2), b'ar')
			494
			495
			496	@make_cffi
	189	class TestDecompressor_decompressobj(unittest.TestCase):	497	class TestDecompressor_decompressobj(unittest.TestCase):
	190	def test_simple(self):	498	def test_simple(self):
	191	data = zstd.ZstdCompressor(level=1).compress(b'foobar')	499	data = zstd.ZstdCompressor(level=1).compress(b'foobar')
	@@ -194,6 +502,24 b' class TestDecompressor_decompressobj(uni'
	194	dobj = dctx.decompressobj()	502	dobj = dctx.decompressobj()
	195	self.assertEqual(dobj.decompress(data), b'foobar')	503	self.assertEqual(dobj.decompress(data), b'foobar')
	196		504
			505	def test_input_types(self):
			506	compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
			507
			508	dctx = zstd.ZstdDecompressor()
			509
			510	mutable_array = bytearray(len(compressed))
			511	mutable_array[:] = compressed
			512
			513	sources = [
			514	memoryview(compressed),
			515	bytearray(compressed),
			516	mutable_array,
			517	]
			518
			519	for source in sources:
			520	dobj = dctx.decompressobj()
			521	self.assertEqual(dobj.decompress(source), b'foo')
			522
	197	def test_reuse(self):	523	def test_reuse(self):
	198	data = zstd.ZstdCompressor(level=1).compress(b'foobar')	524	data = zstd.ZstdCompressor(level=1).compress(b'foobar')
	199		525
	@@ -204,22 +530,58 b' class TestDecompressor_decompressobj(uni'
	204	with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):	530	with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
	205	dobj.decompress(data)	531	dobj.decompress(data)
	206		532
			533	def test_bad_write_size(self):
			534	dctx = zstd.ZstdDecompressor()
			535
			536	with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
			537	dctx.decompressobj(write_size=0)
			538
			539	def test_write_size(self):
			540	source = b'foo' * 64 + b'bar' * 128
			541	data = zstd.ZstdCompressor(level=1).compress(source)
			542
			543	dctx = zstd.ZstdDecompressor()
			544
			545	for i in range(128):
			546	dobj = dctx.decompressobj(write_size=i + 1)
			547	self.assertEqual(dobj.decompress(data), source)
	207		548
	208	def decompress_via_writer(data):	549	def decompress_via_writer(data):
	209	buffer = io.BytesIO()	550	buffer = io.BytesIO()
	210	dctx = zstd.ZstdDecompressor()	551	dctx = zstd.ZstdDecompressor()
	211	with dctx.~~write_to~~(buffer) as decompressor:	552	with dctx.stream_writer(buffer) as decompressor:
	212	decompressor.write(data)	553	decompressor.write(data)
	213	return buffer.getvalue()	554	return buffer.getvalue()
	214		555
	215		556
	216	@make_cffi	557	@make_cffi
	217	class TestDecompressor_~~write_to~~(unittest.TestCase):	558	class TestDecompressor_stream_writer(unittest.TestCase):
	218	def test_empty_roundtrip(self):	559	def test_empty_roundtrip(self):
	219	cctx = zstd.ZstdCompressor()	560	cctx = zstd.ZstdCompressor()
	220	empty = cctx.compress(b'')	561	empty = cctx.compress(b'')
	221	self.assertEqual(decompress_via_writer(empty), b'')	562	self.assertEqual(decompress_via_writer(empty), b'')
	222		563
			564	def test_input_types(self):
			565	cctx = zstd.ZstdCompressor(level=1)
			566	compressed = cctx.compress(b'foo')
			567
			568	mutable_array = bytearray(len(compressed))
			569	mutable_array[:] = compressed
			570
			571	sources = [
			572	memoryview(compressed),
			573	bytearray(compressed),
			574	mutable_array,
			575	]
			576
			577	dctx = zstd.ZstdDecompressor()
			578	for source in sources:
			579	buffer = io.BytesIO()
			580	with dctx.stream_writer(buffer) as decompressor:
			581	decompressor.write(source)
			582
			583	self.assertEqual(buffer.getvalue(), b'foo')
			584
	223	def test_large_roundtrip(self):	585	def test_large_roundtrip(self):
	224	chunks = []	586	chunks = []
	225	for i in range(255):	587	for i in range(255):
	@@ -242,7 +604,7 b' class TestDecompressor_write_to(unittest'
	242		604
	243	buffer = io.BytesIO()	605	buffer = io.BytesIO()
	244	dctx = zstd.ZstdDecompressor()	606	dctx = zstd.ZstdDecompressor()
	245	with dctx.~~write_to~~(buffer) as decompressor:	607	with dctx.stream_writer(buffer) as decompressor:
	246	pos = 0	608	pos = 0
	247	while pos < len(compressed):	609	while pos < len(compressed):
	248	pos2 = pos + 8192	610	pos2 = pos + 8192
	@@ -262,14 +624,14 b' class TestDecompressor_write_to(unittest'
	262	orig = b'foobar' * 16384	624	orig = b'foobar' * 16384
	263	buffer = io.BytesIO()	625	buffer = io.BytesIO()
	264	cctx = zstd.ZstdCompressor(dict_data=d)	626	cctx = zstd.ZstdCompressor(dict_data=d)
	265	with cctx.~~write_to~~(buffer) as compressor:	627	with cctx.stream_writer(buffer) as compressor:
	266	self.assertEqual(compressor.write(orig), ~~1544~~)	628	self.assertEqual(compressor.write(orig), 0)
	267		629
	268	compressed = buffer.getvalue()	630	compressed = buffer.getvalue()
	269	buffer = io.BytesIO()	631	buffer = io.BytesIO()
	270		632
	271	dctx = zstd.ZstdDecompressor(dict_data=d)	633	dctx = zstd.ZstdDecompressor(dict_data=d)
	272	with dctx.~~write_to~~(buffer) as decompressor:	634	with dctx.stream_writer(buffer) as decompressor:
	273	self.assertEqual(decompressor.write(compressed), len(orig))	635	self.assertEqual(decompressor.write(compressed), len(orig))
	274		636
	275	self.assertEqual(buffer.getvalue(), orig)	637	self.assertEqual(buffer.getvalue(), orig)
	@@ -277,7 +639,7 b' class TestDecompressor_write_to(unittest'
	277	def test_memory_size(self):	639	def test_memory_size(self):
	278	dctx = zstd.ZstdDecompressor()	640	dctx = zstd.ZstdDecompressor()
	279	buffer = io.BytesIO()	641	buffer = io.BytesIO()
	280	with dctx.~~write_to~~(buffer) as decompressor:	642	with dctx.stream_writer(buffer) as decompressor:
	281	size = decompressor.memory_size()	643	size = decompressor.memory_size()
	282		644
	283	self.assertGreater(size, 100000)	645	self.assertGreater(size, 100000)
	@@ -286,7 +648,7 b' class TestDecompressor_write_to(unittest'
	286	source = zstd.ZstdCompressor().compress(b'foobarfoobar')	648	source = zstd.ZstdCompressor().compress(b'foobarfoobar')
	287	dest = OpCountingBytesIO()	649	dest = OpCountingBytesIO()
	288	dctx = zstd.ZstdDecompressor()	650	dctx = zstd.ZstdDecompressor()
	289	with dctx.~~write_to~~(dest, write_size=1) as decompressor:	651	with dctx.stream_writer(dest, write_size=1) as decompressor:
	290	s = struct.Struct('>B')	652	s = struct.Struct('>B')
	291	for c in source:	653	for c in source:
	292	if not isinstance(c, str):	654	if not isinstance(c, str):
	@@ -298,29 +660,29 b' class TestDecompressor_write_to(unittest'
	298		660
	299		661
	300	@make_cffi	662	@make_cffi
	301	class TestDecompressor_read_~~from~~(unittest.TestCase):	663	class TestDecompressor_read_to_iter(unittest.TestCase):
	302	def test_type_validation(self):	664	def test_type_validation(self):
	303	dctx = zstd.ZstdDecompressor()	665	dctx = zstd.ZstdDecompressor()
	304		666
	305	# Object with read() works.	667	# Object with read() works.
	306	dctx.read_~~from~~(io.BytesIO())	668	dctx.read_to_iter(io.BytesIO())
	307		669
	308	# Buffer protocol works.	670	# Buffer protocol works.
	309	dctx.read_~~from~~(b'foobar')	671	dctx.read_to_iter(b'foobar')
	310		672
	311	with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):	673	with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
	312	b''.join(dctx.read_~~from~~(True))	674	b''.join(dctx.read_to_iter(True))
	313		675
	314	def test_empty_input(self):	676	def test_empty_input(self):
	315	dctx = zstd.ZstdDecompressor()	677	dctx = zstd.ZstdDecompressor()
	316		678
	317	source = io.BytesIO()	679	source = io.BytesIO()
	318	it = dctx.read_~~from~~(source)	680	it = dctx.read_to_iter(source)
	319	# TODO this is arguably wrong. Should get an error about missing frame foo.	681	# TODO this is arguably wrong. Should get an error about missing frame foo.
	320	with self.assertRaises(StopIteration):	682	with self.assertRaises(StopIteration):
	321	next(it)	683	next(it)
	322		684
	323	it = dctx.read_~~from~~(b'')	685	it = dctx.read_to_iter(b'')
	324	with self.assertRaises(StopIteration):	686	with self.assertRaises(StopIteration):
	325	next(it)	687	next(it)
	326		688
	@@ -328,11 +690,11 b' class TestDecompressor_read_from(unittes'
	328	dctx = zstd.ZstdDecompressor()	690	dctx = zstd.ZstdDecompressor()
	329		691
	330	source = io.BytesIO(b'foobar')	692	source = io.BytesIO(b'foobar')
	331	it = dctx.read_~~from~~(source)	693	it = dctx.read_to_iter(source)
	332	with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):	694	with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
	333	next(it)	695	next(it)
	334		696
	335	it = dctx.read_~~from~~(b'foobar')	697	it = dctx.read_to_iter(b'foobar')
	336	with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):	698	with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
	337	next(it)	699	next(it)
	338		700
	@@ -344,7 +706,7 b' class TestDecompressor_read_from(unittes'
	344	source.seek(0)	706	source.seek(0)
	345		707
	346	dctx = zstd.ZstdDecompressor()	708	dctx = zstd.ZstdDecompressor()
	347	it = dctx.read_~~from~~(source)	709	it = dctx.read_to_iter(source)
	348		710
	349	# No chunks should be emitted since there is no data.	711	# No chunks should be emitted since there is no data.
	350	with self.assertRaises(StopIteration):	712	with self.assertRaises(StopIteration):
	@@ -358,17 +720,17 b' class TestDecompressor_read_from(unittes'
	358	dctx = zstd.ZstdDecompressor()	720	dctx = zstd.ZstdDecompressor()
	359		721
	360	with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):	722	with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
	361	b''.join(dctx.read_~~from~~(b'', skip_bytes=1, read_size=1))	723	b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
	362		724
	363	with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):	725	with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
	364	b''.join(dctx.read_~~from~~(b'foobar', skip_bytes=10))	726	b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
	365		727
	366	def test_skip_bytes(self):	728	def test_skip_bytes(self):
	367	cctx = zstd.ZstdCompressor(write_content_size=False)	729	cctx = zstd.ZstdCompressor(write_content_size=False)
	368	compressed = cctx.compress(b'foobar')	730	compressed = cctx.compress(b'foobar')
	369		731
	370	dctx = zstd.ZstdDecompressor()	732	dctx = zstd.ZstdDecompressor()
	371	output = b''.join(dctx.read_~~from~~(b'hdr' + compressed, skip_bytes=3))	733	output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
	372	self.assertEqual(output, b'foobar')	734	self.assertEqual(output, b'foobar')
	373		735
	374	def test_large_output(self):	736	def test_large_output(self):
	@@ -382,7 +744,7 b' class TestDecompressor_read_from(unittes'
	382	compressed.seek(0)	744	compressed.seek(0)
	383		745
	384	dctx = zstd.ZstdDecompressor()	746	dctx = zstd.ZstdDecompressor()
	385	it = dctx.read_~~from~~(compressed)	747	it = dctx.read_to_iter(compressed)
	386		748
	387	chunks = []	749	chunks = []
	388	chunks.append(next(it))	750	chunks.append(next(it))
	@@ -395,7 +757,7 b' class TestDecompressor_read_from(unittes'
	395	self.assertEqual(decompressed, source.getvalue())	757	self.assertEqual(decompressed, source.getvalue())
	396		758
	397	# And again with buffer protocol.	759	# And again with buffer protocol.
	398	it = dctx.read_~~from~~(compressed.getvalue())	760	it = dctx.read_to_iter(compressed.getvalue())
	399	chunks = []	761	chunks = []
	400	chunks.append(next(it))	762	chunks.append(next(it))
	401	chunks.append(next(it))	763	chunks.append(next(it))
	@@ -406,12 +768,13 b' class TestDecompressor_read_from(unittes'
	406	decompressed = b''.join(chunks)	768	decompressed = b''.join(chunks)
	407	self.assertEqual(decompressed, source.getvalue())	769	self.assertEqual(decompressed, source.getvalue())
	408		770
			771	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
	409	def test_large_input(self):	772	def test_large_input(self):
	410	bytes = list(struct.Struct('>B').pack(i) for i in range(256))	773	bytes = list(struct.Struct('>B').pack(i) for i in range(256))
	411	compressed = io.BytesIO()	774	compressed = io.BytesIO()
	412	input_size = 0	775	input_size = 0
	413	cctx = zstd.ZstdCompressor(level=1)	776	cctx = zstd.ZstdCompressor(level=1)
	414	with cctx.~~write_to~~(compressed) as compressor:	777	with cctx.stream_writer(compressed) as compressor:
	415	while True:	778	while True:
	416	compressor.write(random.choice(bytes))	779	compressor.write(random.choice(bytes))
	417	input_size += 1	780	input_size += 1
	@@ -426,7 +789,7 b' class TestDecompressor_read_from(unittes'
	426	zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)	789	zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
	427		790
	428	dctx = zstd.ZstdDecompressor()	791	dctx = zstd.ZstdDecompressor()
	429	it = dctx.read_~~from~~(compressed)	792	it = dctx.read_to_iter(compressed)
	430		793
	431	chunks = []	794	chunks = []
	432	chunks.append(next(it))	795	chunks.append(next(it))
	@@ -440,7 +803,7 b' class TestDecompressor_read_from(unittes'
	440	self.assertEqual(len(decompressed), input_size)	803	self.assertEqual(len(decompressed), input_size)
	441		804
	442	# And again with buffer protocol.	805	# And again with buffer protocol.
	443	it = dctx.read_~~from~~(compressed.getvalue())	806	it = dctx.read_to_iter(compressed.getvalue())
	444		807
	445	chunks = []	808	chunks = []
	446	chunks.append(next(it))	809	chunks.append(next(it))
	@@ -460,7 +823,7 b' class TestDecompressor_read_from(unittes'
	460	source = io.BytesIO()	823	source = io.BytesIO()
	461		824
	462	compressed = io.BytesIO()	825	compressed = io.BytesIO()
	463	with cctx.~~write_to~~(compressed) as compressor:	826	with cctx.stream_writer(compressed) as compressor:
	464	for i in range(256):	827	for i in range(256):
	465	chunk = b'\0' * 1024	828	chunk = b'\0' * 1024
	466	compressor.write(chunk)	829	compressor.write(chunk)
	@@ -473,17 +836,34 b' class TestDecompressor_read_from(unittes'
	473	self.assertEqual(simple, source.getvalue())	836	self.assertEqual(simple, source.getvalue())
	474		837
	475	compressed.seek(0)	838	compressed.seek(0)
	476	streamed = b''.join(dctx.read_~~from~~(compressed))	839	streamed = b''.join(dctx.read_to_iter(compressed))
	477	self.assertEqual(streamed, source.getvalue())	840	self.assertEqual(streamed, source.getvalue())
	478		841
	479	def test_read_write_size(self):	842	def test_read_write_size(self):
	480	source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))	843	source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
	481	dctx = zstd.ZstdDecompressor()	844	dctx = zstd.ZstdDecompressor()
	482	for chunk in dctx.read_~~from~~(source, read_size=1, write_size=1):	845	for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
	483	self.assertEqual(len(chunk), 1)	846	self.assertEqual(len(chunk), 1)
	484		847
	485	self.assertEqual(source._read_count, len(source.getvalue()))	848	self.assertEqual(source._read_count, len(source.getvalue()))
	486		849
			850	def test_magic_less(self):
			851	params = zstd.CompressionParameters.from_level(
			852	1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
			853	cctx = zstd.ZstdCompressor(compression_params=params)
			854	frame = cctx.compress(b'foobar')
			855
			856	self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
			857
			858	dctx = zstd.ZstdDecompressor()
			859	with self.assertRaisesRegexp(
			860	zstd.ZstdError, 'error determining content size from frame header'):
			861	dctx.decompress(frame)
			862
			863	dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
			864	res = b''.join(dctx.read_to_iter(frame))
			865	self.assertEqual(res, b'foobar')
			866
	487		867
	488	@make_cffi	868	@make_cffi
	489	class TestDecompressor_content_dict_chain(unittest.TestCase):	869	class TestDecompressor_content_dict_chain(unittest.TestCase):
	@@ -511,19 +891,20 b' class TestDecompressor_content_dict_chai'
	511	with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):	891	with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
	512	dctx.decompress_content_dict_chain([b'foo' * 8])	892	dctx.decompress_content_dict_chain([b'foo' * 8])
	513		893
	514	no_size = zstd.ZstdCompressor().compress(b'foo' * 64)	894	no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
	515		895
	516	with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):	896	with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
	517	dctx.decompress_content_dict_chain([no_size])	897	dctx.decompress_content_dict_chain([no_size])
	518		898
	519	# Corrupt first frame.	899	# Corrupt first frame.
	520	frame = zstd.ZstdCompressor(~~write_content_size~~=~~True~~).compress(b'foo' * 64)	900	frame = zstd.ZstdCompressor().compress(b'foo' * 64)
	521	frame = frame[0:12] + frame[15:]	901	frame = frame[0:12] + frame[15:]
	522	with self.assertRaisesRegexp(zstd.ZstdError, ~~'could not decompress chunk 0'~~):	902	with self.assertRaisesRegexp(zstd.ZstdError,
			903	'chunk 0 did not decompress full frame'):
	523	dctx.decompress_content_dict_chain([frame])	904	dctx.decompress_content_dict_chain([frame])
	524		905
	525	def test_bad_subsequent_input(self):	906	def test_bad_subsequent_input(self):
	526	initial = zstd.ZstdCompressor(~~write_content_size~~=~~True~~).compress(b'foo' * 64)	907	initial = zstd.ZstdCompressor().compress(b'foo' * 64)
	527		908
	528	dctx = zstd.ZstdDecompressor()	909	dctx = zstd.ZstdDecompressor()
	529		910
	@@ -539,17 +920,17 b' class TestDecompressor_content_dict_chai'
	539	with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):	920	with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
	540	dctx.decompress_content_dict_chain([initial, b'foo' * 8])	921	dctx.decompress_content_dict_chain([initial, b'foo' * 8])
	541		922
	542	no_size = zstd.ZstdCompressor().compress(b'foo' * 64)	923	no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
	543		924
	544	with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):	925	with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
	545	dctx.decompress_content_dict_chain([initial, no_size])	926	dctx.decompress_content_dict_chain([initial, no_size])
	546		927
	547	# Corrupt second frame.	928	# Corrupt second frame.
	548	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))	929	cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
	549	frame = cctx.compress(b'bar' * 64)	930	frame = cctx.compress(b'bar' * 64)
	550	frame = frame[0:12] + frame[15:]	931	frame = frame[0:12] + frame[15:]
	551		932
	552	with self.assertRaisesRegexp(zstd.ZstdError, 'c~~oul~~d not decompress ~~chunk 1~~'):	933	with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
	553	dctx.decompress_content_dict_chain([initial, frame])	934	dctx.decompress_content_dict_chain([initial, frame])
	554		935
	555	def test_simple(self):	936	def test_simple(self):
	@@ -562,10 +943,10 b' class TestDecompressor_content_dict_chai'
	562	]	943	]
	563		944
	564	chunks = []	945	chunks = []
	565	chunks.append(zstd.ZstdCompressor(~~write_content_size~~=~~True~~).compress(original[0]))	946	chunks.append(zstd.ZstdCompressor().compress(original[0]))
	566	for i, chunk in enumerate(original[1:]):	947	for i, chunk in enumerate(original[1:]):
	567	d = zstd.ZstdCompressionDict(original[i])	948	d = zstd.ZstdCompressionDict(original[i])
	568	cctx = zstd.ZstdCompressor(dict_data=d, ~~write_content_size~~=~~True~~)	949	cctx = zstd.ZstdCompressor(dict_data=d)
	569	chunks.append(cctx.compress(chunk))	950	chunks.append(cctx.compress(chunk))
	570		951
	571	for i in range(1, len(original)):	952	for i in range(1, len(original)):
	@@ -594,7 +975,7 b' class TestDecompressor_multi_decompress_'
	594	dctx.multi_decompress_to_buffer([b'foobarbaz'])	975	dctx.multi_decompress_to_buffer([b'foobarbaz'])
	595		976
	596	def test_list_input(self):	977	def test_list_input(self):
	597	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	978	cctx = zstd.ZstdCompressor()
	598		979
	599	original = [b'foo' * 4, b'bar' * 6]	980	original = [b'foo' * 4, b'bar' * 6]
	600	frames = [cctx.compress(d) for d in original]	981	frames = [cctx.compress(d) for d in original]
	@@ -614,7 +995,7 b' class TestDecompressor_multi_decompress_'
	614	self.assertEqual(len(result[1]), 18)	995	self.assertEqual(len(result[1]), 18)
	615		996
	616	def test_list_input_frame_sizes(self):	997	def test_list_input_frame_sizes(self):
	617	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~False~~)	998	cctx = zstd.ZstdCompressor()
	618		999
	619	original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]	1000	original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
	620	frames = [cctx.compress(d) for d in original]	1001	frames = [cctx.compress(d) for d in original]
	@@ -630,7 +1011,7 b' class TestDecompressor_multi_decompress_'
	630	self.assertEqual(result[i].tobytes(), data)	1011	self.assertEqual(result[i].tobytes(), data)
	631		1012
	632	def test_buffer_with_segments_input(self):	1013	def test_buffer_with_segments_input(self):
	633	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	1014	cctx = zstd.ZstdCompressor()
	634		1015
	635	original = [b'foo' * 4, b'bar' * 6]	1016	original = [b'foo' * 4, b'bar' * 6]
	636	frames = [cctx.compress(d) for d in original]	1017	frames = [cctx.compress(d) for d in original]
	@@ -669,7 +1050,7 b' class TestDecompressor_multi_decompress_'
	669	self.assertEqual(result[i].tobytes(), data)	1050	self.assertEqual(result[i].tobytes(), data)
	670		1051
	671	def test_buffer_with_segments_collection_input(self):	1052	def test_buffer_with_segments_collection_input(self):
	672	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	1053	cctx = zstd.ZstdCompressor()
	673		1054
	674	original = [	1055	original = [
	675	b'foo0' * 2,	1056	b'foo0' * 2,
	@@ -711,8 +1092,18 b' class TestDecompressor_multi_decompress_'
	711	for i in range(5):	1092	for i in range(5):
	712	self.assertEqual(decompressed[i].tobytes(), original[i])	1093	self.assertEqual(decompressed[i].tobytes(), original[i])
	713		1094
			1095	def test_dict(self):
			1096	d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
			1097
			1098	cctx = zstd.ZstdCompressor(dict_data=d, level=1)
			1099	frames = [cctx.compress(s) for s in generate_samples()]
			1100
			1101	dctx = zstd.ZstdDecompressor(dict_data=d)
			1102	result = dctx.multi_decompress_to_buffer(frames)
			1103	self.assertEqual([o.tobytes() for o in result], generate_samples())
			1104
	714	def test_multiple_threads(self):	1105	def test_multiple_threads(self):
	715	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	1106	cctx = zstd.ZstdCompressor()
	716		1107
	717	frames = []	1108	frames = []
	718	frames.extend(cctx.compress(b'x' * 64) for i in range(256))	1109	frames.extend(cctx.compress(b'x' * 64) for i in range(256))
	@@ -727,15 +1118,22 b' class TestDecompressor_multi_decompress_'
	727	self.assertEqual(result[256].tobytes(), b'y' * 64)	1118	self.assertEqual(result[256].tobytes(), b'y' * 64)
	728		1119
	729	def test_item_failure(self):	1120	def test_item_failure(self):
	730	cctx = zstd.ZstdCompressor(~~write_content_size~~=~~True~~)	1121	cctx = zstd.ZstdCompressor()
	731	frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]	1122	frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
	732		1123
	733	frames[1] = frames[1] + b'extra'	1124	frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
	734		1125
	735	dctx = zstd.ZstdDecompressor()	1126	dctx = zstd.ZstdDecompressor()
	736		1127
	737	with self.assertRaisesRegexp(zstd.ZstdError, ~~'error decompressing item 1: Src size incorrect'~~):	1128	with self.assertRaisesRegexp(zstd.ZstdError,
			1129	'error decompressing item 1: ('
			1130	'Corrupted block\|'
			1131	'Destination buffer is too small)'):
	738	dctx.multi_decompress_to_buffer(frames)	1132	dctx.multi_decompress_to_buffer(frames)
	739		1133
	740	with self.assertRaisesRegexp(zstd.ZstdError, ~~'error decompressing item 1: Src size incorrect'~~):	1134	with self.assertRaisesRegexp(zstd.ZstdError,
			1135	'error decompressing item 1: ('
			1136	'Corrupted block\|'
			1137	'Destination buffer is too small)'):
	741	dctx.multi_decompress_to_buffer(frames, threads=2)	1138	dctx.multi_decompress_to_buffer(frames, threads=2)
			1139

contrib/python-zstandard/tests/test_decompressor_fuzzing.py

0 +121 -20

@@ -1,10 +1,6 b''
1	import io	1	import io
2	import os	2	import os
3		3	import unittest
4	try:
5	import unittest2 as unittest
6	except ImportError:
7	import unittest
8		4
9	try:	5	try:
10	import hypothesis	6	import hypothesis
@@ -12,7 +8,7 b' try:'
12	except ImportError:	8	except ImportError:
13	raise unittest.SkipTest('hypothesis not available')	9	raise unittest.SkipTest('hypothesis not available')
14		10
15	import zstd	11	import zstandard as zstd
16		12
17	from . common import (	13	from . common import (
18	make_cffi,	14	make_cffi,
@@ -22,15 +18,96 b' from . common import ('
22		18
23	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')	19	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
24	@make_cffi	20	@make_cffi
25	class TestDecompressor_~~write_to~~_fuzzing(unittest.TestCase):	21	class TestDecompressor_stream_reader_fuzzing(unittest.TestCase):
		22	@hypothesis.settings(
		23	suppress_health_check=[hypothesis.HealthCheck.large_base_example])
		24	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		25	level=strategies.integers(min_value=1, max_value=5),
		26	source_read_size=strategies.integers(1, 16384),
		27	read_sizes=strategies.data())
		28	def test_stream_source_read_variance(self, original, level, source_read_size,
		29	read_sizes):
		30	cctx = zstd.ZstdCompressor(level=level)
		31	frame = cctx.compress(original)
		32
		33	dctx = zstd.ZstdDecompressor()
		34	source = io.BytesIO(frame)
		35
		36	chunks = []
		37	with dctx.stream_reader(source, read_size=source_read_size) as reader:
		38	while True:
		39	read_size = read_sizes.draw(strategies.integers(1, 16384))
		40	chunk = reader.read(read_size)
		41	if not chunk:
		42	break
		43
		44	chunks.append(chunk)
		45
		46	self.assertEqual(b''.join(chunks), original)
		47
		48	@hypothesis.settings(
		49	suppress_health_check=[hypothesis.HealthCheck.large_base_example])
		50	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		51	level=strategies.integers(min_value=1, max_value=5),
		52	source_read_size=strategies.integers(1, 16384),
		53	read_sizes=strategies.data())
		54	def test_buffer_source_read_variance(self, original, level, source_read_size,
		55	read_sizes):
		56	cctx = zstd.ZstdCompressor(level=level)
		57	frame = cctx.compress(original)
		58
		59	dctx = zstd.ZstdDecompressor()
		60	chunks = []
		61
		62	with dctx.stream_reader(frame, read_size=source_read_size) as reader:
		63	while True:
		64	read_size = read_sizes.draw(strategies.integers(1, 16384))
		65	chunk = reader.read(read_size)
		66	if not chunk:
		67	break
		68
		69	chunks.append(chunk)
		70
		71	self.assertEqual(b''.join(chunks), original)
		72
		73	@hypothesis.settings(
		74	suppress_health_check=[hypothesis.HealthCheck.large_base_example])
		75	@hypothesis.given(
		76	original=strategies.sampled_from(random_input_data()),
		77	level=strategies.integers(min_value=1, max_value=5),
		78	source_read_size=strategies.integers(1, 16384),
		79	seek_amounts=strategies.data(),
		80	read_sizes=strategies.data())
		81	def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
		82	read_sizes):
		83	cctx = zstd.ZstdCompressor(level=level)
		84	frame = cctx.compress(original)
		85
		86	dctx = zstd.ZstdDecompressor()
		87
		88	with dctx.stream_reader(frame, read_size=source_read_size) as reader:
		89	while True:
		90	amount = seek_amounts.draw(strategies.integers(0, 16384))
		91	reader.seek(amount, os.SEEK_CUR)
		92
		93	offset = reader.tell()
		94	read_amount = read_sizes.draw(strategies.integers(1, 16384))
		95	chunk = reader.read(read_amount)
		96
		97	if not chunk:
		98	break
		99
		100	self.assertEqual(original[offset:offset + len(chunk)], chunk)
		101
		102
		103	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		104	@make_cffi
		105	class TestDecompressor_stream_writer_fuzzing(unittest.TestCase):
26	@hypothesis.given(original=strategies.sampled_from(random_input_data()),	106	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
27	level=strategies.integers(min_value=1, max_value=5),	107	level=strategies.integers(min_value=1, max_value=5),
28	write_size=strategies.integers(min_value=1, max_value=8192),	108	write_size=strategies.integers(min_value=1, max_value=8192),
29	input_sizes=strategies.~~streaming~~(	109	input_sizes=strategies.data())
30	strategies.integers(min_value=1, max_value=4096)))
31	def test_write_size_variance(self, original, level, write_size, input_sizes):	110	def test_write_size_variance(self, original, level, write_size, input_sizes):
32	input_sizes = iter(input_sizes)
33
34	cctx = zstd.ZstdCompressor(level=level)	111	cctx = zstd.ZstdCompressor(level=level)
35	frame = cctx.compress(original)	112	frame = cctx.compress(original)
36		113
@@ -38,9 +115,10 b' class TestDecompressor_write_to_fuzzing('
38	source = io.BytesIO(frame)	115	source = io.BytesIO(frame)
39	dest = io.BytesIO()	116	dest = io.BytesIO()
40		117
41	with dctx.~~write_to~~(dest, write_size=write_size) as decompressor:	118	with dctx.stream_writer(dest, write_size=write_size) as decompressor:
42	while True:	119	while True:
43	chunk = source.read(next(input_sizes))	120	input_size = input_sizes.draw(strategies.integers(1, 4096))
		121	chunk = source.read(input_size)
44	if not chunk:	122	if not chunk:
45	break	123	break
46		124
@@ -74,11 +152,8 b' class TestDecompressor_copy_stream_fuzzi'
74	class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):	152	class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
75	@hypothesis.given(original=strategies.sampled_from(random_input_data()),	153	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
76	level=strategies.integers(min_value=1, max_value=5),	154	level=strategies.integers(min_value=1, max_value=5),
77	chunk_sizes=strategies.~~streaming~~(	155	chunk_sizes=strategies.data())
78	strategies.integers(min_value=1, max_value=4096)))
79	def test_random_input_sizes(self, original, level, chunk_sizes):	156	def test_random_input_sizes(self, original, level, chunk_sizes):
80	chunk_sizes = iter(chunk_sizes)
81
82	cctx = zstd.ZstdCompressor(level=level)	157	cctx = zstd.ZstdCompressor(level=level)
83	frame = cctx.compress(original)	158	frame = cctx.compress(original)
84		159
@@ -89,7 +164,33 b' class TestDecompressor_decompressobj_fuz'
89		164
90	chunks = []	165	chunks = []
91	while True:	166	while True:
92	chunk = source.read(next(chunk_sizes))	167	chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
		168	chunk = source.read(chunk_size)
		169	if not chunk:
		170	break
		171
		172	chunks.append(dobj.decompress(chunk))
		173
		174	self.assertEqual(b''.join(chunks), original)
		175
		176	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		177	level=strategies.integers(min_value=1, max_value=5),
		178	write_size=strategies.integers(min_value=1,
		179	max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE),
		180	chunk_sizes=strategies.data())
		181	def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
		182	cctx = zstd.ZstdCompressor(level=level)
		183	frame = cctx.compress(original)
		184
		185	source = io.BytesIO(frame)
		186
		187	dctx = zstd.ZstdDecompressor()
		188	dobj = dctx.decompressobj(write_size=write_size)
		189
		190	chunks = []
		191	while True:
		192	chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
		193	chunk = source.read(chunk_size)
93	if not chunk:	194	if not chunk:
94	break	195	break
95		196
@@ -100,7 +201,7 b' class TestDecompressor_decompressobj_fuz'
100		201
101	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')	202	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
102	@make_cffi	203	@make_cffi
103	class TestDecompressor_read_~~from~~_fuzzing(unittest.TestCase):	204	class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase):
104	@hypothesis.given(original=strategies.sampled_from(random_input_data()),	205	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
105	level=strategies.integers(min_value=1, max_value=5),	206	level=strategies.integers(min_value=1, max_value=5),
106	read_size=strategies.integers(min_value=1, max_value=4096),	207	read_size=strategies.integers(min_value=1, max_value=4096),
@@ -112,7 +213,7 b' class TestDecompressor_read_from_fuzzing'
112	source = io.BytesIO(frame)	213	source = io.BytesIO(frame)
113		214
114	dctx = zstd.ZstdDecompressor()	215	dctx = zstd.ZstdDecompressor()
115	chunks = list(dctx.read_~~from~~(source, read_size=read_size, write_size=write_size))	216	chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size))
116		217
117	self.assertEqual(b''.join(chunks), original)	218	self.assertEqual(b''.join(chunks), original)
118		219

contrib/python-zstandard/tests/test_estimate_sizes.py

0 +2 -9

-            try:
+            import unittest
-                import unittest2 as unittest
-            except ImportError:
-                import unittest
-            import zstd
+            import zstandard as zstd
             from . common import (
                 make_cffi,
                     size = zstd.estimate_decompression_context_size()
                     self.assertGreater(size, 100000)
-                def test_compression_size(self):
-                    params = zstd.get_compression_parameters(3)
-                    size = zstd.estimate_compression_context_size(params)
-                    self.assertGreater(size, 100000)

contrib/python-zstandard/tests/test_module_attributes.py

0 +12 -7

             from __future__ import unicode_literals
-            try:
+            import unittest
-                import unittest2 as unittest
-            except ImportError:
-                import unittest
-            import zstd
+            import zstandard as zstd
             from . common import (
                 make_cffi,
             @make_cffi
             class TestModuleAttributes(unittest.TestCase):
                 def test_version(self):
-                    self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
+                    self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4))
                 def test_constants(self):
                     self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
                 def test_hasattr(self):
                     attrs = (
+                        'CONTENTSIZE_UNKNOWN',
+                        'CONTENTSIZE_ERROR',
                         'COMPRESSION_RECOMMENDED_INPUT_SIZE',
                         'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
                         'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
                         'SEARCHLENGTH_MIN',
                         'SEARCHLENGTH_MAX',
                         'TARGETLENGTH_MIN',
-                        'TARGETLENGTH_MAX',
+                        'LDM_MINMATCH_MIN',
+                        'LDM_MINMATCH_MAX',
+                        'LDM_BUCKETSIZELOG_MAX',
                         'STRATEGY_FAST',
                         'STRATEGY_DFAST',
                         'STRATEGY_GREEDY',
                         'STRATEGY_LAZY2',
                         'STRATEGY_BTLAZY2',
                         'STRATEGY_BTOPT',
+                        'STRATEGY_BTULTRA',
+                        'DICT_TYPE_AUTO',
+                        'DICT_TYPE_RAWCONTENT',
+                        'DICT_TYPE_FULLDICT',
                     )
                     for a in attrs:

contrib/python-zstandard/tests/test_train_dictionary.py

0 +42 -65

@@ -1,13 +1,11 b''
		1	import struct
1	import sys	2	import sys
		3	import unittest
2		4
3	try:	5	import zstandard as zstd
4	import unittest2 as unittest
5	except ImportError:
6	import unittest
7
8	import zstd
9		6
10	from . common import (	7	from . common import (
		8	generate_samples,
11	make_cffi,	9	make_cffi,
12	)	10	)
13		11
@@ -30,55 +28,18 b' class TestTrainDictionary(unittest.TestC'
30	with self.assertRaises(ValueError):	28	with self.assertRaises(ValueError):
31	zstd.train_dictionary(8192, [u'foo'])	29	zstd.train_dictionary(8192, [u'foo'])
32		30
33	def test_~~basic~~(self):	31	def test_no_params(self):
34	samples = []	32	d = zstd.train_dictionary(8192, generate_samples())
35	for i in range(128):	33	self.assertIsInstance(d.dict_id(), int_type)
36	samples.append(b'foo' * 64)
37	samples.append(b'bar' * 64)
38	samples.append(b'foobar' * 64)
39	samples.append(b'baz' * 64)
40	samples.append(b'foobaz' * 64)
41	samples.append(b'bazfoo' * 64)
42		34
43	d = zstd.train_dictionary(8192, samples)	35	# The dictionary ID may be different across platforms.
44	self.assertLessEqual(len(d), 8192)	36	expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
45
46	dict_id = d.dict_id()
47	self.assertIsInstance(dict_id, int_type)
48		37
49	data = d.as_bytes()	38	data = d.as_bytes()
50	self.assertEqual(data[0:4], b'~~\x37\xa4\x30\xec~~')	39	self.assertEqual(data[0:8], expected)
51
52	def test_set_dict_id(self):
53	samples = []
54	for i in range(128):
55	samples.append(b'foo' * 64)
56	samples.append(b'foobar' * 64)
57
58	d = zstd.train_dictionary(8192, samples, dict_id=42)
59	self.assertEqual(d.dict_id(), 42)
60
61
62	@make_cffi
63	class TestTrainCoverDictionary(unittest.TestCase):
64	def test_no_args(self):
65	with self.assertRaises(TypeError):
66	zstd.train_cover_dictionary()
67
68	def test_bad_args(self):
69	with self.assertRaises(TypeError):
70	zstd.train_cover_dictionary(8192, u'foo')
71
72	with self.assertRaises(ValueError):
73	zstd.train_cover_dictionary(8192, [u'foo'])
74		40
75	def test_basic(self):	41	def test_basic(self):
76	samples = []	42	d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
77	for i in range(128):
78	samples.append(b'foo' * 64)
79	samples.append(b'foobar' * 64)
80
81	d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
82	self.assertIsInstance(d.dict_id(), int_type)	43	self.assertIsInstance(d.dict_id(), int_type)
83		44
84	data = d.as_bytes()	45	data = d.as_bytes()
@@ -88,23 +49,39 b' class TestTrainCoverDictionary(unittest.'
88	self.assertEqual(d.d, 16)	49	self.assertEqual(d.d, 16)
89		50
90	def test_set_dict_id(self):	51	def test_set_dict_id(self):
91	samples = []	52	d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
92	for i in range(128):	53	dict_id=42)
93	samples.append(b'foo' * 64)
94	samples.append(b'foobar' * 64)
95
96	d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
97	dict_id=42)
98	self.assertEqual(d.dict_id(), 42)	54	self.assertEqual(d.dict_id(), 42)
99		55
100	def test_optimize(self):	56	def test_optimize(self):
101	samples = []	57	d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
102	for i in range(128):	58	d=16)
103	samples.append(b'foo' * 64)	59
104	samples.append(b'foobar' * 64)	60	self.assertEqual(d.k, 50)
		61	self.assertEqual(d.d, 16)
		62
		63	@make_cffi
		64	class TestCompressionDict(unittest.TestCase):
		65	def test_bad_mode(self):
		66	with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
		67	zstd.ZstdCompressionDict(b'foo', dict_type=42)
		68
		69	def test_bad_precompute_compress(self):
		70	d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
105		71
106	d = zstd.train_cover_dictionary(8192, samples, optimize=True,	72	with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
107	threads=-1, steps=1, d=16)	73	d.precompute_compress()
		74
		75	with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
		76	d.precompute_compress(level=3,
		77	compression_params=zstd.CompressionParameters())
108		78
109	self.assertEqual(d.k, 16)	79	def test_precompute_compress_rawcontent(self):
110	self.assertEqual(d.d, 16)	80	d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
		81	dict_type=zstd.DICT_TYPE_RAWCONTENT)
		82	d.precompute_compress(level=1)
		83
		84	d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
		85	dict_type=zstd.DICT_TYPE_FULLDICT)
		86	with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
		87	d.precompute_compress(level=1)

contrib/python-zstandard/zstd.c

0 +165 -36

@@ -20,12 +20,6 b''
20		20
21	PyObject *ZstdError;	21	PyObject *ZstdError;
22		22
23	PyDoc_STRVAR(estimate_compression_context_size__doc__,
24	"estimate_compression_context_size(compression_parameters)\n"
25	"\n"
26	"Give the amount of memory allocated for a compression context given a\n"
27	"CompressionParameters instance");
28
29	PyDoc_STRVAR(estimate_decompression_context_size__doc__,	23	PyDoc_STRVAR(estimate_decompression_context_size__doc__,
30	"estimate_decompression_context_size()\n"	24	"estimate_decompression_context_size()\n"
31	"\n"	25	"\n"
@@ -36,11 +30,101 b' static PyObject* estimate_decompression_'
36	return PyLong_FromSize_t(ZSTD_estimateDCtxSize());	30	return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
37	}	31	}
38		32
39	PyDoc_STRVAR(~~get_compression_parameters~~__doc__,	33	PyDoc_STRVAR(frame_content_size__doc__,
40	"get_compression_parameters(compression_level[, source_size[, dict_size]])\n"	34	"frame_content_size(data)\n"
41	"\n"	35	"\n"
42	"Obtains a ``CompressionParameters`` instance from a compression level and\n"	36	"Obtain the decompressed size of a frame."
43	"optional input size and dictionary size");	37	);
		38
		39	static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
		40	static char* kwlist[] = {
		41	"source",
		42	NULL
		43	};
		44
		45	Py_buffer source;
		46	PyObject* result = NULL;
		47	unsigned long long size;
		48
		49	#if PY_MAJOR_VERSION >= 3
		50	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
		51	#else
		52	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
		53	#endif
		54	kwlist, &source)) {
		55	return NULL;
		56	}
		57
		58	if (!PyBuffer_IsContiguous(&source, 'C') \|\| source.ndim > 1) {
		59	PyErr_SetString(PyExc_ValueError,
		60	"data buffer should be contiguous and have at most one dimension");
		61	goto finally;
		62	}
		63
		64	size = ZSTD_getFrameContentSize(source.buf, source.len);
		65
		66	if (size == ZSTD_CONTENTSIZE_ERROR) {
		67	PyErr_SetString(ZstdError, "error when determining content size");
		68	}
		69	else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
		70	result = PyLong_FromLong(-1);
		71	}
		72	else {
		73	result = PyLong_FromUnsignedLongLong(size);
		74	}
		75
		76	finally:
		77	PyBuffer_Release(&source);
		78
		79	return result;
		80	}
		81
		82	PyDoc_STRVAR(frame_header_size__doc__,
		83	"frame_header_size(data)\n"
		84	"\n"
		85	"Obtain the size of a frame header.\n"
		86	);
		87
		88	static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
		89	static char* kwlist[] = {
		90	"source",
		91	NULL
		92	};
		93
		94	Py_buffer source;
		95	PyObject* result = NULL;
		96	size_t zresult;
		97
		98	#if PY_MAJOR_VERSION >= 3
		99	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
		100	#else
		101	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
		102	#endif
		103	kwlist, &source)) {
		104	return NULL;
		105	}
		106
		107	if (!PyBuffer_IsContiguous(&source, 'C') \|\| source.ndim > 1) {
		108	PyErr_SetString(PyExc_ValueError,
		109	"data buffer should be contiguous and have at most one dimension");
		110	goto finally;
		111	}
		112
		113	zresult = ZSTD_frameHeaderSize(source.buf, source.len);
		114	if (ZSTD_isError(zresult)) {
		115	PyErr_Format(ZstdError, "could not determine frame header size: %s",
		116	ZSTD_getErrorName(zresult));
		117	}
		118	else {
		119	result = PyLong_FromSize_t(zresult);
		120	}
		121
		122	finally:
		123
		124	PyBuffer_Release(&source);
		125
		126	return result;
		127	}
44		128
45	PyDoc_STRVAR(get_frame_parameters__doc__,	129	PyDoc_STRVAR(get_frame_parameters__doc__,
46	"get_frame_parameters(data)\n"	130	"get_frame_parameters(data)\n"
@@ -48,43 +132,48 b' PyDoc_STRVAR(get_frame_parameters__doc__'
48	"Obtains a ``FrameParameters`` instance by parsing data.\n");	132	"Obtains a ``FrameParameters`` instance by parsing data.\n");
49		133
50	PyDoc_STRVAR(train_dictionary__doc__,	134	PyDoc_STRVAR(train_dictionary__doc__,
51	"train_dictionary(dict_size, samples)\n"	135	"train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
52	"\n"	136	" threads=None,notifications=0, dict_id=0, level=0)\n"
53	"Train a dictionary from sample data.\n"
54	"\n"
55	"A compression dictionary of size ``dict_size`` will be created from the\n"
56	"iterable of samples provided by ``samples``.\n"
57	"\n"
58	"The raw dictionary content will be returned\n");
59
60	PyDoc_STRVAR(train_cover_dictionary__doc__,
61	"train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
62	"\n"	137	"\n"
63	"Train a dictionary from sample data using the COVER algorithm.\n"	138	"Train a dictionary from sample data using the COVER algorithm.\n"
64	"\n"	139	"\n"
65	"This behaves like ``train_dictionary()`` except a different algorithm is\n"	140	"A compression dictionary of size ``dict_size`` will be created from the\n"
66	"used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"	141	"iterable of ``samples``. The raw dictionary bytes will be returned.\n"
67	"``d``. These control the segment size and dmer size. A reasonable range\n"	142	"\n"
68	"for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"	143	"The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
		144	"segment size and dmer size. A reasonable range for ``k`` is\n"
		145	"``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
69	"``d`` must be less than or equal to ``k``.\n"	146	"``d`` must be less than or equal to ``k``.\n"
		147	"\n"
		148	"``steps`` can be specified to control the number of steps through potential\n"
		149	"values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
		150	"those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
		151	"will be varied in this mode.\n"
		152	"\n"
		153	"``threads`` can specify how many threads to use to test various ``k`` and\n"
		154	"``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
		155	"a single thread is used.\n"
		156	"\n"
		157	"When ``k`` and ``d`` are not defined, default values are used and the\n"
		158	"algorithm will perform multiple iterations - or steps - to try to find\n"
		159	"ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
		160	"will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
		161	"multiple ``k`` and ``d`` variations.\n"
70	);	162	);
71		163
72	static char zstd_doc[] = "Interface to zstandard";	164	static char zstd_doc[] = "Interface to zstandard";
73		165
74	static PyMethodDef zstd_methods[] = {	166	static PyMethodDef zstd_methods[] = {
75	/* TODO remove since it is a method on CompressionParameters. */
76	{ "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
77	METH_VARARGS, estimate_compression_context_size__doc__ },
78	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,	167	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
79	METH_NOARGS, estimate_decompression_context_size__doc__ },	168	METH_NOARGS, estimate_decompression_context_size__doc__ },
80	{ "get_compression_parameters", (PyCFunction)get_compression_parameters,	169	{ "frame_content_size", (PyCFunction)frame_content_size,
81	METH_VARARGS, get_compression_parameters__doc__ },	170	METH_VARARGS \| METH_KEYWORDS, frame_content_size__doc__ },
		171	{ "frame_header_size", (PyCFunction)frame_header_size,
		172	METH_VARARGS \| METH_KEYWORDS, frame_header_size__doc__ },
82	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,	173	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
83	METH_VARARGS, get_frame_parameters__doc__ },	174	METH_VARARGS \| METH_KEYWORDS, get_frame_parameters__doc__ },
84	{ "train_dictionary", (PyCFunction)train_dictionary,	175	{ "train_dictionary", (PyCFunction)train_dictionary,
85	METH_VARARGS \| METH_KEYWORDS, train_dictionary__doc__ },	176	METH_VARARGS \| METH_KEYWORDS, train_dictionary__doc__ },
86	{ "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
87	METH_VARARGS \| METH_KEYWORDS, train_cover_dictionary__doc__ },
88	{ NULL, NULL }	177	{ NULL, NULL }
89	};	178	};
90		179
@@ -94,10 +183,12 b' void compressor_module_init(PyObject* mo'
94	void compressionparams_module_init(PyObject* mod);	183	void compressionparams_module_init(PyObject* mod);
95	void constants_module_init(PyObject* mod);	184	void constants_module_init(PyObject* mod);
96	void compressiondict_module_init(PyObject* mod);	185	void compressiondict_module_init(PyObject* mod);
		186	void compressionreader_module_init(PyObject* mod);
97	void compressionwriter_module_init(PyObject* mod);	187	void compressionwriter_module_init(PyObject* mod);
98	void compressoriterator_module_init(PyObject* mod);	188	void compressoriterator_module_init(PyObject* mod);
99	void decompressor_module_init(PyObject* mod);	189	void decompressor_module_init(PyObject* mod);
100	void decompressobj_module_init(PyObject* mod);	190	void decompressobj_module_init(PyObject* mod);
		191	void decompressionreader_module_init(PyObject *mod);
101	void decompressionwriter_module_init(PyObject* mod);	192	void decompressionwriter_module_init(PyObject* mod);
102	void decompressoriterator_module_init(PyObject* mod);	193	void decompressoriterator_module_init(PyObject* mod);
103	void frameparams_module_init(PyObject* mod);	194	void frameparams_module_init(PyObject* mod);
@@ -118,7 +209,7 b' void zstd_module_init(PyObject* m) {'
118	We detect this mismatch here and refuse to load the module if this	209	We detect this mismatch here and refuse to load the module if this
119	scenario is detected.	210	scenario is detected.
120	*/	211	*/
121	if (ZSTD_VERSION_NUMBER != 10103 \|\| ZSTD_versionNumber() != 10103) {	212	if (ZSTD_VERSION_NUMBER != 10304 \|\| ZSTD_versionNumber() != 10304) {
122	PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");	213	PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
123	return;	214	return;
124	}	215	}
@@ -128,16 +219,24 b' void zstd_module_init(PyObject* m) {'
128	compressiondict_module_init(m);	219	compressiondict_module_init(m);
129	compressobj_module_init(m);	220	compressobj_module_init(m);
130	compressor_module_init(m);	221	compressor_module_init(m);
		222	compressionreader_module_init(m);
131	compressionwriter_module_init(m);	223	compressionwriter_module_init(m);
132	compressoriterator_module_init(m);	224	compressoriterator_module_init(m);
133	constants_module_init(m);	225	constants_module_init(m);
134	decompressor_module_init(m);	226	decompressor_module_init(m);
135	decompressobj_module_init(m);	227	decompressobj_module_init(m);
		228	decompressionreader_module_init(m);
136	decompressionwriter_module_init(m);	229	decompressionwriter_module_init(m);
137	decompressoriterator_module_init(m);	230	decompressoriterator_module_init(m);
138	frameparams_module_init(m);	231	frameparams_module_init(m);
139	}	232	}
140		233
		234	#if defined(__GNUC__) && (__GNUC__ >= 4)
		235	# define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
		236	#else
		237	# define PYTHON_ZSTD_VISIBILITY
		238	#endif
		239
141	#if PY_MAJOR_VERSION >= 3	240	#if PY_MAJOR_VERSION >= 3
142	static struct PyModuleDef zstd_module = {	241	static struct PyModuleDef zstd_module = {
143	PyModuleDef_HEAD_INIT,	242	PyModuleDef_HEAD_INIT,
@@ -147,7 +246,7 b' static struct PyModuleDef zstd_module = '
147	zstd_methods	246	zstd_methods
148	};	247	};
149		248
150	PyMODINIT_FUNC PyInit_zstd(void) {	249	PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
151	PyObject *m = PyModule_Create(&zstd_module);	250	PyObject *m = PyModule_Create(&zstd_module);
152	if (m) {	251	if (m) {
153	zstd_module_init(m);	252	zstd_module_init(m);
@@ -159,7 +258,7 b' PyMODINIT_FUNC PyInit_zstd(void) {'
159	return m;	258	return m;
160	}	259	}
161	#else	260	#else
162	PyMODINIT_FUNC initzstd(void) {	261	PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
163	PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);	262	PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
164	if (m) {	263	if (m) {
165	zstd_module_init(m);	264	zstd_module_init(m);
@@ -211,3 +310,33 b' size_t roundpow2(size_t i) {'
211		310
212	return i;	311	return i;
213	}	312	}
		313
		314	/* Safer version of _PyBytes_Resize().
		315	*
		316	* _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
		317	* we can get an object with a refcount > 1, even if it was just created
		318	* with PyBytes_FromStringAndSize()! That's because (at least) CPython
		319	* pre-allocates PyBytes instances of size 1 for every possible byte value.
		320	*
		321	* If non-0 is returned, obj may or may not be NULL.
		322	*/
		323	int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
		324	PyObject* tmp;
		325
		326	if ((*obj)->ob_refcnt == 1) {
		327	return _PyBytes_Resize(obj, size);
		328	}
		329
		330	tmp = PyBytes_FromStringAndSize(NULL, size);
		331	if (!tmp) {
		332	return -1;
		333	}
		334
		335	memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
		336	PyBytes_GET_SIZE(*obj));
		337
		338	Py_DECREF(*obj);
		339	*obj = tmp;
		340
		341	return 0;
		342	} No newline at end of file

contrib/python-zstandard/zstd/common/bitstream.h

0 +133 -76

                bitstream
                Part of FSE library
                header file (to include)
-               Copyright (C) 2013-2016, Yann Collet.
+               Copyright (C) 2013-2017, Yann Collet.
                BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
             extern "C" {
             #endif
             /*
             *  This API consists of small unitary functions, which must be inlined for best performance.
             *  Since link-time-optimization is not available for all compilers,
             #include "error_private.h"  /* error codes and messages */
+            /*-*************************************
+            *  Debug
+            ***************************************/
+            #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
+            #  include <assert.h>
+            #else
+            #  ifndef assert
+            #    define assert(condition) ((void)0)
+            #  endif
+            #endif
             /*=========================================
             *  Target specific
             =========================================*/
             #  include <immintrin.h>   /* support for bextr (experimental) */
             #endif
+            #define STREAM_ACCUMULATOR_MIN_32  25
+            #define STREAM_ACCUMULATOR_MIN_64  57
+            #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
             /*-******************************************
             *  bitStream encoding API (write forward)
             ********************************************/
             /* bitStream can mix input from multiple sources.
-            *  A critical property of these streams is that they encode and decode in **reverse** direction.
+             * A critical property of these streams is that they encode and decode in **reverse** direction.
-            *  So the first bit sequence you add will be the last to be read, like a LIFO stack.
+             * So the first bit sequence you add will be the last to be read, like a LIFO stack.
-            */
+             */
             typedef struct
             {
                 size_t bitContainer;
-                int    bitPos;
+                unsigned bitPos;
                 char*  startPtr;
                 char*  ptr;
                 char*  endPtr;
                 unsigned bitsConsumed;
                 const char* ptr;
                 const char* start;
+                const char* limitPtr;
             } BIT_DStream_t;
             typedef enum { BIT_DStream_unfinished = 0,
             /*-**************************************************************
             *  Internal functions
             ****************************************************************/
-            MEM_STATIC unsigned BIT_highbit32 (register U32 val)
+            MEM_STATIC unsigned BIT_highbit32 (U32 val)
             {
+                assert(val != 0);
+                {
             #   if defined(_MSC_VER)   /* Visual */
-                unsigned long r=0;
+                    unsigned long r=0;
-                _BitScanReverse ( &r, val );
+                    _BitScanReverse ( &r, val );
-                return (unsigned) r;
+                    return (unsigned) r;
             #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-                return 31 - __builtin_clz (val);
+                    return 31 - __builtin_clz (val);
             #   else   /* Software version */
-                static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+                    static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
-                U32 v = val;
+, 14, 16, 18, 22, 25,  3, 30,
-                v |= v >> 1;
+, 12, 20, 28, 15, 17, 24,  7,
-                v |= v >> 2;
+, 27, 23,  6, 26,  5,  4, 31 };
-                v |= v >> 4;
+                    U32 v = val;
-                v |= v >> 8;
+                    v |= v >> 1;
-                v |= v >> 16;
+                    v |= v >> 2;
-                return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+                    v |= v >> 4;
+                    v |= v >> 8;
+                    v |= v >> 16;
+                    return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
             #   endif
+                }
             }
             /*=====    Local Constants   =====*/
-            static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,  0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */
+            static const unsigned BIT_mask[] = {
+,          1,         3,         7,         0xF,       0x1F,
+x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+            #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
             /*-**************************************************************
             *  bitStream encoding
             ****************************************************************/
             /*! BIT_initCStream() :
-             *  `dstCapacity` must be > sizeof(void*)
+             *  `dstCapacity` must be > sizeof(size_t)
              *  @return : 0 if success,
-                          otherwise an error code (can be tested using ERR_isError() ) */
+             *            otherwise an error code (can be tested using ERR_isError()) */
-            MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
+            MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                              void* startPtr, size_t dstCapacity)
             {
                 bitC->bitContainer = 0;
                 bitC->bitPos = 0;
                 bitC->startPtr = (char*)startPtr;
                 bitC->ptr = bitC->startPtr;
-                bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
+                bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
-                if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
+                if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
                 return 0;
             }
             /*! BIT_addBits() :
-                can add up to 26 bits into `bitC`.
+             *  can add up to 31 bits into `bitC`.
-                Does not check for register overflow ! */
+             *  Note : does not check for register overflow ! */
-            MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+            MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                                        size_t value, unsigned nbBits)
             {
+                MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+                assert(nbBits < BIT_MASK_SIZE);
+                assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
                 bitC->bitPos += nbBits;
             }
             /*! BIT_addBitsFast() :
              *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-            MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
+            MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                            size_t value, unsigned nbBits)
             {
+                assert((value>>nbBits) == 0);
+                assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                 bitC->bitContainer |= value << bitC->bitPos;
                 bitC->bitPos += nbBits;
             }
             /*! BIT_flushBitsFast() :
+             *  assumption : bitContainer has not overflowed
              *  unsafe version; does not check buffer overflow */
             MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
             {
                 size_t const nbBytes = bitC->bitPos >> 3;
+                assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
                 bitC->ptr += nbBytes;
+                assert(bitC->ptr <= bitC->endPtr);
                 bitC->bitPos &= 7;
-                bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+                bitC->bitContainer >>= nbBytes*8;
             }
             /*! BIT_flushBits() :
+             *  assumption : bitContainer has not overflowed
              *  safe version; check for buffer overflow, and prevents it.
-             *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
+             *  note : does not signal buffer overflow.
+             *  overflow will be revealed later on using BIT_closeCStream() */
             MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
             {
                 size_t const nbBytes = bitC->bitPos >> 3;
+                assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
                 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
                 bitC->ptr += nbBytes;
                 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
                 bitC->bitPos &= 7;
-                bitC->bitContainer >>= nbBytes*8;   /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
+                bitC->bitContainer >>= nbBytes*8;
             }
             /*! BIT_closeCStream() :
              *  @return : size of CStream, in bytes,
-                          or 0 if it could not fit into dstBuffer */
+             *            or 0 if it could not fit into dstBuffer */
             MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
             {
                 BIT_addBitsFast(bitC, 1, 1);   /* endMark */
                 BIT_flushBits(bitC);
+                if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
-                if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
                 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
             }
             /*-********************************************************
-            * bitStream decoding
+            *  bitStream decoding
             **********************************************************/
             /*! BIT_initDStream() :
-            *   Initialize a BIT_DStream_t.
+             *  Initialize a BIT_DStream_t.
-            *   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+             * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
-            *   `srcSize` must be the *exact* size of the bitStream, in bytes.
+             * `srcSize` must be the *exact* size of the bitStream, in bytes.
-            *   @return : size of stream (== srcSize) or an errorCode if a problem is detected
+             * @return : size of stream (== srcSize), or an errorCode if a problem is detected
-            */
+             */
             MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
             {
                 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+                bitD->start = (const char*)srcBuffer;
+                bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
                 if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
-                    bitD->start = (const char*)srcBuffer;
                     bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
                     bitD->bitContainer = MEM_readLEST(bitD->ptr);
                     { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
                       bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
                       if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
                 } else {
-                    bitD->start = (const char*)srcBuffer;
                     bitD->ptr   = bitD->start;
                     bitD->bitContainer = *(const BYTE*)(bitD->start);
                     switch(srcSize)
                     {
-                        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                    case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-                        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                            /* fall-through */
-                        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-                        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                    case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-                        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                            /* fall-through */
-                        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-                        default:;
+                    case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                            /* fall-through */
+                    case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                            /* fall-through */
+                    case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                            /* fall-through */
+                    case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                            /* fall-through */
+                    default: break;
                     }
-                    { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+                    {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-                      bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+                        bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-                      if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+                        if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+                    }
                     bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
                 }
             #  endif
                     return _bextr_u32(bitContainer, start, nbBits);
             #else
+                assert(nbBits < BIT_MASK_SIZE);
                 return (bitContainer >> start) & BIT_mask[nbBits];
             #endif
             }
             MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
             {
+                assert(nbBits < BIT_MASK_SIZE);
                 return bitContainer & BIT_mask[nbBits];
             }
              *  local register is not modified.
              *  On 32-bits, maxNbBits==24.
              *  On 64-bits, maxNbBits==56.
-             *  @return : value extracted
+             * @return : value extracted */
-             */
+            MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
-             MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
             {
             #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
                 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
             #else
-                U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+                U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
-                return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
+                return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
             #endif
             }
             /*! BIT_lookBitsFast() :
-            *   unsafe version; only works only if nbBits >= 1 */
+             *  unsafe version; only works if nbBits >= 1 */
             MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
             {
-                U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
+                U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
-                return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
+                assert(nbBits >= 1);
+                return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
             }
             MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
             /*! BIT_readBits() :
              *  Read (consume) next n bits from local register and update.
              *  Pay attention to not read more than nbBits contained into local register.
-             *  @return : extracted value.
+             * @return : extracted value. */
-             */
             MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
             {
                 size_t const value = BIT_lookBits(bitD, nbBits);
             }
             /*! BIT_readBitsFast() :
-            *   unsafe version; only works only if nbBits >= 1 */
+             *  unsafe version; only works only if nbBits >= 1 */
             MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
             {
                 size_t const value = BIT_lookBitsFast(bitD, nbBits);
+                assert(nbBits >= 1);
                 BIT_skipBits(bitD, nbBits);
                 return value;
             }
             /*! BIT_reloadDStream() :
-            *   Refill `bitD` from buffer previously set in BIT_initDStream() .
+             *  Refill `bitD` from buffer previously set in BIT_initDStream() .
-            *   This function is safe, it guarantees it will not read beyond src buffer.
+             *  This function is safe, it guarantees it will not read beyond src buffer.
-            *   @return : status of `BIT_DStream_t` internal register.
+             * @return : status of `BIT_DStream_t` internal register.
-                          if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
+             *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
             MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
             {
-            	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* should not happen => corruption detected */
+                if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
-            		return BIT_DStream_overflow;
+                    return BIT_DStream_overflow;
-                if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
+                if (bitD->ptr >= bitD->limitPtr) {
                     bitD->ptr -= bitD->bitsConsumed >> 3;
                     bitD->bitsConsumed &= 7;
                     bitD->bitContainer = MEM_readLEST(bitD->ptr);
                     if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
                     return BIT_DStream_completed;
                 }
+                /* start < ptr < limitPtr */
                 {   U32 nbBytes = bitD->bitsConsumed >> 3;
                     BIT_DStream_status result = BIT_DStream_unfinished;
                     if (bitD->ptr - nbBytes < bitD->start) {
                     }
                     bitD->ptr -= nbBytes;
                     bitD->bitsConsumed -= nbBytes*8;
-                    bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD) */
+                    bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
                     return result;
                 }
             }
             /*! BIT_endOfDStream() :
-            *   @return Tells if DStream has exactly reached its end (all bits consumed).
+             * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
-            */
+             */
             MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
             {
                 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));

contrib/python-zstandard/zstd/common/entropy_common.c

0 +13 -19

             #include "huf.h"
-            /*-****************************************
+            /*===   Version   ===*/
-            *  FSE Error Management
+            unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
-            ******************************************/
+            /*===   Error Management   ===*/
             unsigned FSE_isError(size_t code) { return ERR_isError(code); }
             const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
-            /* **************************************************************
-            *  HUF Error Management
-            ****************************************************************/
             unsigned HUF_isError(size_t code) { return ERR_isError(code); }
             const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
             /*-**************************************************************
             *  FSE NCount encoding-decoding
             ****************************************************************/
-            static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
             size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
                              const void* headerBuffer, size_t hbSize)
             {
                         } else {
                             bitStream >>= 2;
                     }   }
-                    {   short const max = (short)((2*threshold-1)-remaining);
+                    {   int const max = (2*threshold-1) - remaining;
-                        short count;
+                        int count;
                         if ((bitStream & (threshold-1)) < (U32)max) {
-                            count = (short)(bitStream & (threshold-1));
+                            count = bitStream & (threshold-1);
-                            bitCount   += nbBits-1;
+                            bitCount += nbBits-1;
                         } else {
-                            count = (short)(bitStream & (2*threshold-1));
+                            count = bitStream & (2*threshold-1);
                             if (count >= threshold) count -= max;
-                            bitCount   += nbBits;
+                            bitCount += nbBits;
                         }
                         count--;   /* extra accuracy */
-                        remaining -= FSE_abs(count);
+                        remaining -= count < 0 ? -count : count;   /* -1 means +1 */
-                        normalizedCounter[charnum++] = count;
+                        normalizedCounter[charnum++] = (short)count;
                         previous0 = !count;
                         while (remaining < threshold) {
                             nbBits--;

contrib/python-zstandard/zstd/common/error_private.c

0 +16 -11

-            /**
+            /*
              * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
              * All rights reserved.
              *
-             * This source code is licensed under the BSD-style license found in the
+             * This source code is licensed under both the BSD-style license (found in the
-             * LICENSE file in the root directory of this source tree. An additional grant
+             * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-             * of patent rights can be found in the PATENTS file in the same directory.
+             * in the COPYING file in the root directory of this source tree).
+             * You may select, at your option, one of the above-listed licenses.
              */
             /* The purpose of this file is to have a single list of error strings embedded in binary */
                 case PREFIX(GENERIC):  return "Error (generic)";
                 case PREFIX(prefix_unknown): return "Unknown frame descriptor";
                 case PREFIX(version_unsupported): return "Version not supported";
-                case PREFIX(parameter_unknown): return "Unknown parameter type";
                 case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-                case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
                 case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
-                case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
+                case PREFIX(corruption_detected): return "Corrupted block detected";
+                case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+                case PREFIX(parameter_unsupported): return "Unsupported parameter";
+                case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
                 case PREFIX(init_missing): return "Context should be init first";
                 case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+                case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
                 case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
-                case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-                case PREFIX(srcSize_wrong): return "Src size incorrect";
-                case PREFIX(corruption_detected): return "Corrupted block detected";
-                case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
                 case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
                 case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
                 case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
                 case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
                 case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+                case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+                case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+                case PREFIX(srcSize_wrong): return "Src size is incorrect";
+                    /* following error codes are not stable and may be removed or changed in a future version */
+                case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+                case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
                 case PREFIX(maxCode):
                 default: return notErrorCode;
                 }

contrib/python-zstandard/zstd/common/error_private.h

0 +8 -8

-            /**
+            /*
              * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
              * All rights reserved.
              *
-             * This source code is licensed under the BSD-style license found in the
+             * This source code is licensed under both the BSD-style license (found in the
-             * LICENSE file in the root directory of this source tree. An additional grant
+             * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-             * of patent rights can be found in the PATENTS file in the same directory.
+             * in the COPYING file in the root directory of this source tree).
+             * You may select, at your option, one of the above-listed licenses.
              */
             /* Note : this module is expected to remain private, do not expose it */
             /*-****************************************
             *  Error codes handling
             ******************************************/
-            #ifdef ERROR
+            #undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
-            #  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
+            #define ERROR(name) ZSTD_ERROR(name)
-            #endif
+            #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
-            #define ERROR(name) ((size_t)-PREFIX(name))
             ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }

contrib/python-zstandard/zstd/common/fse.h

0 +67 -31

                You can contact the author at :
                - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
             ****************************************************************** */
-            #ifndef FSE_H
-            #define FSE_H
             #if defined (__cplusplus)
             extern "C" {
             #endif
+            #ifndef FSE_H
+            #define FSE_H
             /*-*****************************************
             *  Dependencies
             #include <stddef.h>    /* size_t, ptrdiff_t */
+            /*-*****************************************
+            *  FSE_PUBLIC_API : control library symbols visibility
+            ******************************************/
+            #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+            #  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+            #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+            #  define FSE_PUBLIC_API __declspec(dllexport)
+            #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+            #  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+            #else
+            #  define FSE_PUBLIC_API
+            #endif
+            /*------   Version   ------*/
+            #define FSE_VERSION_MAJOR    0
+            #define FSE_VERSION_MINOR    9
+            #define FSE_VERSION_RELEASE  0
+            #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+            #define FSE_QUOTE(str) #str
+            #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+            #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+            #define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+            FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
             /*-****************************************
             *  FSE simple functions
             ******************************************/
                                  if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
                                  if FSE_isError(return), compression failed (more details using FSE_getErrorName())
             */
-            size_t FSE_compress(void* dst, size_t dstCapacity,
+            FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
-                          const void* src, size_t srcSize);
+                                         const void* src, size_t srcSize);
             /*! FSE_decompress():
                 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
                 Why ? : making this distinction requires a header.
                 Header management is intentionally delegated to the user layer, which can better manage special cases.
             */
-            size_t FSE_decompress(void* dst,  size_t dstCapacity,
+            FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
-                            const void* cSrc, size_t cSrcSize);
+                                           const void* cSrc, size_t cSrcSize);
             /*-*****************************************
             *  Tool functions
             ******************************************/
-            size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+            FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
             /* Error Management */
-            unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+            FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
-            const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+            FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
             /*-*****************************************
                                  if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
                                  if FSE_isError(return), it's an error code.
             */
-            size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+            FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
             /*-*****************************************
                 @return : the count of the most frequent symbol (which is not identified).
                           if return == srcSize, there is only one symbol.
                           Can also return an error code, which can be tested with FSE_isError(). */
-            size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+            FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
             /*! FSE_optimalTableLog():
                 dynamically downsize 'tableLog' when conditions are met.
                 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
                 @return : recommended tableLog (necessarily <= 'maxTableLog') */
-            unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+            FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
             /*! FSE_normalizeCount():
                 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
                 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
                 @return : tableLog,
                           or an errorCode, which can be tested using FSE_isError() */
-            size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
+            FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
             /*! FSE_NCountWriteBound():
                 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
                 Typically useful for allocation purpose. */
-            size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+            FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
             /*! FSE_writeNCount():
                 Compactly save 'normalizedCounter' into 'buffer'.
                 @return : size of the compressed table,
                           or an errorCode, which can be tested using FSE_isError(). */
-            size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+            FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
             /*! Constructor and Destructor of FSE_CTable.
                 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
             typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
-            FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
+            FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
-            void        FSE_freeCTable (FSE_CTable* ct);
+            FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
             /*! FSE_buildCTable():
                 Builds `ct`, which must be already allocated, using FSE_createCTable().
                 @return : 0, or an errorCode, which can be tested using FSE_isError() */
-            size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+            FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
             /*! FSE_compress_usingCTable():
                 Compress `src` using `ct` into `dst` which must be already allocated.
                 @return : size of compressed data (<= `dstCapacity`),
                           or 0 if compressed data could not fit into `dst`,
                           or an errorCode, which can be tested using FSE_isError() */
-            size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+            FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
             /*!
             Tutorial :
                 @return : size read from 'rBuffer',
                           or an errorCode, which can be tested using FSE_isError().
                           maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-            size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
+            FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
             /*! Constructor and Destructor of FSE_DTable.
                 Note that its size depends on 'tableLog' */
             typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
-            FSE_DTable* FSE_createDTable(unsigned tableLog);
+            FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
-            void        FSE_freeDTable(FSE_DTable* dt);
+            FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
             /*! FSE_buildDTable():
                 Builds 'dt', which must be already allocated, using FSE_createDTable().
                 return : 0, or an errorCode, which can be tested using FSE_isError() */
-            size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+            FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
             /*! FSE_decompress_usingDTable():
                 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
                 into `dst` which must be already allocated.
                 @return : size of regenerated data (necessarily <= `dstCapacity`),
                           or an errorCode, which can be tested using FSE_isError() */
-            size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+            FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
             /*!
             Tutorial :
             If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
             */
+            #endif  /* FSE_H */
-            #ifdef FSE_STATIC_LINKING_ONLY
+            #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+            #define FSE_H_FSE_STATIC_LINKING_ONLY
             /* *** Dependency *** */
             #include "bitstream.h"
             #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
             #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
+            /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+            #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+            #define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
             /* *****************************************
             *  FSE advanced API
              */
             size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
-            /*! FSE_count_simple
+            /*! FSE_count_simple() :
              * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
              * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
             */
              * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
              * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
              */
-            #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + (1<<((maxTableLog>2)?(maxTableLog-2):0)) )
+            #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
             size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
             size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
             size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
             /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
+            typedef enum {
+               FSE_repeat_none,  /**< Cannot use the previous table */
+               FSE_repeat_check, /**< Can use the previous table but it must be checked */
+               FSE_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+             } FSE_repeat;
             /* *****************************************
             *  FSE symbol compression API
             MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
             {
-                const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+                FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
                 const U16* const stateTable = (const U16*)(statePtr->stateTable);
-                U32 nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+                U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
                 BIT_addBits(bitC, statePtr->value, nbBitsOut);
                 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
             }
             #if defined (__cplusplus)
             }
             #endif
-            #endif  /* FSE_H */

contrib/python-zstandard/zstd/common/fse_decompress.c

0 +5 -25

             /* **************************************************************
-            *  Compiler specifics
-            ****************************************************************/
-            #ifdef _MSC_VER    /* Visual Studio */
-            #  define FORCE_INLINE static __forceinline
-            #  include <intrin.h>                    /* For Visual 2005 */
-            #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-            #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
-            #else
-            #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-            #    ifdef __GNUC__
-            #      define FORCE_INLINE static inline __attribute__((always_inline))
-            #    else
-            #      define FORCE_INLINE static inline
-            #    endif
-            #  else
-            #    define FORCE_INLINE static
-            #  endif /* __STDC_VERSION__ */
-            #endif
-            /* **************************************************************
             *  Includes
             ****************************************************************/
             #include <stdlib.h>     /* malloc, free, qsort */
             #include <string.h>     /* memcpy, memset */
-            #include <stdio.h>      /* printf (debug) */
             #include "bitstream.h"
+            #include "compiler.h"
             #define FSE_STATIC_LINKING_ONLY
             #include "fse.h"
+            #include "error_private.h"
             /* **************************************************************
                 {   U32 u;
                     for (u=0; u<tableSize; u++) {
                         FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-                        U16 nextState = symbolNext[symbol]++;
+                        U32 const nextState = symbolNext[symbol]++;
-                        tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+                        tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
                         tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
                 }   }
                 return 0;
             }
-            FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
+            FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
                       void* dst, size_t maxDstSize,
                 const void* cSrc, size_t cSrcSize,
                 const FSE_DTable* dt, const unsigned fast)

contrib/python-zstandard/zstd/common/huf.h

0 +172 -83

@@ -31,81 +31,114 b''
31	You can contact the author at :	31	You can contact the author at :
32	- Source repository : https://github.com/Cyan4973/FiniteStateEntropy	32	- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33	****************************************************************** */	33	****************************************************************** */
34	#ifndef HUF_H_298734234
35	#define HUF_H_298734234
36		34
37	#if defined (__cplusplus)	35	#if defined (__cplusplus)
38	extern "C" {	36	extern "C" {
39	#endif	37	#endif
40		38
		39	#ifndef HUF_H_298734234
		40	#define HUF_H_298734234
41		41
42	/* * Dependencies * */	42	/* * Dependencies * */
43	#include <stddef.h> /* size_t */	43	#include <stddef.h> /* size_t */
44		44
45		45
46	/* * simple functions * */	46	/* * library symbols visibility * */
47	/**	47	/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
48	HUF_compress() :	48	* HUF symbols remain "private" (internal symbols for library only).
49	Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.	49	* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
50	'dst' buffer must be already allocated.	50	#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
51	Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).	51	# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
52	`srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.	52	#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
53	@return : size of compressed data (<= `dstCapacity`).	53	# define HUF_PUBLIC_API __declspec(dllexport)
54	Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!	54	#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
55	if return == 1, srcData is a single repeated byte symbol (RLE compression).	55	# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
56	if HUF_isError(return), compression failed (more details using HUF_getErrorName())	56	#else
57	*/	57	# define HUF_PUBLIC_API
58	size_t HUF_compress(void* dst, size_t dstCapacity,	58	#endif
59	const void* src, size_t srcSize);	59
		60
		61	/* ========================== */
		62	/* * simple functions * */
		63	/* ========================== */
60		64
61	/**	65	/** HUF_compress() :
62	HUF_decompress() :	66	* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
63	Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',	67	* 'dst' buffer must be already allocated.
64	into already allocated buffer 'dst', of minimum size 'dstSize'.	68	* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
65	`originalSize` : must be the *exact* size of original (uncompressed) data.	69	* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
66	Note : in contrast with FSE, HUF_decompress can regenerate	70	* @return : size of compressed data (<= `dstCapacity`).
67	RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,	71	* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
68	because it knows size to regenerate.	72	* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
69	@return : size of regenerated data (== originalSize),	73	*/
70	or an error code, which can be tested using HUF_isError()	74	HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
71	*/	75	const void* src, size_t srcSize);
72	size_t HUF_decompress(void* dst, size_t originalSize,	76
73	const void* cSrc, size_t cSrcSize);	77	/** HUF_decompress() :
		78	* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
		79	* into already allocated buffer 'dst', of minimum size 'dstSize'.
		80	* `originalSize` : must be the *exact* size of original (uncompressed) data.
		81	* Note : in contrast with FSE, HUF_decompress can regenerate
		82	* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
		83	* because it knows size to regenerate (originalSize).
		84	* @return : size of regenerated data (== originalSize),
		85	* or an error code, which can be tested using HUF_isError()
		86	*/
		87	HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
		88	const void* cSrc, size_t cSrcSize);
74		89
75		90
76	/* * Tool functions * */	91	/* * Tool functions * */
77	#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress /	92	#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress /
78	size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) /	93	HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) /
79		94
80	/* Error Management */	95	/* Error Management */
81	unsigned HUF_isError(size_t code); /*< tells if a return value is an error code /	96	HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code /
82	const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) /	97	HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) /
83		98
84		99
85	/* * Advanced function * */	100	/* * Advanced function * */
86		101
87	/** HUF_compress2() :	102	/** HUF_compress2() :
88	* Same as HUF_compress(), but offers ~~direct~~ control over `maxSymbolValue` and `tableLog` .	103	* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
89	* ~~`tableLog~~` must be `<= HUF_~~TABLELOG_MAX` . */~~	104	* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
90	size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);	105	* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
		106	HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
		107	const void* src, size_t srcSize,
		108	unsigned maxSymbolValue, unsigned tableLog);
91		109
92	/** HUF_compress4X_wksp() :	110	/** HUF_compress4X_wksp() :
93	* Same as HUF_compress2(), but uses externally allocated `workSpace`~~, which must be a table of >= 1024 unsigned */~~	111	* Same as HUF_compress2(), but uses externally allocated `workSpace`.
94	size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least 1024 unsigned /	112	* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
		113	#define HUF_WORKSPACE_SIZE (6 << 10)
		114	#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
		115	HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
		116	const void* src, size_t srcSize,
		117	unsigned maxSymbolValue, unsigned tableLog,
		118	void* workSpace, size_t wkspSize);
95		119
96		120	#endif /* HUF_H_298734234 */
97		121
98	#ifdef HUF_STATIC_LINKING_ONLY	122	/* ******************************************************************
		123	* WARNING !!
		124	* The following section contains advanced and experimental definitions
		125	* which shall never be used in the context of a dynamic library,
		126	* because they are not guaranteed to remain stable in the future.
		127	* Only consider them in association with static linking.
		128	* *****************************************************************/
		129	#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
		130	#define HUF_H_HUF_STATIC_LINKING_ONLY
99		131
100	/* * Dependencies * */	132	/* * Dependencies * */
101	#include "mem.h" /* U32 */	133	#include "mem.h" /* U32 */
102		134
103		135
104	/* * Constants * */	136	/* * Constants * */
105	#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */	137	#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
106	#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */	138	#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
107	#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */	139	#define HUF_SYMBOLVALUE_MAX 255
108	#define HUF_SYMBOLVALUE_MAX 255	140
		141	#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
109	#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)	142	#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
110	# error "HUF_TABLELOG_MAX is too large !"	143	# error "HUF_TABLELOG_MAX is too large !"
111	#endif	144	#endif
@@ -116,12 +149,14 b' size_t HUF_compress4X_wksp (void* dst, s'
116	******************************************/	149	******************************************/
117	/* HUF buffer bounds */	150	/* HUF buffer bounds */
118	#define HUF_CTABLEBOUND 129	151	#define HUF_CTABLEBOUND 129
119	#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */	152	#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */
120	#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */	153	#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
121		154
122	/* static allocation of HUF's Compression Table */	155	/* static allocation of HUF's Compression Table */
		156	#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
		157	#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
123	#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \	158	#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
124	U32 name##hb[maxSymbolValue+1]; \	159	U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
125	void* name##hv = &(name##hb); \	160	void* name##hv = &(name##hb); \
126	HUF_CElt* name = (HUF_CElt)(name##hv) / no final ; */	161	HUF_CElt* name = (HUF_CElt)(name##hv) / no final ; */
127		162
@@ -142,97 +177,151 b' size_t HUF_decompress4X4 (void* dst, siz'
142		177
143	size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed /	178	size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed /
144	size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors /	179	size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors /
		180	size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors /
145	size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder /	181	size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder /
		182	size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder /
146	size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder /	183	size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder /
		184	size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder /
147		185
148		186
149	/* ****************************************	187	/* ****************************************
150	* HUF detailed API	188	* HUF detailed API
151	******************************************/	189	* ****************************************/
152	/*!
153	HUF_compress() does the following:
154	1. count symbol occurrence from source[] into table count[] using FSE_count()
155	2. (optional) refine tableLog using HUF_optimalTableLog()
156	3. build Huffman table from count using HUF_buildCTable()
157	4. save Huffman table to memory buffer using HUF_writeCTable()
158	5. encode the data stream using HUF_compress4X_usingCTable()
159		190
160	The following API allows targeting specific sub-functions for advanced tasks.	191	/*! HUF_compress() does the following:
161	For example, it's possible to compress several blocks using the same 'CTable',	192	* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
162	or to save and regenerate 'CTable' using external methods.	193	* 2. (optional) refine tableLog using HUF_optimalTableLog()
163	*/	194	* 3. build Huffman table from count using HUF_buildCTable()
164	/* FSE_count() : find it within "fse.h" */	195	* 4. save Huffman table to memory buffer using HUF_writeCTable()
		196	* 5. encode the data stream using HUF_compress4X_usingCTable()
		197	*
		198	* The following API allows targeting specific sub-functions for advanced tasks.
		199	* For example, it's possible to compress several blocks using the same 'CTable',
		200	* or to save and regenerate 'CTable' using external methods.
		201	*/
165	unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);	202	unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
166	typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */	203	typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
167	size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);	204	size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
168	size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);	205	size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
169	size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);	206	size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
170		207
		208	typedef enum {
		209	HUF_repeat_none, /*< Cannot use the previous table /
		210	HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat /
		211	HUF_repeat_valid /*< Can use the previous table and it is asumed to be valid /
		212	} HUF_repeat;
		213	/** HUF_compress4X_repeat() :
		214	* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
		215	* If it uses hufTable it does not modify hufTable or repeat.
		216	* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
		217	* If preferRepeat then the old table will always be used if valid. */
		218	size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
		219	const void* src, size_t srcSize,
		220	unsigned maxSymbolValue, unsigned tableLog,
		221	void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE /
		222	HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
171		223
172	/** HUF_buildCTable_wksp() :	224	/** HUF_buildCTable_wksp() :
173	* Same as HUF_buildCTable(), but using externally allocated scratch buffer.	225	* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
174	* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.	226	* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
175	*/	227	*/
		228	#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
		229	#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
176	size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);	230	size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
177		231
178	/*! HUF_readStats() :	232	/*! HUF_readStats() :
179	Read compact Huffman tree, saved by HUF_writeCTable().	233	* Read compact Huffman tree, saved by HUF_writeCTable().
180	`huffWeight` is destination buffer.	234	* `huffWeight` is destination buffer.
181	@return : size read from `src` , or an error Code .	235	* @return : size read from `src` , or an error Code .
182	Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */	236	* Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
183	size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, ~~U32~~* ~~rankStats~~,	237	size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
184	U32* nbSymbolsPtr, U32* tableLogPtr,	238	U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
185	const void* src, size_t srcSize);	239	const void* src, size_t srcSize);
186		240
187	/** HUF_readCTable() :	241	/** HUF_readCTable() :
188	* Loading a CTable saved with HUF_writeCTable() */	242	* Loading a CTable saved with HUF_writeCTable() */
189	size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);	243	size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
190		244
191		245
192	/*	246	/*
193	HUF_decompress() does the following:	247	* HUF_decompress() does the following:
194	1. select the decompression algorithm (X2, X4) based on pre-computed heuristics	248	* 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
195	2. build Huffman table from save, using HUF_readDTableXn()	249	* 2. build Huffman table from save, using HUF_readDTableX?()
196	3. decode 1 or 4 segments in parallel using HUF_decompress~~SXn~~_usingDTable	250	* 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
197	*/	251	*/
198		252
199	/** HUF_selectDecoder() :	253	/** HUF_selectDecoder() :
200	* Tells which decoder is likely to decode faster,	254	* Tells which decoder is likely to decode faster,
201	* based on a set of pre-~~determin~~ed metrics.	255	* based on a set of pre-computed metrics.
202	* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .	256	* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
203	* Assumption : 0 ~~< cSrcSize~~ < dstSize <= 128 KB */	257	* Assumption : 0 < dstSize <= 128 KB */
204	U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);	258	U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
205		259
		260	/**
		261	* The minimum workspace size for the `workSpace` used in
		262	* HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
		263	*
		264	* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
		265	* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
		266	* Buffer overflow errors may potentially occur if code modifications result in
		267	* a required workspace size greater than that specified in the following
		268	* macro.
		269	*/
		270	#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
		271	#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
		272
206	size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);	273	size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
		274	size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
207	size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);	275	size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
		276	size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
208		277
209	size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);	278	size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
210	size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);	279	size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
211	size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);	280	size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
212		281
213		282
		283	/* ====================== */
214	/* single stream variants */	284	/* single stream variants */
		285	/* ====================== */
215		286
216	size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);	287	size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
217	size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least ~~1024~~ unsigned /	288	size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned /
218	size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);	289	size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
		290	/** HUF_compress1X_repeat() :
		291	* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
		292	* If it uses hufTable it does not modify hufTable or repeat.
		293	* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
		294	* If preferRepeat then the old table will always be used if valid. */
		295	size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
		296	const void* src, size_t srcSize,
		297	unsigned maxSymbolValue, unsigned tableLog,
		298	void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE /
		299	HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
219		300
220	size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */	301	size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
221	size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */	302	size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
222		303
223	size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);	304	size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
		305	size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
224	size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder /	306	size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder /
		307	size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder /
225	size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder /	308	size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder /
		309	size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder /
226		310
227	size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable /	311	size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable /
228	size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);	312	size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
229	size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);	313	size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
230		314
		315	/* BMI2 variants.
		316	* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
		317	*/
		318	size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
		319	size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
		320	size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
		321	size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
		322
231	#endif /* HUF_STATIC_LINKING_ONLY */	323	#endif /* HUF_STATIC_LINKING_ONLY */
232		324
233
234	#if defined (__cplusplus)	325	#if defined (__cplusplus)
235	}	326	}
236	#endif	327	#endif
237
238	#endif /* HUF_H_298734234 */

contrib/python-zstandard/zstd/common/mem.h

0 +34 -44

-            /**
+            /*
              * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
              * All rights reserved.
              *
-             * This source code is licensed under the BSD-style license found in the
+             * This source code is licensed under both the BSD-style license (found in the
-             * LICENSE file in the root directory of this source tree. An additional grant
+             * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-             * of patent rights can be found in the PATENTS file in the same directory.
+             * in the COPYING file in the root directory of this source tree).
+             * You may select, at your option, one of the above-listed licenses.
              */
             #ifndef MEM_H_MODULE
             *****************************************************************/
             #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
             # include <stdint.h>
-              typedef  uint8_t BYTE;
+              typedef   uint8_t BYTE;
-              typedef uint16_t U16;
+              typedef  uint16_t U16;
-              typedef  int16_t S16;
+              typedef   int16_t S16;
-              typedef uint32_t U32;
+              typedef  uint32_t U32;
-              typedef  int32_t S32;
+              typedef   int32_t S32;
-              typedef uint64_t U64;
+              typedef  uint64_t U64;
-              typedef  int64_t S64;
+              typedef   int64_t S64;
-              typedef intptr_t iPtrDiff;
             #else
               typedef unsigned char      BYTE;
               typedef unsigned short      U16;
               typedef   signed int        S32;
               typedef unsigned long long  U64;
               typedef   signed long long  S64;
-              typedef ptrdiff_t      iPtrDiff;
             #endif
              * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
              * The below switch allow to select different access method for improved performance.
              * Method 0 (default) : use `memcpy()`. Safe and portable.
-             * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+             * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
              *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
              * Method 2 : direct access. This method is portable but violate C standard.
              *            It can generate buggy code on targets depending on alignment.
-             *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+             *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
              * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
              * Prefer these methods in priority order (0 > 1 > 2)
              */
             #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
             #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
             #    define MEM_FORCE_MEMORY_ACCESS 2
-            #  elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
+            #  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
-              (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
             #    define MEM_FORCE_MEMORY_ACCESS 1
             #  endif
             #endif
             MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
             MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
             MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-            MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+            MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
             MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
             MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
             /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
             /* currently only defined for gcc and icc */
             #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-            	__pragma( pack(push, 1) )
+                __pragma( pack(push, 1) )
-                typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
+                typedef struct { U16 v; } unalign16;
+                typedef struct { U32 v; } unalign32;
+                typedef struct { U64 v; } unalign64;
+                typedef struct { size_t v; } unalignArch;
                 __pragma( pack(pop) )
             #else
-                typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
+                typedef struct { U16 v; } __attribute__((packed)) unalign16;
+                typedef struct { U32 v; } __attribute__((packed)) unalign32;
+                typedef struct { U64 v; } __attribute__((packed)) unalign64;
+                typedef struct { size_t v; } __attribute__((packed)) unalignArch;
             #endif
-            MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+            MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
-            MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+            MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
-            MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+            MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
-            MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
+            MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
-            MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+            MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
-            MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+            MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
-            MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
+            MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
             #else
             {
             #if defined(_MSC_VER)     /* Visual Studio */
                 return _byteswap_ulong(in);
-            #elif defined (__GNUC__)
+            #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
                 return __builtin_bswap32(in);
             #else
                 return  ((in << 24) & 0xff000000 ) |
             {
             #if defined(_MSC_VER)     /* Visual Studio */
                 return _byteswap_uint64(in);
-            #elif defined (__GNUC__)
+            #elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
                 return __builtin_bswap64(in);
             #else
                 return  ((in << 56) & 0xff00000000000000ULL) |
             }
-            /* function safe only for comparisons */
-            MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
-                switch (length)
-                default :
-                case 4 : return MEM_read32(memPtr);
-                case 3 : if (MEM_isLittleEndian())
-                            return MEM_read32(memPtr)<<8;
-                         else
-                            return MEM_read32(memPtr)>>8;
             #if defined (__cplusplus)
             }
             #endif

contrib/python-zstandard/zstd/common/pool.c

0 +161 -72

@@ -1,17 +1,18 b''
1	/**	1	/*
2	* Copyright (c) 2016-present, Facebook, Inc.	2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3	* All rights reserved.	3	* All rights reserved.
4	*	4	*
5	* This source code is licensed under the BSD-style license found in the	5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree~~. An additional grant~~	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* of patent rights can be found in the PATENTS file in the same directory.	7	* in the COPYING file in the root directory of this source tree).
		8	* You may select, at your option, one of the above-listed licenses.
8	*/	9	*/
9		10
10		11
11	/* ====== Dependencies ======= */	12	/* ====== Dependencies ======= */
12	#include <stddef.h> /* size_t */	13	#include <stddef.h> /* size_t */
13	#include <stdlib.h> /* malloc, calloc, free */
14	#include "pool.h"	14	#include "pool.h"
		15	#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
15		16
16	/* ====== Compiler specifics ====== */	17	/* ====== Compiler specifics ====== */
17	#if defined(_MSC_VER)	18	#if defined(_MSC_VER)
@@ -25,13 +26,14 b''
25		26
26	/* A job is a function and an opaque argument */	27	/* A job is a function and an opaque argument */
27	typedef struct POOL_job_s {	28	typedef struct POOL_job_s {
28	POOL_function function;	29	POOL_function function;
29	void *opaque;	30	void *opaque;
30	} POOL_job;	31	} POOL_job;
31		32
32	struct POOL_ctx_s {	33	struct POOL_ctx_s {
		34	ZSTD_customMem customMem;
33	/* Keep track of the threads */	35	/* Keep track of the threads */
34	pthread_t *threads;	36	ZSTD_pthread_t *threads;
35	size_t numThreads;	37	size_t numThreads;
36		38
37	/* The queue is a circular buffer */	39	/* The queue is a circular buffer */
@@ -39,12 +41,18 b' struct POOL_ctx_s {'
39	size_t queueHead;	41	size_t queueHead;
40	size_t queueTail;	42	size_t queueTail;
41	size_t queueSize;	43	size_t queueSize;
		44
		45	/* The number of threads working on jobs */
		46	size_t numThreadsBusy;
		47	/* Indicates if the queue is empty */
		48	int queueEmpty;
		49
42	/* The mutex protects the queue */	50	/* The mutex protects the queue */
43	pthread_mutex_t queueMutex;	51	ZSTD_pthread_mutex_t queueMutex;
44	/* Condition variable for pushers to wait on when the queue is full */	52	/* Condition variable for pushers to wait on when the queue is full */
45	pthread_cond_t queuePushCond;	53	ZSTD_pthread_cond_t queuePushCond;
46	/* Condition variables for poppers to wait on when the queue is empty */	54	/* Condition variables for poppers to wait on when the queue is empty */
47	pthread_cond_t queuePopCond;	55	ZSTD_pthread_cond_t queuePopCond;
48	/* Indicates if the queue is shutting down */	56	/* Indicates if the queue is shutting down */
49	int shutdown;	57	int shutdown;
50	};	58	};
@@ -59,55 +67,73 b' static void* POOL_thread(void* opaque) {'
59	if (!ctx) { return NULL; }	67	if (!ctx) { return NULL; }
60	for (;;) {	68	for (;;) {
61	/* Lock the mutex and wait for a non-empty queue or until shutdown */	69	/* Lock the mutex and wait for a non-empty queue or until shutdown */
62	pthread_mutex_lock(&ctx->queueMutex);	70	ZSTD_pthread_mutex_lock(&ctx->queueMutex);
63	while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {	71
64	pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);	72	while (ctx->queueEmpty && !ctx->shutdown) {
		73	ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
65	}	74	}
66	/* empty => shutting down: so stop */	75	/* empty => shutting down: so stop */
67	if (ctx->queue~~Head~~ == ~~ctx~~->~~queueTail~~) {	76	if (ctx->queueEmpty) {
68	pthread_mutex_unlock(&ctx->queueMutex);	77	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
69	return opaque;	78	return opaque;
70	}	79	}
71	/* Pop a job off the queue */	80	/* Pop a job off the queue */
72	{ POOL_job const job = ctx->queue[ctx->queueHead];	81	{ POOL_job const job = ctx->queue[ctx->queueHead];
73	ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;	82	ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
		83	ctx->numThreadsBusy++;
		84	ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
74	/* Unlock the mutex, signal a pusher, and run the job */	85	/* Unlock the mutex, signal a pusher, and run the job */
75	pthread_mutex_unlock(&ctx->queueMutex);	86	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
76	pthread_cond_signal(&ctx->queuePushCond);	87	ZSTD_pthread_cond_signal(&ctx->queuePushCond);
		88
77	job.function(job.opaque);	89	job.function(job.opaque);
78	}	90
79	}	91	/* If the intended queue size was 0, signal after finishing job */
		92	if (ctx->queueSize == 1) {
		93	ZSTD_pthread_mutex_lock(&ctx->queueMutex);
		94	ctx->numThreadsBusy--;
		95	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
		96	ZSTD_pthread_cond_signal(&ctx->queuePushCond);
		97	} }
		98	} /* for (;;) */
80	/* Unreachable */	99	/* Unreachable */
81	}	100	}
82		101
83	POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {	102	POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
84	POOL_ctx *ctx;	103	return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
		104	}
		105
		106	POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
		107	POOL_ctx* ctx;
85	/* Check the parameters */	108	/* Check the parameters */
86	if (!numThreads \|\| !~~queueSize~~) { return NULL; }	109	if (!numThreads) { return NULL; }
87	/* Allocate the context and zero initialize */	110	/* Allocate the context and zero initialize */
88	ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));	111	ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
89	if (!ctx) { return NULL; }	112	if (!ctx) { return NULL; }
90	/* Initialize the job queue.	113	/* Initialize the job queue.
91	* It needs one extra space since one space is wasted to differentiate empty	114	* It needs one extra space since one space is wasted to differentiate empty
92	* and full queues.	115	* and full queues.
93	*/	116	*/
94	ctx->queueSize = queueSize + 1;	117	ctx->queueSize = queueSize + 1;
95	ctx->queue = (POOL_job )malloc(ctx->queueSize sizeof(POOL_job));	118	ctx->queue = (POOL_job)ZSTD_malloc(ctx->queueSize sizeof(POOL_job), customMem);
96	ctx->queueHead = 0;	119	ctx->queueHead = 0;
97	ctx->queueTail = 0;	120	ctx->queueTail = 0;
98	pthread_mutex_init(&ctx->queueMutex, NULL);	121	ctx->numThreadsBusy = 0;
99	pthread_cond_init(&ctx->queuePushCond, NULL);	122	ctx->queueEmpty = 1;
100	pthread_~~cond~~_init(&ctx->queue~~PopCond~~, NULL);	123	(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
		124	(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
		125	(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
101	ctx->shutdown = 0;	126	ctx->shutdown = 0;
102	/* Allocate space for the thread handles */	127	/* Allocate space for the thread handles */
103	ctx->threads = (pthread_t )malloc(numThreads sizeof(pthread_t));	128	ctx->threads = (ZSTD_pthread_t)ZSTD_malloc(numThreads sizeof(ZSTD_pthread_t), customMem);
104	ctx->numThreads = 0;	129	ctx->numThreads = 0;
		130	ctx->customMem = customMem;
105	/* Check for errors */	131	/* Check for errors */
106	if (!ctx->threads \|\| !ctx->queue) { POOL_free(ctx); return NULL; }	132	if (!ctx->threads \|\| !ctx->queue) { POOL_free(ctx); return NULL; }
107	/* Initialize the threads */	133	/* Initialize the threads */
108	{ size_t i;	134	{ size_t i;
109	for (i = 0; i < numThreads; ++i) {	135	for (i = 0; i < numThreads; ++i) {
110	if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {	136	if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
111	ctx->numThreads = i;	137	ctx->numThreads = i;
112	POOL_free(ctx);	138	POOL_free(ctx);
113	return NULL;	139	return NULL;
@@ -120,75 +146,138 b' POOL_ctx *POOL_create(size_t numThreads,'
120	/*! POOL_join() :	146	/*! POOL_join() :
121	Shutdown the queue, wake any sleeping threads, and join all of the threads.	147	Shutdown the queue, wake any sleeping threads, and join all of the threads.
122	*/	148	*/
123	static void POOL_join(POOL_ctx *ctx) {	149	static void POOL_join(POOL_ctx* ctx) {
124	/* Shut down the queue */	150	/* Shut down the queue */
125	pthread_mutex_lock(&ctx->queueMutex);	151	ZSTD_pthread_mutex_lock(&ctx->queueMutex);
126	ctx->shutdown = 1;	152	ctx->shutdown = 1;
127	pthread_mutex_unlock(&ctx->queueMutex);	153	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
128	/* Wake up sleeping threads */	154	/* Wake up sleeping threads */
129	pthread_cond_broadcast(&ctx->queuePushCond);	155	ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
130	pthread_cond_broadcast(&ctx->queuePopCond);	156	ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
131	/* Join all of the threads */	157	/* Join all of the threads */
132	{ size_t i;	158	{ size_t i;
133	for (i = 0; i < ctx->numThreads; ++i) {	159	for (i = 0; i < ctx->numThreads; ++i) {
134	pthread_join(ctx->threads[i], NULL);	160	ZSTD_pthread_join(ctx->threads[i], NULL);
135	} }	161	} }
136	}	162	}
137		163
138	void POOL_free(POOL_ctx *ctx) {	164	void POOL_free(POOL_ctx *ctx) {
139	if (!ctx) { return; }	165	if (!ctx) { return; }
140	POOL_join(ctx);	166	POOL_join(ctx);
141	pthread_mutex_destroy(&ctx->queueMutex);	167	ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
142	pthread_cond_destroy(&ctx->queuePushCond);	168	ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
143	pthread_cond_destroy(&ctx->queuePopCond);	169	ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
144	if (ctx->queue) free(ctx->queue);	170	ZSTD_free(ctx->queue, ctx->customMem);
145	if (ctx->threads) free(ctx->threads);	171	ZSTD_free(ctx->threads, ctx->customMem);
146	free(ctx);	172	ZSTD_free(ctx, ctx->customMem);
		173	}
		174
		175	size_t POOL_sizeof(POOL_ctx *ctx) {
		176	if (ctx==NULL) return 0; /* supports sizeof NULL */
		177	return sizeof(*ctx)
		178	+ ctx->queueSize * sizeof(POOL_job)
		179	+ ctx->numThreads * sizeof(ZSTD_pthread_t);
		180	}
		181
		182	/**
		183	* Returns 1 if the queue is full and 0 otherwise.
		184	*
		185	* If the queueSize is 1 (the pool was created with an intended queueSize of 0),
		186	* then a queue is empty if there is a thread free and no job is waiting.
		187	*/
		188	static int isQueueFull(POOL_ctx const* ctx) {
		189	if (ctx->queueSize > 1) {
		190	return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
		191	} else {
		192	return ctx->numThreadsBusy == ctx->numThreads \|\|
		193	!ctx->queueEmpty;
		194	}
		195	}
		196
		197
		198	static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
		199	{
		200	POOL_job const job = {function, opaque};
		201	assert(ctx != NULL);
		202	if (ctx->shutdown) return;
		203
		204	ctx->queueEmpty = 0;
		205	ctx->queue[ctx->queueTail] = job;
		206	ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
		207	ZSTD_pthread_cond_signal(&ctx->queuePopCond);
		208	}
		209
		210	void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
		211	{
		212	assert(ctx != NULL);
		213	ZSTD_pthread_mutex_lock(&ctx->queueMutex);
		214	/* Wait until there is space in the queue for the new job */
		215	while (isQueueFull(ctx) && (!ctx->shutdown)) {
		216	ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
		217	}
		218	POOL_add_internal(ctx, function, opaque);
		219	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
147	}	220	}
148		221
149	void POOL_add(void ctxVoid, POOL_function function, void opaque) {
150	POOL_ctx ctx = (POOL_ctx )ctxVoid;
151	if (!ctx) { return; }
152		222
153	pthread_mutex_lock(&ctx->queueMutex);	223	int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
154	{ POOL_job const job = {function, opaque};	224	{
155	/* Wait until there is space in the queue for the new job */	225	assert(ctx != NULL);
156	size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;	226	ZSTD_pthread_mutex_lock(&ctx->queueMutex);
157	while (ctx->queueHead == newTail && !ctx->shutdown) {	227	if (isQueueFull(ctx)) {
158	~~pthread_cond_wait~~(&~~ctx~~->~~queuePushCond~~, &ctx->queueMutex);	228	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
159	newTail = (ctx->queueTail + 1) % ctx->queueSize;	229	return 0;
160	}
161	/* The queue is still going => there is space */
162	if (!ctx->shutdown) {
163	ctx->queue[ctx->queueTail] = job;
164	ctx->queueTail = newTail;
165	}
166	}	230	}
167	pthread_mutex_unlock(&ctx->queueMutex);	231	POOL_add_internal(ctx, function, opaque);
168	pthread_cond_signal(&ctx->queuePopCond);	232	ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
		233	return 1;
169	}	234	}
170		235
		236
171	#else /* ZSTD_MULTITHREAD not defined */	237	#else /* ZSTD_MULTITHREAD not defined */
		238
		239	/* ========================== */
172	/* No multi-threading support */	240	/* No multi-threading support */
		241	/* ========================== */
173		242
174	/* We don't need any data, but if it is empty malloc() might return NULL. */	243
		244	/* We don't need any data, but if it is empty, malloc() might return NULL. */
175	struct POOL_ctx_s {	245	struct POOL_ctx_s {
176	int d~~ata~~;	246	int dummy;
177	};	247	};
		248	static POOL_ctx g_ctx;
178		249
179	POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {	250	POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
180	(void)numThreads;	251	return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
181	(void)queueSize;
182	return (POOL_ctx *)malloc(sizeof(POOL_ctx));
183	}	252	}
184		253
185	void POOL_free(POOL_ctx *ctx) {	254	POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
186	if (ctx) free(ctx);	255	(void)numThreads;
		256	(void)queueSize;
		257	(void)customMem;
		258	return &g_ctx;
		259	}
		260
		261	void POOL_free(POOL_ctx* ctx) {
		262	assert(!ctx \|\| ctx == &g_ctx);
		263	(void)ctx;
187	}	264	}
188		265
189	void POOL_add(~~void~~ ctx, POOL_function function, void opaque) {	266	void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
190	(void)ctx;	267	(void)ctx;
191	function(opaque);	268	function(opaque);
		269	}
		270
		271	int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
		272	(void)ctx;
		273	function(opaque);
		274	return 1;
		275	}
		276
		277	size_t POOL_sizeof(POOL_ctx* ctx) {
		278	if (ctx==NULL) return 0; /* supports sizeof NULL */
		279	assert(ctx == &g_ctx);
		280	return sizeof(*ctx);
192	}	281	}
193		282
194	#endif /* ZSTD_MULTITHREAD */	283	#endif /* ZSTD_MULTITHREAD */

contrib/python-zstandard/zstd/common/pool.h

0 +34 -16

-            /**
+            /*
-             * Copyright (c) 2016-present, Facebook, Inc.
+             * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
              * All rights reserved.
              *
-             * This source code is licensed under the BSD-style license found in the
+             * This source code is licensed under both the BSD-style license (found in the
-             * LICENSE file in the root directory of this source tree. An additional grant
+             * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-             * of patent rights can be found in the PATENTS file in the same directory.
+             * in the COPYING file in the root directory of this source tree).
+             * You may select, at your option, one of the above-listed licenses.
              */
             #ifndef POOL_H
             #define POOL_H
             #include <stddef.h>   /* size_t */
+            #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
+            #include "zstd.h"
             typedef struct POOL_ctx_s POOL_ctx;
             /*! POOL_create() :
-                Create a thread pool with at most `numThreads` threads.
+             *  Create a thread pool with at most `numThreads` threads.
-                `numThreads` must be at least 1.
+             * `numThreads` must be at least 1.
-                The maximum number of queued jobs before blocking is `queueSize`.
+             *  The maximum number of queued jobs before blocking is `queueSize`.
-                `queueSize` must be at least 1.
+             * @return : POOL_ctx pointer on success, else NULL.
-                @return : The POOL_ctx pointer on success else NULL.
             */
-            POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+            POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
+            POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
             /*! POOL_free() :
                 Free a thread pool returned by POOL_create().
             */
-            void POOL_free(POOL_ctx *ctx);
+            void POOL_free(POOL_ctx* ctx);
+            /*! POOL_sizeof() :
+                return memory usage of pool returned by POOL_create().
+            */
+            size_t POOL_sizeof(POOL_ctx* ctx);
             /*! POOL_function :
                 The function type that can be added to a thread pool.
             */
-            typedef void (*POOL_function)(void *);
+            typedef void (*POOL_function)(void*);
             /*! POOL_add_function :
                 The function type for a generic thread pool add function.
             */
-            typedef void (*POOL_add_function)(void *, POOL_function, void *);
+            typedef void (*POOL_add_function)(void*, POOL_function, void*);
             /*! POOL_add() :
-                Add the job `function(opaque)` to the thread pool.
+                Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
                 Possibly blocks until there is room in the queue.
                 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
             */
-            void POOL_add(void *ctx, POOL_function function, void *opaque);
+            void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+            /*! POOL_tryAdd() :
+                Add the job `function(opaque)` to the thread pool if a worker is available.
+                return immediately otherwise.
+               @return : 1 if successful, 0 if not.
+            */
+            int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
             #if defined (__cplusplus)

contrib/python-zstandard/zstd/common/threading.c

0 +11 -15

             /**
              * Copyright (c) 2016 Tino Reichardt
              * All rights reserved.
              *
-             * This source code is licensed under the BSD-style license found in the
+             * This source code is licensed under both the BSD-style license (found in the
-             * LICENSE file in the root directory of this source tree. An additional grant
+             * LICENSE file in the root directory of this source tree) and the GPLv2 (found
-             * of patent rights can be found in the PATENTS file in the same directory.
+             * in the COPYING file in the root directory of this source tree).
              *
              * You can contact the author at:
              * - zstdmt source repository: https://github.com/mcmilk/zstdmt
              * This file will hold wrapper for systems, which do not support pthreads
              */
-            /* ======   Compiler specifics   ====== */
+            /* create fake symbol to avoid empty trnaslation unit warning */
-            #if defined(_MSC_VER)
+            int g_ZSTD_threading_useles_symbol;
-            #  pragma warning(disable : 4206)        /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
-            #endif
             #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
             static unsigned __stdcall worker(void *arg)
             {
-                pthread_t* const thread = (pthread_t*) arg;
+                ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
                 thread->arg = thread->start_routine(thread->arg);
                 return 0;
             }
-            int pthread_create(pthread_t* thread, const void* unused,
+            int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
                         void* (*start_routine) (void*), void* arg)
             {
                 (void)unused;
                     return 0;
             }
-            int _pthread_join(pthread_t * thread, void **value_ptr)
+            int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
             {
                 DWORD result;
-                if (!thread->handle) return 0;
+                if (!thread.handle) return 0;
-                result = WaitForSingleObject(thread->handle, INFINITE);
+                result = WaitForSingleObject(thread.handle, INFINITE);
                 switch (result) {
                 case WAIT_OBJECT_0:
-                    if (value_ptr) *value_ptr = thread->arg;
+                    if (value_ptr) *value_ptr = thread.arg;
                     return 0;
                 case WAIT_ABANDONED:
                     return EINVAL;

contrib/python-zstandard/zstd/common/threading.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/xxhash.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/xxhash.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/zstd_common.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/zstd_errors.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/common/zstd_internal.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/fse_compress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/huf_compress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_compress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstd_opt.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstdmt_compress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/compress/zstdmt_compress.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/decompress/huf_decompress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/decompress/zstd_decompress.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/cover.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/zdict.c

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/dictBuilder/zdict.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/zstd.h

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd_cffi.py

0 0 0

	1	NO CONTENT: modified file			NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/zstd/PATENTS

0 removed 0 -33

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages