upstream/mercurial-mirror Commit - r32054:616e7883

freeze: merge default into stable for 4.2 code freeze

Augie Fackler -

r32054:616e7883 4.2-rc stable

parent child

Expand all files

The requested changes are too big and content was truncated. Show full diff

contrib/python-zstandard/c-ext/bufferutil.c

0 created 644 +770 0

This diff has been collapsed as it changes many lines, (770 lines changed) Show them Hide them
		@@ -0,0 +1,770 b''
	1	/**
	2	* Copyright (c) 2017-present, Gregory Szorc
	3	* All rights reserved.
	4	*
	5	* This software may be modified and distributed under the terms
	6	* of the BSD license. See the LICENSE file for details.
	7	*/
	8
	9	#include "python-zstandard.h"
	10
	11	extern PyObject* ZstdError;
	12
	13	PyDoc_STRVAR(BufferWithSegments__doc__,
	14	"BufferWithSegments - A memory buffer holding known sub-segments.\n"
	15	"\n"
	16	"This type represents a contiguous chunk of memory containing N discrete\n"
	17	"items within sub-segments of that memory.\n"
	18	"\n"
	19	"Segments within the buffer are stored as an array of\n"
	20	"``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
	21	"integer using the host/native bit order representation.\n"
	22	"\n"
	23	"The type exists to facilitate operations against N>1 items without the\n"
	24	"overhead of Python object creation and management.\n"
	25	);
	26
	27	static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
	28	/* Backing memory is either canonically owned by a Py_buffer or by us. */
	29	if (self->parent.buf) {
	30	PyBuffer_Release(&self->parent);
	31	}
	32	else if (self->useFree) {
	33	free(self->data);
	34	}
	35	else {
	36	PyMem_Free(self->data);
	37	}
	38
	39	self->data = NULL;
	40
	41	if (self->useFree) {
	42	free(self->segments);
	43	}
	44	else {
	45	PyMem_Free(self->segments);
	46	}
	47
	48	self->segments = NULL;
	49
	50	PyObject_Del(self);
	51	}
	52
	53	static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
	54	static char* kwlist[] = {
	55	"data",
	56	"segments",
	57	NULL
	58	};
	59
	60	Py_buffer segments;
	61	Py_ssize_t segmentCount;
	62	Py_ssize_t i;
	63
	64	memset(&self->parent, 0, sizeof(self->parent));
	65
	66	#if PY_MAJOR_VERSION >= 3
	67	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "yy:BufferWithSegments",
	68	#else
	69	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ss:BufferWithSegments",
	70	#endif
	71	kwlist, &self->parent, &segments)) {
	72	return -1;
	73	}
	74
	75	if (!PyBuffer_IsContiguous(&self->parent, 'C') \|\| self->parent.ndim > 1) {
	76	PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
	77	goto except;
	78	}
	79
	80	if (!PyBuffer_IsContiguous(&segments, 'C') \|\| segments.ndim > 1) {
	81	PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
	82	goto except;
	83	}
	84
	85	if (segments.len % sizeof(BufferSegment)) {
	86	PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu",
	87	sizeof(BufferSegment));
	88	goto except;
	89	}
	90
	91	segmentCount = segments.len / sizeof(BufferSegment);
	92
	93	/* Validate segments data, as blindly trusting it could lead to arbitrary
	94	memory access. */
	95	for (i = 0; i < segmentCount; i++) {
	96	BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
	97
	98	if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
	99	PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
	100	goto except;
	101	return -1;
	102	}
	103	}
	104
	105	/* Make a copy of the segments data. It is cheap to do so and is a guard
	106	against caller changing offsets, which has security implications. */
	107	self->segments = PyMem_Malloc(segments.len);
	108	if (!self->segments) {
	109	PyErr_NoMemory();
	110	goto except;
	111	}
	112
	113	memcpy(self->segments, segments.buf, segments.len);
	114	PyBuffer_Release(&segments);
	115
	116	self->data = self->parent.buf;
	117	self->dataSize = self->parent.len;
	118	self->segmentCount = segmentCount;
	119
	120	return 0;
	121
	122	except:
	123	PyBuffer_Release(&self->parent);
	124	PyBuffer_Release(&segments);
	125	return -1;
	126	};
	127
	128	/**
	129	* Construct a BufferWithSegments from existing memory and offsets.
	130	*
	131	* Ownership of the backing memory and BufferSegments will be transferred to
	132	* the created object and freed when the BufferWithSegments is destroyed.
	133	*/
	134	ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
	135	BufferSegment* segments, Py_ssize_t segmentsSize) {
	136	ZstdBufferWithSegments* result = NULL;
	137	Py_ssize_t i;
	138
	139	if (NULL == data) {
	140	PyErr_SetString(PyExc_ValueError, "data is NULL");
	141	return NULL;
	142	}
	143
	144	if (NULL == segments) {
	145	PyErr_SetString(PyExc_ValueError, "segments is NULL");
	146	return NULL;
	147	}
	148
	149	for (i = 0; i < segmentsSize; i++) {
	150	BufferSegment* segment = &segments[i];
	151
	152	if (segment->offset + segment->length > dataSize) {
	153	PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
	154	return NULL;
	155	}
	156	}
	157
	158	result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
	159	if (NULL == result) {
	160	return NULL;
	161	}
	162
	163	result->useFree = 0;
	164
	165	memset(&result->parent, 0, sizeof(result->parent));
	166	result->data = data;
	167	result->dataSize = dataSize;
	168	result->segments = segments;
	169	result->segmentCount = segmentsSize;
	170
	171	return result;
	172	}
	173
	174	static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
	175	return self->segmentCount;
	176	}
	177
	178	static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
	179	ZstdBufferSegment* result = NULL;
	180
	181	if (i < 0) {
	182	PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
	183	return NULL;
	184	}
	185
	186	if (i >= self->segmentCount) {
	187	PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
	188	return NULL;
	189	}
	190
	191	result = (ZstdBufferSegment)PyObject_CallObject((PyObject)&ZstdBufferSegmentType, NULL);
	192	if (NULL == result) {
	193	return NULL;
	194	}
	195
	196	result->parent = (PyObject*)self;
	197	Py_INCREF(self);
	198
	199	result->data = (char*)self->data + self->segments[i].offset;
	200	result->dataSize = self->segments[i].length;
	201	result->offset = self->segments[i].offset;
	202
	203	return result;
	204	}
	205
	206	#if PY_MAJOR_VERSION >= 3
	207	static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
	208	return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags);
	209	}
	210	#else
	211	static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
	212	if (segment != 0) {
	213	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
	214	return -1;
	215	}
	216
	217	*ptrptr = self->data;
	218	return self->dataSize;
	219	}
	220
	221	static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
	222	if (len) {
	223	*len = 1;
	224	}
	225
	226	return 1;
	227	}
	228	#endif
	229
	230	PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
	231	"Obtain a bytes instance for this buffer.\n"
	232	);
	233
	234	static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
	235	return PyBytes_FromStringAndSize(self->data, self->dataSize);
	236	}
	237
	238	PyDoc_STRVAR(BufferWithSegments_segments__doc__,
	239	"Obtain a BufferSegments describing segments in this sintance.\n"
	240	);
	241
	242	static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
	243	ZstdBufferSegments* result = (ZstdBufferSegments)PyObject_CallObject((PyObject)&ZstdBufferSegmentsType, NULL);
	244	if (NULL == result) {
	245	return NULL;
	246	}
	247
	248	result->parent = (PyObject*)self;
	249	Py_INCREF(self);
	250	result->segments = self->segments;
	251	result->segmentCount = self->segmentCount;
	252
	253	return result;
	254	}
	255
	256	static PySequenceMethods BufferWithSegments_sq = {
	257	(lenfunc)BufferWithSegments_length, /* sq_length */
	258	0, /* sq_concat */
	259	0, /* sq_repeat */
	260	(ssizeargfunc)BufferWithSegments_item, /* sq_item */
	261	0, /* sq_ass_item */
	262	0, /* sq_contains */
	263	0, /* sq_inplace_concat */
	264	0 /* sq_inplace_repeat */
	265	};
	266
	267	static PyBufferProcs BufferWithSegments_as_buffer = {
	268	#if PY_MAJOR_VERSION >= 3
	269	(getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
	270	0 /* bf_releasebuffer */
	271	#else
	272	(readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
	273	0, /* bf_getwritebuffer */
	274	(segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
	275	0 /* bf_getcharbuffer */
	276	#endif
	277	};
	278
	279	static PyMethodDef BufferWithSegments_methods[] = {
	280	{ "segments", (PyCFunction)BufferWithSegments_segments,
	281	METH_NOARGS, BufferWithSegments_segments__doc__ },
	282	{ "tobytes", (PyCFunction)BufferWithSegments_tobytes,
	283	METH_NOARGS, BufferWithSegments_tobytes__doc__ },
	284	{ NULL, NULL }
	285	};
	286
	287	static PyMemberDef BufferWithSegments_members[] = {
	288	{ "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
	289	READONLY, "total size of the buffer in bytes" },
	290	{ NULL }
	291	};
	292
	293	PyTypeObject ZstdBufferWithSegmentsType = {
	294	PyVarObject_HEAD_INIT(NULL, 0)
	295	"zstd.BufferWithSegments", /* tp_name */
	296	sizeof(ZstdBufferWithSegments),/* tp_basicsize */
	297	0, /* tp_itemsize */
	298	(destructor)BufferWithSegments_dealloc, /* tp_dealloc */
	299	0, /* tp_print */
	300	0, /* tp_getattr */
	301	0, /* tp_setattr */
	302	0, /* tp_compare */
	303	0, /* tp_repr */
	304	0, /* tp_as_number */
	305	&BufferWithSegments_sq, /* tp_as_sequence */
	306	0, /* tp_as_mapping */
	307	0, /* tp_hash */
	308	0, /* tp_call */
	309	0, /* tp_str */
	310	0, /* tp_getattro */
	311	0, /* tp_setattro */
	312	&BufferWithSegments_as_buffer, /* tp_as_buffer */
	313	Py_TPFLAGS_DEFAULT, /* tp_flags */
	314	BufferWithSegments__doc__, /* tp_doc */
	315	0, /* tp_traverse */
	316	0, /* tp_clear */
	317	0, /* tp_richcompare */
	318	0, /* tp_weaklistoffset */
	319	0, /* tp_iter */
	320	0, /* tp_iternext */
	321	BufferWithSegments_methods, /* tp_methods */
	322	BufferWithSegments_members, /* tp_members */
	323	0, /* tp_getset */
	324	0, /* tp_base */
	325	0, /* tp_dict */
	326	0, /* tp_descr_get */
	327	0, /* tp_descr_set */
	328	0, /* tp_dictoffset */
	329	(initproc)BufferWithSegments_init, /* tp_init */
	330	0, /* tp_alloc */
	331	PyType_GenericNew, /* tp_new */
	332	};
	333
	334	PyDoc_STRVAR(BufferSegments__doc__,
	335	"BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
	336	);
	337
	338	static void BufferSegments_dealloc(ZstdBufferSegments* self) {
	339	Py_CLEAR(self->parent);
	340	PyObject_Del(self);
	341	}
	342
	343	#if PY_MAJOR_VERSION >= 3
	344	static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
	345	return PyBuffer_FillInfo(view, (PyObject*)self,
	346	(void)self->segments, self->segmentCount sizeof(BufferSegment),
	347	1, flags);
	348	}
	349	#else
	350	static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
	351	if (segment != 0) {
	352	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
	353	return -1;
	354	}
	355
	356	ptrptr = (void)self->segments;
	357	return self->segmentCount * sizeof(BufferSegment);
	358	}
	359
	360	static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
	361	if (len) {
	362	*len = 1;
	363	}
	364
	365	return 1;
	366	}
	367	#endif
	368
	369	static PyBufferProcs BufferSegments_as_buffer = {
	370	#if PY_MAJOR_VERSION >= 3
	371	(getbufferproc)BufferSegments_getbuffer,
	372	0
	373	#else
	374	(readbufferproc)BufferSegments_getreadbuffer,
	375	0,
	376	(segcountproc)BufferSegments_getsegcount,
	377	0
	378	#endif
	379	};
	380
	381	PyTypeObject ZstdBufferSegmentsType = {
	382	PyVarObject_HEAD_INIT(NULL, 0)
	383	"zstd.BufferSegments", /* tp_name */
	384	sizeof(ZstdBufferSegments),/* tp_basicsize */
	385	0, /* tp_itemsize */
	386	(destructor)BufferSegments_dealloc, /* tp_dealloc */
	387	0, /* tp_print */
	388	0, /* tp_getattr */
	389	0, /* tp_setattr */
	390	0, /* tp_compare */
	391	0, /* tp_repr */
	392	0, /* tp_as_number */
	393	0, /* tp_as_sequence */
	394	0, /* tp_as_mapping */
	395	0, /* tp_hash */
	396	0, /* tp_call */
	397	0, /* tp_str */
	398	0, /* tp_getattro */
	399	0, /* tp_setattro */
	400	&BufferSegments_as_buffer, /* tp_as_buffer */
	401	Py_TPFLAGS_DEFAULT, /* tp_flags */
	402	BufferSegments__doc__, /* tp_doc */
	403	0, /* tp_traverse */
	404	0, /* tp_clear */
	405	0, /* tp_richcompare */
	406	0, /* tp_weaklistoffset */
	407	0, /* tp_iter */
	408	0, /* tp_iternext */
	409	0, /* tp_methods */
	410	0, /* tp_members */
	411	0, /* tp_getset */
	412	0, /* tp_base */
	413	0, /* tp_dict */
	414	0, /* tp_descr_get */
	415	0, /* tp_descr_set */
	416	0, /* tp_dictoffset */
	417	0, /* tp_init */
	418	0, /* tp_alloc */
	419	PyType_GenericNew, /* tp_new */
	420	};
	421
	422	PyDoc_STRVAR(BufferSegment__doc__,
	423	"BufferSegment - Represents a segment within a BufferWithSegments\n"
	424	);
	425
	426	static void BufferSegment_dealloc(ZstdBufferSegment* self) {
	427	Py_CLEAR(self->parent);
	428	PyObject_Del(self);
	429	}
	430
	431	static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
	432	return self->dataSize;
	433	}
	434
	435	#if PY_MAJOR_VERSION >= 3
	436	static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
	437	return PyBuffer_FillInfo(view, (PyObject*)self,
	438	self->data, self->dataSize, 1, flags);
	439	}
	440	#else
	441	static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
	442	if (segment != 0) {
	443	PyErr_SetString(PyExc_ValueError, "segment number must be 0");
	444	return -1;
	445	}
	446
	447	*ptrptr = self->data;
	448	return self->dataSize;
	449	}
	450
	451	static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
	452	if (len) {
	453	*len = 1;
	454	}
	455
	456	return 1;
	457	}
	458	#endif
	459
	460	PyDoc_STRVAR(BufferSegment_tobytes__doc__,
	461	"Obtain a bytes instance for this segment.\n"
	462	);
	463
	464	static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
	465	return PyBytes_FromStringAndSize(self->data, self->dataSize);
	466	}
	467
	468	static PySequenceMethods BufferSegment_sq = {
	469	(lenfunc)BufferSegment_length, /* sq_length */
	470	0, /* sq_concat */
	471	0, /* sq_repeat */
	472	0, /* sq_item */
	473	0, /* sq_ass_item */
	474	0, /* sq_contains */
	475	0, /* sq_inplace_concat */
	476	0 /* sq_inplace_repeat */
	477	};
	478
	479	static PyBufferProcs BufferSegment_as_buffer = {
	480	#if PY_MAJOR_VERSION >= 3
	481	(getbufferproc)BufferSegment_getbuffer,
	482	0
	483	#else
	484	(readbufferproc)BufferSegment_getreadbuffer,
	485	0,
	486	(segcountproc)BufferSegment_getsegcount,
	487	0
	488	#endif
	489	};
	490
	491	static PyMethodDef BufferSegment_methods[] = {
	492	{ "tobytes", (PyCFunction)BufferSegment_tobytes,
	493	METH_NOARGS, BufferSegment_tobytes__doc__ },
	494	{ NULL, NULL }
	495	};
	496
	497	static PyMemberDef BufferSegment_members[] = {
	498	{ "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
	499	"offset of segment within parent buffer" },
	500	{ NULL }
	501	};
	502
	503	PyTypeObject ZstdBufferSegmentType = {
	504	PyVarObject_HEAD_INIT(NULL, 0)
	505	"zstd.BufferSegment", /* tp_name */
	506	sizeof(ZstdBufferSegment),/* tp_basicsize */
	507	0, /* tp_itemsize */
	508	(destructor)BufferSegment_dealloc, /* tp_dealloc */
	509	0, /* tp_print */
	510	0, /* tp_getattr */
	511	0, /* tp_setattr */
	512	0, /* tp_compare */
	513	0, /* tp_repr */
	514	0, /* tp_as_number */
	515	&BufferSegment_sq, /* tp_as_sequence */
	516	0, /* tp_as_mapping */
	517	0, /* tp_hash */
	518	0, /* tp_call */
	519	0, /* tp_str */
	520	0, /* tp_getattro */
	521	0, /* tp_setattro */
	522	&BufferSegment_as_buffer, /* tp_as_buffer */
	523	Py_TPFLAGS_DEFAULT, /* tp_flags */
	524	BufferSegment__doc__, /* tp_doc */
	525	0, /* tp_traverse */
	526	0, /* tp_clear */
	527	0, /* tp_richcompare */
	528	0, /* tp_weaklistoffset */
	529	0, /* tp_iter */
	530	0, /* tp_iternext */
	531	BufferSegment_methods, /* tp_methods */
	532	BufferSegment_members, /* tp_members */
	533	0, /* tp_getset */
	534	0, /* tp_base */
	535	0, /* tp_dict */
	536	0, /* tp_descr_get */
	537	0, /* tp_descr_set */
	538	0, /* tp_dictoffset */
	539	0, /* tp_init */
	540	0, /* tp_alloc */
	541	PyType_GenericNew, /* tp_new */
	542	};
	543
	544	PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
	545	"Represents a collection of BufferWithSegments.\n"
	546	);
	547
	548	static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
	549	Py_ssize_t i;
	550
	551	if (self->firstElements) {
	552	PyMem_Free(self->firstElements);
	553	self->firstElements = NULL;
	554	}
	555
	556	if (self->buffers) {
	557	for (i = 0; i < self->bufferCount; i++) {
	558	Py_CLEAR(self->buffers[i]);
	559	}
	560
	561	PyMem_Free(self->buffers);
	562	self->buffers = NULL;
	563	}
	564
	565	PyObject_Del(self);
	566	}
	567
	568	static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
	569	Py_ssize_t size;
	570	Py_ssize_t i;
	571	Py_ssize_t offset = 0;
	572
	573	size = PyTuple_Size(args);
	574	if (-1 == size) {
	575	return -1;
	576	}
	577
	578	if (0 == size) {
	579	PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
	580	return -1;
	581	}
	582
	583	for (i = 0; i < size; i++) {
	584	PyObject* item = PyTuple_GET_ITEM(args, i);
	585	if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
	586	PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
	587	return -1;
	588	}
	589
	590	if (0 == ((ZstdBufferWithSegments*)item)->segmentCount \|\|
	591	0 == ((ZstdBufferWithSegments*)item)->dataSize) {
	592	PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
	593	return -1;
	594	}
	595	}
	596
	597	self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
	598	if (NULL == self->buffers) {
	599	PyErr_NoMemory();
	600	return -1;
	601	}
	602
	603	self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
	604	if (NULL == self->firstElements) {
	605	PyMem_Free(self->buffers);
	606	self->buffers = NULL;
	607	PyErr_NoMemory();
	608	return -1;
	609	}
	610
	611	self->bufferCount = size;
	612
	613	for (i = 0; i < size; i++) {
	614	ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
	615
	616	self->buffers[i] = item;
	617	Py_INCREF(item);
	618
	619	if (i > 0) {
	620	self->firstElements[i - 1] = offset;
	621	}
	622
	623	offset += item->segmentCount;
	624	}
	625
	626	self->firstElements[size - 1] = offset;
	627
	628	return 0;
	629	}
	630
	631	static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
	632	Py_ssize_t i;
	633	Py_ssize_t j;
	634	unsigned long long size = 0;
	635
	636	for (i = 0; i < self->bufferCount; i++) {
	637	for (j = 0; j < self->buffers[i]->segmentCount; j++) {
	638	size += self->buffers[i]->segments[j].length;
	639	}
	640	}
	641
	642	return PyLong_FromUnsignedLongLong(size);
	643	}
	644
	645	Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
	646	return self->firstElements[self->bufferCount - 1];
	647	}
	648
	649	static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
	650	Py_ssize_t bufferOffset;
	651
	652	if (i < 0) {
	653	PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
	654	return NULL;
	655	}
	656
	657	if (i >= BufferWithSegmentsCollection_length(self)) {
	658	PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
	659	BufferWithSegmentsCollection_length(self));
	660	return NULL;
	661	}
	662
	663	for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
	664	Py_ssize_t offset = 0;
	665
	666	if (i < self->firstElements[bufferOffset]) {
	667	if (bufferOffset > 0) {
	668	offset = self->firstElements[bufferOffset - 1];
	669	}
	670
	671	return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
	672	}
	673	}
	674
	675	PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
	676	return NULL;
	677	}
	678
	679	static PySequenceMethods BufferWithSegmentsCollection_sq = {
	680	(lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
	681	0, /* sq_concat */
	682	0, /* sq_repeat */
	683	(ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
	684	0, /* sq_ass_item */
	685	0, /* sq_contains */
	686	0, /* sq_inplace_concat */
	687	0 /* sq_inplace_repeat */
	688	};
	689
	690	static PyMethodDef BufferWithSegmentsCollection_methods[] = {
	691	{ "size", (PyCFunction)BufferWithSegmentsCollection_size,
	692	METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
	693	{ NULL, NULL }
	694	};
	695
	696	PyTypeObject ZstdBufferWithSegmentsCollectionType = {
	697	PyVarObject_HEAD_INIT(NULL, 0)
	698	"zstd.BufferWithSegmentsCollection", /* tp_name */
	699	sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
	700	0, /* tp_itemsize */
	701	(destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
	702	0, /* tp_print */
	703	0, /* tp_getattr */
	704	0, /* tp_setattr */
	705	0, /* tp_compare */
	706	0, /* tp_repr */
	707	0, /* tp_as_number */
	708	&BufferWithSegmentsCollection_sq, /* tp_as_sequence */
	709	0, /* tp_as_mapping */
	710	0, /* tp_hash */
	711	0, /* tp_call */
	712	0, /* tp_str */
	713	0, /* tp_getattro */
	714	0, /* tp_setattro */
	715	0, /* tp_as_buffer */
	716	Py_TPFLAGS_DEFAULT, /* tp_flags */
	717	BufferWithSegmentsCollection__doc__, /* tp_doc */
	718	0, /* tp_traverse */
	719	0, /* tp_clear */
	720	0, /* tp_richcompare */
	721	0, /* tp_weaklistoffset */
	722	/* TODO implement iterator for performance. */
	723	0, /* tp_iter */
	724	0, /* tp_iternext */
	725	BufferWithSegmentsCollection_methods, /* tp_methods */
	726	0, /* tp_members */
	727	0, /* tp_getset */
	728	0, /* tp_base */
	729	0, /* tp_dict */
	730	0, /* tp_descr_get */
	731	0, /* tp_descr_set */
	732	0, /* tp_dictoffset */
	733	(initproc)BufferWithSegmentsCollection_init, /* tp_init */
	734	0, /* tp_alloc */
	735	PyType_GenericNew, /* tp_new */
	736	};
	737
	738	void bufferutil_module_init(PyObject* mod) {
	739	Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
	740	if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
	741	return;
	742	}
	743
	744	Py_INCREF(&ZstdBufferWithSegmentsType);
	745	PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
	746
	747	Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
	748	if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
	749	return;
	750	}
	751
	752	Py_INCREF(&ZstdBufferSegmentsType);
	753	PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
	754
	755	Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
	756	if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
	757	return;
	758	}
	759
	760	Py_INCREF(&ZstdBufferSegmentType);
	761	PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
	762
	763	Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
	764	if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
	765	return;
	766	}
	767
	768	Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
	769	PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
	770	}

contrib/python-zstandard/c-ext/frameparams.c

0 created 644 +132 0

			@@ -0,0 +1,132 b''
		1	/**
		2	* Copyright (c) 2017-present, Gregory Szorc
		3	* All rights reserved.
		4	*
		5	* This software may be modified and distributed under the terms
		6	* of the BSD license. See the LICENSE file for details.
		7	*/
		8
		9	#include "python-zstandard.h"
		10
		11	extern PyObject* ZstdError;
		12
		13	PyDoc_STRVAR(FrameParameters__doc__,
		14	"FrameParameters: information about a zstd frame");
		15
		16	FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) {
		17	const char* source;
		18	Py_ssize_t sourceSize;
		19	ZSTD_frameParams params;
		20	FrameParametersObject* result = NULL;
		21	size_t zresult;
		22
		23	#if PY_MAJOR_VERSION >= 3
		24	if (!PyArg_ParseTuple(args, "y#:get_frame_parameters",
		25	#else
		26	if (!PyArg_ParseTuple(args, "s#:get_frame_parameters",
		27	#endif
		28	&source, &sourceSize)) {
		29	return NULL;
		30	}
		31
		32	/* Needed for Python 2 to reject unicode */
		33	if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) {
		34	PyErr_SetString(PyExc_TypeError, "argument must be bytes");
		35	return NULL;
		36	}
		37
		38	zresult = ZSTD_getFrameParams(&params, (void*)source, sourceSize);
		39
		40	if (ZSTD_isError(zresult)) {
		41	PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
		42	return NULL;
		43	}
		44
		45	if (zresult) {
		46	PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
		47	return NULL;
		48	}
		49
		50	result = PyObject_New(FrameParametersObject, &FrameParametersType);
		51	if (!result) {
		52	return NULL;
		53	}
		54
		55	result->frameContentSize = params.frameContentSize;
		56	result->windowSize = params.windowSize;
		57	result->dictID = params.dictID;
		58	result->checksumFlag = params.checksumFlag ? 1 : 0;
		59
		60	return result;
		61	}
		62
		63	static void FrameParameters_dealloc(PyObject* self) {
		64	PyObject_Del(self);
		65	}
		66
		67	static PyMemberDef FrameParameters_members[] = {
		68	{ "content_size", T_ULONGLONG,
		69	offsetof(FrameParametersObject, frameContentSize), READONLY,
		70	"frame content size" },
		71	{ "window_size", T_UINT,
		72	offsetof(FrameParametersObject, windowSize), READONLY,
		73	"window size" },
		74	{ "dict_id", T_UINT,
		75	offsetof(FrameParametersObject, dictID), READONLY,
		76	"dictionary ID" },
		77	{ "has_checksum", T_BOOL,
		78	offsetof(FrameParametersObject, checksumFlag), READONLY,
		79	"checksum flag" },
		80	{ NULL }
		81	};
		82
		83	PyTypeObject FrameParametersType = {
		84	PyVarObject_HEAD_INIT(NULL, 0)
		85	"FrameParameters", /* tp_name */
		86	sizeof(FrameParametersObject), /* tp_basicsize */
		87	0, /* tp_itemsize */
		88	(destructor)FrameParameters_dealloc, /* tp_dealloc */
		89	0, /* tp_print */
		90	0, /* tp_getattr */
		91	0, /* tp_setattr */
		92	0, /* tp_compare */
		93	0, /* tp_repr */
		94	0, /* tp_as_number */
		95	0, /* tp_as_sequence */
		96	0, /* tp_as_mapping */
		97	0, /* tp_hash */
		98	0, /* tp_call */
		99	0, /* tp_str */
		100	0, /* tp_getattro */
		101	0, /* tp_setattro */
		102	0, /* tp_as_buffer */
		103	Py_TPFLAGS_DEFAULT, /* tp_flags */
		104	FrameParameters__doc__, /* tp_doc */
		105	0, /* tp_traverse */
		106	0, /* tp_clear */
		107	0, /* tp_richcompare */
		108	0, /* tp_weaklistoffset */
		109	0, /* tp_iter */
		110	0, /* tp_iternext */
		111	0, /* tp_methods */
		112	FrameParameters_members, /* tp_members */
		113	0, /* tp_getset */
		114	0, /* tp_base */
		115	0, /* tp_dict */
		116	0, /* tp_descr_get */
		117	0, /* tp_descr_set */
		118	0, /* tp_dictoffset */
		119	0, /* tp_init */
		120	0, /* tp_alloc */
		121	0, /* tp_new */
		122	};
		123
		124	void frameparams_module_init(PyObject* mod) {
		125	Py_TYPE(&FrameParametersType) = &PyType_Type;
		126	if (PyType_Ready(&FrameParametersType) < 0) {
		127	return;
		128	}
		129
		130	Py_INCREF(&FrameParametersType);
		131	PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
		132	}

contrib/python-zstandard/tests/test_buffer_util.py

0 created 644 +112 0

			@@ -0,0 +1,112 b''
		1	import struct
		2
		3	try:
		4	import unittest2 as unittest
		5	except ImportError:
		6	import unittest
		7
		8	import zstd
		9
		10	ss = struct.Struct('=QQ')
		11
		12
		13	class TestBufferWithSegments(unittest.TestCase):
		14	def test_arguments(self):
		15	with self.assertRaises(TypeError):
		16	zstd.BufferWithSegments()
		17
		18	with self.assertRaises(TypeError):
		19	zstd.BufferWithSegments(b'foo')
		20
		21	# Segments data should be a multiple of 16.
		22	with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
		23	zstd.BufferWithSegments(b'foo', b'\x00\x00')
		24
		25	def test_invalid_offset(self):
		26	with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
		27	zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
		28
		29	def test_invalid_getitem(self):
		30	b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
		31
		32	with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
		33	test = b[-10]
		34
		35	with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
		36	test = b[1]
		37
		38	with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
		39	test = b[2]
		40
		41	def test_single(self):
		42	b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
		43	self.assertEqual(len(b), 1)
		44	self.assertEqual(b.size, 3)
		45	self.assertEqual(b.tobytes(), b'foo')
		46
		47	self.assertEqual(len(b[0]), 3)
		48	self.assertEqual(b[0].offset, 0)
		49	self.assertEqual(b[0].tobytes(), b'foo')
		50
		51	def test_multiple(self):
		52	b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
		53	ss.pack(3, 4),
		54	ss.pack(7, 5)]))
		55	self.assertEqual(len(b), 3)
		56	self.assertEqual(b.size, 12)
		57	self.assertEqual(b.tobytes(), b'foofooxfooxy')
		58
		59	self.assertEqual(b[0].tobytes(), b'foo')
		60	self.assertEqual(b[1].tobytes(), b'foox')
		61	self.assertEqual(b[2].tobytes(), b'fooxy')
		62
		63
		64	class TestBufferWithSegmentsCollection(unittest.TestCase):
		65	def test_empty_constructor(self):
		66	with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
		67	zstd.BufferWithSegmentsCollection()
		68
		69	def test_argument_validation(self):
		70	with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
		71	zstd.BufferWithSegmentsCollection(None)
		72
		73	with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
		74	zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
		75	None)
		76
		77	with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
		78	zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
		79
		80	def test_length(self):
		81	b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
		82	b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
		83	ss.pack(3, 3)]))
		84
		85	c = zstd.BufferWithSegmentsCollection(b1)
		86	self.assertEqual(len(c), 1)
		87	self.assertEqual(c.size(), 3)
		88
		89	c = zstd.BufferWithSegmentsCollection(b2)
		90	self.assertEqual(len(c), 2)
		91	self.assertEqual(c.size(), 6)
		92
		93	c = zstd.BufferWithSegmentsCollection(b1, b2)
		94	self.assertEqual(len(c), 3)
		95	self.assertEqual(c.size(), 9)
		96
		97	def test_getitem(self):
		98	b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
		99	b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
		100	ss.pack(3, 3)]))
		101
		102	c = zstd.BufferWithSegmentsCollection(b1, b2)
		103
		104	with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
		105	c[3]
		106
		107	with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
		108	c[4]
		109
		110	self.assertEqual(c[0].tobytes(), b'foo')
		111	self.assertEqual(c[1].tobytes(), b'bar')
		112	self.assertEqual(c[2].tobytes(), b'baz')

contrib/python-zstandard/tests/test_compressor_fuzzing.py

0 created 644 +143 0

			@@ -0,0 +1,143 b''
		1	import io
		2	import os
		3
		4	try:
		5	import unittest2 as unittest
		6	except ImportError:
		7	import unittest
		8
		9	try:
		10	import hypothesis
		11	import hypothesis.strategies as strategies
		12	except ImportError:
		13	raise unittest.SkipTest('hypothesis not available')
		14
		15	import zstd
		16
		17	from . common import (
		18	make_cffi,
		19	random_input_data,
		20	)
		21
		22
		23	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		24	@make_cffi
		25	class TestCompressor_write_to_fuzzing(unittest.TestCase):
		26	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		27	level=strategies.integers(min_value=1, max_value=5),
		28	write_size=strategies.integers(min_value=1, max_value=1048576))
		29	def test_write_size_variance(self, original, level, write_size):
		30	refctx = zstd.ZstdCompressor(level=level)
		31	ref_frame = refctx.compress(original)
		32
		33	cctx = zstd.ZstdCompressor(level=level)
		34	b = io.BytesIO()
		35	with cctx.write_to(b, size=len(original), write_size=write_size) as compressor:
		36	compressor.write(original)
		37
		38	self.assertEqual(b.getvalue(), ref_frame)
		39
		40
		41	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		42	@make_cffi
		43	class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
		44	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		45	level=strategies.integers(min_value=1, max_value=5),
		46	read_size=strategies.integers(min_value=1, max_value=1048576),
		47	write_size=strategies.integers(min_value=1, max_value=1048576))
		48	def test_read_write_size_variance(self, original, level, read_size, write_size):
		49	refctx = zstd.ZstdCompressor(level=level)
		50	ref_frame = refctx.compress(original)
		51
		52	cctx = zstd.ZstdCompressor(level=level)
		53	source = io.BytesIO(original)
		54	dest = io.BytesIO()
		55
		56	cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
		57	write_size=write_size)
		58
		59	self.assertEqual(dest.getvalue(), ref_frame)
		60
		61
		62	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		63	@make_cffi
		64	class TestCompressor_compressobj_fuzzing(unittest.TestCase):
		65	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		66	level=strategies.integers(min_value=1, max_value=5),
		67	chunk_sizes=strategies.streaming(
		68	strategies.integers(min_value=1, max_value=4096)))
		69	def test_random_input_sizes(self, original, level, chunk_sizes):
		70	chunk_sizes = iter(chunk_sizes)
		71
		72	refctx = zstd.ZstdCompressor(level=level)
		73	ref_frame = refctx.compress(original)
		74
		75	cctx = zstd.ZstdCompressor(level=level)
		76	cobj = cctx.compressobj(size=len(original))
		77
		78	chunks = []
		79	i = 0
		80	while True:
		81	chunk_size = next(chunk_sizes)
		82	source = original[i:i + chunk_size]
		83	if not source:
		84	break
		85
		86	chunks.append(cobj.compress(source))
		87	i += chunk_size
		88
		89	chunks.append(cobj.flush())
		90
		91	self.assertEqual(b''.join(chunks), ref_frame)
		92
		93
		94	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		95	@make_cffi
		96	class TestCompressor_read_from_fuzzing(unittest.TestCase):
		97	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		98	level=strategies.integers(min_value=1, max_value=5),
		99	read_size=strategies.integers(min_value=1, max_value=4096),
		100	write_size=strategies.integers(min_value=1, max_value=4096))
		101	def test_read_write_size_variance(self, original, level, read_size, write_size):
		102	refcctx = zstd.ZstdCompressor(level=level)
		103	ref_frame = refcctx.compress(original)
		104
		105	source = io.BytesIO(original)
		106
		107	cctx = zstd.ZstdCompressor(level=level)
		108	chunks = list(cctx.read_from(source, size=len(original), read_size=read_size,
		109	write_size=write_size))
		110
		111	self.assertEqual(b''.join(chunks), ref_frame)
		112
		113
		114	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		115	class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
		116	@hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
		117	min_size=1, max_size=1024),
		118	threads=strategies.integers(min_value=1, max_value=8),
		119	use_dict=strategies.booleans())
		120	def test_data_equivalence(self, original, threads, use_dict):
		121	kwargs = {}
		122
		123	# Use a content dictionary because it is cheap to create.
		124	if use_dict:
		125	kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
		126
		127	cctx = zstd.ZstdCompressor(level=1,
		128	write_content_size=True,
		129	write_checksum=True,
		130	**kwargs)
		131
		132	result = cctx.multi_compress_to_buffer(original, threads=-1)
		133
		134	self.assertEqual(len(result), len(original))
		135
		136	# The frame produced via the batch APIs may not be bit identical to that
		137	# produced by compress() because compression parameters are adjusted
		138	# from the first input in batch mode. So the only thing we can do is
		139	# verify the decompressed data matches the input.
		140	dctx = zstd.ZstdDecompressor(**kwargs)
		141
		142	for i, frame in enumerate(result):
		143	self.assertEqual(dctx.decompress(frame), original[i])

contrib/python-zstandard/tests/test_data_structures_fuzzing.py

0 created 644 +79 0

			@@ -0,0 +1,79 b''
		1	import io
		2	import os
		3
		4	try:
		5	import unittest2 as unittest
		6	except ImportError:
		7	import unittest
		8
		9	try:
		10	import hypothesis
		11	import hypothesis.strategies as strategies
		12	except ImportError:
		13	raise unittest.SkipTest('hypothesis not available')
		14
		15	import zstd
		16
		17	from .common import (
		18	make_cffi,
		19	)
		20
		21
		22	s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
		23	max_value=zstd.WINDOWLOG_MAX)
		24	s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
		25	max_value=zstd.CHAINLOG_MAX)
		26	s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
		27	max_value=zstd.HASHLOG_MAX)
		28	s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
		29	max_value=zstd.SEARCHLOG_MAX)
		30	s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
		31	max_value=zstd.SEARCHLENGTH_MAX)
		32	s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
		33	max_value=zstd.TARGETLENGTH_MAX)
		34	s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
		35	zstd.STRATEGY_DFAST,
		36	zstd.STRATEGY_GREEDY,
		37	zstd.STRATEGY_LAZY,
		38	zstd.STRATEGY_LAZY2,
		39	zstd.STRATEGY_BTLAZY2,
		40	zstd.STRATEGY_BTOPT))
		41
		42
		43	@make_cffi
		44	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		45	class TestCompressionParametersHypothesis(unittest.TestCase):
		46	@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
		47	s_searchlength, s_targetlength, s_strategy)
		48	def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
		49	searchlength, targetlength, strategy):
		50	# ZSTD_checkCParams moves the goal posts on us from what's advertised
		51	# in the constants. So move along with them.
		52	if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
		53	searchlength += 1
		54	elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
		55	searchlength -= 1
		56
		57	p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
		58	searchlog, searchlength,
		59	targetlength, strategy)
		60
		61	cctx = zstd.ZstdCompressor(compression_params=p)
		62	with cctx.write_to(io.BytesIO()):
		63	pass
		64
		65	@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
		66	s_searchlength, s_targetlength, s_strategy)
		67	def test_estimate_compression_context_size(self, windowlog, chainlog,
		68	hashlog, searchlog,
		69	searchlength, targetlength,
		70	strategy):
		71	if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
		72	searchlength += 1
		73	elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
		74	searchlength -= 1
		75
		76	p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
		77	searchlog, searchlength,
		78	targetlength, strategy)
		79	size = zstd.estimate_compression_context_size(p)

contrib/python-zstandard/tests/test_decompressor_fuzzing.py

0 created 644 +151 0

			@@ -0,0 +1,151 b''
		1	import io
		2	import os
		3
		4	try:
		5	import unittest2 as unittest
		6	except ImportError:
		7	import unittest
		8
		9	try:
		10	import hypothesis
		11	import hypothesis.strategies as strategies
		12	except ImportError:
		13	raise unittest.SkipTest('hypothesis not available')
		14
		15	import zstd
		16
		17	from . common import (
		18	make_cffi,
		19	random_input_data,
		20	)
		21
		22
		23	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		24	@make_cffi
		25	class TestDecompressor_write_to_fuzzing(unittest.TestCase):
		26	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		27	level=strategies.integers(min_value=1, max_value=5),
		28	write_size=strategies.integers(min_value=1, max_value=8192),
		29	input_sizes=strategies.streaming(
		30	strategies.integers(min_value=1, max_value=4096)))
		31	def test_write_size_variance(self, original, level, write_size, input_sizes):
		32	input_sizes = iter(input_sizes)
		33
		34	cctx = zstd.ZstdCompressor(level=level)
		35	frame = cctx.compress(original)
		36
		37	dctx = zstd.ZstdDecompressor()
		38	source = io.BytesIO(frame)
		39	dest = io.BytesIO()
		40
		41	with dctx.write_to(dest, write_size=write_size) as decompressor:
		42	while True:
		43	chunk = source.read(next(input_sizes))
		44	if not chunk:
		45	break
		46
		47	decompressor.write(chunk)
		48
		49	self.assertEqual(dest.getvalue(), original)
		50
		51
		52	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		53	@make_cffi
		54	class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
		55	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		56	level=strategies.integers(min_value=1, max_value=5),
		57	read_size=strategies.integers(min_value=1, max_value=8192),
		58	write_size=strategies.integers(min_value=1, max_value=8192))
		59	def test_read_write_size_variance(self, original, level, read_size, write_size):
		60	cctx = zstd.ZstdCompressor(level=level)
		61	frame = cctx.compress(original)
		62
		63	source = io.BytesIO(frame)
		64	dest = io.BytesIO()
		65
		66	dctx = zstd.ZstdDecompressor()
		67	dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
		68
		69	self.assertEqual(dest.getvalue(), original)
		70
		71
		72	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		73	@make_cffi
		74	class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
		75	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		76	level=strategies.integers(min_value=1, max_value=5),
		77	chunk_sizes=strategies.streaming(
		78	strategies.integers(min_value=1, max_value=4096)))
		79	def test_random_input_sizes(self, original, level, chunk_sizes):
		80	chunk_sizes = iter(chunk_sizes)
		81
		82	cctx = zstd.ZstdCompressor(level=level)
		83	frame = cctx.compress(original)
		84
		85	source = io.BytesIO(frame)
		86
		87	dctx = zstd.ZstdDecompressor()
		88	dobj = dctx.decompressobj()
		89
		90	chunks = []
		91	while True:
		92	chunk = source.read(next(chunk_sizes))
		93	if not chunk:
		94	break
		95
		96	chunks.append(dobj.decompress(chunk))
		97
		98	self.assertEqual(b''.join(chunks), original)
		99
		100
		101	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		102	@make_cffi
		103	class TestDecompressor_read_from_fuzzing(unittest.TestCase):
		104	@hypothesis.given(original=strategies.sampled_from(random_input_data()),
		105	level=strategies.integers(min_value=1, max_value=5),
		106	read_size=strategies.integers(min_value=1, max_value=4096),
		107	write_size=strategies.integers(min_value=1, max_value=4096))
		108	def test_read_write_size_variance(self, original, level, read_size, write_size):
		109	cctx = zstd.ZstdCompressor(level=level)
		110	frame = cctx.compress(original)
		111
		112	source = io.BytesIO(frame)
		113
		114	dctx = zstd.ZstdDecompressor()
		115	chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size))
		116
		117	self.assertEqual(b''.join(chunks), original)
		118
		119
		120	@unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
		121	class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
		122	@hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
		123	min_size=1, max_size=1024),
		124	threads=strategies.integers(min_value=1, max_value=8),
		125	use_dict=strategies.booleans())
		126	def test_data_equivalence(self, original, threads, use_dict):
		127	kwargs = {}
		128	if use_dict:
		129	kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
		130
		131	cctx = zstd.ZstdCompressor(level=1,
		132	write_content_size=True,
		133	write_checksum=True,
		134	**kwargs)
		135
		136	frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
		137
		138	dctx = zstd.ZstdDecompressor(**kwargs)
		139
		140	result = dctx.multi_decompress_to_buffer(frames_buffer)
		141
		142	self.assertEqual(len(result), len(original))
		143	for i, frame in enumerate(result):
		144	self.assertEqual(frame.tobytes(), original[i])
		145
		146	frames_list = [f.tobytes() for f in frames_buffer]
		147	result = dctx.multi_decompress_to_buffer(frames_list)
		148
		149	self.assertEqual(len(result), len(original))
		150	for i, frame in enumerate(result):
		151	self.assertEqual(frame.tobytes(), original[i])

contrib/python-zstandard/zstd/common/pool.c

0 created 644 +194 0

			@@ -0,0 +1,194 b''
		1	/**
		2	* Copyright (c) 2016-present, Facebook, Inc.
		3	* All rights reserved.
		4	*
		5	* This source code is licensed under the BSD-style license found in the
		6	* LICENSE file in the root directory of this source tree. An additional grant
		7	* of patent rights can be found in the PATENTS file in the same directory.
		8	*/
		9
		10
		11	/* ====== Dependencies ======= */
		12	#include <stddef.h> /* size_t */
		13	#include <stdlib.h> /* malloc, calloc, free */
		14	#include "pool.h"
		15
		16	/* ====== Compiler specifics ====== */
		17	#if defined(_MSC_VER)
		18	# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
		19	#endif
		20
		21
		22	#ifdef ZSTD_MULTITHREAD
		23
		24	#include "threading.h" /* pthread adaptation */
		25
		26	/* A job is a function and an opaque argument */
		27	typedef struct POOL_job_s {
		28	POOL_function function;
		29	void *opaque;
		30	} POOL_job;
		31
		32	struct POOL_ctx_s {
		33	/* Keep track of the threads */
		34	pthread_t *threads;
		35	size_t numThreads;
		36
		37	/* The queue is a circular buffer */
		38	POOL_job *queue;
		39	size_t queueHead;
		40	size_t queueTail;
		41	size_t queueSize;
		42	/* The mutex protects the queue */
		43	pthread_mutex_t queueMutex;
		44	/* Condition variable for pushers to wait on when the queue is full */
		45	pthread_cond_t queuePushCond;
		46	/* Condition variables for poppers to wait on when the queue is empty */
		47	pthread_cond_t queuePopCond;
		48	/* Indicates if the queue is shutting down */
		49	int shutdown;
		50	};
		51
		52	/* POOL_thread() :
		53	Work thread for the thread pool.
		54	Waits for jobs and executes them.
		55	@returns : NULL on failure else non-null.
		56	*/
		57	static void* POOL_thread(void* opaque) {
		58	POOL_ctx* const ctx = (POOL_ctx*)opaque;
		59	if (!ctx) { return NULL; }
		60	for (;;) {
		61	/* Lock the mutex and wait for a non-empty queue or until shutdown */
		62	pthread_mutex_lock(&ctx->queueMutex);
		63	while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {
		64	pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
		65	}
		66	/* empty => shutting down: so stop */
		67	if (ctx->queueHead == ctx->queueTail) {
		68	pthread_mutex_unlock(&ctx->queueMutex);
		69	return opaque;
		70	}
		71	/* Pop a job off the queue */
		72	{ POOL_job const job = ctx->queue[ctx->queueHead];
		73	ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
		74	/* Unlock the mutex, signal a pusher, and run the job */
		75	pthread_mutex_unlock(&ctx->queueMutex);
		76	pthread_cond_signal(&ctx->queuePushCond);
		77	job.function(job.opaque);
		78	}
		79	}
		80	/* Unreachable */
		81	}
		82
		83	POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
		84	POOL_ctx *ctx;
		85	/* Check the parameters */
		86	if (!numThreads \|\| !queueSize) { return NULL; }
		87	/* Allocate the context and zero initialize */
		88	ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
		89	if (!ctx) { return NULL; }
		90	/* Initialize the job queue.
		91	* It needs one extra space since one space is wasted to differentiate empty
		92	* and full queues.
		93	*/
		94	ctx->queueSize = queueSize + 1;
		95	ctx->queue = (POOL_job )malloc(ctx->queueSize sizeof(POOL_job));
		96	ctx->queueHead = 0;
		97	ctx->queueTail = 0;
		98	pthread_mutex_init(&ctx->queueMutex, NULL);
		99	pthread_cond_init(&ctx->queuePushCond, NULL);
		100	pthread_cond_init(&ctx->queuePopCond, NULL);
		101	ctx->shutdown = 0;
		102	/* Allocate space for the thread handles */
		103	ctx->threads = (pthread_t )malloc(numThreads sizeof(pthread_t));
		104	ctx->numThreads = 0;
		105	/* Check for errors */
		106	if (!ctx->threads \|\| !ctx->queue) { POOL_free(ctx); return NULL; }
		107	/* Initialize the threads */
		108	{ size_t i;
		109	for (i = 0; i < numThreads; ++i) {
		110	if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
		111	ctx->numThreads = i;
		112	POOL_free(ctx);
		113	return NULL;
		114	} }
		115	ctx->numThreads = numThreads;
		116	}
		117	return ctx;
		118	}
		119
		120	/*! POOL_join() :
		121	Shutdown the queue, wake any sleeping threads, and join all of the threads.
		122	*/
		123	static void POOL_join(POOL_ctx *ctx) {
		124	/* Shut down the queue */
		125	pthread_mutex_lock(&ctx->queueMutex);
		126	ctx->shutdown = 1;
		127	pthread_mutex_unlock(&ctx->queueMutex);
		128	/* Wake up sleeping threads */
		129	pthread_cond_broadcast(&ctx->queuePushCond);
		130	pthread_cond_broadcast(&ctx->queuePopCond);
		131	/* Join all of the threads */
		132	{ size_t i;
		133	for (i = 0; i < ctx->numThreads; ++i) {
		134	pthread_join(ctx->threads[i], NULL);
		135	} }
		136	}
		137
		138	void POOL_free(POOL_ctx *ctx) {
		139	if (!ctx) { return; }
		140	POOL_join(ctx);
		141	pthread_mutex_destroy(&ctx->queueMutex);
		142	pthread_cond_destroy(&ctx->queuePushCond);
		143	pthread_cond_destroy(&ctx->queuePopCond);
		144	if (ctx->queue) free(ctx->queue);
		145	if (ctx->threads) free(ctx->threads);
		146	free(ctx);
		147	}
		148
		149	void POOL_add(void ctxVoid, POOL_function function, void opaque) {
		150	POOL_ctx ctx = (POOL_ctx )ctxVoid;
		151	if (!ctx) { return; }
		152
		153	pthread_mutex_lock(&ctx->queueMutex);
		154	{ POOL_job const job = {function, opaque};
		155	/* Wait until there is space in the queue for the new job */
		156	size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
		157	while (ctx->queueHead == newTail && !ctx->shutdown) {
		158	pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
		159	newTail = (ctx->queueTail + 1) % ctx->queueSize;
		160	}
		161	/* The queue is still going => there is space */
		162	if (!ctx->shutdown) {
		163	ctx->queue[ctx->queueTail] = job;
		164	ctx->queueTail = newTail;
		165	}
		166	}
		167	pthread_mutex_unlock(&ctx->queueMutex);
		168	pthread_cond_signal(&ctx->queuePopCond);
		169	}
		170
		171	#else /* ZSTD_MULTITHREAD not defined */
		172	/* No multi-threading support */
		173
		174	/* We don't need any data, but if it is empty malloc() might return NULL. */
		175	struct POOL_ctx_s {
		176	int data;
		177	};
		178
		179	POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
		180	(void)numThreads;
		181	(void)queueSize;
		182	return (POOL_ctx *)malloc(sizeof(POOL_ctx));
		183	}
		184
		185	void POOL_free(POOL_ctx *ctx) {
		186	if (ctx) free(ctx);
		187	}
		188
		189	void POOL_add(void ctx, POOL_function function, void opaque) {
		190	(void)ctx;
		191	function(opaque);
		192	}
		193
		194	#endif /* ZSTD_MULTITHREAD */

contrib/python-zstandard/zstd/common/pool.h

0 created 644 +56 0

			@@ -0,0 +1,56 b''
		1	/**
		2	* Copyright (c) 2016-present, Facebook, Inc.
		3	* All rights reserved.
		4	*
		5	* This source code is licensed under the BSD-style license found in the
		6	* LICENSE file in the root directory of this source tree. An additional grant
		7	* of patent rights can be found in the PATENTS file in the same directory.
		8	*/
		9	#ifndef POOL_H
		10	#define POOL_H
		11
		12	#if defined (__cplusplus)
		13	extern "C" {
		14	#endif
		15
		16
		17	#include <stddef.h> /* size_t */
		18
		19	typedef struct POOL_ctx_s POOL_ctx;
		20
		21	/*! POOL_create() :
		22	Create a thread pool with at most `numThreads` threads.
		23	`numThreads` must be at least 1.
		24	The maximum number of queued jobs before blocking is `queueSize`.
		25	`queueSize` must be at least 1.
		26	@return : The POOL_ctx pointer on success else NULL.
		27	*/
		28	POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
		29
		30	/*! POOL_free() :
		31	Free a thread pool returned by POOL_create().
		32	*/
		33	void POOL_free(POOL_ctx *ctx);
		34
		35	/*! POOL_function :
		36	The function type that can be added to a thread pool.
		37	*/
		38	typedef void (POOL_function)(void );
		39	/*! POOL_add_function :
		40	The function type for a generic thread pool add function.
		41	*/
		42	typedef void (POOL_add_function)(void , POOL_function, void *);
		43
		44	/*! POOL_add() :
		45	Add the job `function(opaque)` to the thread pool.
		46	Possibly blocks until there is room in the queue.
		47	Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
		48	*/
		49	void POOL_add(void ctx, POOL_function function, void opaque);
		50
		51
		52	#if defined (__cplusplus)
		53	}
		54	#endif
		55
		56	#endif

contrib/python-zstandard/zstd/common/threading.c

0 created 644 +79 0

			@@ -0,0 +1,79 b''
		1
		2	/**
		3	* Copyright (c) 2016 Tino Reichardt
		4	* All rights reserved.
		5	*
		6	* This source code is licensed under the BSD-style license found in the
		7	* LICENSE file in the root directory of this source tree. An additional grant
		8	* of patent rights can be found in the PATENTS file in the same directory.
		9	*
		10	* You can contact the author at:
		11	* - zstdmt source repository: https://github.com/mcmilk/zstdmt
		12	*/
		13
		14	/**
		15	* This file will hold wrapper for systems, which do not support pthreads
		16	*/
		17
		18	/* ====== Compiler specifics ====== */
		19	#if defined(_MSC_VER)
		20	# pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */
		21	#endif
		22
		23
		24	#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
		25
		26	/**
		27	* Windows minimalist Pthread Wrapper, based on :
		28	* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
		29	*/
		30
		31
		32	/* === Dependencies === */
		33	#include <process.h>
		34	#include <errno.h>
		35	#include "threading.h"
		36
		37
		38	/* === Implementation === */
		39
		40	static unsigned __stdcall worker(void *arg)
		41	{
		42	pthread_t* const thread = (pthread_t*) arg;
		43	thread->arg = thread->start_routine(thread->arg);
		44	return 0;
		45	}
		46
		47	int pthread_create(pthread_t* thread, const void* unused,
		48	void* (start_routine) (void), void* arg)
		49	{
		50	(void)unused;
		51	thread->arg = arg;
		52	thread->start_routine = start_routine;
		53	thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
		54
		55	if (!thread->handle)
		56	return errno;
		57	else
		58	return 0;
		59	}
		60
		61	int _pthread_join(pthread_t * thread, void **value_ptr)
		62	{
		63	DWORD result;
		64
		65	if (!thread->handle) return 0;
		66
		67	result = WaitForSingleObject(thread->handle, INFINITE);
		68	switch (result) {
		69	case WAIT_OBJECT_0:
		70	if (value_ptr) *value_ptr = thread->arg;
		71	return 0;
		72	case WAIT_ABANDONED:
		73	return EINVAL;
		74	default:
		75	return GetLastError();
		76	}
		77	}
		78
		79	#endif /* ZSTD_MULTITHREAD */

contrib/python-zstandard/zstd/common/threading.h

0 created 644 +104 0

			@@ -0,0 +1,104 b''
		1
		2	/**
		3	* Copyright (c) 2016 Tino Reichardt
		4	* All rights reserved.
		5	*
		6	* This source code is licensed under the BSD-style license found in the
		7	* LICENSE file in the root directory of this source tree. An additional grant
		8	* of patent rights can be found in the PATENTS file in the same directory.
		9	*
		10	* You can contact the author at:
		11	* - zstdmt source repository: https://github.com/mcmilk/zstdmt
		12	*/
		13
		14	#ifndef THREADING_H_938743
		15	#define THREADING_H_938743
		16
		17	#if defined (__cplusplus)
		18	extern "C" {
		19	#endif
		20
		21	#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
		22
		23	/**
		24	* Windows minimalist Pthread Wrapper, based on :
		25	* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
		26	*/
		27	#ifdef WINVER
		28	# undef WINVER
		29	#endif
		30	#define WINVER 0x0600
		31
		32	#ifdef _WIN32_WINNT
		33	# undef _WIN32_WINNT
		34	#endif
		35	#define _WIN32_WINNT 0x0600
		36
		37	#ifndef WIN32_LEAN_AND_MEAN
		38	# define WIN32_LEAN_AND_MEAN
		39	#endif
		40
		41	#include <windows.h>
		42
		43	/* mutex */
		44	#define pthread_mutex_t CRITICAL_SECTION
		45	#define pthread_mutex_init(a,b) InitializeCriticalSection((a))
		46	#define pthread_mutex_destroy(a) DeleteCriticalSection((a))
		47	#define pthread_mutex_lock(a) EnterCriticalSection((a))
		48	#define pthread_mutex_unlock(a) LeaveCriticalSection((a))
		49
		50	/* condition variable */
		51	#define pthread_cond_t CONDITION_VARIABLE
		52	#define pthread_cond_init(a, b) InitializeConditionVariable((a))
		53	#define pthread_cond_destroy(a) /* No delete */
		54	#define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
		55	#define pthread_cond_signal(a) WakeConditionVariable((a))
		56	#define pthread_cond_broadcast(a) WakeAllConditionVariable((a))
		57
		58	/* pthread_create() and pthread_join() */
		59	typedef struct {
		60	HANDLE handle;
		61	void* (start_routine)(void);
		62	void* arg;
		63	} pthread_t;
		64
		65	int pthread_create(pthread_t* thread, const void* unused,
		66	void* (start_routine) (void), void* arg);
		67
		68	#define pthread_join(a, b) _pthread_join(&(a), (b))
		69	int _pthread_join(pthread_t* thread, void** value_ptr);
		70
		71	/**
		72	* add here more wrappers as required
		73	*/
		74
		75
		76	#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */
		77	/* === POSIX Systems === */
		78	# include <pthread.h>
		79
		80	#else /* ZSTD_MULTITHREAD not defined */
		81	/* No multithreading support */
		82
		83	#define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
		84	#define pthread_mutex_init(a,b)
		85	#define pthread_mutex_destroy(a)
		86	#define pthread_mutex_lock(a)
		87	#define pthread_mutex_unlock(a)
		88
		89	#define pthread_cond_t int
		90	#define pthread_cond_init(a,b)
		91	#define pthread_cond_destroy(a)
		92	#define pthread_cond_wait(a,b)
		93	#define pthread_cond_signal(a)
		94	#define pthread_cond_broadcast(a)
		95
		96	/* do not use pthread_t */
		97
		98	#endif /* ZSTD_MULTITHREAD */
		99
		100	#if defined (__cplusplus)
		101	}
		102	#endif
		103
		104	#endif /* THREADING_H_938743 */

contrib/python-zstandard/zstd/compress/zstdmt_compress.c

0 created 644 +740 0

This diff has been collapsed as it changes many lines, (740 lines changed) Show them Hide them
		@@ -0,0 +1,740 b''
	1	/**
	2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under the BSD-style license found in the
	6	* LICENSE file in the root directory of this source tree. An additional grant
	7	* of patent rights can be found in the PATENTS file in the same directory.
	8	*/
	9
	10
	11	/* ====== Tuning parameters ====== */
	12	#define ZSTDMT_NBTHREADS_MAX 128
	13
	14
	15	/* ====== Compiler specifics ====== */
	16	#if defined(_MSC_VER)
	17	# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
	18	#endif
	19
	20
	21	/* ====== Dependencies ====== */
	22	#include <stdlib.h> /* malloc */
	23	#include <string.h> /* memcpy */
	24	#include "pool.h" /* threadpool */
	25	#include "threading.h" /* mutex */
	26	#include "zstd_internal.h" /* MIN, ERROR, ZSTD_, ZSTD_highbit32 /
	27	#include "zstdmt_compress.h"
	28	#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
	29	#include "xxhash.h"
	30
	31
	32	/* ====== Debug ====== */
	33	#if 0
	34
	35	# include <stdio.h>
	36	# include <unistd.h>
	37	# include <sys/times.h>
	38	static unsigned g_debugLevel = 3;
	39	# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); }
	40	# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); }
	41
	42	# define DEBUG_PRINTHEX(l,p,n) { \
	43	unsigned debug_u; \
	44	for (debug_u=0; debug_u<(n); debug_u++) \
	45	DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
	46	DEBUGLOGRAW(l, " \n"); \
	47	}
	48
	49	static unsigned long long GetCurrentClockTimeMicroseconds()
	50	{
	51	static clock_t _ticksPerSecond = 0;
	52	if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
	53
	54	struct tms junk; clock_t newTicks = (clock_t) times(&junk);
	55	return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
	56	}
	57
	58	#define MUTEX_WAIT_TIME_DLEVEL 5
	59	#define PTHREAD_MUTEX_LOCK(mutex) \
	60	if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \
	61	unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \
	62	pthread_mutex_lock(mutex); \
	63	unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \
	64	unsigned long long elapsedTime = (afterTime-beforeTime); \
	65	if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
	66	DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
	67	elapsedTime, #mutex); \
	68	} \
	69	} else pthread_mutex_lock(mutex);
	70
	71	#else
	72
	73	# define DEBUGLOG(l, ...) {} /* disabled */
	74	# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m)
	75	# define DEBUG_PRINTHEX(l,p,n) {}
	76
	77	#endif
	78
	79
	80	/* ===== Buffer Pool ===== */
	81
	82	typedef struct buffer_s {
	83	void* start;
	84	size_t size;
	85	} buffer_t;
	86
	87	static const buffer_t g_nullBuffer = { NULL, 0 };
	88
	89	typedef struct ZSTDMT_bufferPool_s {
	90	unsigned totalBuffers;
	91	unsigned nbBuffers;
	92	buffer_t bTable[1]; /* variable size */
	93	} ZSTDMT_bufferPool;
	94
	95	static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads)
	96	{
	97	unsigned const maxNbBuffers = 2*nbThreads + 2;
	98	ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) sizeof(buffer_t));
	99	if (bufPool==NULL) return NULL;
	100	bufPool->totalBuffers = maxNbBuffers;
	101	bufPool->nbBuffers = 0;
	102	return bufPool;
	103	}
	104
	105	static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
	106	{
	107	unsigned u;
	108	if (!bufPool) return; /* compatibility with free on NULL */
	109	for (u=0; u<bufPool->totalBuffers; u++)
	110	free(bufPool->bTable[u].start);
	111	free(bufPool);
	112	}
	113
	114	/* assumption : invocation from main thread only ! */
	115	static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize)
	116	{
	117	if (pool->nbBuffers) { /* try to use an existing buffer */
	118	buffer_t const buf = pool->bTable[--(pool->nbBuffers)];
	119	size_t const availBufferSize = buf.size;
	120	if ((availBufferSize >= bSize) & (availBufferSize <= 10bSize)) / large enough, but not too much */
	121	return buf;
	122	free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */
	123	}
	124	/* create new buffer */
	125	{ buffer_t buffer;
	126	void* const start = malloc(bSize);
	127	if (start==NULL) bSize = 0;
	128	buffer.start = start; /* note : start can be NULL if malloc fails ! */
	129	buffer.size = bSize;
	130	return buffer;
	131	}
	132	}
	133
	134	/* store buffer for later re-use, up to pool capacity */
	135	static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf)
	136	{
	137	if (buf.start == NULL) return; /* release on NULL */
	138	if (pool->nbBuffers < pool->totalBuffers) {
	139	pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */
	140	return;
	141	}
	142	/* Reached bufferPool capacity (should not happen) */
	143	free(buf.start);
	144	}
	145
	146
	147	/* ===== CCtx Pool ===== */
	148
	149	typedef struct {
	150	unsigned totalCCtx;
	151	unsigned availCCtx;
	152	ZSTD_CCtx* cctx[1]; /* variable size */
	153	} ZSTDMT_CCtxPool;
	154
	155	/* assumption : CCtxPool invocation only from main thread */
	156
	157	/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
	158	static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
	159	{
	160	unsigned u;
	161	for (u=0; u<pool->totalCCtx; u++)
	162	ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
	163	free(pool);
	164	}
	165
	166	/* ZSTDMT_createCCtxPool() :
	167	* implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
	168	static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads)
	169	{
	170	ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)sizeof(ZSTD_CCtx*));
	171	if (!cctxPool) return NULL;
	172	cctxPool->totalCCtx = nbThreads;
	173	cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
	174	cctxPool->cctx[0] = ZSTD_createCCtx();
	175	if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
	176	DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads);
	177	return cctxPool;
	178	}
	179
	180	static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool)
	181	{
	182	if (pool->availCCtx) {
	183	pool->availCCtx--;
	184	return pool->cctx[pool->availCCtx];
	185	}
	186	return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */
	187	}
	188
	189	static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
	190	{
	191	if (cctx==NULL) return; /* compatibility with release on NULL */
	192	if (pool->availCCtx < pool->totalCCtx)
	193	pool->cctx[pool->availCCtx++] = cctx;
	194	else
	195	/* pool overflow : should not happen, since totalCCtx==nbThreads */
	196	ZSTD_freeCCtx(cctx);
	197	}
	198
	199
	200	/* ===== Thread worker ===== */
	201
	202	typedef struct {
	203	buffer_t buffer;
	204	size_t filled;
	205	} inBuff_t;
	206
	207	typedef struct {
	208	ZSTD_CCtx* cctx;
	209	buffer_t src;
	210	const void* srcStart;
	211	size_t srcSize;
	212	size_t dictSize;
	213	buffer_t dstBuff;
	214	size_t cSize;
	215	size_t dstFlushed;
	216	unsigned firstChunk;
	217	unsigned lastChunk;
	218	unsigned jobCompleted;
	219	unsigned jobScanned;
	220	pthread_mutex_t* jobCompleted_mutex;
	221	pthread_cond_t* jobCompleted_cond;
	222	ZSTD_parameters params;
	223	ZSTD_CDict* cdict;
	224	unsigned long long fullFrameSize;
	225	} ZSTDMT_jobDescription;
	226
	227	/* ZSTDMT_compressChunk() : POOL_function type */
	228	void ZSTDMT_compressChunk(void* jobDescription)
	229	{
	230	ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
	231	const void* const src = (const char*)job->srcStart + job->dictSize;
	232	buffer_t const dstBuff = job->dstBuff;
	233	DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
	234	if (job->cdict) {
	235	size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
	236	if (job->cdict) DEBUGLOG(3, "using CDict ");
	237	if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
	238	} else {
	239	size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
	240	if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
	241	ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
	242	}
	243	if (!job->firstChunk) { /* flush frame header */
	244	size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
	245	if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
	246	ZSTD_invalidateRepCodes(job->cctx);
	247	}
	248
	249	DEBUGLOG(4, "Compressing : ");
	250	DEBUG_PRINTHEX(4, job->srcStart, 12);
	251	job->cSize = (job->lastChunk) ? /* last chunk signal */
	252	ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
	253	ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
	254	DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
	255
	256	_endJob:
	257	PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
	258	job->jobCompleted = 1;
	259	job->jobScanned = 0;
	260	pthread_cond_signal(job->jobCompleted_cond);
	261	pthread_mutex_unlock(job->jobCompleted_mutex);
	262	}
	263
	264
	265	/* ------------------------------------------ */
	266	/* ===== Multi-threaded compression ===== */
	267	/* ------------------------------------------ */
	268
	269	struct ZSTDMT_CCtx_s {
	270	POOL_ctx* factory;
	271	ZSTDMT_bufferPool* buffPool;
	272	ZSTDMT_CCtxPool* cctxPool;
	273	pthread_mutex_t jobCompleted_mutex;
	274	pthread_cond_t jobCompleted_cond;
	275	size_t targetSectionSize;
	276	size_t marginSize;
	277	size_t inBuffSize;
	278	size_t dictSize;
	279	size_t targetDictSize;
	280	inBuff_t inBuff;
	281	ZSTD_parameters params;
	282	XXH64_state_t xxhState;
	283	unsigned nbThreads;
	284	unsigned jobIDMask;
	285	unsigned doneJobID;
	286	unsigned nextJobID;
	287	unsigned frameEnded;
	288	unsigned allJobsCompleted;
	289	unsigned overlapRLog;
	290	unsigned long long frameContentSize;
	291	size_t sectionSize;
	292	ZSTD_CDict* cdict;
	293	ZSTD_CStream* cstream;
	294	ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */
	295	};
	296
	297	ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads)
	298	{
	299	ZSTDMT_CCtx* cctx;
	300	U32 const minNbJobs = nbThreads + 2;
	301	U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1;
	302	U32 const nbJobs = 1 << nbJobsLog2;
	303	DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n",
	304	nbThreads, minNbJobs, nbJobsLog2, nbJobs);
	305	if ((nbThreads < 1) \| (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL;
	306	cctx = (ZSTDMT_CCtx) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobssizeof(ZSTDMT_jobDescription));
	307	if (!cctx) return NULL;
	308	cctx->nbThreads = nbThreads;
	309	cctx->jobIDMask = nbJobs - 1;
	310	cctx->allJobsCompleted = 1;
	311	cctx->sectionSize = 0;
	312	cctx->overlapRLog = 3;
	313	cctx->factory = POOL_create(nbThreads, 1);
	314	cctx->buffPool = ZSTDMT_createBufferPool(nbThreads);
	315	cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads);
	316	if (!cctx->factory \| !cctx->buffPool \| !cctx->cctxPool) { /* one object was not created */
	317	ZSTDMT_freeCCtx(cctx);
	318	return NULL;
	319	}
	320	if (nbThreads==1) {
	321	cctx->cstream = ZSTD_createCStream();
	322	if (!cctx->cstream) {
	323	ZSTDMT_freeCCtx(cctx); return NULL;
	324	} }
	325	pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */
	326	pthread_cond_init(&cctx->jobCompleted_cond, NULL);
	327	DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads);
	328	return cctx;
	329	}
	330
	331	/* ZSTDMT_releaseAllJobResources() :
	332	* Ensure all workers are killed first. */
	333	static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
	334	{
	335	unsigned jobID;
	336	for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
	337	ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff);
	338	mtctx->jobs[jobID].dstBuff = g_nullBuffer;
	339	ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src);
	340	mtctx->jobs[jobID].src = g_nullBuffer;
	341	ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx);
	342	mtctx->jobs[jobID].cctx = NULL;
	343	}
	344	memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
	345	ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer);
	346	mtctx->inBuff.buffer = g_nullBuffer;
	347	mtctx->allJobsCompleted = 1;
	348	}
	349
	350	size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
	351	{
	352	if (mtctx==NULL) return 0; /* compatible with free on NULL */
	353	POOL_free(mtctx->factory);
	354	if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */
	355	ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */
	356	ZSTDMT_freeCCtxPool(mtctx->cctxPool);
	357	ZSTD_freeCDict(mtctx->cdict);
	358	ZSTD_freeCStream(mtctx->cstream);
	359	pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
	360	pthread_cond_destroy(&mtctx->jobCompleted_cond);
	361	free(mtctx);
	362	return 0;
	363	}
	364
	365	size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value)
	366	{
	367	switch(parameter)
	368	{
	369	case ZSTDMT_p_sectionSize :
	370	mtctx->sectionSize = value;
	371	return 0;
	372	case ZSTDMT_p_overlapSectionLog :
	373	DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
	374	mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value;
	375	return 0;
	376	default :
	377	return ERROR(compressionParameter_unsupported);
	378	}
	379	}
	380
	381
	382	/* ------------------------------------------ */
	383	/* ===== Multi-threaded compression ===== */
	384	/* ------------------------------------------ */
	385
	386	size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
	387	void* dst, size_t dstCapacity,
	388	const void* src, size_t srcSize,
	389	int compressionLevel)
	390	{
	391	ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
	392	size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2);
	393	unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */;
	394	unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads);
	395	size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
	396	size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
	397	size_t remainingSrcSize = srcSize;
	398	const char* const srcStart = (const char*)src;
	399	size_t frameStartPos = 0;
	400
	401	DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize);
	402	DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize);
	403	params.fParams.contentSizeFlag = 1;
	404
	405	if (nbChunks==1) { /* fallback to single-thread mode */
	406	ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
	407	return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
	408	}
	409
	410	{ unsigned u;
	411	for (u=0; u<nbChunks; u++) {
	412	size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
	413	size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity;
	414	buffer_t const dstAsBuffer = { dst, dstCapacity };
	415	buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer;
	416	ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool);
	417
	418	if ((cctx==NULL) \|\| (dstBuffer.start==NULL)) {
	419	mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */
	420	mtctx->jobs[u].jobCompleted = 1;
	421	nbChunks = u+1;
	422	break; /* let's wait for previous jobs to complete, but don't start new ones */
	423	}
	424
	425	mtctx->jobs[u].srcStart = srcStart + frameStartPos;
	426	mtctx->jobs[u].srcSize = chunkSize;
	427	mtctx->jobs[u].fullFrameSize = srcSize;
	428	mtctx->jobs[u].params = params;
	429	mtctx->jobs[u].dstBuff = dstBuffer;
	430	mtctx->jobs[u].cctx = cctx;
	431	mtctx->jobs[u].firstChunk = (u==0);
	432	mtctx->jobs[u].lastChunk = (u==nbChunks-1);
	433	mtctx->jobs[u].jobCompleted = 0;
	434	mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
	435	mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
	436
	437	DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize);
	438	DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12);
	439	POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
	440
	441	frameStartPos += chunkSize;
	442	remainingSrcSize -= chunkSize;
	443	} }
	444	/* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */
	445
	446	{ unsigned chunkID;
	447	size_t error = 0, dstPos = 0;
	448	for (chunkID=0; chunkID<nbChunks; chunkID++) {
	449	DEBUGLOG(3, "waiting for chunk %u ", chunkID);
	450	PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
	451	while (mtctx->jobs[chunkID].jobCompleted==0) {
	452	DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID);
	453	pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
	454	}
	455	pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
	456	DEBUGLOG(3, "ready to write chunk %u ", chunkID);
	457
	458	ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx);
	459	mtctx->jobs[chunkID].cctx = NULL;
	460	mtctx->jobs[chunkID].srcStart = NULL;
	461	{ size_t const cSize = mtctx->jobs[chunkID].cSize;
	462	if (ZSTD_isError(cSize)) error = cSize;
	463	if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
	464	if (chunkID) { /* note : chunk 0 is already written directly into dst */
	465	if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);
	466	ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff);
	467	mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
	468	}
	469	dstPos += cSize ;
	470	}
	471	}
	472	if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos);
	473	return error ? error : dstPos;
	474	}
	475
	476	}
	477
	478
	479	/* ====================================== */
	480	/* ======= Streaming API ======= */
	481	/* ====================================== */
	482
	483	static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) {
	484	while (zcs->doneJobID < zcs->nextJobID) {
	485	unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
	486	PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
	487	while (zcs->jobs[jobID].jobCompleted==0) {
	488	DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
	489	pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
	490	}
	491	pthread_mutex_unlock(&zcs->jobCompleted_mutex);
	492	zcs->doneJobID++;
	493	}
	494	}
	495
	496
	497	static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
	498	const void* dict, size_t dictSize, unsigned updateDict,
	499	ZSTD_parameters params, unsigned long long pledgedSrcSize)
	500	{
	501	ZSTD_customMem const cmem = { NULL, NULL, NULL };
	502	DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog);
	503	if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize);
	504	if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */
	505	ZSTDMT_waitForAllJobsCompleted(zcs);
	506	ZSTDMT_releaseAllJobResources(zcs);
	507	zcs->allJobsCompleted = 1;
	508	}
	509	zcs->params = params;
	510	if (updateDict) {
	511	ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL;
	512	if (dict && dictSize) {
	513	zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem);
	514	if (zcs->cdict == NULL) return ERROR(memory_allocation);
	515	} }
	516	zcs->frameContentSize = pledgedSrcSize;
	517	zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog);
	518	DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog);
	519	DEBUGLOG(3, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10));
	520	zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2);
	521	zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize);
	522	zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize);
	523	DEBUGLOG(3, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10));
	524	zcs->marginSize = zcs->targetSectionSize >> 2;
	525	zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize;
	526	zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
	527	if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation);
	528	zcs->inBuff.filled = 0;
	529	zcs->dictSize = 0;
	530	zcs->doneJobID = 0;
	531	zcs->nextJobID = 0;
	532	zcs->frameEnded = 0;
	533	zcs->allJobsCompleted = 0;
	534	if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
	535	return 0;
	536	}
	537
	538	size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs,
	539	const void* dict, size_t dictSize,
	540	ZSTD_parameters params, unsigned long long pledgedSrcSize)
	541	{
	542	return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize);
	543	}
	544
	545	/* ZSTDMT_resetCStream() :
	546	* pledgedSrcSize is optional and can be zero == unknown */
	547	size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
	548	{
	549	if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize);
	550	return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize);
	551	}
	552
	553	size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
	554	ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
	555	return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0);
	556	}
	557
	558
	559	static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
	560	{
	561	size_t const dstBufferCapacity = ZSTD_compressBound(srcSize);
	562	buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity);
	563	ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool);
	564	unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
	565
	566	if ((cctx==NULL) \|\| (dstBuffer.start==NULL)) {
	567	zcs->jobs[jobID].jobCompleted = 1;
	568	zcs->nextJobID++;
	569	ZSTDMT_waitForAllJobsCompleted(zcs);
	570	ZSTDMT_releaseAllJobResources(zcs);
	571	return ERROR(memory_allocation);
	572	}
	573
	574	DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
	575	zcs->jobs[jobID].src = zcs->inBuff.buffer;
	576	zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
	577	zcs->jobs[jobID].srcSize = srcSize;
	578	zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */
	579	zcs->jobs[jobID].params = zcs->params;
	580	if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */
	581	zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
	582	zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
	583	zcs->jobs[jobID].dstBuff = dstBuffer;
	584	zcs->jobs[jobID].cctx = cctx;
	585	zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
	586	zcs->jobs[jobID].lastChunk = endFrame;
	587	zcs->jobs[jobID].jobCompleted = 0;
	588	zcs->jobs[jobID].dstFlushed = 0;
	589	zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
	590	zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
	591
	592	/* get a new buffer for next input */
	593	if (!endFrame) {
	594	size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
	595	zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize);
	596	if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
	597	zcs->jobs[jobID].jobCompleted = 1;
	598	zcs->nextJobID++;
	599	ZSTDMT_waitForAllJobsCompleted(zcs);
	600	ZSTDMT_releaseAllJobResources(zcs);
	601	return ERROR(memory_allocation);
	602	}
	603	DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled);
	604	zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
	605	DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize));
	606	memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled);
	607	DEBUGLOG(5, "new inBuff pre-filled");
	608	zcs->dictSize = newDictSize;
	609	} else {
	610	zcs->inBuff.buffer = g_nullBuffer;
	611	zcs->inBuff.filled = 0;
	612	zcs->dictSize = 0;
	613	zcs->frameEnded = 1;
	614	if (zcs->nextJobID == 0)
	615	zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */
	616	}
	617
	618	DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask);
	619	POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
	620	zcs->nextJobID++;
	621	return 0;
	622	}
	623
	624
	625	/* ZSTDMT_flushNextJob() :
	626	* output : will be updated with amount of data flushed .
	627	* blockToFlush : if >0, the function will block and wait if there is no data available to flush .
	628	* @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
	629	static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
	630	{
	631	unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
	632	if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
	633	PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
	634	while (zcs->jobs[wJobID].jobCompleted==0) {
	635	DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
	636	if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
	637	pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
	638	}
	639	pthread_mutex_unlock(&zcs->jobCompleted_mutex);
	640	/* compression job completed : output can be flushed */
	641	{ ZSTDMT_jobDescription job = zcs->jobs[wJobID];
	642	if (!job.jobScanned) {
	643	if (ZSTD_isError(job.cSize)) {
	644	DEBUGLOG(5, "compression error detected ");
	645	ZSTDMT_waitForAllJobsCompleted(zcs);
	646	ZSTDMT_releaseAllJobResources(zcs);
	647	return job.cSize;
	648	}
	649	ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx);
	650	zcs->jobs[wJobID].cctx = NULL;
	651	DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
	652	if (zcs->params.fParams.checksumFlag) {
	653	XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize);
	654	if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
	655	U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
	656	DEBUGLOG(4, "writing checksum : %08X \n", checksum);
	657	MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
	658	job.cSize += 4;
	659	zcs->jobs[wJobID].cSize += 4;
	660	} }
	661	ZSTDMT_releaseBuffer(zcs->buffPool, job.src);
	662	zcs->jobs[wJobID].srcStart = NULL;
	663	zcs->jobs[wJobID].src = g_nullBuffer;
	664	zcs->jobs[wJobID].jobScanned = 1;
	665	}
	666	{ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
	667	DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
	668	memcpy((char)output->dst + output->pos, (const char)job.dstBuff.start + job.dstFlushed, toWrite);
	669	output->pos += toWrite;
	670	job.dstFlushed += toWrite;
	671	}
	672	if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
	673	ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff);
	674	zcs->jobs[wJobID].dstBuff = g_nullBuffer;
	675	zcs->jobs[wJobID].jobCompleted = 0;
	676	zcs->doneJobID++;
	677	} else {
	678	zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
	679	}
	680	/* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
	681	if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
	682	if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
	683	zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
	684	return 0; /* everything flushed */
	685	} }
	686
	687
	688	size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
	689	{
	690	size_t const newJobThreshold = zcs->dictSize + zcs->targetSectionSize + zcs->marginSize;
	691	if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */
	692	if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input);
	693
	694	/* fill input buffer */
	695	{ size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled);
	696	memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad);
	697	input->pos += toLoad;
	698	zcs->inBuff.filled += toLoad;
	699	}
	700
	701	if ( (zcs->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
	702	&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */
	703	CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) );
	704	}
	705
	706	/* check for data to flush */
	707	CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */
	708
	709	/* recommended next input size : fill current input buffer */
	710	return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
	711	}
	712
	713
	714	static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame)
	715	{
	716	size_t const srcSize = zcs->inBuff.filled - zcs->dictSize;
	717
	718	if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize);
	719	if ( ((srcSize > 0) \|\| (endFrame && !zcs->frameEnded))
	720	&& (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) {
	721	CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) );
	722	}
	723
	724	/* check if there is any data available to flush */
	725	DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID);
	726	return ZSTDMT_flushNextJob(zcs, output, 1);
	727	}
	728
	729
	730	size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
	731	{
	732	if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output);
	733	return ZSTDMT_flushStream_internal(zcs, output, 0);
	734	}
	735
	736	size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output)
	737	{
	738	if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output);
	739	return ZSTDMT_flushStream_internal(zcs, output, 1);
	740	}

contrib/python-zstandard/zstd/compress/zstdmt_compress.h

0 created 644 +78 0

			@@ -0,0 +1,78 b''
		1	/**
		2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
		3	* All rights reserved.
		4	*
		5	* This source code is licensed under the BSD-style license found in the
		6	* LICENSE file in the root directory of this source tree. An additional grant
		7	* of patent rights can be found in the PATENTS file in the same directory.
		8	*/
		9
		10	#ifndef ZSTDMT_COMPRESS_H
		11	#define ZSTDMT_COMPRESS_H
		12
		13	#if defined (__cplusplus)
		14	extern "C" {
		15	#endif
		16
		17
		18	/* Note : All prototypes defined in this file shall be considered experimental.
		19	* There is no guarantee of API continuity (yet) on any of these prototypes */
		20
		21	/* === Dependencies === */
		22	#include <stddef.h> /* size_t */
		23	#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
		24	#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
		25
		26
		27	/* === Simple one-pass functions === */
		28
		29	typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
		30	ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
		31	ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
		32
		33	ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
		34	void* dst, size_t dstCapacity,
		35	const void* src, size_t srcSize,
		36	int compressionLevel);
		37
		38
		39	/* === Streaming functions === */
		40
		41	ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
		42	ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /*< pledgedSrcSize is optional and can be zero == unknown /
		43
		44	ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
		45
		46	ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /*< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) /
		47	ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /*< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) /
		48
		49
		50	/* === Advanced functions and parameters === */
		51
		52	#ifndef ZSTDMT_SECTION_SIZE_MIN
		53	# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
		54	#endif
		55
		56	ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /*< dict can be released after init, a local copy is preserved within zcs /
		57	ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize is optional and can be zero == unknown /
		58
		59	/* ZSDTMT_parameter :
		60	* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
		61	typedef enum {
		62	ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
		63	ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
		64	} ZSDTMT_parameter;
		65
		66	/* ZSTDMT_setMTCtxParameter() :
		67	* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
		68	* The function must be called typically after ZSTD_createCCtx().
		69	* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
		70	* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
		71	ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
		72
		73
		74	#if defined (__cplusplus)
		75	}
		76	#endif
		77
		78	#endif /* ZSTDMT_COMPRESS_H */

contrib/python-zstandard/zstd/dictBuilder/cover.c

0 created 644 +1021 0

This diff has been collapsed as it changes many lines, (1021 lines changed) Show them Hide them
		@@ -0,0 +1,1021 b''
	1	/**
	2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under the BSD-style license found in the
	6	* LICENSE file in the root directory of this source tree. An additional grant
	7	* of patent rights can be found in the PATENTS file in the same directory.
	8	*/
	9
	10	/-************************************
	11	* Dependencies
	12	***************************************/
	13	#include <stdio.h> /* fprintf */
	14	#include <stdlib.h> /* malloc, free, qsort */
	15	#include <string.h> /* memset */
	16	#include <time.h> /* clock */
	17
	18	#include "mem.h" /* read */
	19	#include "pool.h"
	20	#include "threading.h"
	21	#include "zstd_internal.h" /* includes zstd.h */
	22	#ifndef ZDICT_STATIC_LINKING_ONLY
	23	#define ZDICT_STATIC_LINKING_ONLY
	24	#endif
	25	#include "zdict.h"
	26
	27	/-************************************
	28	* Constants
	29	***************************************/
	30	#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
	31
	32	/-************************************
	33	* Console display
	34	***************************************/
	35	static int g_displayLevel = 2;
	36	#define DISPLAY(...) \
	37	{ \
	38	fprintf(stderr, __VA_ARGS__); \
	39	fflush(stderr); \
	40	}
	41	#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
	42	if (displayLevel >= l) { \
	43	DISPLAY(__VA_ARGS__); \
	44	} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
	45	#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
	46
	47	#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
	48	if (displayLevel >= l) { \
	49	if ((clock() - g_time > refreshRate) \|\| (displayLevel >= 4)) { \
	50	g_time = clock(); \
	51	DISPLAY(__VA_ARGS__); \
	52	if (displayLevel >= 4) \
	53	fflush(stdout); \
	54	} \
	55	}
	56	#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
	57	static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
	58	static clock_t g_time = 0;
	59
	60	/-************************************
	61	* Hash table
	62	***************************************
	63	* A small specialized hash map for storing activeDmers.
	64	* The map does not resize, so if it becomes full it will loop forever.
	65	* Thus, the map must be large enough to store every value.
	66	* The map implements linear probing and keeps its load less than 0.5.
	67	*/
	68
	69	#define MAP_EMPTY_VALUE ((U32)-1)
	70	typedef struct COVER_map_pair_t_s {
	71	U32 key;
	72	U32 value;
	73	} COVER_map_pair_t;
	74
	75	typedef struct COVER_map_s {
	76	COVER_map_pair_t *data;
	77	U32 sizeLog;
	78	U32 size;
	79	U32 sizeMask;
	80	} COVER_map_t;
	81
	82	/**
	83	* Clear the map.
	84	*/
	85	static void COVER_map_clear(COVER_map_t *map) {
	86	memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
	87	}
	88
	89	/**
	90	* Initializes a map of the given size.
	91	* Returns 1 on success and 0 on failure.
	92	* The map must be destroyed with COVER_map_destroy().
	93	* The map is only guaranteed to be large enough to hold size elements.
	94	*/
	95	static int COVER_map_init(COVER_map_t *map, U32 size) {
	96	map->sizeLog = ZSTD_highbit32(size) + 2;
	97	map->size = (U32)1 << map->sizeLog;
	98	map->sizeMask = map->size - 1;
	99	map->data = (COVER_map_pair_t )malloc(map->size sizeof(COVER_map_pair_t));
	100	if (!map->data) {
	101	map->sizeLog = 0;
	102	map->size = 0;
	103	return 0;
	104	}
	105	COVER_map_clear(map);
	106	return 1;
	107	}
	108
	109	/**
	110	* Internal hash function
	111	*/
	112	static const U32 prime4bytes = 2654435761U;
	113	static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
	114	return (key * prime4bytes) >> (32 - map->sizeLog);
	115	}
	116
	117	/**
	118	* Helper function that returns the index that a key should be placed into.
	119	*/
	120	static U32 COVER_map_index(COVER_map_t *map, U32 key) {
	121	const U32 hash = COVER_map_hash(map, key);
	122	U32 i;
	123	for (i = hash;; i = (i + 1) & map->sizeMask) {
	124	COVER_map_pair_t *pos = &map->data[i];
	125	if (pos->value == MAP_EMPTY_VALUE) {
	126	return i;
	127	}
	128	if (pos->key == key) {
	129	return i;
	130	}
	131	}
	132	}
	133
	134	/**
	135	* Returns the pointer to the value for key.
	136	* If key is not in the map, it is inserted and the value is set to 0.
	137	* The map must not be full.
	138	*/
	139	static U32 COVER_map_at(COVER_map_t map, U32 key) {
	140	COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
	141	if (pos->value == MAP_EMPTY_VALUE) {
	142	pos->key = key;
	143	pos->value = 0;
	144	}
	145	return &pos->value;
	146	}
	147
	148	/**
	149	* Deletes key from the map if present.
	150	*/
	151	static void COVER_map_remove(COVER_map_t *map, U32 key) {
	152	U32 i = COVER_map_index(map, key);
	153	COVER_map_pair_t *del = &map->data[i];
	154	U32 shift = 1;
	155	if (del->value == MAP_EMPTY_VALUE) {
	156	return;
	157	}
	158	for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
	159	COVER_map_pair_t *const pos = &map->data[i];
	160	/* If the position is empty we are done */
	161	if (pos->value == MAP_EMPTY_VALUE) {
	162	del->value = MAP_EMPTY_VALUE;
	163	return;
	164	}
	165	/* If pos can be moved to del do so */
	166	if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
	167	del->key = pos->key;
	168	del->value = pos->value;
	169	del = pos;
	170	shift = 1;
	171	} else {
	172	++shift;
	173	}
	174	}
	175	}
	176
	177	/**
	178	* Destroyes a map that is inited with COVER_map_init().
	179	*/
	180	static void COVER_map_destroy(COVER_map_t *map) {
	181	if (map->data) {
	182	free(map->data);
	183	}
	184	map->data = NULL;
	185	map->size = 0;
	186	}
	187
	188	/-************************************
	189	* Context
	190	***************************************/
	191
	192	typedef struct {
	193	const BYTE *samples;
	194	size_t *offsets;
	195	const size_t *samplesSizes;
	196	size_t nbSamples;
	197	U32 *suffix;
	198	size_t suffixSize;
	199	U32 *freqs;
	200	U32 *dmerAt;
	201	unsigned d;
	202	} COVER_ctx_t;
	203
	204	/* We need a global context for qsort... */
	205	static COVER_ctx_t *g_ctx = NULL;
	206
	207	/-************************************
	208	* Helper functions
	209	***************************************/
	210
	211	/**
	212	* Returns the sum of the sample sizes.
	213	*/
	214	static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
	215	size_t sum = 0;
	216	size_t i;
	217	for (i = 0; i < nbSamples; ++i) {
	218	sum += samplesSizes[i];
	219	}
	220	return sum;
	221	}
	222
	223	/**
	224	* Returns -1 if the dmer at lp is less than the dmer at rp.
	225	* Return 0 if the dmers at lp and rp are equal.
	226	* Returns 1 if the dmer at lp is greater than the dmer at rp.
	227	*/
	228	static int COVER_cmp(COVER_ctx_t ctx, const void lp, const void *rp) {
	229	const U32 lhs = (const U32 )lp;
	230	const U32 rhs = (const U32 )rp;
	231	return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
	232	}
	233
	234	/**
	235	* Same as COVER_cmp() except ties are broken by pointer value
	236	* NOTE: g_ctx must be set to call this function. A global is required because
	237	* qsort doesn't take an opaque pointer.
	238	*/
	239	static int COVER_strict_cmp(const void lp, const void rp) {
	240	int result = COVER_cmp(g_ctx, lp, rp);
	241	if (result == 0) {
	242	result = lp < rp ? -1 : 1;
	243	}
	244	return result;
	245	}
	246
	247	/**
	248	* Returns the first pointer in [first, last) whose element does not compare
	249	* less than value. If no such element exists it returns last.
	250	*/
	251	static const size_t COVER_lower_bound(const size_t first, const size_t *last,
	252	size_t value) {
	253	size_t count = last - first;
	254	while (count != 0) {
	255	size_t step = count / 2;
	256	const size_t *ptr = first;
	257	ptr += step;
	258	if (*ptr < value) {
	259	first = ++ptr;
	260	count -= step + 1;
	261	} else {
	262	count = step;
	263	}
	264	}
	265	return first;
	266	}
	267
	268	/**
	269	* Generic groupBy function.
	270	* Groups an array sorted by cmp into groups with equivalent values.
	271	* Calls grp for each group.
	272	*/
	273	static void
	274	COVER_groupBy(const void data, size_t count, size_t size, COVER_ctx_t ctx,
	275	int (cmp)(COVER_ctx_t , const void , const void ),
	276	void (grp)(COVER_ctx_t , const void , const void )) {
	277	const BYTE ptr = (const BYTE )data;
	278	size_t num = 0;
	279	while (num < count) {
	280	const BYTE *grpEnd = ptr + size;
	281	++num;
	282	while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
	283	grpEnd += size;
	284	++num;
	285	}
	286	grp(ctx, ptr, grpEnd);
	287	ptr = grpEnd;
	288	}
	289	}
	290
	291	/-************************************
	292	* Cover functions
	293	***************************************/
	294
	295	/**
	296	* Called on each group of positions with the same dmer.
	297	* Counts the frequency of each dmer and saves it in the suffix array.
	298	* Fills `ctx->dmerAt`.
	299	*/
	300	static void COVER_group(COVER_ctx_t ctx, const void group,
	301	const void *groupEnd) {
	302	/* The group consists of all the positions with the same first d bytes. */
	303	const U32 grpPtr = (const U32 )group;
	304	const U32 grpEnd = (const U32 )groupEnd;
	305	/* The dmerId is how we will reference this dmer.
	306	* This allows us to map the whole dmer space to a much smaller space, the
	307	* size of the suffix array.
	308	*/
	309	const U32 dmerId = (U32)(grpPtr - ctx->suffix);
	310	/* Count the number of samples this dmer shows up in */
	311	U32 freq = 0;
	312	/* Details */
	313	const size_t *curOffsetPtr = ctx->offsets;
	314	const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
	315	/* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
	316	* different sample than the last.
	317	*/
	318	size_t curSampleEnd = ctx->offsets[0];
	319	for (; grpPtr != grpEnd; ++grpPtr) {
	320	/* Save the dmerId for this position so we can get back to it. */
	321	ctx->dmerAt[*grpPtr] = dmerId;
	322	/* Dictionaries only help for the first reference to the dmer.
	323	* After that zstd can reference the match from the previous reference.
	324	* So only count each dmer once for each sample it is in.
	325	*/
	326	if (*grpPtr < curSampleEnd) {
	327	continue;
	328	}
	329	freq += 1;
	330	/* Binary search to find the end of the sample *grpPtr is in.
	331	* In the common case that grpPtr + 1 == grpEnd we can skip the binary
	332	* search because the loop is over.
	333	*/
	334	if (grpPtr + 1 != grpEnd) {
	335	const size_t *sampleEndPtr =
	336	COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
	337	curSampleEnd = *sampleEndPtr;
	338	curOffsetPtr = sampleEndPtr + 1;
	339	}
	340	}
	341	/* At this point we are never going to look at this segment of the suffix
	342	* array again. We take advantage of this fact to save memory.
	343	* We store the frequency of the dmer in the first position of the group,
	344	* which is dmerId.
	345	*/
	346	ctx->suffix[dmerId] = freq;
	347	}
	348
	349	/**
	350	* A segment is a range in the source as well as the score of the segment.
	351	*/
	352	typedef struct {
	353	U32 begin;
	354	U32 end;
	355	double score;
	356	} COVER_segment_t;
	357
	358	/**
	359	* Selects the best segment in an epoch.
	360	* Segments of are scored according to the function:
	361	*
	362	* Let F(d) be the frequency of dmer d.
	363	* Let S_i be the dmer at position i of segment S which has length k.
	364	*
	365	* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
	366	*
	367	* Once the dmer d is in the dictionay we set F(d) = 0.
	368	*/
	369	static COVER_segment_t COVER_selectSegment(const COVER_ctx_t ctx, U32 freqs,
	370	COVER_map_t *activeDmers, U32 begin,
	371	U32 end, COVER_params_t parameters) {
	372	/* Constants */
	373	const U32 k = parameters.k;
	374	const U32 d = parameters.d;
	375	const U32 dmersInK = k - d + 1;
	376	/* Try each segment (activeSegment) and save the best (bestSegment) */
	377	COVER_segment_t bestSegment = {0, 0, 0};
	378	COVER_segment_t activeSegment;
	379	/* Reset the activeDmers in the segment */
	380	COVER_map_clear(activeDmers);
	381	/* The activeSegment starts at the beginning of the epoch. */
	382	activeSegment.begin = begin;
	383	activeSegment.end = begin;
	384	activeSegment.score = 0;
	385	/* Slide the activeSegment through the whole epoch.
	386	* Save the best segment in bestSegment.
	387	*/
	388	while (activeSegment.end < end) {
	389	/* The dmerId for the dmer at the next position */
	390	U32 newDmer = ctx->dmerAt[activeSegment.end];
	391	/* The entry in activeDmers for this dmerId */
	392	U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
	393	/* If the dmer isn't already present in the segment add its score. */
	394	if (*newDmerOcc == 0) {
	395	/* The paper suggest using the L-0.5 norm, but experiments show that it
	396	* doesn't help.
	397	*/
	398	activeSegment.score += freqs[newDmer];
	399	}
	400	/* Add the dmer to the segment */
	401	activeSegment.end += 1;
	402	*newDmerOcc += 1;
	403
	404	/* If the window is now too large, drop the first position */
	405	if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
	406	U32 delDmer = ctx->dmerAt[activeSegment.begin];
	407	U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
	408	activeSegment.begin += 1;
	409	*delDmerOcc -= 1;
	410	/* If this is the last occurence of the dmer, subtract its score */
	411	if (*delDmerOcc == 0) {
	412	COVER_map_remove(activeDmers, delDmer);
	413	activeSegment.score -= freqs[delDmer];
	414	}
	415	}
	416
	417	/* If this segment is the best so far save it */
	418	if (activeSegment.score > bestSegment.score) {
	419	bestSegment = activeSegment;
	420	}
	421	}
	422	{
	423	/* Trim off the zero frequency head and tail from the segment. */
	424	U32 newBegin = bestSegment.end;
	425	U32 newEnd = bestSegment.begin;
	426	U32 pos;
	427	for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
	428	U32 freq = freqs[ctx->dmerAt[pos]];
	429	if (freq != 0) {
	430	newBegin = MIN(newBegin, pos);
	431	newEnd = pos + 1;
	432	}
	433	}
	434	bestSegment.begin = newBegin;
	435	bestSegment.end = newEnd;
	436	}
	437	{
	438	/* Zero out the frequency of each dmer covered by the chosen segment. */
	439	U32 pos;
	440	for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
	441	freqs[ctx->dmerAt[pos]] = 0;
	442	}
	443	}
	444	return bestSegment;
	445	}
	446
	447	/**
	448	* Check the validity of the parameters.
	449	* Returns non-zero if the parameters are valid and 0 otherwise.
	450	*/
	451	static int COVER_checkParameters(COVER_params_t parameters) {
	452	/* k and d are required parameters */
	453	if (parameters.d == 0 \|\| parameters.k == 0) {
	454	return 0;
	455	}
	456	/* d <= k */
	457	if (parameters.d > parameters.k) {
	458	return 0;
	459	}
	460	return 1;
	461	}
	462
	463	/**
	464	* Clean up a context initialized with `COVER_ctx_init()`.
	465	*/
	466	static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
	467	if (!ctx) {
	468	return;
	469	}
	470	if (ctx->suffix) {
	471	free(ctx->suffix);
	472	ctx->suffix = NULL;
	473	}
	474	if (ctx->freqs) {
	475	free(ctx->freqs);
	476	ctx->freqs = NULL;
	477	}
	478	if (ctx->dmerAt) {
	479	free(ctx->dmerAt);
	480	ctx->dmerAt = NULL;
	481	}
	482	if (ctx->offsets) {
	483	free(ctx->offsets);
	484	ctx->offsets = NULL;
	485	}
	486	}
	487
	488	/**
	489	* Prepare a context for dictionary building.
	490	* The context is only dependent on the parameter `d` and can used multiple
	491	* times.
	492	* Returns 1 on success or zero on error.
	493	* The context must be destroyed with `COVER_ctx_destroy()`.
	494	*/
	495	static int COVER_ctx_init(COVER_ctx_t ctx, const void samplesBuffer,
	496	const size_t *samplesSizes, unsigned nbSamples,
	497	unsigned d) {
	498	const BYTE const samples = (const BYTE )samplesBuffer;
	499	const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
	500	/* Checks */
	501	if (totalSamplesSize < d \|\|
	502	totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
	503	DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
	504	(COVER_MAX_SAMPLES_SIZE >> 20));
	505	return 0;
	506	}
	507	/* Zero the context */
	508	memset(ctx, 0, sizeof(*ctx));
	509	DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
	510	(U32)totalSamplesSize);
	511	ctx->samples = samples;
	512	ctx->samplesSizes = samplesSizes;
	513	ctx->nbSamples = nbSamples;
	514	/* Partial suffix array */
	515	ctx->suffixSize = totalSamplesSize - d + 1;
	516	ctx->suffix = (U32 )malloc(ctx->suffixSize sizeof(U32));
	517	/* Maps index to the dmerID */
	518	ctx->dmerAt = (U32 )malloc(ctx->suffixSize sizeof(U32));
	519	/* The offsets of each file */
	520	ctx->offsets = (size_t )malloc((nbSamples + 1) sizeof(size_t));
	521	if (!ctx->suffix \|\| !ctx->dmerAt \|\| !ctx->offsets) {
	522	DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
	523	COVER_ctx_destroy(ctx);
	524	return 0;
	525	}
	526	ctx->freqs = NULL;
	527	ctx->d = d;
	528
	529	/* Fill offsets from the samlesSizes */
	530	{
	531	U32 i;
	532	ctx->offsets[0] = 0;
	533	for (i = 1; i <= nbSamples; ++i) {
	534	ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
	535	}
	536	}
	537	DISPLAYLEVEL(2, "Constructing partial suffix array\n");
	538	{
	539	/* suffix is a partial suffix array.
	540	* It only sorts suffixes by their first parameters.d bytes.
	541	* The sort is stable, so each dmer group is sorted by position in input.
	542	*/
	543	U32 i;
	544	for (i = 0; i < ctx->suffixSize; ++i) {
	545	ctx->suffix[i] = i;
	546	}
	547	/* qsort doesn't take an opaque pointer, so pass as a global */
	548	g_ctx = ctx;
	549	qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
	550	}
	551	DISPLAYLEVEL(2, "Computing frequencies\n");
	552	/* For each dmer group (group of positions with the same first d bytes):
	553	* 1. For each position we set dmerAt[position] = dmerID. The dmerID is
	554	* (groupBeginPtr - suffix). This allows us to go from position to
	555	* dmerID so we can look up values in freq.
	556	* 2. We calculate how many samples the dmer occurs in and save it in
	557	* freqs[dmerId].
	558	*/
	559	COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
	560	&COVER_group);
	561	ctx->freqs = ctx->suffix;
	562	ctx->suffix = NULL;
	563	return 1;
	564	}
	565
	566	/**
	567	* Given the prepared context build the dictionary.
	568	*/
	569	static size_t COVER_buildDictionary(const COVER_ctx_t ctx, U32 freqs,
	570	COVER_map_t activeDmers, void dictBuffer,
	571	size_t dictBufferCapacity,
	572	COVER_params_t parameters) {
	573	BYTE const dict = (BYTE )dictBuffer;
	574	size_t tail = dictBufferCapacity;
	575	/* Divide the data up into epochs of equal size.
	576	* We will select at least one segment from each epoch.
	577	*/
	578	const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
	579	const U32 epochSize = (U32)(ctx->suffixSize / epochs);
	580	size_t epoch;
	581	DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
	582	epochSize);
	583	/* Loop through the epochs until there are no more segments or the dictionary
	584	* is full.
	585	*/
	586	for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
	587	const U32 epochBegin = (U32)(epoch * epochSize);
	588	const U32 epochEnd = epochBegin + epochSize;
	589	size_t segmentSize;
	590	/* Select a segment */
	591	COVER_segment_t segment = COVER_selectSegment(
	592	ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
	593	/* Trim the segment if necessary and if it is empty then we are done */
	594	segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
	595	if (segmentSize == 0) {
	596	break;
	597	}
	598	/* We fill the dictionary from the back to allow the best segments to be
	599	* referenced with the smallest offsets.
	600	*/
	601	tail -= segmentSize;
	602	memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
	603	DISPLAYUPDATE(
	604	2, "\r%u%% ",
	605	(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
	606	}
	607	DISPLAYLEVEL(2, "\r%79s\r", "");
	608	return tail;
	609	}
	610
	611	/**
	612	* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
	613	* dictionary.
	614	*/
	615	static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
	616	ZDICT_params_t zdictParams;
	617	memset(&zdictParams, 0, sizeof(zdictParams));
	618	zdictParams.notificationLevel = 1;
	619	zdictParams.dictID = parameters.dictID;
	620	zdictParams.compressionLevel = parameters.compressionLevel;
	621	return zdictParams;
	622	}
	623
	624	/**
	625	* Constructs a dictionary using a heuristic based on the following paper:
	626	*
	627	* Liao, Petri, Moffat, Wirth
	628	* Effective Construction of Relative Lempel-Ziv Dictionaries
	629	* Published in WWW 2016.
	630	*/
	631	ZDICTLIB_API size_t COVER_trainFromBuffer(
	632	void dictBuffer, size_t dictBufferCapacity, const void samplesBuffer,
	633	const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
	634	BYTE const dict = (BYTE )dictBuffer;
	635	COVER_ctx_t ctx;
	636	COVER_map_t activeDmers;
	637	/* Checks */
	638	if (!COVER_checkParameters(parameters)) {
	639	DISPLAYLEVEL(1, "Cover parameters incorrect\n");
	640	return ERROR(GENERIC);
	641	}
	642	if (nbSamples == 0) {
	643	DISPLAYLEVEL(1, "Cover must have at least one input file\n");
	644	return ERROR(GENERIC);
	645	}
	646	if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
	647	DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
	648	ZDICT_DICTSIZE_MIN);
	649	return ERROR(dstSize_tooSmall);
	650	}
	651	/* Initialize global data */
	652	g_displayLevel = parameters.notificationLevel;
	653	/* Initialize context and activeDmers */
	654	if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
	655	parameters.d)) {
	656	return ERROR(GENERIC);
	657	}
	658	if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
	659	DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
	660	COVER_ctx_destroy(&ctx);
	661	return ERROR(GENERIC);
	662	}
	663
	664	DISPLAYLEVEL(2, "Building dictionary\n");
	665	{
	666	const size_t tail =
	667	COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
	668	dictBufferCapacity, parameters);
	669	ZDICT_params_t zdictParams = COVER_translateParams(parameters);
	670	const size_t dictionarySize = ZDICT_finalizeDictionary(
	671	dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
	672	samplesBuffer, samplesSizes, nbSamples, zdictParams);
	673	if (!ZSTD_isError(dictionarySize)) {
	674	DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
	675	(U32)dictionarySize);
	676	}
	677	COVER_ctx_destroy(&ctx);
	678	COVER_map_destroy(&activeDmers);
	679	return dictionarySize;
	680	}
	681	}
	682
	683	/**
	684	* COVER_best_t is used for two purposes:
	685	* 1. Synchronizing threads.
	686	* 2. Saving the best parameters and dictionary.
	687	*
	688	* All of the methods except COVER_best_init() are thread safe if zstd is
	689	* compiled with multithreaded support.
	690	*/
	691	typedef struct COVER_best_s {
	692	pthread_mutex_t mutex;
	693	pthread_cond_t cond;
	694	size_t liveJobs;
	695	void *dict;
	696	size_t dictSize;
	697	COVER_params_t parameters;
	698	size_t compressedSize;
	699	} COVER_best_t;
	700
	701	/**
	702	* Initialize the `COVER_best_t`.
	703	*/
	704	static void COVER_best_init(COVER_best_t *best) {
	705	if (!best) {
	706	return;
	707	}
	708	pthread_mutex_init(&best->mutex, NULL);
	709	pthread_cond_init(&best->cond, NULL);
	710	best->liveJobs = 0;
	711	best->dict = NULL;
	712	best->dictSize = 0;
	713	best->compressedSize = (size_t)-1;
	714	memset(&best->parameters, 0, sizeof(best->parameters));
	715	}
	716
	717	/**
	718	* Wait until liveJobs == 0.
	719	*/
	720	static void COVER_best_wait(COVER_best_t *best) {
	721	if (!best) {
	722	return;
	723	}
	724	pthread_mutex_lock(&best->mutex);
	725	while (best->liveJobs != 0) {
	726	pthread_cond_wait(&best->cond, &best->mutex);
	727	}
	728	pthread_mutex_unlock(&best->mutex);
	729	}
	730
	731	/**
	732	* Call COVER_best_wait() and then destroy the COVER_best_t.
	733	*/
	734	static void COVER_best_destroy(COVER_best_t *best) {
	735	if (!best) {
	736	return;
	737	}
	738	COVER_best_wait(best);
	739	if (best->dict) {
	740	free(best->dict);
	741	}
	742	pthread_mutex_destroy(&best->mutex);
	743	pthread_cond_destroy(&best->cond);
	744	}
	745
	746	/**
	747	* Called when a thread is about to be launched.
	748	* Increments liveJobs.
	749	*/
	750	static void COVER_best_start(COVER_best_t *best) {
	751	if (!best) {
	752	return;
	753	}
	754	pthread_mutex_lock(&best->mutex);
	755	++best->liveJobs;
	756	pthread_mutex_unlock(&best->mutex);
	757	}
	758
	759	/**
	760	* Called when a thread finishes executing, both on error or success.
	761	* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
	762	* If this dictionary is the best so far save it and its parameters.
	763	*/
	764	static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
	765	COVER_params_t parameters, void *dict,
	766	size_t dictSize) {
	767	if (!best) {
	768	return;
	769	}
	770	{
	771	size_t liveJobs;
	772	pthread_mutex_lock(&best->mutex);
	773	--best->liveJobs;
	774	liveJobs = best->liveJobs;
	775	/* If the new dictionary is better */
	776	if (compressedSize < best->compressedSize) {
	777	/* Allocate space if necessary */
	778	if (!best->dict \|\| best->dictSize < dictSize) {
	779	if (best->dict) {
	780	free(best->dict);
	781	}
	782	best->dict = malloc(dictSize);
	783	if (!best->dict) {
	784	best->compressedSize = ERROR(GENERIC);
	785	best->dictSize = 0;
	786	return;
	787	}
	788	}
	789	/* Save the dictionary, parameters, and size */
	790	memcpy(best->dict, dict, dictSize);
	791	best->dictSize = dictSize;
	792	best->parameters = parameters;
	793	best->compressedSize = compressedSize;
	794	}
	795	pthread_mutex_unlock(&best->mutex);
	796	if (liveJobs == 0) {
	797	pthread_cond_broadcast(&best->cond);
	798	}
	799	}
	800	}
	801
	802	/**
	803	* Parameters for COVER_tryParameters().
	804	*/
	805	typedef struct COVER_tryParameters_data_s {
	806	const COVER_ctx_t *ctx;
	807	COVER_best_t *best;
	808	size_t dictBufferCapacity;
	809	COVER_params_t parameters;
	810	} COVER_tryParameters_data_t;
	811
	812	/**
	813	* Tries a set of parameters and upates the COVER_best_t with the results.
	814	* This function is thread safe if zstd is compiled with multithreaded support.
	815	* It takes its parameters as an OWNING opaque pointer to support threading.
	816	*/
	817	static void COVER_tryParameters(void *opaque) {
	818	/* Save parameters as local variables */
	819	COVER_tryParameters_data_t const data = (COVER_tryParameters_data_t )opaque;
	820	const COVER_ctx_t *const ctx = data->ctx;
	821	const COVER_params_t parameters = data->parameters;
	822	size_t dictBufferCapacity = data->dictBufferCapacity;
	823	size_t totalCompressedSize = ERROR(GENERIC);
	824	/* Allocate space for hash table, dict, and freqs */
	825	COVER_map_t activeDmers;
	826	BYTE const dict = (BYTE const)malloc(dictBufferCapacity);
	827	U32 freqs = (U32 )malloc(ctx->suffixSize * sizeof(U32));
	828	if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
	829	DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
	830	goto _cleanup;
	831	}
	832	if (!dict \|\| !freqs) {
	833	DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
	834	goto _cleanup;
	835	}
	836	/* Copy the frequencies because we need to modify them */
	837	memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
	838	/* Build the dictionary */
	839	{
	840	const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
	841	dictBufferCapacity, parameters);
	842	const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
	843	dictBufferCapacity = ZDICT_finalizeDictionary(
	844	dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
	845	ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
	846	if (ZDICT_isError(dictBufferCapacity)) {
	847	DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
	848	goto _cleanup;
	849	}
	850	}
	851	/* Check total compressed size */
	852	{
	853	/* Pointers */
	854	ZSTD_CCtx *cctx;
	855	ZSTD_CDict *cdict;
	856	void *dst;
	857	/* Local variables */
	858	size_t dstCapacity;
	859	size_t i;
	860	/* Allocate dst with enough space to compress the maximum sized sample */
	861	{
	862	size_t maxSampleSize = 0;
	863	for (i = 0; i < ctx->nbSamples; ++i) {
	864	maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
	865	}
	866	dstCapacity = ZSTD_compressBound(maxSampleSize);
	867	dst = malloc(dstCapacity);
	868	}
	869	/* Create the cctx and cdict */
	870	cctx = ZSTD_createCCtx();
	871	cdict =
	872	ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
	873	if (!dst \|\| !cctx \|\| !cdict) {
	874	goto _compressCleanup;
	875	}
	876	/* Compress each sample and sum their sizes (or error) */
	877	totalCompressedSize = 0;
	878	for (i = 0; i < ctx->nbSamples; ++i) {
	879	const size_t size = ZSTD_compress_usingCDict(
	880	cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
	881	ctx->samplesSizes[i], cdict);
	882	if (ZSTD_isError(size)) {
	883	totalCompressedSize = ERROR(GENERIC);
	884	goto _compressCleanup;
	885	}
	886	totalCompressedSize += size;
	887	}
	888	_compressCleanup:
	889	ZSTD_freeCCtx(cctx);
	890	ZSTD_freeCDict(cdict);
	891	if (dst) {
	892	free(dst);
	893	}
	894	}
	895
	896	_cleanup:
	897	COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
	898	dictBufferCapacity);
	899	free(data);
	900	COVER_map_destroy(&activeDmers);
	901	if (dict) {
	902	free(dict);
	903	}
	904	if (freqs) {
	905	free(freqs);
	906	}
	907	}
	908
	909	ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
	910	size_t dictBufferCapacity,
	911	const void *samplesBuffer,
	912	const size_t *samplesSizes,
	913	unsigned nbSamples,
	914	COVER_params_t *parameters) {
	915	/* constants */
	916	const unsigned nbThreads = parameters->nbThreads;
	917	const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
	918	const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
	919	const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
	920	const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
	921	const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
	922	const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
	923	const unsigned kIterations =
	924	(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
	925	/* Local variables */
	926	const int displayLevel = parameters->notificationLevel;
	927	unsigned iteration = 1;
	928	unsigned d;
	929	unsigned k;
	930	COVER_best_t best;
	931	POOL_ctx *pool = NULL;
	932	/* Checks */
	933	if (kMinK < kMaxD \|\| kMaxK < kMinK) {
	934	LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
	935	return ERROR(GENERIC);
	936	}
	937	if (nbSamples == 0) {
	938	DISPLAYLEVEL(1, "Cover must have at least one input file\n");
	939	return ERROR(GENERIC);
	940	}
	941	if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
	942	DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
	943	ZDICT_DICTSIZE_MIN);
	944	return ERROR(dstSize_tooSmall);
	945	}
	946	if (nbThreads > 1) {
	947	pool = POOL_create(nbThreads, 1);
	948	if (!pool) {
	949	return ERROR(memory_allocation);
	950	}
	951	}
	952	/* Initialization */
	953	COVER_best_init(&best);
	954	/* Turn down global display level to clean up display at level 2 and below */
	955	g_displayLevel = parameters->notificationLevel - 1;
	956	/* Loop through d first because each new value needs a new context */
	957	LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
	958	kIterations);
	959	for (d = kMinD; d <= kMaxD; d += 2) {
	960	/* Initialize the context for this value of d */
	961	COVER_ctx_t ctx;
	962	LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
	963	if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
	964	LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
	965	COVER_best_destroy(&best);
	966	return ERROR(GENERIC);
	967	}
	968	/* Loop through k reusing the same context */
	969	for (k = kMinK; k <= kMaxK; k += kStepSize) {
	970	/* Prepare the arguments */
	971	COVER_tryParameters_data_t data = (COVER_tryParameters_data_t )malloc(
	972	sizeof(COVER_tryParameters_data_t));
	973	LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
	974	if (!data) {
	975	LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
	976	COVER_best_destroy(&best);
	977	COVER_ctx_destroy(&ctx);
	978	return ERROR(GENERIC);
	979	}
	980	data->ctx = &ctx;
	981	data->best = &best;
	982	data->dictBufferCapacity = dictBufferCapacity;
	983	data->parameters = *parameters;
	984	data->parameters.k = k;
	985	data->parameters.d = d;
	986	data->parameters.steps = kSteps;
	987	/* Check the parameters */
	988	if (!COVER_checkParameters(data->parameters)) {
	989	DISPLAYLEVEL(1, "Cover parameters incorrect\n");
	990	continue;
	991	}
	992	/* Call the function and pass ownership of data to it */
	993	COVER_best_start(&best);
	994	if (pool) {
	995	POOL_add(pool, &COVER_tryParameters, data);
	996	} else {
	997	COVER_tryParameters(data);
	998	}
	999	/* Print status */
	1000	LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
	1001	(U32)((iteration * 100) / kIterations));
	1002	++iteration;
	1003	}
	1004	COVER_best_wait(&best);
	1005	COVER_ctx_destroy(&ctx);
	1006	}
	1007	LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
	1008	/* Fill the output buffer and parameters with output of the best parameters */
	1009	{
	1010	const size_t dictSize = best.dictSize;
	1011	if (ZSTD_isError(best.compressedSize)) {
	1012	COVER_best_destroy(&best);
	1013	return best.compressedSize;
	1014	}
	1015	*parameters = best.parameters;
	1016	memcpy(dictBuffer, best.dict, dictSize);
	1017	COVER_best_destroy(&best);
	1018	POOL_free(pool);
	1019	return dictSize;
	1020	}
	1021	}

hgext/show.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/bundlespec.txt

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/internals/censor.txt

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/pager.txt

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/rcutil.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/map-cmdline.show

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/static/followlines.js

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

mercurial/txnutil.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/badserverext.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-help.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-pylint.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-config-env.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-config-env.py.out

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A5.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A6.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-A7.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B5.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B6.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-B7.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-C1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-C2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-C3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-C4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-D1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-D2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-D3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-exchange-obsmarkers-case-D4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-http-bad-server.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-partial-C2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-partial-C3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-partial-C4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B5.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B6.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B7.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-pruned-B8.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A5.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A6.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A7.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-superceed-A8.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D1.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D2.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D3.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D4.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D5.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D6.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-checkheads-unpushed-D7.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-dest.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-revlog-raw.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-revlog-raw.py.out

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-show-underway.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-show.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-simplekeyvaluefile.py

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-update-dest.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-worker.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/test-xdg.t

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

tests/testlib/exchange-obsmarker-util.sh

0 created 755 0 0

	1		NO CONTENT: new file 100755
The requested commit or file is too big and content was truncated. Show full diff

tests/testlib/push-checkheads-util.sh

0 created 644 0 0

	1		NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff

Makefile

0 +16 -1

              # Packaging targets
              osx:
+             	rm -rf build/mercurial
-             	/usr/bin/python2.7 setup.py install --optimize=1 \
              	  --root=build/mercurial/ --prefix=/usr/local/ \
              	  --install-lib=/Library/Python/2.7/site-packages/
              	make -C doc all install DESTDIR="$(PWD)/build/mercurial/"
+                     # install zsh completions - this location appears to be
+                     # searched by default as of macOS Sierra.
+             	install -d build/mercurial/usr/local/share/zsh/site-functions/
+             	install -m 0644 contrib/zsh_completion build/mercurial/usr/local/share/zsh/site-functions/hg
+                     # install bash completions - there doesn't appear to be a
+                     # place that's searched by default for bash, so we'll follow
+                     # the lead of Apple's git install and just put it in a
+                     # location of our own.
+             	install -d build/mercurial/usr/local/hg/contrib/
+             	install -m 0644 contrib/bash_completion build/mercurial/usr/local/hg/contrib/hg-completion.bash
              	mkdir -p $${OUTPUTDIR:-dist}
              	HGVER=$$((cat build/mercurial/Library/Python/2.7/site-packages/mercurial/__version__.py; echo 'print(version)') | python) && \
              	OSXVER=$$(sw_vers -productVersion | cut -d. -f1,2) && \
              .PHONY: help all local build doc cleanbutpackages clean install install-bin \
              	install-doc install-home install-home-bin install-home-doc \
              	dist dist-notests check tests check-code update-pot \
-             	osx fedora20 docker-fedora20 fedora21 docker-fedora21 \
+             	osx deb ppa docker-debian-jessie \
+             	docker-ubuntu-trusty docker-ubuntu-trusty-ppa \
+             	docker-ubuntu-xenial docker-ubuntu-xenial-ppa \
+             	docker-ubuntu-yakkety docker-ubuntu-yakkety-ppa \
+             	fedora20 docker-fedora20 fedora21 docker-fedora21 \
              	centos5 docker-centos5 centos6 docker-centos6 centos7 docker-centos7

contrib/check-code.py

0 +24 -9

              testpats = [
                [
-                 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
+                 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
                  (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
                  (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
                  (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
                  (r'^  .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
                  (r'^  .*file://\$TESTTMP',
                   'write "file:/*/$TESTTMP" + (glob) to match on windows too'),
-                 (r'^  [^$>].*27\.0\.0\.1.*[^)]$',
-                  'use (glob) to match localhost IP on hosts without 127.0.0.1 too'),
+                 (r'^  [^$>].*27\.0\.0\.1',
+                  'use $LOCALIP not an explicit loopback address'),
+                 (r'^  [^$>].*\$LOCALIP.*[^)]$',
+                  'mark $LOCALIP output lines with (glob) to help tests in BSD jails'),
                  (r'^  (cat|find): .*: No such file or directory',
                   'use test -f to test for file existence'),
                  (r'^  diff -[^ -]*p',
                ],
                # warnings
                [
-                 (r'^  (?!.*127\.0\.0\.1)[^*?/\n]* \(glob\)$',
-                  "glob match with no glob string (?, *, /, and 127.0.0.1)"),
+                 (r'^  (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
+                  "glob match with no glob string (?, *, /, and $LOCALIP)"),
                ]
              ]
                  (r'lambda\s*\(.*,.*\)',
                   "tuple parameter unpacking not available in Python 3+"),
                  (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
-                 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
+                 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
                  (r'\bdict\(.*=', 'dict() is different in Py2 and 3 and is slower than {}',
                   'dict-from-generator'),
                  (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
                   'legacy exception syntax; use "as" instead of ","'),
                  (r':\n(    )*( ){1,3}[^ ]', "must indent 4 spaces"),
                  (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
-                 (r'\b__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
+                 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
                  (r'os\.path\.join\(.*, *(""|\'\')\)',
                   "use pathutil.normasprefix(path) instead of os.path.join(path, '')"),
                  (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
                  (r'^import cStringIO', "don't use cStringIO.StringIO, use util.stringio"),
                  (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
                  (r'^import SocketServer', "don't use SockerServer, use util.socketserver"),
-                 (r'^import urlparse', "don't use urlparse, use util.urlparse"),
+                 (r'^import urlparse', "don't use urlparse, use util.urlreq"),
                  (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
                  (r'^import cPickle', "don't use cPickle, use util.pickle"),
                  (r'^import pickle', "don't use pickle, use util.pickle"),
                  (r'^import httplib', "don't use httplib, use util.httplib"),
                  (r'^import BaseHTTPServer', "use util.httpserver instead"),
                  (r'\.next\(\)', "don't use .next(), use next(...)"),
+                 (r'([a-z]*).revision\(\1\.node\(',
+                  "don't convert rev to node before passing to revision(nodeorrev)"),
                  # rules depending on implementation of repquote()
                  (r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
                        (?P=quote))""", reppython),
              ]
+             # extension non-filter patterns
+             pyextnfpats = [
+                 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
+                 # warnings
+                 [],
+             ]
              txtfilters = []
              txtpats = [
              checks = [
                  ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
+                 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
                  ('python 3', r'.*(hgext|mercurial).*(?<!pycompat)\.py', '',
                          pyfilters, py3pats),
                  ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
                  return result
              def main():
-                 parser = optparse.OptionParser("%prog [options] [files]")
+                 parser = optparse.OptionParser("%prog [options] [files | -]")
                  parser.add_option("-w", "--warnings", action="store_true",
                                    help="include warning-level checks")
                  parser.add_option("-p", "--per-file", type="int",
                  if len(args) == 0:
                      check = glob.glob("*")
+                 elif args == ['-']:
+                     # read file list from stdin
+                     check = sys.stdin.read().splitlines()
                  else:
                      check = args

contrib/chg/chg.c

0 +20 -2

              		abortmsg("insecure sockdir %s", sockdir);
              }
+             /*
+              * Check if a socket directory exists and is only owned by the current user.
+              * Return 1 if so, 0 if not. This is used to check if XDG_RUNTIME_DIR can be
+              * used or not. According to the specification [1], XDG_RUNTIME_DIR should be
+              * ignored if the directory is not owned by the user with mode 0700.
+              * [1]: https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+              */
+             static int checkruntimedir(const char *sockdir)
+             {
+             	struct stat st;
+             	int r = lstat(sockdir, &st);
+             	if (r < 0) /* ex. does not exist */
+             		return 0;
+             	if (!S_ISDIR(st.st_mode)) /* ex. is a file, not a directory */
+             		return 0;
+             	return st.st_uid == geteuid() && (st.st_mode & 0777) == 0700;
+             }
              static void getdefaultsockdir(char sockdir[], size_t size)
              {
              	/* by default, put socket file in secure directory
              	 * (permission of socket file may be ignored on some Unices) */
              	const char *runtimedir = getenv("XDG_RUNTIME_DIR");
              	int r;
-             	if (runtimedir) {
+             	if (runtimedir && checkruntimedir(runtimedir)) {
              		r = snprintf(sockdir, size, "%s/chg", runtimedir);
              	} else {
              		const char *tmpdir = getenv("TMPDIR");
              	}
              	setupsignalhandler(hgc_peerpid(hgc), hgc_peerpgid(hgc));
+             	atexit(waitpager);
              	int exitcode = hgc_runcommand(hgc, argv + 1, argc - 1);
              	restoresignalhandler();
              	hgc_close(hgc);
              	freecmdserveropts(&opts);
-             	waitpager();
              	return exitcode;
              }

contrib/chg/hgclient.c

0 +1 -1

              		ctx->datasize = sizeof(r_n);
              		writeblock(hgc);
              	} else if (strcmp(args[0], "pager") == 0) {
-             		setuppager(args[1]);
+             		setuppager(args[1], args + 3);
              		if (hgc->capflags & CAP_ATTACHIO)
              			attachio(hgc);
              		/* unblock the server */

contrib/chg/procutil.c

0 +13 -3

              	struct sigaction sa;
              	memset(&sa, 0, sizeof(sa));
+             	/* deadly signals meant to be sent to a process group:
+             	 * - SIGHUP: usually generated by the kernel, when termination of a
+             	 *   process causes that process group to become orphaned
+             	 * - SIGINT: usually generated by the terminal */
              	sa.sa_handler = forwardsignaltogroup;
              	sa.sa_flags = SA_RESTART;
              	if (sigemptyset(&sa.sa_mask) < 0)
              		goto error;
              	if (sigaction(SIGHUP, &sa, NULL) < 0)
              		goto error;
              	if (sigaction(SIGINT, &sa, NULL) < 0)
              	sa.sa_flags = SA_RESTART;
              	if (sigaction(SIGWINCH, &sa, NULL) < 0)
              		goto error;
+             	/* forward user-defined signals */
+             	if (sigaction(SIGUSR1, &sa, NULL) < 0)
+             		goto error;
+             	if (sigaction(SIGUSR2, &sa, NULL) < 0)
+             		goto error;
              	/* propagate job control requests to worker */
              	sa.sa_handler = forwardsignal;
              	sa.sa_flags = SA_RESTART;
              /* This implementation is based on hgext/pager.py (post 369741ef7253)
               * Return 0 if pager is not started, or pid of the pager */
-             pid_t setuppager(const char *pagercmd)
+             pid_t setuppager(const char *pagercmd, const char *envp[])
              {
              	assert(pagerpid == 0);
              	if (!pagercmd)
              		close(pipefds[0]);
              		close(pipefds[1]);
-             		int r = execlp("/bin/sh", "/bin/sh", "-c", pagercmd, NULL);
+             		int r = execle("/bin/sh", "/bin/sh", "-c", pagercmd, NULL,
+             				envp);
              		if (r < 0) {
              			abortmsgerrno("cannot start pager '%s'", pagercmd);
              		}

contrib/chg/procutil.h

0 +1 -1

              void restoresignalhandler(void);
              void setupsignalhandler(pid_t pid, pid_t pgid);
-             pid_t setuppager(const char *pagercmd);
+             pid_t setuppager(const char *pagercmd, const char *envp[]);
              void waitpager(void);
              #endif /* PROCUTIL_H_ */

contrib/hgperf

0 644 ➡ 755 +3 -5

              import mercurial.util
              import mercurial.dispatch
-             import time
              def timer(func, title=None):
                  results = []
-                 begin = time.time()
+                 begin = mercurial.util.timer()
                  count = 0
                  while True:
                      ostart = os.times()
-                     cstart = time.time()
+                     cstart = mercurial.util.timer()
                      r = func()
-                     cstop = time.time()
+                     cstop = mercurial.util.timer()
                      ostop = os.times()
                      count += 1
                      a, b = ostart, ostop

contrib/memory.py

0 +1 -2

              '''
              from __future__ import absolute_import
-             import atexit
              def memusage(ui):
                  """Report memory usage of the current process."""
                                          for k, v in result.iteritems()]) + "\n")
              def extsetup(ui):
-                 atexit.register(memusage, ui)
+                 ui.atexit(memusage, ui)

contrib/perf.py

0 +56 -19

              from __future__ import absolute_import
              import functools
+             import gc
              import os
              import random
              import sys
              setattr(util, 'safehasattr', safehasattr)
              # for "historical portability":
+             # define util.timer forcibly, because util.timer has been available
+             # since ae5d60bb70c9
+             if safehasattr(time, 'perf_counter'):
+                 util.timer = time.perf_counter
+             elif os.name == 'nt':
+                 util.timer = time.clock
+             else:
+                 util.timer = time.time
+             # for "historical portability":
              # use locally defined empty option list, if formatteropts isn't
              # available, because commands.formatteropts has been available since
              # 3.2 (or 7a7eed5176a4), even though formatting itself has been
                                  self.hexfunc = node.short
                          def __nonzero__(self):
                              return False
+                         __bool__ = __nonzero__
                          def startitem(self):
                              pass
                          def data(self, **data):
                  func()
              def _timer(fm, func, title=None):
+                 gc.collect()
                  results = []
-                 begin = time.time()
+                 begin = util.timer()
                  count = 0
                  while True:
                      ostart = os.times()
-                     cstart = time.time()
+                     cstart = util.timer()
                      r = func()
-                     cstop = time.time()
+                     cstop = util.timer()
                      ostop = os.times()
                      count += 1
                      a, b = ostart, ostop
                  node = r.lookup(rev)
                  rev = r.rev(node)
+                 def getrawchunks(data, chain):
+                     start = r.start
+                     length = r.length
+                     inline = r._inline
+                     iosize = r._io.size
+                     buffer = util.buffer
+                     offset = start(chain[0])
+                     chunks = []
+                     ladd = chunks.append
+                     for rev in chain:
+                         chunkstart = start(rev)
+                         if inline:
+                             chunkstart += (rev + 1) * iosize
+                         chunklength = length(rev)
+                         ladd(buffer(data, chunkstart - offset, chunklength))
+                     return chunks
                  def dodeltachain(rev):
                      if not cache:
                          r.clearcaches()
                          r.clearcaches()
                      r._chunkraw(chain[0], chain[-1])
-                 def dodecompress(data, chain):
+                 def dorawchunks(data, chain):
                      if not cache:
                          r.clearcaches()
-                     start = r.start
-                     length = r.length
-                     inline = r._inline
-                     iosize = r._io.size
-                     buffer = util.buffer
-                     offset = start(chain[0])
+                     getrawchunks(data, chain)
-                     for rev in chain:
-                         chunkstart = start(rev)
-                         if inline:
-                             chunkstart += (rev + 1) * iosize
-                         chunklength = length(rev)
-                         b = buffer(data, chunkstart - offset, chunklength)
-                         r.decompress(b)
+                 def dodecompress(chunks):
+                     decomp = r.decompress
+                     for chunk in chunks:
+                         decomp(chunk)
                  def dopatch(text, bins):
                      if not cache:
                  chain = r._deltachain(rev)[0]
                  data = r._chunkraw(chain[0], chain[-1])[1]
+                 rawchunks = getrawchunks(data, chain)
                  bins = r._chunks(chain)
                  text = str(bins[0])
                  bins = bins[1:]
                      (lambda: dorevision(), 'full'),
                      (lambda: dodeltachain(rev), 'deltachain'),
                      (lambda: doread(chain), 'read'),
-                     (lambda: dodecompress(data, chain), 'decompress'),
+                     (lambda: dorawchunks(data, chain), 'rawchunks'),
+                     (lambda: dodecompress(rawchunks), 'decompress'),
                      (lambda: dopatch(text, bins), 'patch'),
                      (lambda: dohash(text), 'hash'),
                  ]
                      timer(fn, title=title)
                      fm.end()
+             @command('perfwrite', formatteropts)
+             def perfwrite(ui, repo, **opts):
+                 """microbenchmark ui.write
+                 """
+                 timer, fm = gettimer(ui, opts)
+                 def write():
+                     for i in range(100000):
+                         ui.write(('Testing write performance\n'))
+                 timer(write)
+                 fm.end()
              def uisetup(ui):
                  if (util.safehasattr(cmdutil, 'openrevlog') and
                      not util.safehasattr(commands, 'debugrevlogopts')):

contrib/python-zstandard/NEWS.rst

0 +60 0

		@@ -1,6 +1,66 b''
1	1	Version History
2	2	===============
3	3
	4	0.8.1 (released 2017-04-08)
	5	---------------------------
	6
	7	* Add #includes so compilation on OS X and BSDs works (#20).
	8
	9	0.8.0 (released 2017-03-08)
	10	---------------------------
	11
	12	* CompressionParameters now has a estimated_compression_context_size() method.
	13	zstd.estimate_compression_context_size() is now deprecated and slated for
	14	removal.
	15	* Implemented a lot of fuzzing tests.
	16	* CompressionParameters instances now perform extra validation by calling
	17	ZSTD_checkCParams() at construction time.
	18	* multi_compress_to_buffer() API for compressing multiple inputs as a
	19	single operation, as efficiently as possible.
	20	* ZSTD_CStream instances are now used across multiple operations on
	21	ZstdCompressor instances, resulting in much better performance for
	22	APIs that do streaming.
	23	* ZSTD_DStream instances are now used across multiple operations on
	24	ZstdDecompressor instances, resulting in much better performance for
	25	APIs that do streaming.
	26	* train_dictionary() now releases the GIL.
	27	* Support for training dictionaries using the COVER algorithm.
	28	* multi_decompress_to_buffer() API for decompressing multiple frames as a
	29	single operation, as efficiently as possible.
	30	* Support for multi-threaded compression.
	31	* Disable deprecation warnings when compiling CFFI module.
	32	* Fixed memory leak in train_dictionary().
	33	* Removed DictParameters type.
	34	* train_dictionary() now accepts keyword arguments instead of a
	35	DictParameters instance to control dictionary generation.
	36
	37	0.7.0 (released 2017-02-07)
	38	---------------------------
	39
	40	* Added zstd.get_frame_parameters() to obtain info about a zstd frame.
	41	* Added ZstdDecompressor.decompress_content_dict_chain() for efficient
	42	decompression of content-only dictionary chains.
	43	* CFFI module fully implemented; all tests run against both C extension and
	44	CFFI implementation.
	45	* Vendored version of zstd updated to 1.1.3.
	46	* Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
	47	to avoid extra memory allocation of dict data.
	48	* Add function names to error messages (by using ":name" in PyArg_Parse*
	49	functions).
	50	* Reuse decompression context across operations. Previously, we created a
	51	new ZSTD_DCtx for each decompress(). This was measured to slow down
	52	decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
	53	is not thread safe. So we reuse the ZSTD_DCtx across operations and make
	54	things faster in the process.
	55	* ZstdCompressor.write_to()'s compress() and flush() methods now return number
	56	of bytes written.
	57	* ZstdDecompressor.write_to()'s write() method now returns the number of bytes
	58	written to the underlying output object.
	59	* CompressionParameters instances now expose their values as attributes.
	60	* CompressionParameters instances no longer are subscriptable nor behave
	61	as tuples (backwards incompatible). Use attributes to obtain values.
	62	* DictParameters instances now expose their values as attributes.
	63
4	64	0.6.0 (released 2017-01-14)
5	65	---------------------------
6	66

contrib/python-zstandard/README.rst

0 +642 -78

This diff has been collapsed as it changes many lines, (720 lines changed) Show them Hide them
			@@ -4,10 +4,11 b' python-zstandard'
	4	4
	5	5	This project provides Python bindings for interfacing with the
	6	6	`Zstandard <http://www.zstd.net>`_ compression library. A C extension
	7		and CFFI interface is provided.
		7	and CFFI interface are provided.
	8	8
	9		The primary goal of the ~~extension~~ is to provide a ~~Pythonic~~ interface to
	10		the underlying C API. This means exposing most of the features and flexibility
		9	The primary goal of the project is to provide a rich interface to the
		10	underlying C API through a Pythonic interface while not sacrificing
		11	performance. This means exposing most of the features and flexibility
	11	12	of the C API while not sacrificing usability or safety that Python provides.
	12	13
	13	14	The canonical home for this project is
			@@ -19,15 +20,24 b' State of Project'
	19	20	================
	20	21
	21	22	The project is officially in beta state. The author is reasonably satisfied
	22		~~with the current API and~~ that functionality works as advertised. There
	23		may be some backwards incompatible changes before 1.0. Though the author
	24		does not intend to make any major changes to the Python API.
		23	that functionality works as advertised. **There will be some backwards
		24	incompatible changes before 1.0, probably in the 0.9 release.** This may
		25	involve renaming the main module from zstd to zstandard and renaming
		26	various types and methods. Pin the package version to prevent unwanted
		27	breakage when this change occurs!
		28
		29	This project is vendored and distributed with Mercurial 4.1, where it is
		30	used in a production capacity.
	25	31
	26	32	There is continuous integration for Python versions 2.6, 2.7, and 3.3+
	27	33	on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably
	28	34	confident the extension is stable and works as advertised on these
	29	35	platforms.
	30	36
		37	The CFFI bindings are mostly feature complete. Where a feature is implemented
		38	in CFFI, unit tests run against both C extension and CFFI implementation to
		39	ensure behavior parity.
		40
	31	41	Expected Changes
	32	42	----------------
	33	43
			@@ -43,19 +53,27 b" sizes using zstd's preferred defaults)."
	43	53	There should be an API that accepts an object that conforms to the buffer
	44	54	interface and returns an iterator over compressed or decompressed output.
	45	55
		56	There should be an API that exposes an ``io.RawIOBase`` interface to
		57	compressor and decompressor streams, like how ``gzip.GzipFile`` from
		58	the standard library works (issue 13).
		59
	46	60	The author is on the fence as to whether to support the extremely
	47	61	low level compression and decompression APIs. It could be useful to
	48	62	support compression without the framing headers. But the author doesn't
	49	63	believe it a high priority at this time.
	50	64
	51		The CFFI bindings are half-baked and need to be finished.
		65	There will likely be a refactoring of the module names. Currently,
		66	``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface.
		67	This means that all code for the C extension must be implemented in
		68	C. ``zstd`` may be converted to a Python module so code can be reused
		69	between CFFI and C and so not all code in the C extension has to be C.
	52	70
	53	71	Requirements
	54	72	============
	55	73
	56		This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, and ~~3.5~~
	57		on common platforms (Linux, Windows, and OS X). Only x86_64 is ~~currently~~
	58		well-tested as an architecture.
		74	This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and
		75	3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is
		76	currently well-tested as an architecture.
	59	77
	60	78	Installing
	61	79	==========
			@@ -106,15 +124,11 b' compressing at several hundred MB/s and '
	106	124	Comparison to Other Python Bindings
	107	125	===================================
	108	126
	109		https://pypi.python.org/pypi/zstd is an alternative Python binding to
		127	https://pypi.python.org/pypi/zstd is an alternate Python binding to
	110	128	Zstandard. At the time this was written, the latest release of that
	111		package (1.0.0.2) had the following significant differences from this package:
	112
	113		* It only exposes the simple API for compression and decompression operations.
	114		This extension exposes the streaming API, dictionary training, and more.
	115		* It adds a custom framing header to compressed data and there is no way to
	116		disable it. This means that data produced with that module cannot be used by
	117		other Zstandard implementations.
		129	package (1.1.2) only exposed the simple APIs for compression and decompression.
		130	This package exposes much more of the zstd API, including streaming and
		131	dictionary compression. This package also has CFFI support.
	118	132
	119	133	Bundling of Zstandard Source Code
	120	134	=================================
			@@ -151,10 +165,13 b' A Tox configuration is present to test a'
	151	165	$ tox
	152	166
	153	167	Tests use the ``hypothesis`` Python package to perform fuzzing. If you
	154		don't have it, those tests won't run.
		168	don't have it, those tests won't run. Since the fuzzing tests take longer
		169	to execute than normal tests, you'll need to opt in to running them by
		170	setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set
		171	automatically when using ``tox``.
	155	172
	156		There is also an experimental CFFI module. You need the ``cffi`` Python
	157		package installed to build and test that.
		173	The ``cffi`` Python package needs to be installed in order to build the CFFI
		174	bindings. If it isn't present, the CFFI bindings won't be built.
	158	175
	159	176	To create a virtualenv with all development dependencies, do something
	160	177	like the following::
			@@ -171,8 +188,16 b' like the following::'
	171	188	API
	172	189	===
	173	190
	174		The compiled C extension provides a ``zstd`` Python module. Th~~is module~~
	175		exposes the following interfaces.
		191	The compiled C extension provides a ``zstd`` Python module. The CFFI
		192	bindings provide a ``zstd_cffi`` module. Both provide an identical API
		193	interface. The types, functions, and attributes exposed by these modules
		194	are documented in the sections below.
		195
		196	.. note::
		197
		198	The documentation in this section makes references to various zstd
		199	concepts and functionality. The ``Concepts`` section below explains
		200	these concepts in more detail.
	176	201
	177	202	ZstdCompressor
	178	203	--------------
			@@ -208,6 +233,14 b' write_dict_id'
	208	233	Whether to write the dictionary ID into the compressed data.
	209	234	Defaults to True. The dictionary ID is only written if a dictionary
	210	235	is being used.
		236	threads
		237	Enables and sets the number of threads to use for multi-threaded compression
		238	operations. Defaults to 0, which means to use single-threaded compression.
		239	Negative values will resolve to the number of logical CPUs in the system.
		240	Read below for more info on multi-threaded compression. This argument only
		241	controls thread count for operations that operate on individual pieces of
		242	data. APIs that spawn multiple threads for working on multiple pieces of
		243	data have their own ``threads`` argument.
	211	244
	212	245	Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
	213	246	instances can be called from multiple Python threads simultaneously. In other
			@@ -221,6 +254,8 b' Simple API'
	221	254	cctx = zstd.ZstdCompressor()
	222	255	compressed = cctx.compress(b'data to compress')
	223	256
		257	The ``data`` argument can be any object that implements the buffer protocol.
		258
	224	259	Unless ``compression_params`` or ``dict_data`` are passed to the
	225	260	``ZstdCompressor``, each invocation of ``compress()`` will calculate the
	226	261	optimal compression parameters for the configured compression ``level`` and
			@@ -260,6 +295,10 b' A ``flush()`` method can be called to ev'
	260	295	compressor's internal state into the output object. This may result in 0 or
	261	296	more ``write()`` calls to the output object.
	262	297
		298	Both ``write()`` and ``flush()`` return the number of bytes written to the
		299	object's ``write()``. In many cases, small inputs do not accumulate enough
		300	data to cause a write and ``write()`` will return ``0``.
		301
	263	302	If the size of the data being fed to this streaming compressor is known,
	264	303	you can declare it before compression begins::
	265	304
			@@ -406,6 +445,42 b' the compressor::'
	406	445	data = cobj.compress(b'foobar')
	407	446	data = cobj.flush()
	408	447
		448	Batch Compression API
		449	^^^^^^^^^^^^^^^^^^^^^
		450
		451	(Experimental. Not yet supported in CFFI bindings.)
		452
		453	``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
		454	inputs as a single operation.
		455
		456	Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
		457	``BufferWithSegments``, or a list containing byte like objects. Each element of
		458	the container will be compressed individually using the configured parameters
		459	on the ``ZstdCompressor`` instance.
		460
		461	The ``threads`` argument controls how many threads to use for compression. The
		462	default is ``0`` which means to use a single thread. Negative values use the
		463	number of logical CPUs in the machine.
		464
		465	The function returns a ``BufferWithSegmentsCollection``. This type represents
		466	N discrete memory allocations, eaching holding 1 or more compressed frames.
		467
		468	Output data is written to shared memory buffers. This means that unlike
		469	regular Python objects, a reference to any object within the collection
		470	keeps the shared buffer and therefore memory backing it alive. This can have
		471	undesirable effects on process memory usage.
		472
		473	The API and behavior of this function is experimental and will likely change.
		474	Known deficiencies include:
		475
		476	* If asked to use multiple threads, it will always spawn that many threads,
		477	even if the input is too small to use them. It should automatically lower
		478	the thread count when the extra threads would just add overhead.
		479	* The buffer allocation strategy is fixed. There is room to make it dynamic,
		480	perhaps even to allow one output buffer per input, facilitating a variation
		481	of the API to return a list without the adverse effects of shared memory
		482	buffers.
		483
	409	484	ZstdDecompressor
	410	485	----------------
	411	486
			@@ -476,6 +551,10 b' This behaves similarly to ``zstd.ZstdCom'
	476	551	the decompressor by calling ``write(data)`` and decompressed output is written
	477	552	to the output object by calling its ``write(data)`` method.
	478	553
		554	Calls to ``write()`` will return the number of bytes written to the output
		555	object. Not all inputs will result in bytes being written, so return values
		556	of ``0`` are possible.
		557
	479	558	The size of chunks being ``write()`` to the destination can be specified::
	480	559
	481	560	dctx = zstd.ZstdDecompressor()
			@@ -576,64 +655,155 b' Here is how this API should be used::'
	576	655	data = dobj.decompress(compressed_chunk_0)
	577	656	data = dobj.decompress(compressed_chunk_1)
	578	657
	579		Choosing an API
	580		---------------
		658	Batch Decompression API
		659	^^^^^^^^^^^^^^^^^^^^^^^
		660
		661	(Experimental. Not yet supported in CFFI bindings.)
		662
		663	``multi_decompress_to_buffer()`` performs decompression of multiple
		664	frames as a single operation and returns a ``BufferWithSegmentsCollection``
		665	containing decompressed data for all inputs.
	581	666
	582		Various forms of compression and decompression APIs are provided because each
	583		are suitable for different use cases.
		667	Compressed frames can be passed to the function as a ``BufferWithSegments``,
		668	a ``BufferWithSegmentsCollection``, or as a list containing objects that
		669	conform to the buffer protocol. For best performance, pass a
		670	``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
		671	minimal input validation will be done for that type. If calling from
		672	Python (as opposed to C), constructing one of these instances may add
		673	overhead cancelling out the performance overhead of validation for list
		674	inputs.
		675
		676	The decompressed size of each frame must be discoverable. It can either be
		677	embedded within the zstd frame (``write_content_size=True`` argument to
		678	``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
		679
		680	The ``decompressed_sizes`` argument is an object conforming to the buffer
		681	protocol which holds an array of 64-bit unsigned integers in the machine's
		682	native format defining the decompressed sizes of each frame. If this argument
		683	is passed, it avoids having to scan each frame for its decompressed size.
		684	This frame scanning can add noticeable overhead in some scenarios.
	584	685
	585		The simple/one-shot APIs are useful for small data, when the decompressed
	586		data size is known (either recorded in the zstd frame header via
	587		``write_content_size`` or known via an out-of-band mechanism, such as a file
	588		size).
		686	The ``threads`` argument controls the number of threads to use to perform
		687	decompression operations. The default (``0``) or the value ``1`` means to
		688	use a single thread. Negative values use the number of logical CPUs in the
		689	machine.
		690
		691	.. note::
		692
		693	It is possible to pass a ``mmap.mmap()`` instance into this function by
		694	wrapping it with a ``BufferWithSegments`` instance (which will define the
		695	offsets of frames within the memory mapped region).
		696
		697	This function is logically equivalent to performing ``dctx.decompress()``
		698	on each input frame and returning the result.
	589	699
	590		A limitation of the simple APIs is that input or output data must fit in memory.
	591		And unless using advanced tricks with Python buffer objects, both input and
	592		output must fit in memory simultaneously.
		700	This function exists to perform decompression on multiple frames as fast
		701	as possible by having as little overhead as possible. Since decompression is
		702	performed as a single operation and since the decompressed output is stored in
		703	a single buffer, extra memory allocations, Python objects, and Python function
		704	calls are avoided. This is ideal for scenarios where callers need to access
		705	decompressed data for multiple frames.
	593	706
	594		Another limitation is that compression or decompression is performed as a single
	595		operation. So if you feed large input, it could take a long time for the
	596		function to return.
		707	Currently, the implementation always spawns multiple threads when requested,
		708	even if the amount of work to do is small. In the future, it will be smarter
		709	about avoiding threads and their associated overhead when the amount of
		710	work to do is small.
		711
		712	Content-Only Dictionary Chain Decompression
		713	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
		714
		715	``decompress_content_dict_chain(frames)`` performs decompression of a list of
		716	zstd frames produced using chained content-only dictionary compression. Such
		717	a list of frames is produced by compressing discrete inputs where each
		718	non-initial input is compressed with a content-only dictionary consisting
		719	of the content of the previous input.
		720
		721	For example, say you have the following inputs::
	597	722
	598		The streaming APIs do not have the limitations of the simple API. The cost to
	599		this is they are more complex to use than a single function call.
		723	inputs = [b'input 1', b'input 2', b'input 3']
		724
		725	The zstd frame chain consists of:
		726
		727	1. ``b'input 1'`` compressed in standalone/discrete mode
		728	2. ``b'input 2'`` compressed using ``b'input 1'`` as a content-only dictionary
		729	3. ``b'input 3'`` compressed using ``b'input 2'`` as a content-only dictionary
		730
		731	Each zstd frame must have the content size written.
		732
		733	The following Python code can be used to produce a *content-only dictionary
		734	chain*::
	600	735
	601		The streaming APIs put the caller in control of compression and decompression
	602		behavior by allowing them to directly control either the input or output side
	603		of the operation.
		736	def make_chain(inputs):
		737	frames = []
		738
		739	# First frame is compressed in standalone/discrete mode.
		740	zctx = zstd.ZstdCompressor(write_content_size=True)
		741	frames.append(zctx.compress(inputs[0]))
	604	742
	605		With the streaming input APIs, the caller feeds data into the compressor or
	606		decompressor as they see fit. Output data will only be written after the caller
	607		has explicitly written data.
		743	# Subsequent frames use the previous fulltext as a content-only dictionary
		744	for i, raw in enumerate(inputs[1:]):
		745	dict_data = zstd.ZstdCompressionDict(inputs[i])
		746	zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data)
		747	frames.append(zctx.compress(raw))
		748
		749	return frames
		750
		751	``decompress_content_dict_chain()`` returns the uncompressed data of the last
		752	element in the input chain.
	608	753
	609		With the streaming output APIs, the caller consumes output from the compressor
	610		or decompressor as they see fit. The compressor or decompressor will only
	611		consume data from the source when the caller is ready to receive it.
		754	It is possible to implement content-only dictionary chain decompression
		755	on top of other Python APIs. However, this function will likely be significantly
		756	faster, especially for long input chains, as it avoids the overhead of
		757	instantiating and passing around intermediate objects between C and Python.
		758
		759	Multi-Threaded Compression
		760	--------------------------
		761
		762	``ZstdCompressor`` accepts a ``threads`` argument that controls the number
		763	of threads to use for compression. The way this works is that input is split
		764	into segments and each segment is fed into a worker pool for compression. Once
		765	a segment is compressed, it is flushed/appended to the output.
		766
		767	The segment size for multi-threaded compression is chosen from the window size
		768	of the compressor. This is derived from the ``window_log`` attribute of a
		769	``CompressionParameters`` instance. By default, segment sizes are in the 1+MB
		770	range.
	612	771
	613		One end of the streaming APIs involves a file-like object that must
	614		``write()`` output data or ``read()`` input data. Depending on what the
	615		backing storage for these objects is, those operations may not complete quickly.
	616		For example, when streaming compressed data to a file, the ``write()`` into
	617		a streaming compressor could result in a ``write()`` to the filesystem, which
	618		may take a long time to finish due to slow I/O on the filesystem. So, there
	619		may be overhead in streaming APIs beyond the compression and decompression
	620		operations.
		772	If multi-threaded compression is requested and the input is smaller than the
		773	configured segment size, only a single compression thread will be used. If the
		774	input is smaller than the segment size multiplied by the thread pool size or
		775	if data cannot be delivered to the compressor fast enough, not all requested
		776	compressor threads may be active simultaneously.
		777
		778	Compared to non-multi-threaded compression, multi-threaded compression has
		779	higher per-operation overhead. This includes extra memory operations,
		780	thread creation, lock acquisition, etc.
		781
		782	Due to the nature of multi-threaded compression using N compression
		783	states, the output from multi-threaded compression will likely be larger
		784	than non-multi-threaded compression. The difference is usually small. But
		785	there is a CPU/wall time versus size trade off that may warrant investigation.
		786
		787	Output from multi-threaded compression does not require any special handling
		788	on the decompression side. In other words, any zstd decompressor should be able
		789	to consume data produced with multi-threaded compression.
	621	790
	622	791	Dictionary Creation and Management
	623	792	----------------------------------
	624	793
	625		Zstandard allows dictionaries to be used when compressing and
	626		decompressing data. The idea is that if you are compressing a lot of similar
	627		data, you can precompute common properties of that data (such as recurring
	628		byte sequences) to achieve better compression ratios.
	629
	630		In Python, compression dictionaries are represented as the
	631		``ZstdCompressionDict`` type.
		794	Compression dictionaries are represented as the ``ZstdCompressionDict`` type.
	632	795
	633	796	Instances can be constructed from bytes::
	634	797
	635	798	dict_data = zstd.ZstdCompressionDict(data)
	636	799
		800	It is possible to construct a dictionary from any data. Unless the
		801	data begins with a magic header, the dictionary will be treated as
		802	content-only. Content-only dictionaries allow compression operations
		803	that follow to reference raw data within the content. For one use of
		804	content-only dictionaries, see
		805	``ZstdDecompressor.decompress_content_dict_chain()``.
		806
	637	807	More interestingly, instances can be created by training on sample data::
	638	808
	639	809	dict_data = zstd.train_dictionary(size, samples)
			@@ -673,6 +843,88 b' a ``ZstdCompressionDict`` later) via ``a'
	673	843	dict_data = zstd.train_dictionary(size, samples)
	674	844	raw_data = dict_data.as_bytes()
	675	845
		846	The following named arguments to ``train_dictionary`` can also be used
		847	to further control dictionary generation.
		848
		849	selectivity
		850	Integer selectivity level. Default is 9. Larger values yield more data in
		851	dictionary.
		852	level
		853	Integer compression level. Default is 6.
		854	dict_id
		855	Integer dictionary ID for the produced dictionary. Default is 0, which
		856	means to use a random value.
		857	notifications
		858	Controls writing of informational messages to ``stderr``. ``0`` (the
		859	default) means to write nothing. ``1`` writes errors. ``2`` writes
		860	progression info. ``3`` writes more details. And ``4`` writes all info.
		861
		862	Cover Dictionaries
		863	^^^^^^^^^^^^^^^^^^
		864
		865	An alternate dictionary training mechanism named cover is also available.
		866	More details about this training mechanism are available in the paper
		867	Effective Construction of Relative Lempel-Ziv Dictionaries (authors:
		868	Liao, Petri, Moffat, Wirth).
		869
		870	To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of
		871	``zstd.train_dictionary()``. The function behaves nearly the same except
		872	its arguments are different and the returned dictionary will contain ``k``
		873	and ``d`` attributes reflecting the parameters to the cover algorithm.
		874
		875	.. note::
		876
		877	The ``k`` and ``d`` attributes are only populated on dictionary
		878	instances created by this function. If a ``ZstdCompressionDict`` is
		879	constructed from raw bytes data, the ``k`` and ``d`` attributes will
		880	be ``0``.
		881
		882	The segment and dmer size parameters to the cover algorithm can either be
		883	specified manually or you can ask ``train_cover_dictionary()`` to try
		884	multiple values and pick the best one, where best means the smallest
		885	compressed data size.
		886
		887	In manual mode, the ``k`` and ``d`` arguments must be specified or a
		888	``ZstdError`` will be raised.
		889
		890	In automatic mode (triggered by specifying ``optimize=True``), ``k``
		891	and ``d`` are optional. If a value isn't specified, then default values for
		892	both are tested. The ``steps`` argument can control the number of steps
		893	through ``k`` values. The ``level`` argument defines the compression level
		894	that will be used when testing the compressed size. And ``threads`` can
		895	specify the number of threads to use for concurrent operation.
		896
		897	This function takes the following arguments:
		898
		899	dict_size
		900	Target size in bytes of the dictionary to generate.
		901	samples
		902	A list of bytes holding samples the dictionary will be trained from.
		903	k
		904	Parameter to cover algorithm defining the segment size. A reasonable range
		905	is [16, 2048+].
		906	d
		907	Parameter to cover algorithm defining the dmer size. A reasonable range is
		908	[6, 16]. ``d`` must be less than or equal to ``k``.
		909	dict_id
		910	Integer dictionary ID for the produced dictionary. Default is 0, which uses
		911	a random value.
		912	optimize
		913	When true, test dictionary generation with multiple parameters.
		914	level
		915	Integer target compression level when testing compression with
		916	``optimize=True``. Default is 1.
		917	steps
		918	Number of steps through ``k`` values to perform when ``optimize=True``.
		919	Default is 32.
		920	threads
		921	Number of threads to use when ``optimize=True``. Default is 0, which means
		922	to use a single thread. A negative value can be specified to use as many
		923	threads as there are detected logical CPUs.
		924	notifications
		925	Controls writing of informational messages to ``stderr``. See the
		926	documentation for ``train_dictionary()`` for more.
		927
	676	928	Explicit Compression Parameters
	677	929	-------------------------------
	678	930
			@@ -700,19 +952,57 b' You can then configure a compressor to u'
	700	952
	701	953	cctx = zstd.ZstdCompressor(compression_params=params)
	702	954
	703		The members of ~~the~~ ``CompressionParameters`` ~~tuple~~ are as follows::
		955	The members/attributes of ``CompressionParameters`` instances are as follows::
	704	956
	705		* ~~0 - Window~~ log
	706		* ~~1 - Chain~~ log
	707		* ~~2 - Hash~~ log
	708		* ~~3 - Search~~ log
	709		* ~~4 - Search~~ length
	710		* ~~5 - Target~~ length
	711		* 6 - Strategy (one of the ``zstd.STRATEGY_`` constants)
		957	* window_log
		958	* chain_log
		959	* hash_log
		960	* search_log
		961	* search_length
		962	* target_length
		963	* strategy
		964
		965	This is the order the arguments are passed to the constructor if not using
		966	named arguments.
	712	967
	713	968	You'll need to read the Zstandard documentation for what these parameters
	714	969	do.
	715	970
		971	Frame Inspection
		972	----------------
		973
		974	Data emitted from zstd compression is encapsulated in a frame. This frame
		975	begins with a 4 byte magic number header followed by 2 to 14 bytes describing
		976	the frame in more detail. For more info, see
		977	https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
		978
		979	``zstd.get_frame_parameters(data)`` parses a zstd frame header from a bytes
		980	instance and return a ``FrameParameters`` object describing the frame.
		981
		982	Depending on which fields are present in the frame and their values, the
		983	length of the frame parameters varies. If insufficient bytes are passed
		984	in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
		985	frame parameters can be parsed, pass in at least 18 bytes.
		986
		987	``FrameParameters`` instances have the following attributes:
		988
		989	content_size
		990	Integer size of original, uncompressed content. This will be ``0`` if the
		991	original content size isn't written to the frame (controlled with the
		992	``write_content_size`` argument to ``ZstdCompressor``) or if the input
		993	content size was ``0``.
		994
		995	window_size
		996	Integer size of maximum back-reference distance in compressed data.
		997
		998	dict_id
		999	Integer of dictionary ID used for compression. ``0`` if no dictionary
		1000	ID was used or if the dictionary ID was ``0``.
		1001
		1002	has_checksum
		1003	Bool indicating whether a 4 byte content checksum is stored at the end
		1004	of the frame.
		1005
	716	1006	Misc Functionality
	717	1007	------------------
	718	1008
			@@ -776,19 +1066,293 b' TARGETLENGTH_MIN'
	776	1066	TARGETLENGTH_MAX
	777	1067	Maximum value for compression parameter
	778	1068	STRATEGY_FAST
	779		Compression strategory
		1069	Compression strategy
	780	1070	STRATEGY_DFAST
	781		Compression strategory
		1071	Compression strategy
	782	1072	STRATEGY_GREEDY
	783		Compression strategory
		1073	Compression strategy
	784	1074	STRATEGY_LAZY
	785		Compression strategory
		1075	Compression strategy
	786	1076	STRATEGY_LAZY2
	787		Compression strategory
		1077	Compression strategy
	788	1078	STRATEGY_BTLAZY2
	789		Compression strategory
		1079	Compression strategy
	790	1080	STRATEGY_BTOPT
	791		Compression strategory
		1081	Compression strategy
		1082
		1083	Performance Considerations
		1084	--------------------------
		1085
		1086	The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
		1087	persistent compression or decompression context. Reusing a ``ZstdCompressor``
		1088	or ``ZstdDecompressor`` instance for multiple operations is faster than
		1089	instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
		1090	operation. The differences are magnified as the size of data decreases. For
		1091	example, the difference between context reuse and non-reuse for 100,000
		1092	100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
		1093	whereas 10 1,000,000 byte inputs will be more similar in speed (because the
		1094	time spent doing compression dwarfs time spent creating new contexts).
		1095
		1096	Buffer Types
		1097	------------
		1098
		1099	The API exposes a handful of custom types for interfacing with memory buffers.
		1100	The primary goal of these types is to facilitate efficient multi-object
		1101	operations.
		1102
		1103	The essential idea is to have a single memory allocation provide backing
		1104	storage for multiple logical objects. This has 2 main advantages: fewer
		1105	allocations and optimal memory access patterns. This avoids having to allocate
		1106	a Python object for each logical object and furthermore ensures that access of
		1107	data for objects can be sequential (read: fast) in memory.
		1108
		1109	BufferWithSegments
		1110	^^^^^^^^^^^^^^^^^^
		1111
		1112	The ``BufferWithSegments`` type represents a memory buffer containing N
		1113	discrete items of known lengths (segments). It is essentially a fixed size
		1114	memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
		1115	unsigned native endian integers defining the byte offset and length of each
		1116	segment within the buffer.
		1117
		1118	Instances behave like containers.
		1119
		1120	``len()`` returns the number of segments within the instance.
		1121
		1122	``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
		1123	individual segment within the backing buffer. That returned object references
		1124	(not copies) memory. This means that iterating all objects doesn't copy
		1125	data within the buffer.
		1126
		1127	The ``.size`` attribute contains the total size in bytes of the backing
		1128	buffer.
		1129
		1130	Instances conform to the buffer protocol. So a reference to the backing bytes
		1131	can be obtained via ``memoryview(o)``. A copy of the backing bytes can also
		1132	be obtained via ``.tobytes()``.
		1133
		1134	The ``.segments`` attribute exposes the array of ``(offset, length)`` for
		1135	segments within the buffer. It is a ``BufferSegments`` type.
		1136
		1137	BufferSegment
		1138	^^^^^^^^^^^^^
		1139
		1140	The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
		1141	It is essentially a reference to N bytes within a ``BufferWithSegments``.
		1142
		1143	``len()`` returns the length of the segment in bytes.
		1144
		1145	``.offset`` contains the byte offset of this segment within its parent
		1146	``BufferWithSegments`` instance.
		1147
		1148	The object conforms to the buffer protocol. ``.tobytes()`` can be called to
		1149	obtain a ``bytes`` instance with a copy of the backing bytes.
		1150
		1151	BufferSegments
		1152	^^^^^^^^^^^^^^
		1153
		1154	This type represents an array of ``(offset, length)`` integers defining segments
		1155	within a ``BufferWithSegments``.
		1156
		1157	The array members are 64-bit unsigned integers using host/native bit order.
		1158
		1159	Instances conform to the buffer protocol.
		1160
		1161	BufferWithSegmentsCollection
		1162	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
		1163
		1164	The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
		1165	of multiple ``BufferWithSegments`` instances.
		1166
		1167	Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
		1168	resulting object behaves like an ordered sequence whose members are the
		1169	segments within each ``BufferWithSegments``.
		1170
		1171	``len()`` returns the number of segments within all ``BufferWithSegments``
		1172	instances.
		1173
		1174	``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
		1175	that offset as if all ``BufferWithSegments`` instances were a single
		1176	entity.
		1177
		1178	If the object is composed of 2 ``BufferWithSegments`` instances with the
		1179	first having 2 segments and the second have 3 segments, then ``b[0]``
		1180	and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
		1181	and ``b[4]`` access segments from the second.
		1182
		1183	Choosing an API
		1184	===============
		1185
		1186	There are multiple APIs for performing compression and decompression. This is
		1187	because different applications have different needs and the library wants to
		1188	facilitate optimal use in as many use cases as possible.
		1189
		1190	From a high-level, APIs are divided into one-shot and streaming. See
		1191	the ``Concepts`` section for a description of how these are different at
		1192	the C layer.
		1193
		1194	The one-shot APIs are useful for small data, where the input or output
		1195	size is known. (The size can come from a buffer length, file size, or
		1196	stored in the zstd frame header.) A limitation of the one-shot APIs is that
		1197	input and output must fit in memory simultaneously. For say a 4 GB input,
		1198	this is often not feasible.
		1199
		1200	The one-shot APIs also perform all work as a single operation. So, if you
		1201	feed it large input, it could take a long time for the function to return.
		1202
		1203	The streaming APIs do not have the limitations of the simple API. But the
		1204	price you pay for this flexibility is that they are more complex than a
		1205	single function call.
		1206
		1207	The streaming APIs put the caller in control of compression and decompression
		1208	behavior by allowing them to directly control either the input or output side
		1209	of the operation.
		1210
		1211	With the streaming input, compressor, and decompressor APIs, the caller
		1212	has full control over the input to the compression or decompression stream.
		1213	They can directly choose when new data is operated on.
		1214
		1215	With the streaming ouput APIs, the caller has full control over the output
		1216	of the compression or decompression stream. It can choose when to receive
		1217	new data.
		1218
		1219	When using the streaming APIs that operate on file-like or stream objects,
		1220	it is important to consider what happens in that object when I/O is requested.
		1221	There is potential for long pauses as data is read or written from the
		1222	underlying stream (say from interacting with a filesystem or network). This
		1223	could add considerable overhead.
		1224
		1225	Concepts
		1226	========
		1227
		1228	It is important to have a basic understanding of how Zstandard works in order
		1229	to optimally use this library. In addition, there are some low-level Python
		1230	concepts that are worth explaining to aid understanding. This section aims to
		1231	provide that knowledge.
		1232
		1233	Zstandard Frames and Compression Format
		1234	---------------------------------------
		1235
		1236	Compressed zstandard data almost always exists within a container called a
		1237	frame. (For the technically curious, see the
		1238	`specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.)
		1239
		1240	The frame contains a header and optional trailer. The header contains a
		1241	magic number to self-identify as a zstd frame and a description of the
		1242	compressed data that follows.
		1243
		1244	Among other things, the frame optionally contains the size of the
		1245	decompressed data the frame represents, a 32-bit checksum of the
		1246	decompressed data (to facilitate verification during decompression),
		1247	and the ID of the dictionary used to compress the data.
		1248
		1249	Storing the original content size in the frame (``write_content_size=True``
		1250	to ``ZstdCompressor``) is important for performance in some scenarios. Having
		1251	the decompressed size stored there (or storing it elsewhere) allows
		1252	decompression to perform a single memory allocation that is exactly sized to
		1253	the output. This is faster than continuously growing a memory buffer to hold
		1254	output.
		1255
		1256	Compression and Decompression Contexts
		1257	--------------------------------------
		1258
		1259	In order to perform a compression or decompression operation with the zstd
		1260	C API, you need what's called a context. A context essentially holds
		1261	configuration and state for a compression or decompression operation. For
		1262	example, a compression context holds the configured compression level.
		1263
		1264	Contexts can be reused for multiple operations. Since creating and
		1265	destroying contexts is not free, there are performance advantages to
		1266	reusing contexts.
		1267
		1268	The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially
		1269	wrappers around these contexts in the zstd C API.
		1270
		1271	One-shot And Streaming Operations
		1272	---------------------------------
		1273
		1274	A compression or decompression operation can either be performed as a
		1275	single one-shot operation or as a continuous streaming operation.
		1276
		1277	In one-shot mode (the simple APIs provided by the Python interface),
		1278	all input is handed to the compressor or decompressor as a single buffer
		1279	and all output is returned as a single buffer.
		1280
		1281	In streaming mode, input is delivered to the compressor or decompressor as
		1282	a series of chunks via multiple function calls. Likewise, output is
		1283	obtained in chunks as well.
		1284
		1285	Streaming operations require an additional stream object to be created
		1286	to track the operation. These are logical extensions of context
		1287	instances.
		1288
		1289	There are advantages and disadvantages to each mode of operation. There
		1290	are scenarios where certain modes can't be used. See the
		1291	``Choosing an API`` section for more.
		1292
		1293	Dictionaries
		1294	------------
		1295
		1296	A compression dictionary is essentially data used to seed the compressor
		1297	state so it can achieve better compression. The idea is that if you are
		1298	compressing a lot of similar pieces of data (e.g. JSON documents or anything
		1299	sharing similar structure), then you can find common patterns across multiple
		1300	objects then leverage those common patterns during compression and
		1301	decompression operations to achieve better compression ratios.
		1302
		1303	Dictionary compression is generally only useful for small inputs - data no
		1304	larger than a few kilobytes. The upper bound on this range is highly dependent
		1305	on the input data and the dictionary.
		1306
		1307	Python Buffer Protocol
		1308	----------------------
		1309
		1310	Many functions in the library operate on objects that implement Python's
		1311	`buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_.
		1312
		1313	The buffer protocol is an internal implementation detail of a Python
		1314	type that allows instances of that type (objects) to be exposed as a raw
		1315	pointer (or buffer) in the C API. In other words, it allows objects to be
		1316	exposed as an array of bytes.
		1317
		1318	From the perspective of the C API, objects implementing the buffer protocol
		1319	all look the same: they are just a pointer to a memory address of a defined
		1320	length. This allows the C API to be largely type agnostic when accessing their
		1321	data. This allows custom types to be passed in without first converting them
		1322	to a specific type.
		1323
		1324	Many Python types implement the buffer protocol. These include ``bytes``
		1325	(``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``,
		1326	``mmap.mmap``, and ``memoryview``.
		1327
		1328	``python-zstandard`` APIs that accept objects conforming to the buffer
		1329	protocol require that the buffer is C contiguous and has a single
		1330	dimension (``ndim==1``). This is usually the case. An example of where it
		1331	is not is a Numpy matrix type.
		1332
		1333	Requiring Output Sizes for Non-Streaming Decompression APIs
		1334	-----------------------------------------------------------
		1335
		1336	Non-streaming decompression APIs require that either the output size is
		1337	explicitly defined (either in the zstd frame header or passed into the
		1338	function) or that a max output size is specified. This restriction is for
		1339	your safety.
		1340
		1341	The one-shot decompression APIs store the decompressed result in a
		1342	single buffer. This means that a buffer needs to be pre-allocated to hold
		1343	the result. If the decompressed size is not known, then there is no universal
		1344	good default size to use. Any default will fail or will be highly sub-optimal
		1345	in some scenarios (it will either be too small or will put stress on the
		1346	memory allocator to allocate a too large block).
		1347
		1348	A helpful API may retry decompression with buffers of increasing size.
		1349	While useful, there are obvious performance disadvantages, namely redoing
		1350	decompression N times until it works. In addition, there is a security
		1351	concern. Say the input came from highly compressible data, like 1 GB of the
		1352	same byte value. The output size could be several magnitudes larger than the
		1353	input size. An input of <100KB could decompress to >1GB. Without a bounds
		1354	restriction on the decompressed size, certain inputs could exhaust all system
		1355	memory. That's not good and is why the maximum output size is limited.
	792	1356
	793	1357	Note on Zstandard's Experimental API
	794	1358	======================================

contrib/python-zstandard/c-ext/compressiondict.c

0 +181 -36

              extern PyObject* ZstdError;
              ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
-             	static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
+             	static char* kwlist[] = {
+             		"dict_size",
+             		"samples",
+             		"selectivity",
+             		"level",
+             		"notifications",
+             		"dict_id",
+             		NULL
+             	};
              	size_t capacity;
              	PyObject* samples;
              	Py_ssize_t samplesLen;
-             	PyObject* parameters = NULL;
+             	unsigned  selectivity = 0;
+             	int level = 0;
+             	unsigned notifications = 0;
+             	unsigned dictID = 0;
              	ZDICT_params_t zparams;
              	Py_ssize_t sampleIndex;
              	Py_ssize_t sampleSize;
              	PyObject* sampleItem;
              	size_t zresult;
-             	void* sampleBuffer;
+             	void* sampleBuffer = NULL;
              	void* sampleOffset;
              	size_t samplesSize = 0;
-             	size_t* sampleSizes;
-             	void* dict;
-             	ZstdCompressionDict* result;
+             	size_t* sampleSizes = NULL;
+             	void* dict = NULL;
+             	ZstdCompressionDict* result = NULL;
-             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
+             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary",
+             		kwlist,
              		&capacity,
              		&PyList_Type, &samples,
-             		(PyObject*)&DictParametersType, &parameters)) {
+             		&selectivity, &level, &notifications, &dictID)) {
              		return NULL;
              	}
-             	/* Validate parameters first since it is easiest. */
-             	zparams.selectivityLevel = 0;
-             	zparams.compressionLevel = 0;
-             	zparams.notificationLevel = 0;
-             	zparams.dictID = 0;
-             	zparams.reserved[0] = 0;
-             	zparams.reserved[1] = 0;
+             	memset(&zparams, 0, sizeof(zparams));
-             	if (parameters) {
-             		/* TODO validate data ranges */
-             		zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
-             		zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
-             		zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
-             		zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
+             	}
+             	zparams.selectivityLevel = selectivity;
+             	zparams.compressionLevel = level;
+             	zparams.notificationLevel = notifications;
+             	zparams.dictID = dictID;
              	/* Figure out the size of the raw samples */
              	samplesLen = PyList_Size(samples);
              		sampleItem = PyList_GetItem(samples, sampleIndex);
              		if (!PyBytes_Check(sampleItem)) {
              			PyErr_SetString(PyExc_ValueError, "samples must be bytes");
-             			/* TODO probably need to perform DECREF here */
              			return NULL;
              		}
              		samplesSize += PyBytes_GET_SIZE(sampleItem);
              	sampleBuffer = PyMem_Malloc(samplesSize);
              	if (!sampleBuffer) {
              		PyErr_NoMemory();
-             		return NULL;
+             		goto finally;
              	}
              	sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
              	if (!sampleSizes) {
-             		PyMem_Free(sampleBuffer);
              		PyErr_NoMemory();
-             		return NULL;
+             		goto finally;
              	}
              	sampleOffset = sampleBuffer;
              	dict = PyMem_Malloc(capacity);
              	if (!dict) {
-             		PyMem_Free(sampleSizes);
-             		PyMem_Free(sampleBuffer);
              		PyErr_NoMemory();
-             		return NULL;
+             		goto finally;
              	}
+             	/* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */
+             	Py_BEGIN_ALLOW_THREADS
              	zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
              		sampleBuffer, sampleSizes, (unsigned int)samplesLen,
              		zparams);
+             	Py_END_ALLOW_THREADS
              	if (ZDICT_isError(zresult)) {
              		PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
              		PyMem_Free(dict);
-             		PyMem_Free(sampleSizes);
-             		PyMem_Free(sampleBuffer);
-             		return NULL;
+             		goto finally;
              	}
              	result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
              	if (!result) {
-             		return NULL;
+             		goto finally;
              	}
              	result->dictData = dict;
              	result->dictSize = zresult;
+             	result->d = 0;
+             	result->k = 0;
+             finally:
+             	PyMem_Free(sampleBuffer);
+             	PyMem_Free(sampleSizes);
              	return result;
              }
+             ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
+             	static char* kwlist[] = {
+             		"dict_size",
+             		"samples",
+             		"k",
+             		"d",
+             		"notifications",
+             		"dict_id",
+             		"level",
+             		"optimize",
+             		"steps",
+             		"threads",
+             		NULL
+             	};
+             	size_t capacity;
+             	PyObject* samples;
+             	unsigned k = 0;
+             	unsigned d = 0;
+             	unsigned notifications = 0;
+             	unsigned dictID = 0;
+             	int level = 0;
+             	PyObject* optimize = NULL;
+             	unsigned steps = 0;
+             	int threads = 0;
+             	COVER_params_t params;
+             	Py_ssize_t samplesLen;
+             	Py_ssize_t i;
+             	size_t samplesSize = 0;
+             	void* sampleBuffer = NULL;
+             	size_t* sampleSizes = NULL;
+             	void* sampleOffset;
+             	Py_ssize_t sampleSize;
+             	void* dict = NULL;
+             	size_t zresult;
+             	ZstdCompressionDict* result = NULL;
+             	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary",
+             		kwlist, &capacity, &PyList_Type, &samples,
+             		&k, &d, &notifications, &dictID, &level, &optimize, &steps, &threads)) {
+             		return NULL;
+             	}
+             	if (threads < 0) {
+             		threads = cpu_count();
+             	}
+             	memset(&params, 0, sizeof(params));
+             	params.k = k;
+             	params.d = d;
+             	params.steps = steps;
+             	params.nbThreads = threads;
+             	params.notificationLevel = notifications;
+             	params.dictID = dictID;
+             	params.compressionLevel = level;
+             	/* Figure out total size of input samples. */
+             	samplesLen = PyList_Size(samples);
+             	for (i = 0; i < samplesLen; i++) {
+             		PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+             		if (!PyBytes_Check(sampleItem)) {
+             			PyErr_SetString(PyExc_ValueError, "samples must be bytes");
+             			return NULL;
+             		}
+             		samplesSize += PyBytes_GET_SIZE(sampleItem);
+             	}
+             	sampleBuffer = PyMem_Malloc(samplesSize);
+             	if (!sampleBuffer) {
+             		PyErr_NoMemory();
+             		goto finally;
+             	}
+             	sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
+             	if (!sampleSizes) {
+             		PyErr_NoMemory();
+             		goto finally;
+             	}
+             	sampleOffset = sampleBuffer;
+             	for (i = 0; i < samplesLen; i++) {
+             		PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+             		sampleSize = PyBytes_GET_SIZE(sampleItem);
+             		sampleSizes[i] = sampleSize;
+             		memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
+             		sampleOffset = (char*)sampleOffset + sampleSize;
+             	}
+             	dict = PyMem_Malloc(capacity);
+             	if (!dict) {
+             		PyErr_NoMemory();
+             		goto finally;
+             	}
+             	Py_BEGIN_ALLOW_THREADS
+             	if (optimize && PyObject_IsTrue(optimize)) {
+             		zresult = COVER_optimizeTrainFromBuffer(dict, capacity,
+             			sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
+             	}
+             	else {
+             		zresult = COVER_trainFromBuffer(dict, capacity,
+             			sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
+             	}
+             	Py_END_ALLOW_THREADS
+             	if (ZDICT_isError(zresult)) {
+             		PyMem_Free(dict);
+             		PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
+             		goto finally;
+             	}
+             	result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
+             	if (!result) {
+             		PyMem_Free(dict);
+             		goto finally;
+             	}
+             	result->dictData = dict;
+             	result->dictSize = zresult;
+             	result->d = params.d;
+             	result->k = params.k;
+             finally:
+             	PyMem_Free(sampleBuffer);
+             	PyMem_Free(sampleSizes);
+             	return result;
+             }
              PyDoc_STRVAR(ZstdCompressionDict__doc__,
              "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
              	self->dictSize = 0;
              #if PY_MAJOR_VERSION >= 3
-             	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict",
              #else
-             	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict",
              #endif
+             		&source, &sourceSize)) {
              		return -1;
              	}
              	{ NULL, NULL }
              };
+             static PyMemberDef ZstdCompressionDict_members[] = {
+             	{ "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
+             	  "segment size" },
+             	{ "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
+             	  "dmer size" },
+             	{ NULL }
+             };
              static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
              	return self->dictSize;
              }
 ,                              /* tp_iter */
 ,                              /* tp_iternext */
              	ZstdCompressionDict_methods,    /* tp_methods */
-,                              /* tp_members */
+             	ZstdCompressionDict_members,    /* tp_members */
 ,                              /* tp_getset */
 ,                              /* tp_base */
 ,                              /* tp_dict */

contrib/python-zstandard/c-ext/compressionparams.c

0 +138 -111

		@@ -25,7 +25,8 b' CompressionParametersObject* get_compres'
25	25	ZSTD_compressionParameters params;
26	26	CompressionParametersObject* result;
27	27
28		if (!PyArg_ParseTuple(args, "i\|Kn", &~~compressionLevel~~, &~~sourceSize~~, &~~dictSize~~)) {
	28	if (!PyArg_ParseTuple(args, "i\|Kn:get_compression_parameters",
	29	&compressionLevel, &sourceSize, &dictSize)) {
29	30	return NULL;
30	31	}
31	32
		@@ -47,12 +48,108 b' CompressionParametersObject* get_compres'
47	48	return result;
48	49	}
49	50
	51	static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
	52	static char* kwlist[] = {
	53	"window_log",
	54	"chain_log",
	55	"hash_log",
	56	"search_log",
	57	"search_length",
	58	"target_length",
	59	"strategy",
	60	NULL
	61	};
	62
	63	unsigned windowLog;
	64	unsigned chainLog;
	65	unsigned hashLog;
	66	unsigned searchLog;
	67	unsigned searchLength;
	68	unsigned targetLength;
	69	unsigned strategy;
	70	ZSTD_compressionParameters params;
	71	size_t zresult;
	72
	73	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters",
	74	kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength,
	75	&targetLength, &strategy)) {
	76	return -1;
	77	}
	78
	79	if (windowLog < ZSTD_WINDOWLOG_MIN \|\| windowLog > ZSTD_WINDOWLOG_MAX) {
	80	PyErr_SetString(PyExc_ValueError, "invalid window log value");
	81	return -1;
	82	}
	83
	84	if (chainLog < ZSTD_CHAINLOG_MIN \|\| chainLog > ZSTD_CHAINLOG_MAX) {
	85	PyErr_SetString(PyExc_ValueError, "invalid chain log value");
	86	return -1;
	87	}
	88
	89	if (hashLog < ZSTD_HASHLOG_MIN \|\| hashLog > ZSTD_HASHLOG_MAX) {
	90	PyErr_SetString(PyExc_ValueError, "invalid hash log value");
	91	return -1;
	92	}
	93
	94	if (searchLog < ZSTD_SEARCHLOG_MIN \|\| searchLog > ZSTD_SEARCHLOG_MAX) {
	95	PyErr_SetString(PyExc_ValueError, "invalid search log value");
	96	return -1;
	97	}
	98
	99	if (searchLength < ZSTD_SEARCHLENGTH_MIN \|\| searchLength > ZSTD_SEARCHLENGTH_MAX) {
	100	PyErr_SetString(PyExc_ValueError, "invalid search length value");
	101	return -1;
	102	}
	103
	104	if (targetLength < ZSTD_TARGETLENGTH_MIN \|\| targetLength > ZSTD_TARGETLENGTH_MAX) {
	105	PyErr_SetString(PyExc_ValueError, "invalid target length value");
	106	return -1;
	107	}
	108
	109	if (strategy < ZSTD_fast \|\| strategy > ZSTD_btopt) {
	110	PyErr_SetString(PyExc_ValueError, "invalid strategy value");
	111	return -1;
	112	}
	113
	114	self->windowLog = windowLog;
	115	self->chainLog = chainLog;
	116	self->hashLog = hashLog;
	117	self->searchLog = searchLog;
	118	self->searchLength = searchLength;
	119	self->targetLength = targetLength;
	120	self->strategy = strategy;
	121
	122	ztopy_compression_parameters(self, &params);
	123	zresult = ZSTD_checkCParams(params);
	124
	125	if (ZSTD_isError(zresult)) {
	126	PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s",
	127	ZSTD_getErrorName(zresult));
	128	return -1;
	129	}
	130
	131	return 0;
	132	}
	133
	134	PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__,
	135	"Estimate the size in bytes of a compression context for compression parameters\n"
	136	);
	137
	138	PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) {
	139	ZSTD_compressionParameters params;
	140
	141	ztopy_compression_parameters(self, &params);
	142
	143	return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params));
	144	}
	145
50	146	PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) {
51	147	CompressionParametersObject* params;
52	148	ZSTD_compressionParameters zparams;
53	149	PyObject* result;
54	150
55		if (!PyArg_ParseTuple(args, "O!", &~~CompressionParametersType~~, &~~params~~)) {
	151	if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size",
	152	&CompressionParametersType, &params)) {
56	153	return NULL;
57	154	}
58	155
		@@ -64,113 +161,43 b' PyObject* estimate_compression_context_s'
64	161	PyDoc_STRVAR(CompressionParameters__doc__,
65	162	"CompressionParameters: low-level control over zstd compression");
66	163
67		static PyObject* CompressionParameters_new(PyTypeObject* subtype, PyObject* args, PyObject* kwargs) {
68		CompressionParametersObject* self;
69		unsigned windowLog;
70		unsigned chainLog;
71		unsigned hashLog;
72		unsigned searchLog;
73		unsigned searchLength;
74		unsigned targetLength;
75		unsigned strategy;
76
77		if (!PyArg_ParseTuple(args, "IIIIIII", &windowLog, &chainLog, &hashLog, &searchLog,
78		&searchLength, &targetLength, &strategy)) {
79		return NULL;
80		}
81
82		if (windowLog < ZSTD_WINDOWLOG_MIN \|\| windowLog > ZSTD_WINDOWLOG_MAX) {
83		PyErr_SetString(PyExc_ValueError, "invalid window log value");
84		return NULL;
85		}
86
87		if (chainLog < ZSTD_CHAINLOG_MIN \|\| chainLog > ZSTD_CHAINLOG_MAX) {
88		PyErr_SetString(PyExc_ValueError, "invalid chain log value");
89		return NULL;
90		}
91
92		if (hashLog < ZSTD_HASHLOG_MIN \|\| hashLog > ZSTD_HASHLOG_MAX) {
93		PyErr_SetString(PyExc_ValueError, "invalid hash log value");
94		return NULL;
95		}
96
97		if (searchLog < ZSTD_SEARCHLOG_MIN \|\| searchLog > ZSTD_SEARCHLOG_MAX) {
98		PyErr_SetString(PyExc_ValueError, "invalid search log value");
99		return NULL;
100		}
101
102		if (searchLength < ZSTD_SEARCHLENGTH_MIN \|\| searchLength > ZSTD_SEARCHLENGTH_MAX) {
103		PyErr_SetString(PyExc_ValueError, "invalid search length value");
104		return NULL;
105		}
106
107		if (targetLength < ZSTD_TARGETLENGTH_MIN \|\| targetLength > ZSTD_TARGETLENGTH_MAX) {
108		PyErr_SetString(PyExc_ValueError, "invalid target length value");
109		return NULL;
110		}
111
112		if (strategy < ZSTD_fast \|\| strategy > ZSTD_btopt) {
113		PyErr_SetString(PyExc_ValueError, "invalid strategy value");
114		return NULL;
115		}
116
117		self = (CompressionParametersObject*)subtype->tp_alloc(subtype, 1);
118		if (!self) {
119		return NULL;
120		}
121
122		self->windowLog = windowLog;
123		self->chainLog = chainLog;
124		self->hashLog = hashLog;
125		self->searchLog = searchLog;
126		self->searchLength = searchLength;
127		self->targetLength = targetLength;
128		self->strategy = strategy;
129
130		return (PyObject*)self;
131		}
132
133	164	static void CompressionParameters_dealloc(PyObject* self) {
134	165	PyObject_Del(self);
135	166	}
136	167
137		static Py~~_ssize_t~~ CompressionParameters_~~length~~(~~PyObject~~* ~~self~~) {
138		return 7;
139		}
140
141		static PyObject* CompressionParameters_item(PyObject* o, Py_ssize_t i) {
142		CompressionParametersObject* self = (CompressionParametersObject*)o;
	168	static PyMethodDef CompressionParameters_methods[] = {
	169	{
	170	"estimated_compression_context_size",
	171	(PyCFunction)CompressionParameters_estimated_compression_context_size,
	172	METH_NOARGS,
	173	CompressionParameters_estimated_compression_context_size__doc__
	174	},
	175	{ NULL, NULL }
	176	};
143	177
144		switch (i) {
145		case 0:
146		return PyLong_FromLong(self->windowLog);
147		case 1:
148		return PyLong_FromLong(self->chainLog);
149		case 2:
150		return PyLong_FromLong(self->hashLog);
151		case 3:
152		return PyLong_FromLong(self->searchLog);
153		case 4:
154		return PyLong_FromLong(self->searchLength);
155		case 5:
156		return PyLong_FromLong(self->targetLength);
157		case 6:
158		return PyLong_FromLong(self->strategy);
159		default:
160		PyErr_SetString(PyExc_IndexError, "index out of range");
161		return NULL;
162		}
163		}
164
165		static PySequenceMethods CompressionParameters_sq = {
166		CompressionParameters_length, /* sq_length */
167		0, /* sq_concat */
168		0, /* sq_repeat */
169		CompressionParameters_item, /* sq_item */
170		0, /* sq_ass_item */
171		0, /* sq_contains */
172		0, /* sq_inplace_concat */
173		0 /* sq_inplace_repeat */
	178	static PyMemberDef CompressionParameters_members[] = {
	179	{ "window_log", T_UINT,
	180	offsetof(CompressionParametersObject, windowLog), READONLY,
	181	"window log" },
	182	{ "chain_log", T_UINT,
	183	offsetof(CompressionParametersObject, chainLog), READONLY,
	184	"chain log" },
	185	{ "hash_log", T_UINT,
	186	offsetof(CompressionParametersObject, hashLog), READONLY,
	187	"hash log" },
	188	{ "search_log", T_UINT,
	189	offsetof(CompressionParametersObject, searchLog), READONLY,
	190	"search log" },
	191	{ "search_length", T_UINT,
	192	offsetof(CompressionParametersObject, searchLength), READONLY,
	193	"search length" },
	194	{ "target_length", T_UINT,
	195	offsetof(CompressionParametersObject, targetLength), READONLY,
	196	"target length" },
	197	{ "strategy", T_INT,
	198	offsetof(CompressionParametersObject, strategy), READONLY,
	199	"strategy" },
	200	{ NULL }
174	201	};
175	202
176	203	PyTypeObject CompressionParametersType = {
		@@ -185,7 +212,7 b' PyTypeObject CompressionParametersType ='
185	212	0, /* tp_compare */
186	213	0, /* tp_repr */
187	214	0, /* tp_as_number */
188		&CompressionParameters_sq, /* tp_as_sequence */
	215	0, /* tp_as_sequence */
189	216	0, /* tp_as_mapping */
190	217	0, /* tp_hash */
191	218	0, /* tp_call */
		@@ -193,7 +220,7 b' PyTypeObject CompressionParametersType ='
193	220	0, /* tp_getattro */
194	221	0, /* tp_setattro */
195	222	0, /* tp_as_buffer */
196		Py_TPFLAGS_DEFAULT, /* tp_flags */
	223	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
197	224	CompressionParameters__doc__, /* tp_doc */
198	225	0, /* tp_traverse */
199	226	0, /* tp_clear */
		@@ -201,17 +228,17 b' PyTypeObject CompressionParametersType ='
201	228	0, /* tp_weaklistoffset */
202	229	0, /* tp_iter */
203	230	0, /* tp_iternext */
204		0, /* tp_methods */
205		0, /* tp_members */
	231	CompressionParameters_methods, /* tp_methods */
	232	CompressionParameters_members, /* tp_members */
206	233	0, /* tp_getset */
207	234	0, /* tp_base */
208	235	0, /* tp_dict */
209	236	0, /* tp_descr_get */
210	237	0, /* tp_descr_set */
211	238	0, /* tp_dictoffset */
212		0, /* tp_init */
	239	(initproc)CompressionParameters_init, /* tp_init */
213	240	0, /* tp_alloc */
214		CompressionParameters_new, /* tp_new */
	241	PyType_GenericNew, /* tp_new */
215	242	};
216	243
217	244	void compressionparams_module_init(PyObject* mod) {
		@@ -220,7 +247,7 b' void compressionparams_module_init(PyObj'
220	247	return;
221	248	}
222	249
223		Py_I~~ncRef~~((~~PyObject~~*)&CompressionParametersType);
	250	Py_INCREF(&CompressionParametersType);
224	251	PyModule_AddObject(mod, "CompressionParameters",
225	252	(PyObject*)&CompressionParametersType);
226	253	}

contrib/python-zstandard/c-ext/compressionwriter.c

0 +41 -24

              	Py_XDECREF(self->compressor);
              	Py_XDECREF(self->writer);
-             	if (self->cstream) {
-             		ZSTD_freeCStream(self->cstream);
-             		self->cstream = NULL;
+             	}
              	PyObject_Del(self);
              }
              		return NULL;
              	}
-             	self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize);
-             	if (!self->cstream) {
-             		return NULL;
+             	if (self->compressor->mtcctx) {
+             		if (init_mtcstream(self->compressor, self->sourceSize)) {
+             			return NULL;
+             		}
+             	}
+             	else {
+             		if (0 != init_cstream(self->compressor, self->sourceSize)) {
+             			return NULL;
+             		}
              	}
              	self->entered = 1;
              	ZSTD_outBuffer output;
              	PyObject* res;
-             	if (!PyArg_ParseTuple(args, "OOO", &exc_type, &exc_value, &exc_tb)) {
+             	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
              		return NULL;
              	}
              	self->entered = 0;
-             	if (self->cstream && exc_type == Py_None && exc_value == Py_None &&
-             		exc_tb == Py_None) {
+             	if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None
+             		&& exc_value == Py_None && exc_tb == Py_None) {
              		output.dst = PyMem_Malloc(self->outSize);
              		if (!output.dst) {
              		output.pos = 0;
              		while (1) {
-             			zresult = ZSTD_endStream(self->cstream, &output);
+             			if (self->compressor->mtcctx) {
+             				zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output);
+             			}
+             			else {
+             				zresult = ZSTD_endStream(self->compressor->cstream, &output);
+             			}
              			if (ZSTD_isError(zresult)) {
              				PyErr_Format(ZstdError, "error ending compression stream: %s",
              					ZSTD_getErrorName(zresult));
              		}
              		PyMem_Free(output.dst);
-             		ZSTD_freeCStream(self->cstream);
-             		self->cstream = NULL;
              	}
              	Py_RETURN_FALSE;
              }
              static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
-             	if (!self->cstream) {
+             	if (!self->compressor->cstream) {
              		PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; "
              			"call when a context manager is active");
              		return NULL;
              	}
-             	return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream));
+             	return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream));
              }
              static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) {
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	PyObject* res;
+             	Py_ssize_t totalWrite = 0;
              #if PY_MAJOR_VERSION >= 3
-             	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
              #else
-             	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
              #endif
              		return NULL;
              	}
              	while ((ssize_t)input.pos < sourceSize) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_compressStream(self->cstream, &output, &input);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+             				&output, &input);
+             		}
+             		else {
+             			zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input);
+             		}
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              #endif
              				output.dst, output.pos);
              			Py_XDECREF(res);
+             			totalWrite += output.pos;
              		}
              		output.pos = 0;
              	}
              	PyMem_Free(output.dst);
-             	/* TODO return bytes written */
-             	Py_RETURN_NONE;
+             	return PyLong_FromSsize_t(totalWrite);
              }
              static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) {
              	size_t zresult;
              	ZSTD_outBuffer output;
              	PyObject* res;
+             	Py_ssize_t totalWrite = 0;
              	if (!self->entered) {
              		PyErr_SetString(ZstdError, "flush must be called from an active context manager");
              	while (1) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_flushStream(self->cstream, &output);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output);
+             		}
+             		else {
+             			zresult = ZSTD_flushStream(self->compressor->cstream, &output);
+             		}
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              #endif
              				output.dst, output.pos);
              			Py_XDECREF(res);
+             			totalWrite += output.pos;
              		}
              		output.pos = 0;
              	}
              	PyMem_Free(output.dst);
-             	/* TODO return bytes written */
-             	Py_RETURN_NONE;
+             	return PyLong_FromSsize_t(totalWrite);
              }
              static PyMethodDef ZstdCompressionWriter_methods[] = {

contrib/python-zstandard/c-ext/compressobj.c

0 +22 -14

              	PyMem_Free(self->output.dst);
              	self->output.dst = NULL;
-             	if (self->cstream) {
-             		ZSTD_freeCStream(self->cstream);
-             		self->cstream = NULL;
+             	}
              	Py_XDECREF(self->compressor);
              	PyObject_Del(self);
              	}
              #if PY_MAJOR_VERSION >= 3
-             	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) {
              #else
-             	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) {
              #endif
              		return NULL;
              	}
              	while ((ssize_t)input.pos < sourceSize) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_compressStream(self->cstream, &self->output, &input);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+             				&self->output, &input);
+             		}
+             		else {
+             			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input);
+             		}
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              	PyObject* result = NULL;
              	Py_ssize_t resultSize = 0;
-             	if (!PyArg_ParseTuple(args, "|i", &flushMode)) {
+             	if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) {
              		return NULL;
              	}
              		/* The output buffer is of size ZSTD_CStreamOutSize(), which is
              		   guaranteed to hold a full block. */
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_flushStream(self->cstream, &self->output);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output);
+             		}
+             		else {
+             			zresult = ZSTD_flushStream(self->compressor->cstream, &self->output);
+             		}
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              	self->finished = 1;
              	while (1) {
-             		zresult = ZSTD_endStream(self->cstream, &self->output);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+             		}
+             		else {
+             			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
+             		}
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "error ending compression stream: %s",
              				ZSTD_getErrorName(zresult));
              		}
              	}
-             	ZSTD_freeCStream(self->cstream);
-             	self->cstream = NULL;
              	if (result) {
              		return result;
              	}

contrib/python-zstandard/c-ext/compressor.c

0 +866 -113

This diff has been collapsed as it changes many lines, (979 lines changed) Show them Hide them
			@@ -7,16 +7,21 b''
	7	7	*/
	8	8
	9	9	#include "python-zstandard.h"
		10	#include "pool.h"
	10	11
	11	12	extern PyObject* ZstdError;
	12	13
	13		int populate_cdict(ZstdCompressor* compressor, ~~void~~* ~~dictData~~, ~~size_t~~ ~~dictSize~~, ZSTD_parameters* zparams) {
		14	int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) {
	14	15	ZSTD_customMem zmem;
	15		assert(!compressor->cdict);
		16
		17	if (compressor->cdict \|\| !compressor->dict \|\| !compressor->dict->dictData) {
		18	return 0;
		19	}
		20
	16	21	Py_BEGIN_ALLOW_THREADS
	17	22	memset(&zmem, 0, sizeof(zmem));
	18	23	compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
	19		compressor->dict->dictSize, *zparams, zmem);
		24	compressor->dict->dictSize, 1, *zparams, zmem);
	20	25	Py_END_ALLOW_THREADS
	21	26
	22	27	if (!compressor->cdict) {
			@@ -28,22 +33,32 b' int populate_cdict(ZstdCompressor* compr'
	28	33	}
	29	34
	30	35	/**
	31		* Initialize a zstd CStream from a ZstdCompressor instance.
	32		*
	33		* Returns ~~a ZSTD_CStream~~ on success ~~or NULL~~ on failure. ~~If NULL,~~ a Python
	34		* exception will be set.
	35		*/
	36		~~ZSTD_CStream~~* ~~CStream_from_ZstdCompressor~~(ZstdCompressor* compressor, ~~Py_ssize_t~~ sourceSize) {
	37		ZSTD_CStream* cstream;
		36	* Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized.
		37	*
		38	* Returns 0 on success. Other value on failure. Will set a Python exception
		39	* on failure.
		40	*/
		41	int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) {
	38	42	ZSTD_parameters zparams;
	39	43	void* dictData = NULL;
	40	44	size_t dictSize = 0;
	41	45	size_t zresult;
	42	46
	43		cstream = ZSTD_createCStream();
	44		if (!cstream) {
	45		PyErr_SetString(ZstdError, "cannot create CStream");
	46		return NULL;
		47	if (compressor->cstream) {
		48	zresult = ZSTD_resetCStream(compressor->cstream, sourceSize);
		49	if (ZSTD_isError(zresult)) {
		50	PyErr_Format(ZstdError, "could not reset CStream: %s",
		51	ZSTD_getErrorName(zresult));
		52	return -1;
		53	}
		54
		55	return 0;
		56	}
		57
		58	compressor->cstream = ZSTD_createCStream();
		59	if (!compressor->cstream) {
		60	PyErr_SetString(ZstdError, "could not create CStream");
		61	return -1;
	47	62	}
	48	63
	49	64	if (compressor->dict) {
			@@ -63,15 +78,51 b' ZSTD_CStream* CStream_from_ZstdCompresso'
	63	78
	64	79	zparams.fParams = compressor->fparams;
	65	80
	66		zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, ~~zparams~~, ~~sourceSize~~);
		81	zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize,
		82	zparams, sourceSize);
	67	83
	68	84	if (ZSTD_isError(zresult)) {
	69		ZSTD_freeCStream(cstream);
		85	ZSTD_freeCStream(compressor->cstream);
		86	compressor->cstream = NULL;
	70	87	PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
	71		return ~~NULL~~;
		88	return -1;
	72	89	}
	73	90
	74		return ~~cstream~~;
		91	return 0;;
		92	}
		93
		94	int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) {
		95	size_t zresult;
		96	void* dictData = NULL;
		97	size_t dictSize = 0;
		98	ZSTD_parameters zparams;
		99
		100	assert(compressor->mtcctx);
		101
		102	if (compressor->dict) {
		103	dictData = compressor->dict->dictData;
		104	dictSize = compressor->dict->dictSize;
		105	}
		106
		107	memset(&zparams, 0, sizeof(zparams));
		108	if (compressor->cparams) {
		109	ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
		110	}
		111	else {
		112	zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize);
		113	}
		114
		115	zparams.fParams = compressor->fparams;
		116
		117	zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize,
		118	zparams, sourceSize);
		119
		120	if (ZSTD_isError(zresult)) {
		121	PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult));
		122	return -1;
		123	}
		124
		125	return 0;
	75	126	}
	76	127
	77	128	PyDoc_STRVAR(ZstdCompressor__doc__,
			@@ -103,6 +154,11 b' PyDoc_STRVAR(ZstdCompressor__doc__,'
	103	154	" Determines whether the dictionary ID will be written into the compressed\n"
	104	155	" data. Defaults to True. Only adds content to the compressed data if\n"
	105	156	" a dictionary is being used.\n"
		157	"threads\n"
		158	" Number of threads to use to compress data concurrently. When set,\n"
		159	" compression operations are performed on multiple threads. The default\n"
		160	" value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
		161	" set the number of threads to the number of detected logical CPUs.\n"
	106	162	);
	107	163
	108	164	static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
			@@ -113,6 +169,7 b' static int ZstdCompressor_init(ZstdCompr'
	113	169	"write_checksum",
	114	170	"write_content_size",
	115	171	"write_dict_id",
		172	"threads",
	116	173	NULL
	117	174	};
	118	175
			@@ -122,16 +179,12 b' static int ZstdCompressor_init(ZstdCompr'
	122	179	PyObject* writeChecksum = NULL;
	123	180	PyObject* writeContentSize = NULL;
	124	181	PyObject* writeDictID = NULL;
		182	int threads = 0;
	125	183
	126		self->cctx = NULL;
	127		self->dict = NULL;
	128		self->cparams = NULL;
	129		self->cdict = NULL;
	130
	131		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|iO!O!OOO", kwlist,
	132		&level, &ZstdCompressionDictType, &dict,
		184	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|iO!O!OOOi:ZstdCompressor",
		185	kwlist, &level, &ZstdCompressionDictType, &dict,
	133	186	&CompressionParametersType, &params,
	134		&writeChecksum, &writeContentSize, &writeDictID)) {
		187	&writeChecksum, &writeContentSize, &writeDictID, &threads)) {
	135	188	return -1;
	136	189	}
	137	190
			@@ -146,12 +199,27 b' static int ZstdCompressor_init(ZstdCompr'
	146	199	return -1;
	147	200	}
	148	201
		202	if (threads < 0) {
		203	threads = cpu_count();
		204	}
		205
		206	self->threads = threads;
		207
	149	208	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
	150	209	overhead of each compression operation. */
	151		self->cctx = ZSTD_createCCtx();
	152		if (!self->cctx) {
	153		PyErr_NoMemory();
	154		return -1;
		210	if (threads) {
		211	self->mtcctx = ZSTDMT_createCCtx(threads);
		212	if (!self->mtcctx) {
		213	PyErr_NoMemory();
		214	return -1;
		215	}
		216	}
		217	else {
		218	self->cctx = ZSTD_createCCtx();
		219	if (!self->cctx) {
		220	PyErr_NoMemory();
		221	return -1;
		222	}
	155	223	}
	156	224
	157	225	self->compressionLevel = level;
			@@ -182,6 +250,11 b' static int ZstdCompressor_init(ZstdCompr'
	182	250	}
	183	251
	184	252	static void ZstdCompressor_dealloc(ZstdCompressor* self) {
		253	if (self->cstream) {
		254	ZSTD_freeCStream(self->cstream);
		255	self->cstream = NULL;
		256	}
		257
	185	258	Py_XDECREF(self->cparams);
	186	259	Py_XDECREF(self->dict);
	187	260
			@@ -195,6 +268,11 b' static void ZstdCompressor_dealloc(ZstdC'
	195	268	self->cctx = NULL;
	196	269	}
	197	270
		271	if (self->mtcctx) {
		272	ZSTDMT_freeCCtx(self->mtcctx);
		273	self->mtcctx = NULL;
		274	}
		275
	198	276	PyObject_Del(self);
	199	277	}
	200	278
			@@ -229,7 +307,6 b' static PyObject* ZstdCompressor_copy_str'
	229	307	Py_ssize_t sourceSize = 0;
	230	308	size_t inSize = ZSTD_CStreamInSize();
	231	309	size_t outSize = ZSTD_CStreamOutSize();
	232		ZSTD_CStream* cstream;
	233	310	ZSTD_inBuffer input;
	234	311	ZSTD_outBuffer output;
	235	312	Py_ssize_t totalRead = 0;
			@@ -243,8 +320,8 b' static PyObject* ZstdCompressor_copy_str'
	243	320	PyObject* totalReadPy;
	244	321	PyObject* totalWritePy;
	245	322
	246		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|nkk", kwlist, &~~source~~, &~~dest~~, &~~sourceSize~~,
	247		&inSize, &outSize)) {
		323	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|nkk:copy_stream", kwlist,
		324	&source, &dest, &sourceSize, &inSize, &outSize)) {
	248	325	return NULL;
	249	326	}
	250	327
			@@ -261,10 +338,17 b' static PyObject* ZstdCompressor_copy_str'
	261	338	/* Prevent free on uninitialized memory in finally. */
	262	339	output.dst = NULL;
	263	340
	264		cstream = CStream_from_ZstdCompressor(self, sourceSize);
	265		if (!cstream) {
	266		res = NULL;
	267		goto finally;
		341	if (self->mtcctx) {
		342	if (init_mtcstream(self, sourceSize)) {
		343	res = NULL;
		344	goto finally;
		345	}
		346	}
		347	else {
		348	if (0 != init_cstream(self, sourceSize)) {
		349	res = NULL;
		350	goto finally;
		351	}
	268	352	}
	269	353
	270	354	output.dst = PyMem_Malloc(outSize);
			@@ -300,7 +384,12 b' static PyObject* ZstdCompressor_copy_str'
	300	384
	301	385	while (input.pos < input.size) {
	302	386	Py_BEGIN_ALLOW_THREADS
	303		zresult = ZSTD_compressStream(cstream, &output, &input);
		387	if (self->mtcctx) {
		388	zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input);
		389	}
		390	else {
		391	zresult = ZSTD_compressStream(self->cstream, &output, &input);
		392	}
	304	393	Py_END_ALLOW_THREADS
	305	394
	306	395	if (ZSTD_isError(zresult)) {
			@@ -325,7 +414,12 b' static PyObject* ZstdCompressor_copy_str'
	325	414
	326	415	/* We've finished reading. Now flush the compressor stream. */
	327	416	while (1) {
	328		zresult = ZSTD_endStream(cstream, &output);
		417	if (self->mtcctx) {
		418	zresult = ZSTDMT_endStream(self->mtcctx, &output);
		419	}
		420	else {
		421	zresult = ZSTD_endStream(self->cstream, &output);
		422	}
	329	423	if (ZSTD_isError(zresult)) {
	330	424	PyErr_Format(ZstdError, "error ending compression stream: %s",
	331	425	ZSTD_getErrorName(zresult));
			@@ -350,24 +444,17 b' static PyObject* ZstdCompressor_copy_str'
	350	444	}
	351	445	}
	352	446
	353		ZSTD_freeCStream(cstream);
	354		cstream = NULL;
	355
	356	447	totalReadPy = PyLong_FromSsize_t(totalRead);
	357	448	totalWritePy = PyLong_FromSsize_t(totalWrite);
	358	449	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
	359		Py_D~~ecRef~~(totalReadPy);
	360		Py_D~~ecRef~~(totalWritePy);
		450	Py_DECREF(totalReadPy);
		451	Py_DECREF(totalWritePy);
	361	452
	362	453	finally:
	363	454	if (output.dst) {
	364	455	PyMem_Free(output.dst);
	365	456	}
	366	457
	367		if (cstream) {
	368		ZSTD_freeCStream(cstream);
	369		}
	370
	371	458	return res;
	372	459	}
	373	460
			@@ -402,14 +489,26 b' static PyObject* ZstdCompressor_compress'
	402	489	ZSTD_parameters zparams;
	403	490
	404	491	#if PY_MAJOR_VERSION >= 3
	405		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#\|O",
		492	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#\|O:compress",
	406	493	#else
	407		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|O",
		494	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|O:compress",
	408	495	#endif
	409	496	kwlist, &source, &sourceSize, &allowEmpty)) {
	410	497	return NULL;
	411	498	}
	412	499
		500	if (self->threads && self->dict) {
		501	PyErr_SetString(ZstdError,
		502	"compress() cannot be used with both dictionaries and multi-threaded compression");
		503	return NULL;
		504	}
		505
		506	if (self->threads && self->cparams) {
		507	PyErr_SetString(ZstdError,
		508	"compress() cannot be used with both compression parameters and multi-threaded compression");
		509	return NULL;
		510	}
		511
	413	512	/* Limitation in zstd C API doesn't let decompression side distinguish
	414	513	between content size of 0 and unknown content size. This can make round
	415	514	tripping via Python difficult. Until this is fixed, require a flag
			@@ -456,24 +555,28 b' static PyObject* ZstdCompressor_compress'
	456	555	https://github.com/facebook/zstd/issues/358 contains more info. We could
	457	556	potentially add an argument somewhere to control this behavior.
	458	557	*/
	459		if (dictData && !self->cdict) {
	460		if (populate_cdict(self, dictData, dictSize, &zparams)) {
	461		Py_DECREF(output);
	462		return NULL;
	463		}
		558	if (0 != populate_cdict(self, &zparams)) {
		559	Py_DECREF(output);
		560	return NULL;
	464	561	}
	465	562
	466	563	Py_BEGIN_ALLOW_THREADS
	467		/* By avoiding ZSTD_compress(), we don't necessarily write out content
	468		size. This means the argument to ZstdCompressor to control frame
	469		parameters is honored. */
	470		if (self->cdict) {
	471		zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
	472		source, sourceSize, self->cdict);
		564	if (self->mtcctx) {
		565	zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize,
		566	source, sourceSize, self->compressionLevel);
	473	567	}
	474	568	else {
	475		zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
	476		source, sourceSize, dictData, dictSize, zparams);
		569	/* By avoiding ZSTD_compress(), we don't necessarily write out content
		570	size. This means the argument to ZstdCompressor to control frame
		571	parameters is honored. */
		572	if (self->cdict) {
		573	zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
		574	source, sourceSize, self->cdict);
		575	}
		576	else {
		577	zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
		578	source, sourceSize, dictData, dictSize, zparams);
		579	}
	477	580	}
	478	581	Py_END_ALLOW_THREADS
	479	582
			@@ -507,19 +610,28 b' static ZstdCompressionObj* ZstdCompresso'
	507	610
	508	611	Py_ssize_t inSize = 0;
	509	612	size_t outSize = ZSTD_CStreamOutSize();
	510		ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType);
		613	ZstdCompressionObj* result = NULL;
		614
		615	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|n:compressobj", kwlist, &inSize)) {
		616	return NULL;
		617	}
		618
		619	result = (ZstdCompressionObj)PyObject_CallObject((PyObject)&ZstdCompressionObjType, NULL);
	511	620	if (!result) {
	512	621	return NULL;
	513	622	}
	514	623
	515		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|n", kwlist, &inSize)) {
	516		return NULL;
		624	if (self->mtcctx) {
		625	if (init_mtcstream(self, inSize)) {
		626	Py_DECREF(result);
		627	return NULL;
		628	}
	517	629	}
	518
	519		result->cstream = CStream_from_ZstdCompressor(self, inSize);
	520		if (!result->cstream) {
	521		Py_DECREF(result);
	522		return NULL;
		630	else {
		631	if (0 != init_cstream(self, inSize)) {
		632	Py_DECREF(result);
		633	return NULL;
		634	}
	523	635	}
	524	636
	525	637	result->output.dst = PyMem_Malloc(outSize);
			@@ -529,13 +641,9 b' static ZstdCompressionObj* ZstdCompresso'
	529	641	return NULL;
	530	642	}
	531	643	result->output.size = outSize;
	532		result->output.pos = 0;
	533
	534	644	result->compressor = self;
	535	645	Py_INCREF(result->compressor);
	536	646
	537		result->finished = 0;
	538
	539	647	return result;
	540	648	}
	541	649
			@@ -574,24 +682,15 b' static ZstdCompressorIterator* ZstdCompr'
	574	682	size_t outSize = ZSTD_CStreamOutSize();
	575	683	ZstdCompressorIterator* result;
	576	684
	577		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|nkk", kwlist, &~~reader~~, &~~sourceSize~~,
	578		&inSize, &outSize)) {
		685	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|nkk:read_from", kwlist,
		686	&reader, &sourceSize, &inSize, &outSize)) {
	579	687	return NULL;
	580	688	}
	581	689
	582		result = ~~PyObject_New~~(ZstdCompressorIterator, &~~ZstdCompressorIteratorType~~);
		690	result = (ZstdCompressorIterator)PyObject_CallObject((PyObject)&ZstdCompressorIteratorType, NULL);
	583	691	if (!result) {
	584	692	return NULL;
	585	693	}
	586
	587		result->compressor = NULL;
	588		result->reader = NULL;
	589		result->buffer = NULL;
	590		result->cstream = NULL;
	591		result->input.src = NULL;
	592		result->output.dst = NULL;
	593		result->readResult = NULL;
	594
	595	694	if (PyObject_HasAttrString(reader, "read")) {
	596	695	result->reader = reader;
	597	696	Py_INCREF(result->reader);
			@@ -608,7 +707,6 b' static ZstdCompressorIterator* ZstdCompr'
	608	707	goto except;
	609	708	}
	610	709
	611		result->bufferOffset = 0;
	612	710	sourceSize = result->buffer->len;
	613	711	}
	614	712	else {
			@@ -621,9 +719,16 b' static ZstdCompressorIterator* ZstdCompr'
	621	719	Py_INCREF(result->compressor);
	622	720
	623	721	result->sourceSize = sourceSize;
	624		result->cstream = CStream_from_ZstdCompressor(self, sourceSize);
	625		if (!~~result~~->~~cstream~~) {
	626		goto except;
		722
		723	if (self->mtcctx) {
		724	if (init_mtcstream(self, sourceSize)) {
		725	goto except;
		726	}
		727	}
		728	else {
		729	if (0 != init_cstream(self, sourceSize)) {
		730	goto except;
		731	}
	627	732	}
	628	733
	629	734	result->inSize = inSize;
			@@ -635,26 +740,12 b' static ZstdCompressorIterator* ZstdCompr'
	635	740	goto except;
	636	741	}
	637	742	result->output.size = outSize;
	638		result->output.pos = 0;
	639
	640		result->input.src = NULL;
	641		result->input.size = 0;
	642		result->input.pos = 0;
	643
	644		result->finishedInput = 0;
	645		result->finishedOutput = 0;
	646	743
	647	744	goto finally;
	648	745
	649	746	except:
	650		if (result->cstream) {
	651		ZSTD_freeCStream(result->cstream);
	652		result->cstream = NULL;
	653		}
	654
	655		Py_DecRef((PyObject*)result->compressor);
	656		Py_DecRef(result->reader);
	657
		747	Py_XDECREF(result->compressor);
		748	Py_XDECREF(result->reader);
	658	749	Py_DECREF(result);
	659	750	result = NULL;
	660	751
			@@ -693,8 +784,8 b' static ZstdCompressionWriter* ZstdCompre'
	693	784	Py_ssize_t sourceSize = 0;
	694	785	size_t outSize = ZSTD_CStreamOutSize();
	695	786
	696		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|nk", kwlist, &~~writer~~, &~~sourceSize~~,
	697		&outSize)) {
		787	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|nk:write_to", kwlist,
		788	&writer, &sourceSize, &outSize)) {
	698	789	return NULL;
	699	790	}
	700	791
			@@ -703,7 +794,7 b' static ZstdCompressionWriter* ZstdCompre'
	703	794	return NULL;
	704	795	}
	705	796
	706		result = ~~PyObject_New~~(ZstdCompressionWriter, &~~ZstdCompressionWriterType~~);
		797	result = (ZstdCompressionWriter)PyObject_CallObject((PyObject)&ZstdCompressionWriterType, NULL);
	707	798	if (!result) {
	708	799	return NULL;
	709	800	}
			@@ -715,11 +806,671 b' static ZstdCompressionWriter* ZstdCompre'
	715	806	Py_INCREF(result->writer);
	716	807
	717	808	result->sourceSize = sourceSize;
	718
	719	809	result->outSize = outSize;
	720	810
	721		result->entered = 0;
	722		result->cstream = NULL;
		811	return result;
		812	}
		813
		814	typedef struct {
		815	void* sourceData;
		816	size_t sourceSize;
		817	} DataSource;
		818
		819	typedef struct {
		820	DataSource* sources;
		821	Py_ssize_t sourcesSize;
		822	unsigned long long totalSourceSize;
		823	} DataSources;
		824
		825	typedef struct {
		826	void* dest;
		827	Py_ssize_t destSize;
		828	BufferSegment* segments;
		829	Py_ssize_t segmentsSize;
		830	} DestBuffer;
		831
		832	typedef enum {
		833	WorkerError_none = 0,
		834	WorkerError_zstd = 1,
		835	WorkerError_no_memory = 2,
		836	} WorkerError;
		837
		838	/**
		839	* Holds state for an individual worker performing multi_compress_to_buffer work.
		840	*/
		841	typedef struct {
		842	/* Used for compression. */
		843	ZSTD_CCtx* cctx;
		844	ZSTD_CDict* cdict;
		845	int cLevel;
		846	CompressionParametersObject* cParams;
		847	ZSTD_frameParameters fParams;
		848
		849	/* What to compress. */
		850	DataSource* sources;
		851	Py_ssize_t sourcesSize;
		852	Py_ssize_t startOffset;
		853	Py_ssize_t endOffset;
		854	unsigned long long totalSourceSize;
		855
		856	/* Result storage. */
		857	DestBuffer* destBuffers;
		858	Py_ssize_t destCount;
		859
		860	/* Error tracking. */
		861	WorkerError error;
		862	size_t zresult;
		863	Py_ssize_t errorOffset;
		864	} WorkerState;
		865
		866	static void compress_worker(WorkerState* state) {
		867	Py_ssize_t inputOffset = state->startOffset;
		868	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
		869	Py_ssize_t currentBufferStartOffset = state->startOffset;
		870	size_t zresult;
		871	ZSTD_parameters zparams;
		872	void* newDest;
		873	size_t allocationSize;
		874	size_t boundSize;
		875	Py_ssize_t destOffset = 0;
		876	DataSource* sources = state->sources;
		877	DestBuffer* destBuffer;
		878
		879	assert(!state->destBuffers);
		880	assert(0 == state->destCount);
		881
		882	if (state->cParams) {
		883	ztopy_compression_parameters(state->cParams, &zparams.cParams);
		884	}
		885
		886	zparams.fParams = state->fParams;
		887
		888	/*
		889	* The total size of the compressed data is unknown until we actually
		890	* compress data. That means we can't pre-allocate the exact size we need.
		891	*
		892	* There is a cost to every allocation and reallocation. So, it is in our
		893	* interest to minimize the number of allocations.
		894	*
		895	* There is also a cost to too few allocations. If allocations are too
		896	* large they may fail. If buffers are shared and all inputs become
		897	* irrelevant at different lifetimes, then a reference to one segment
		898	* in the buffer will keep the entire buffer alive. This leads to excessive
		899	* memory usage.
		900	*
		901	* Our current strategy is to assume a compression ratio of 16:1 and
		902	* allocate buffers of that size, rounded up to the nearest power of 2
		903	* (because computers like round numbers). That ratio is greater than what
		904	* most inputs achieve. This is by design: we don't want to over-allocate.
		905	* But we don't want to under-allocate and lead to too many buffers either.
		906	*/
		907
		908	state->destCount = 1;
		909
		910	state->destBuffers = calloc(1, sizeof(DestBuffer));
		911	if (NULL == state->destBuffers) {
		912	state->error = WorkerError_no_memory;
		913	return;
		914	}
		915
		916	destBuffer = &state->destBuffers[state->destCount - 1];
		917
		918	/*
		919	* Rather than track bounds and grow the segments buffer, allocate space
		920	* to hold remaining items then truncate when we're done with it.
		921	*/
		922	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
		923	if (NULL == destBuffer->segments) {
		924	state->error = WorkerError_no_memory;
		925	return;
		926	}
		927
		928	destBuffer->segmentsSize = remainingItems;
		929
		930	allocationSize = roundpow2(state->totalSourceSize >> 4);
		931
		932	/* If the maximum size of the output is larger than that, round up. */
		933	boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
		934
		935	if (boundSize > allocationSize) {
		936	allocationSize = roundpow2(boundSize);
		937	}
		938
		939	destBuffer->dest = malloc(allocationSize);
		940	if (NULL == destBuffer->dest) {
		941	state->error = WorkerError_no_memory;
		942	return;
		943	}
		944
		945	destBuffer->destSize = allocationSize;
		946
		947	for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
		948	void* source = sources[inputOffset].sourceData;
		949	size_t sourceSize = sources[inputOffset].sourceSize;
		950	size_t destAvailable;
		951	void* dest;
		952
		953	destAvailable = destBuffer->destSize - destOffset;
		954	boundSize = ZSTD_compressBound(sourceSize);
		955
		956	/*
		957	* Not enough space in current buffer to hold largest compressed output.
		958	* So allocate and switch to a new output buffer.
		959	*/
		960	if (boundSize > destAvailable) {
		961	/*
		962	* The downsizing of the existing buffer is optional. It should be cheap
		963	* (unlike growing). So we just do it.
		964	*/
		965	if (destAvailable) {
		966	newDest = realloc(destBuffer->dest, destOffset);
		967	if (NULL == newDest) {
		968	state->error = WorkerError_no_memory;
		969	return;
		970	}
		971
		972	destBuffer->dest = newDest;
		973	destBuffer->destSize = destOffset;
		974	}
		975
		976	/* Truncate segments buffer. */
		977	newDest = realloc(destBuffer->segments,
		978	(inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
		979	if (NULL == newDest) {
		980	state->error = WorkerError_no_memory;
		981	return;
		982	}
		983
		984	destBuffer->segments = newDest;
		985	destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
		986
		987	/* Grow space for new struct. */
		988	/* TODO consider over-allocating so we don't do this every time. */
		989	newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
		990	if (NULL == newDest) {
		991	state->error = WorkerError_no_memory;
		992	return;
		993	}
		994
		995	state->destBuffers = newDest;
		996	state->destCount++;
		997
		998	destBuffer = &state->destBuffers[state->destCount - 1];
		999
		1000	/* Don't take any chances with non-NULL pointers. */
		1001	memset(destBuffer, 0, sizeof(DestBuffer));
		1002
		1003	/**
		1004	* We could dynamically update allocation size based on work done so far.
		1005	* For now, keep is simple.
		1006	*/
		1007	allocationSize = roundpow2(state->totalSourceSize >> 4);
		1008
		1009	if (boundSize > allocationSize) {
		1010	allocationSize = roundpow2(boundSize);
		1011	}
		1012
		1013	destBuffer->dest = malloc(allocationSize);
		1014	if (NULL == destBuffer->dest) {
		1015	state->error = WorkerError_no_memory;
		1016	return;
		1017	}
		1018
		1019	destBuffer->destSize = allocationSize;
		1020	destAvailable = allocationSize;
		1021	destOffset = 0;
		1022
		1023	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
		1024	if (NULL == destBuffer->segments) {
		1025	state->error = WorkerError_no_memory;
		1026	return;
		1027	}
		1028
		1029	destBuffer->segmentsSize = remainingItems;
		1030	currentBufferStartOffset = inputOffset;
		1031	}
		1032
		1033	dest = (char*)destBuffer->dest + destOffset;
		1034
		1035	if (state->cdict) {
		1036	zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable,
		1037	source, sourceSize, state->cdict);
		1038	}
		1039	else {
		1040	if (!state->cParams) {
		1041	zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0);
		1042	}
		1043
		1044	zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable,
		1045	source, sourceSize, NULL, 0, zparams);
		1046	}
		1047
		1048	if (ZSTD_isError(zresult)) {
		1049	state->error = WorkerError_zstd;
		1050	state->zresult = zresult;
		1051	state->errorOffset = inputOffset;
		1052	break;
		1053	}
		1054
		1055	destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
		1056	destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult;
		1057
		1058	destOffset += zresult;
		1059	remainingItems--;
		1060	}
		1061
		1062	if (destBuffer->destSize > destOffset) {
		1063	newDest = realloc(destBuffer->dest, destOffset);
		1064	if (NULL == newDest) {
		1065	state->error = WorkerError_no_memory;
		1066	return;
		1067	}
		1068
		1069	destBuffer->dest = newDest;
		1070	destBuffer->destSize = destOffset;
		1071	}
		1072	}
		1073
		1074	ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
		1075	DataSources* sources, unsigned int threadCount) {
		1076	ZSTD_parameters zparams;
		1077	unsigned long long bytesPerWorker;
		1078	POOL_ctx* pool = NULL;
		1079	WorkerState* workerStates = NULL;
		1080	Py_ssize_t i;
		1081	unsigned long long workerBytes = 0;
		1082	Py_ssize_t workerStartOffset = 0;
		1083	size_t currentThread = 0;
		1084	int errored = 0;
		1085	Py_ssize_t segmentsCount = 0;
		1086	Py_ssize_t segmentIndex;
		1087	PyObject* segmentsArg = NULL;
		1088	ZstdBufferWithSegments* buffer;
		1089	ZstdBufferWithSegmentsCollection* result = NULL;
		1090
		1091	assert(sources->sourcesSize > 0);
		1092	assert(sources->totalSourceSize > 0);
		1093	assert(threadCount >= 1);
		1094
		1095	/* More threads than inputs makes no sense. */
		1096	threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize
		1097	: threadCount;
		1098
		1099	/* TODO lower thread count when input size is too small and threads would add
		1100	overhead. */
		1101
		1102	/*
		1103	* When dictionaries are used, parameters are derived from the size of the
		1104	* first element.
		1105	*
		1106	* TODO come up with a better mechanism.
		1107	*/
		1108	memset(&zparams, 0, sizeof(zparams));
		1109	if (compressor->cparams) {
		1110	ztopy_compression_parameters(compressor->cparams, &zparams.cParams);
		1111	}
		1112	else {
		1113	zparams.cParams = ZSTD_getCParams(compressor->compressionLevel,
		1114	sources->sources[0].sourceSize,
		1115	compressor->dict ? compressor->dict->dictSize : 0);
		1116	}
		1117
		1118	zparams.fParams = compressor->fparams;
		1119
		1120	if (0 != populate_cdict(compressor, &zparams)) {
		1121	return NULL;
		1122	}
		1123
		1124	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
		1125	if (NULL == workerStates) {
		1126	PyErr_NoMemory();
		1127	goto finally;
		1128	}
		1129
		1130	memset(workerStates, 0, threadCount * sizeof(WorkerState));
		1131
		1132	if (threadCount > 1) {
		1133	pool = POOL_create(threadCount, 1);
		1134	if (NULL == pool) {
		1135	PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
		1136	goto finally;
		1137	}
		1138	}
		1139
		1140	bytesPerWorker = sources->totalSourceSize / threadCount;
		1141
		1142	for (i = 0; i < threadCount; i++) {
		1143	workerStates[i].cctx = ZSTD_createCCtx();
		1144	if (!workerStates[i].cctx) {
		1145	PyErr_NoMemory();
		1146	goto finally;
		1147	}
		1148
		1149	workerStates[i].cdict = compressor->cdict;
		1150	workerStates[i].cLevel = compressor->compressionLevel;
		1151	workerStates[i].cParams = compressor->cparams;
		1152	workerStates[i].fParams = compressor->fparams;
		1153
		1154	workerStates[i].sources = sources->sources;
		1155	workerStates[i].sourcesSize = sources->sourcesSize;
		1156	}
		1157
		1158	Py_BEGIN_ALLOW_THREADS
		1159	for (i = 0; i < sources->sourcesSize; i++) {
		1160	workerBytes += sources->sources[i].sourceSize;
		1161
		1162	/*
		1163	* The last worker/thread needs to handle all remaining work. Don't
		1164	* trigger it prematurely. Defer to the block outside of the loop
		1165	* to run the last worker/thread. But do still process this loop
		1166	* so workerBytes is correct.
		1167	*/
		1168	if (currentThread == threadCount - 1) {
		1169	continue;
		1170	}
		1171
		1172	if (workerBytes >= bytesPerWorker) {
		1173	assert(currentThread < threadCount);
		1174	workerStates[currentThread].totalSourceSize = workerBytes;
		1175	workerStates[currentThread].startOffset = workerStartOffset;
		1176	workerStates[currentThread].endOffset = i;
		1177
		1178	if (threadCount > 1) {
		1179	POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
		1180	}
		1181	else {
		1182	compress_worker(&workerStates[currentThread]);
		1183	}
		1184
		1185	currentThread++;
		1186	workerStartOffset = i + 1;
		1187	workerBytes = 0;
		1188	}
		1189	}
		1190
		1191	if (workerBytes) {
		1192	assert(currentThread < threadCount);
		1193	workerStates[currentThread].totalSourceSize = workerBytes;
		1194	workerStates[currentThread].startOffset = workerStartOffset;
		1195	workerStates[currentThread].endOffset = sources->sourcesSize - 1;
		1196
		1197	if (threadCount > 1) {
		1198	POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
		1199	}
		1200	else {
		1201	compress_worker(&workerStates[currentThread]);
		1202	}
		1203	}
		1204
		1205	if (threadCount > 1) {
		1206	POOL_free(pool);
		1207	pool = NULL;
		1208	}
		1209
		1210	Py_END_ALLOW_THREADS
		1211
		1212	for (i = 0; i < threadCount; i++) {
		1213	switch (workerStates[i].error) {
		1214	case WorkerError_no_memory:
		1215	PyErr_NoMemory();
		1216	errored = 1;
		1217	break;
		1218
		1219	case WorkerError_zstd:
		1220	PyErr_Format(ZstdError, "error compressing item %zd: %s",
		1221	workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
		1222	errored = 1;
		1223	break;
		1224	default:
		1225	;
		1226	}
		1227
		1228	if (errored) {
		1229	break;
		1230	}
		1231
		1232	}
		1233
		1234	if (errored) {
		1235	goto finally;
		1236	}
		1237
		1238	segmentsCount = 0;
		1239	for (i = 0; i < threadCount; i++) {
		1240	WorkerState* state = &workerStates[i];
		1241	segmentsCount += state->destCount;
		1242	}
		1243
		1244	segmentsArg = PyTuple_New(segmentsCount);
		1245	if (NULL == segmentsArg) {
		1246	goto finally;
		1247	}
		1248
		1249	segmentIndex = 0;
		1250
		1251	for (i = 0; i < threadCount; i++) {
		1252	Py_ssize_t j;
		1253	WorkerState* state = &workerStates[i];
		1254
		1255	for (j = 0; j < state->destCount; j++) {
		1256	DestBuffer* destBuffer = &state->destBuffers[j];
		1257	buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
		1258	destBuffer->segments, destBuffer->segmentsSize);
		1259
		1260	if (NULL == buffer) {
		1261	goto finally;
		1262	}
		1263
		1264	/* Tell instance to use free() instsead of PyMem_Free(). */
		1265	buffer->useFree = 1;
		1266
		1267	/*
		1268	* BufferWithSegments_FromMemory takes ownership of the backing memory.
		1269	* Unset it here so it doesn't get freed below.
		1270	*/
		1271	destBuffer->dest = NULL;
		1272	destBuffer->segments = NULL;
		1273
		1274	PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
		1275	}
		1276	}
		1277
		1278	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
		1279	(PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
		1280
		1281	finally:
		1282	Py_CLEAR(segmentsArg);
		1283
		1284	if (pool) {
		1285	POOL_free(pool);
		1286	}
		1287
		1288	if (workerStates) {
		1289	Py_ssize_t j;
		1290
		1291	for (i = 0; i < threadCount; i++) {
		1292	WorkerState state = workerStates[i];
		1293
		1294	if (state.cctx) {
		1295	ZSTD_freeCCtx(state.cctx);
		1296	}
		1297
		1298	/* malloc() is used in worker thread. */
		1299
		1300	for (j = 0; j < state.destCount; j++) {
		1301	if (state.destBuffers) {
		1302	free(state.destBuffers[j].dest);
		1303	free(state.destBuffers[j].segments);
		1304	}
		1305	}
		1306
		1307
		1308	free(state.destBuffers);
		1309	}
		1310
		1311	PyMem_Free(workerStates);
		1312	}
		1313
		1314	return result;
		1315	}
		1316
		1317	PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
		1318	"Compress multiple pieces of data as a single operation\n"
		1319	"\n"
		1320	"Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
		1321	"a list of bytes like objects holding data to compress.\n"
		1322	"\n"
		1323	"Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
		1324	"\n"
		1325	"This function is optimized to perform multiple compression operations as\n"
		1326	"as possible with as little overhead as possbile.\n"
		1327	);
		1328
		1329	static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
		1330	static char* kwlist[] = {
		1331	"data",
		1332	"threads",
		1333	NULL
		1334	};
		1335
		1336	PyObject* data;
		1337	int threads = 0;
		1338	Py_buffer* dataBuffers = NULL;
		1339	DataSources sources;
		1340	Py_ssize_t i;
		1341	Py_ssize_t sourceCount = 0;
		1342	ZstdBufferWithSegmentsCollection* result = NULL;
		1343
		1344	if (self->mtcctx) {
		1345	PyErr_SetString(ZstdError,
		1346	"function cannot be called on ZstdCompressor configured for multi-threaded compression");
		1347	return NULL;
		1348	}
		1349
		1350	memset(&sources, 0, sizeof(sources));
		1351
		1352	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|i:multi_compress_to_buffer", kwlist,
		1353	&data, &threads)) {
		1354	return NULL;
		1355	}
		1356
		1357	if (threads < 0) {
		1358	threads = cpu_count();
		1359	}
		1360
		1361	if (threads < 2) {
		1362	threads = 1;
		1363	}
		1364
		1365	if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
		1366	ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
		1367
		1368	sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
		1369	if (NULL == sources.sources) {
		1370	PyErr_NoMemory();
		1371	goto finally;
		1372	}
		1373
		1374	for (i = 0; i < buffer->segmentCount; i++) {
		1375	sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
		1376	sources.sources[i].sourceSize = buffer->segments[i].length;
		1377	sources.totalSourceSize += buffer->segments[i].length;
		1378	}
		1379
		1380	sources.sourcesSize = buffer->segmentCount;
		1381	}
		1382	else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
		1383	Py_ssize_t j;
		1384	Py_ssize_t offset = 0;
		1385	ZstdBufferWithSegments* buffer;
		1386	ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
		1387
		1388	sourceCount = BufferWithSegmentsCollection_length(collection);
		1389
		1390	sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
		1391	if (NULL == sources.sources) {
		1392	PyErr_NoMemory();
		1393	goto finally;
		1394	}
		1395
		1396	for (i = 0; i < collection->bufferCount; i++) {
		1397	buffer = collection->buffers[i];
		1398
		1399	for (j = 0; j < buffer->segmentCount; j++) {
		1400	sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
		1401	sources.sources[offset].sourceSize = buffer->segments[j].length;
		1402	sources.totalSourceSize += buffer->segments[j].length;
		1403
		1404	offset++;
		1405	}
		1406	}
		1407
		1408	sources.sourcesSize = sourceCount;
		1409	}
		1410	else if (PyList_Check(data)) {
		1411	sourceCount = PyList_GET_SIZE(data);
		1412
		1413	sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
		1414	if (NULL == sources.sources) {
		1415	PyErr_NoMemory();
		1416	goto finally;
		1417	}
		1418
		1419	/*
		1420	* It isn't clear whether the address referred to by Py_buffer.buf
		1421	* is still valid after PyBuffer_Release. We we hold a reference to all
		1422	* Py_buffer instances for the duration of the operation.
		1423	*/
		1424	dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
		1425	if (NULL == dataBuffers) {
		1426	PyErr_NoMemory();
		1427	goto finally;
		1428	}
		1429
		1430	memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
		1431
		1432	for (i = 0; i < sourceCount; i++) {
		1433	if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
		1434	&dataBuffers[i], PyBUF_CONTIG_RO)) {
		1435	PyErr_Clear();
		1436	PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
		1437	goto finally;
		1438	}
		1439
		1440	sources.sources[i].sourceData = dataBuffers[i].buf;
		1441	sources.sources[i].sourceSize = dataBuffers[i].len;
		1442	sources.totalSourceSize += dataBuffers[i].len;
		1443	}
		1444
		1445	sources.sourcesSize = sourceCount;
		1446	}
		1447	else {
		1448	PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
		1449	goto finally;
		1450	}
		1451
		1452	if (0 == sources.sourcesSize) {
		1453	PyErr_SetString(PyExc_ValueError, "no source elements found");
		1454	goto finally;
		1455	}
		1456
		1457	if (0 == sources.totalSourceSize) {
		1458	PyErr_SetString(PyExc_ValueError, "source elements are empty");
		1459	goto finally;
		1460	}
		1461
		1462	result = compress_from_datasources(self, &sources, threads);
		1463
		1464	finally:
		1465	PyMem_Free(sources.sources);
		1466
		1467	if (dataBuffers) {
		1468	for (i = 0; i < sourceCount; i++) {
		1469	PyBuffer_Release(&dataBuffers[i]);
		1470	}
		1471
		1472	PyMem_Free(dataBuffers);
		1473	}
	723	1474
	724	1475	return result;
	725	1476	}
			@@ -735,6 +1486,8 b' static PyMethodDef ZstdCompressor_method'
	735	1486	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_read_from__doc__ },
	736	1487	{ "write_to", (PyCFunction)ZstdCompressor_write_to,
	737	1488	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_write_to___doc__ },
		1489	{ "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
		1490	METH_VARARGS \| METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
	738	1491	{ NULL, NULL }
	739	1492	};
	740	1493

contrib/python-zstandard/c-ext/compressoriterator.c

0 +21 -8

              		self->buffer = NULL;
              	}
-             	if (self->cstream) {
-             		ZSTD_freeCStream(self->cstream);
-             		self->cstream = NULL;
+             	}
              	if (self->output.dst) {
              		PyMem_Free(self->output.dst);
              		self->output.dst = NULL;
              	/* If we have data left in the input, consume it. */
              	if (self->input.pos < self->input.size) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_compressStream(self->compressor->mtcctx,
+             				&self->output, &self->input);
+             		}
+             		else {
+             			zresult = ZSTD_compressStream(self->compressor->cstream, &self->output,
+             				&self->input);
+             		}
              		Py_END_ALLOW_THREADS
              		/* Release the Python object holding the input buffer. */
              	/* EOF */
              	if (0 == readSize) {
-             		zresult = ZSTD_endStream(self->cstream, &self->output);
+             		if (self->compressor->mtcctx) {
+             			zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output);
+             		}
+             		else {
+             			zresult = ZSTD_endStream(self->compressor->cstream, &self->output);
+             		}
              		if (ZSTD_isError(zresult)) {
              			PyErr_Format(ZstdError, "error ending compression stream: %s",
              				ZSTD_getErrorName(zresult));
              	self->input.pos = 0;
              	Py_BEGIN_ALLOW_THREADS
-             	zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
+             	if (self->compressor->mtcctx) {
+             		zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output,
+             			&self->input);
+             	}
+             	else {
+             		zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input);
+             	}
              	Py_END_ALLOW_THREADS
              	/* The input buffer currently points to memory managed by Python

contrib/python-zstandard/c-ext/constants.c

0 +1 -1

              	PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
              	PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
              	PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
-             	Py_IncRef(zstdVersion);
+             	Py_INCREF(zstdVersion);
              	PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
              	frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));

contrib/python-zstandard/c-ext/decompressionwriter.c

0 +12 -20

              	Py_XDECREF(self->decompressor);
              	Py_XDECREF(self->writer);
-             	if (self->dstream) {
-             		ZSTD_freeDStream(self->dstream);
-             		self->dstream = NULL;
+             	}
              	PyObject_Del(self);
              }
              		return NULL;
              	}
-             	self->dstream = DStream_from_ZstdDecompressor(self->decompressor);
-             	if (!self->dstream) {
+             	if (0 != init_dstream(self->decompressor)) {
              		return NULL;
              	}
              static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
              	self->entered = 0;
-             	if (self->dstream) {
-             		ZSTD_freeDStream(self->dstream);
-             		self->dstream = NULL;
+             	}
              	Py_RETURN_FALSE;
              }
              static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
-             	if (!self->dstream) {
+             	if (!self->decompressor->dstream) {
              		PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; "
              			"call when context manager is active");
              		return NULL;
              	}
-             	return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream));
+             	return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream));
              }
              static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) {
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	PyObject* res;
+             	Py_ssize_t totalWrite = 0;
              #if PY_MAJOR_VERSION >= 3
-             	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) {
              #else
-             	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+             	if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) {
              #endif
              		return NULL;
              	}
              		return NULL;
              	}
+             	assert(self->decompressor->dstream);
              	output.dst = PyMem_Malloc(self->outSize);
              	if (!output.dst) {
              		return PyErr_NoMemory();
              	while ((ssize_t)input.pos < sourceSize) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_decompressStream(self->dstream, &output, &input);
+             		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              #endif
              				output.dst, output.pos);
              			Py_XDECREF(res);
+             			totalWrite += output.pos;
              			output.pos = 0;
              		}
              	}
              	PyMem_Free(output.dst);
-             	/* TODO return bytes written */
-             	Py_RETURN_NONE;
+             	}
+             	return PyLong_FromSsize_t(totalWrite);
+             }
              static PyMethodDef ZstdDecompressionWriter_methods[] = {
              	{ "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,

contrib/python-zstandard/c-ext/decompressobj.c

0 +7 -10

              );
              static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
-             	if (self->dstream) {
-             		ZSTD_freeDStream(self->dstream);
-             		self->dstream = NULL;
+             	}
              	Py_XDECREF(self->decompressor);
              	PyObject_Del(self);
              	PyObject* result = NULL;
              	Py_ssize_t resultSize = 0;
+             	/* Constructor should ensure stream is populated. */
+             	assert(self->decompressor->dstream);
              	if (self->finished) {
              		PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
              		return NULL;
              	}
              #if PY_MAJOR_VERSION >= 3
-             	if (!PyArg_ParseTuple(args, "y#",
+             	if (!PyArg_ParseTuple(args, "y#:decompress",
              #else
-             	if (!PyArg_ParseTuple(args, "s#",
+             	if (!PyArg_ParseTuple(args, "s#:decompress",
              #endif
              		&source, &sourceSize)) {
              		return NULL;
              	/* Read input until exhausted. */
              	while (input.pos < input.size) {
              		Py_BEGIN_ALLOW_THREADS
-             		zresult = ZSTD_decompressStream(self->dstream, &output, &input);
+             		zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input);
              		Py_END_ALLOW_THREADS
              		if (ZSTD_isError(zresult)) {
              	goto finally;
              except:
-             	Py_DecRef(result);
-             	result = NULL;
+             	Py_CLEAR(result);
              finally:
              	PyMem_Free(output.dst);

contrib/python-zstandard/c-ext/decompressor.c

0 +1023 -115

This diff has been collapsed as it changes many lines, (1138 lines changed) Show them Hide them
			@@ -7,19 +7,37 b''
	7	7	*/
	8	8
	9	9	#include "python-zstandard.h"
		10	#include "pool.h"
	10	11
	11	12	extern PyObject* ZstdError;
	12	13
	13		ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) {
	14		ZSTD_DStream* dstream;
		14	/**
		15	* Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset.
		16	*
		17	* This should be called before starting a decompression operation with a
		18	* ZSTD_DStream on a ZstdDecompressor.
		19	*/
		20	int init_dstream(ZstdDecompressor* decompressor) {
	15	21	void* dictData = NULL;
	16	22	size_t dictSize = 0;
	17	23	size_t zresult;
	18	24
	19		dstream = ZSTD_createDStream();
	20		if (!dstream) {
		25	/* Simple case of dstream already exists. Just reset it. */
		26	if (decompressor->dstream) {
		27	zresult = ZSTD_resetDStream(decompressor->dstream);
		28	if (ZSTD_isError(zresult)) {
		29	PyErr_Format(ZstdError, "could not reset DStream: %s",
		30	ZSTD_getErrorName(zresult));
		31	return -1;
		32	}
		33
		34	return 0;
		35	}
		36
		37	decompressor->dstream = ZSTD_createDStream();
		38	if (!decompressor->dstream) {
	21	39	PyErr_SetString(ZstdError, "could not create DStream");
	22		return ~~NULL~~;
		40	return -1;
	23	41	}
	24	42
	25	43	if (decompressor->dict) {
			@@ -28,19 +46,23 b' ZSTD_DStream* DStream_from_ZstdDecompres'
	28	46	}
	29	47
	30	48	if (dictData) {
	31		zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize);
		49	zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize);
	32	50	}
	33	51	else {
	34		zresult = ZSTD_initDStream(dstream);
		52	zresult = ZSTD_initDStream(decompressor->dstream);
	35	53	}
	36	54
	37	55	if (ZSTD_isError(zresult)) {
		56	/* Don't leave a reference to an invalid object. */
		57	ZSTD_freeDStream(decompressor->dstream);
		58	decompressor->dstream = NULL;
		59
	38	60	PyErr_Format(ZstdError, "could not initialize DStream: %s",
	39	61	ZSTD_getErrorName(zresult));
	40		return ~~NULL~~;
		62	return -1;
	41	63	}
	42	64
	43		return ~~dstream~~;
		65	return 0;
	44	66	}
	45	67
	46	68	PyDoc_STRVAR(Decompressor__doc__,
			@@ -59,23 +81,19 b' static int Decompressor_init(ZstdDecompr'
	59	81
	60	82	ZstdCompressionDict* dict = NULL;
	61	83
	62		self->~~ref~~dctx = NULL;
		84	self->dctx = NULL;
	63	85	self->dict = NULL;
	64	86	self->ddict = NULL;
	65	87
	66		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|O!", kwlist,
		88	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|O!:ZstdDecompressor", kwlist,
	67	89	&ZstdCompressionDictType, &dict)) {
	68	90	return -1;
	69	91	}
	70	92
	71		/* Instead of creating a ZSTD_DCtx for every decompression operation,
	72		we create an instance at object creation time and recycle it via
	73		ZSTD_copyDCTx() on each use. This means each use is a malloc+memcpy
	74		instead of a malloc+init. */
	75	93	/* TODO lazily initialize the reference ZSTD_DCtx on first use since
	76	94	not instances of ZstdDecompressor will use a ZSTD_DCtx. */
	77		self->~~ref~~dctx = ZSTD_createDCtx();
	78		if (!self->~~ref~~dctx) {
		95	self->dctx = ZSTD_createDCtx();
		96	if (!self->dctx) {
	79	97	PyErr_NoMemory();
	80	98	goto except;
	81	99	}
			@@ -88,26 +106,32 b' static int Decompressor_init(ZstdDecompr'
	88	106	return 0;
	89	107
	90	108	except:
	91		if (self->~~ref~~dctx) {
	92		ZSTD_freeDCtx(self->~~ref~~dctx);
	93		self->~~ref~~dctx = NULL;
		109	if (self->dctx) {
		110	ZSTD_freeDCtx(self->dctx);
		111	self->dctx = NULL;
	94	112	}
	95	113
	96	114	return -1;
	97	115	}
	98	116
	99	117	static void Decompressor_dealloc(ZstdDecompressor* self) {
	100		if (self->refdctx) {
	101		ZSTD_freeDCtx(self->refdctx);
	102		}
	103
	104		Py_XDECREF(self->dict);
		118	Py_CLEAR(self->dict);
	105	119
	106	120	if (self->ddict) {
	107	121	ZSTD_freeDDict(self->ddict);
	108	122	self->ddict = NULL;
	109	123	}
	110	124
		125	if (self->dstream) {
		126	ZSTD_freeDStream(self->dstream);
		127	self->dstream = NULL;
		128	}
		129
		130	if (self->dctx) {
		131	ZSTD_freeDCtx(self->dctx);
		132	self->dctx = NULL;
		133	}
		134
	111	135	PyObject_Del(self);
	112	136	}
	113	137
			@@ -136,7 +160,6 b' static PyObject* Decompressor_copy_strea'
	136	160	PyObject* dest;
	137	161	size_t inSize = ZSTD_DStreamInSize();
	138	162	size_t outSize = ZSTD_DStreamOutSize();
	139		ZSTD_DStream* dstream;
	140	163	ZSTD_inBuffer input;
	141	164	ZSTD_outBuffer output;
	142	165	Py_ssize_t totalRead = 0;
			@@ -150,8 +173,8 b' static PyObject* Decompressor_copy_strea'
	150	173	PyObject* totalReadPy;
	151	174	PyObject* totalWritePy;
	152	175
	153		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|kk", kwlist, &~~source~~,
	154		&dest, &inSize, &outSize)) {
		176	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO\|kk:copy_stream", kwlist,
		177	&source, &dest, &inSize, &outSize)) {
	155	178	return NULL;
	156	179	}
	157	180
			@@ -168,8 +191,7 b' static PyObject* Decompressor_copy_strea'
	168	191	/* Prevent free on uninitialized memory in finally. */
	169	192	output.dst = NULL;
	170	193
	171		dstream = DStream_from_ZstdDecompressor(self);
	172		if (!dstream) {
		194	if (0 != init_dstream(self)) {
	173	195	res = NULL;
	174	196	goto finally;
	175	197	}
			@@ -207,7 +229,7 b' static PyObject* Decompressor_copy_strea'
	207	229
	208	230	while (input.pos < input.size) {
	209	231	Py_BEGIN_ALLOW_THREADS
	210		zresult = ZSTD_decompressStream(dstream, &output, &input);
		232	zresult = ZSTD_decompressStream(self->dstream, &output, &input);
	211	233	Py_END_ALLOW_THREADS
	212	234
	213	235	if (ZSTD_isError(zresult)) {
			@@ -234,24 +256,17 b' static PyObject* Decompressor_copy_strea'
	234	256
	235	257	/* Source stream is exhausted. Finish up. */
	236	258
	237		ZSTD_freeDStream(dstream);
	238		dstream = NULL;
	239
	240	259	totalReadPy = PyLong_FromSsize_t(totalRead);
	241	260	totalWritePy = PyLong_FromSsize_t(totalWrite);
	242	261	res = PyTuple_Pack(2, totalReadPy, totalWritePy);
	243		Py_D~~ecRef~~(totalReadPy);
	244		Py_D~~ecRef~~(totalWritePy);
		262	Py_DECREF(totalReadPy);
		263	Py_DECREF(totalWritePy);
	245	264
	246		finally:
		265	finally:
	247	266	if (output.dst) {
	248	267	PyMem_Free(output.dst);
	249	268	}
	250	269
	251		if (dstream) {
	252		ZSTD_freeDStream(dstream);
	253		}
	254
	255	270	return res;
	256	271	}
	257	272
			@@ -291,28 +306,19 b' PyObject* Decompressor_decompress(ZstdDe'
	291	306	unsigned long long decompressedSize;
	292	307	size_t destCapacity;
	293	308	PyObject* result = NULL;
	294		ZSTD_DCtx* dctx = NULL;
	295	309	void* dictData = NULL;
	296	310	size_t dictSize = 0;
	297	311	size_t zresult;
	298	312
	299	313	#if PY_MAJOR_VERSION >= 3
	300		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#\|n", ~~kwlist~~,
		314	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#\|n:decompress",
	301	315	#else
	302		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|n", ~~kwlist~~,
		316	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|n:decompress",
	303	317	#endif
	304		&source, &sourceSize, &maxOutputSize)) {
		318	kwlist, &source, &sourceSize, &maxOutputSize)) {
	305	319	return NULL;
	306	320	}
	307	321
	308		dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx));
	309		if (!dctx) {
	310		PyErr_NoMemory();
	311		return NULL;
	312		}
	313
	314		ZSTD_copyDCtx(dctx, self->refdctx);
	315
	316	322	if (self->dict) {
	317	323	dictData = self->dict->dictData;
	318	324	dictSize = self->dict->dictSize;
			@@ -320,12 +326,12 b' PyObject* Decompressor_decompress(ZstdDe'
	320	326
	321	327	if (dictData && !self->ddict) {
	322	328	Py_BEGIN_ALLOW_THREADS
	323		self->ddict = ZSTD_createDDict(dictData, dictSize);
		329	self->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
	324	330	Py_END_ALLOW_THREADS
	325	331
	326	332	if (!self->ddict) {
	327	333	PyErr_SetString(ZstdError, "could not create decompression dict");
	328		goto except;
		334	return NULL;
	329	335	}
	330	336	}
	331	337
			@@ -335,7 +341,7 b' PyObject* Decompressor_decompress(ZstdDe'
	335	341	if (0 == maxOutputSize) {
	336	342	PyErr_SetString(ZstdError, "input data invalid or missing content size "
	337	343	"in frame header");
	338		goto except;
		344	return NULL;
	339	345	}
	340	346	else {
	341	347	result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
			@@ -348,45 +354,39 b' PyObject* Decompressor_decompress(ZstdDe'
	348	354	}
	349	355
	350	356	if (!result) {
	351		goto except;
		357	return NULL;
	352	358	}
	353	359
	354	360	Py_BEGIN_ALLOW_THREADS
	355	361	if (self->ddict) {
	356		zresult = ZSTD_decompress_usingDDict(dctx, ~~PyBytes_AsString~~(~~result~~), ~~destCapacity~~,
		362	zresult = ZSTD_decompress_usingDDict(self->dctx,
		363	PyBytes_AsString(result), destCapacity,
	357	364	source, sourceSize, self->ddict);
	358	365	}
	359	366	else {
	360		zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize);
		367	zresult = ZSTD_decompressDCtx(self->dctx,
		368	PyBytes_AsString(result), destCapacity, source, sourceSize);
	361	369	}
	362	370	Py_END_ALLOW_THREADS
	363	371
	364	372	if (ZSTD_isError(zresult)) {
	365	373	PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
	366		goto except;
		374	Py_DECREF(result);
		375	return NULL;
	367	376	}
	368	377	else if (decompressedSize && zresult != decompressedSize) {
	369	378	PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
	370	379	zresult, decompressedSize);
	371		goto except;
		380	Py_DECREF(result);
		381	return NULL;
	372	382	}
	373	383	else if (zresult < destCapacity) {
	374	384	if (_PyBytes_Resize(&result, zresult)) {
	375		goto except;
		385	Py_DECREF(result);
		386	return NULL;
	376	387	}
	377	388	}
	378	389
	379		goto finally;
	380
	381		except:
	382		Py_DecRef(result);
	383		result = NULL;
	384
	385		finally:
	386		if (dctx) {
	387		PyMem_FREE(dctx);
	388		}
	389
	390	390	return result;
	391	391	}
	392	392
			@@ -401,22 +401,19 b' PyDoc_STRVAR(Decompressor_decompressobj_'
	401	401	);
	402	402
	403	403	static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) {
	404		ZstdDecompressionObj* result = ~~PyObject_New~~(ZstdDecompressionObj, &~~ZstdDecompressionObjType~~);
		404	ZstdDecompressionObj* result = (ZstdDecompressionObj)PyObject_CallObject((PyObject)&ZstdDecompressionObjType, NULL);
	405	405	if (!result) {
	406	406	return NULL;
	407	407	}
	408	408
	409		result->dstream = DStream_from_ZstdDecompressor(self);
	410		if (!result->dstream) {
	411		Py_DecRef((PyObject*)result);
		409	if (0 != init_dstream(self)) {
		410	Py_DECREF(result);
	412	411	return NULL;
	413	412	}
	414	413
	415	414	result->decompressor = self;
	416	415	Py_INCREF(result->decompressor);
	417	416
	418		result->finished = 0;
	419
	420	417	return result;
	421	418	}
	422	419
			@@ -455,8 +452,8 b' static ZstdDecompressorIterator* Decompr'
	455	452	ZstdDecompressorIterator* result;
	456	453	size_t skipBytes = 0;
	457	454
	458		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|kkk", kwlist, &~~reader~~,
	459		&inSize, &outSize, &skipBytes)) {
		455	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|kkk:read_from", kwlist,
		456	&reader, &inSize, &outSize, &skipBytes)) {
	460	457	return NULL;
	461	458	}
	462	459
			@@ -466,18 +463,11 b' static ZstdDecompressorIterator* Decompr'
	466	463	return NULL;
	467	464	}
	468	465
	469		result = ~~PyObject_New~~(ZstdDecompressorIterator, &~~ZstdDecompressorIteratorType~~);
		466	result = (ZstdDecompressorIterator)PyObject_CallObject((PyObject)&ZstdDecompressorIteratorType, NULL);
	470	467	if (!result) {
	471	468	return NULL;
	472	469	}
	473	470
	474		result->decompressor = NULL;
	475		result->reader = NULL;
	476		result->buffer = NULL;
	477		result->dstream = NULL;
	478		result->input.src = NULL;
	479		result->output.dst = NULL;
	480
	481	471	if (PyObject_HasAttrString(reader, "read")) {
	482	472	result->reader = reader;
	483	473	Py_INCREF(result->reader);
			@@ -494,8 +484,6 b' static ZstdDecompressorIterator* Decompr'
	494	484	if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) {
	495	485	goto except;
	496	486	}
	497
	498		result->bufferOffset = 0;
	499	487	}
	500	488	else {
	501	489	PyErr_SetString(PyExc_ValueError,
			@@ -510,8 +498,7 b' static ZstdDecompressorIterator* Decompr'
	510	498	result->outSize = outSize;
	511	499	result->skipBytes = skipBytes;
	512	500
	513		result->dstream = DStream_from_ZstdDecompressor(self);
	514		if (!result->dstream) {
		501	if (0 != init_dstream(self)) {
	515	502	goto except;
	516	503	}
	517	504
			@@ -520,33 +507,18 b' static ZstdDecompressorIterator* Decompr'
	520	507	PyErr_NoMemory();
	521	508	goto except;
	522	509	}
	523		result->input.size = 0;
	524		result->input.pos = 0;
	525
	526		result->output.dst = NULL;
	527		result->output.size = 0;
	528		result->output.pos = 0;
	529
	530		result->readCount = 0;
	531		result->finishedInput = 0;
	532		result->finishedOutput = 0;
	533	510
	534	511	goto finally;
	535	512
	536	513	except:
	537		if (result->reader) {
	538		Py_DECREF(result->reader);
	539		result->reader = NULL;
	540		}
		514	Py_CLEAR(result->reader);
	541	515
	542	516	if (result->buffer) {
	543	517	PyBuffer_Release(result->buffer);
	544		Py_~~DECREF~~(result->buffer);
	545		result->buffer = NULL;
		518	Py_CLEAR(result->buffer);
	546	519	}
	547	520
	548		Py_~~DECREF~~(result);
	549		result = NULL;
		521	Py_CLEAR(result);
	550	522
	551	523	finally:
	552	524
			@@ -577,7 +549,8 b' static ZstdDecompressionWriter* Decompre'
	577	549	size_t outSize = ZSTD_DStreamOutSize();
	578	550	ZstdDecompressionWriter* result;
	579	551
	580		if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|k", kwlist, &~~writer~~, &~~outSize~~)) {
		552	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|k:write_to", kwlist,
		553	&writer, &outSize)) {
	581	554	return NULL;
	582	555	}
	583	556
			@@ -586,7 +559,7 b' static ZstdDecompressionWriter* Decompre'
	586	559	return NULL;
	587	560	}
	588	561
	589		result = ~~PyObject_New~~(ZstdDecompressionWriter, &~~ZstdDecompressionWriterType~~);
		562	result = (ZstdDecompressionWriter)PyObject_CallObject((PyObject)&ZstdDecompressionWriterType, NULL);
	590	563	if (!result) {
	591	564	return NULL;
	592	565	}
			@@ -599,8 +572,939 b' static ZstdDecompressionWriter* Decompre'
	599	572
	600	573	result->outSize = outSize;
	601	574
	602		result->entered = 0;
	603		result->dstream = NULL;
		575	return result;
		576	}
		577
		578	PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
		579	"Decompress a series of chunks using the content dictionary chaining technique\n"
		580	);
		581
		582	static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) {
		583	static char* kwlist[] = {
		584	"frames",
		585	NULL
		586	};
		587
		588	PyObject* chunks;
		589	Py_ssize_t chunksLen;
		590	Py_ssize_t chunkIndex;
		591	char parity = 0;
		592	PyObject* chunk;
		593	char* chunkData;
		594	Py_ssize_t chunkSize;
		595	ZSTD_DCtx* dctx = NULL;
		596	size_t zresult;
		597	ZSTD_frameParams frameParams;
		598	void* buffer1 = NULL;
		599	size_t buffer1Size = 0;
		600	size_t buffer1ContentSize = 0;
		601	void* buffer2 = NULL;
		602	size_t buffer2Size = 0;
		603	size_t buffer2ContentSize = 0;
		604	void* destBuffer = NULL;
		605	PyObject* result = NULL;
		606
		607	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
		608	kwlist, &PyList_Type, &chunks)) {
		609	return NULL;
		610	}
		611
		612	chunksLen = PyList_Size(chunks);
		613	if (!chunksLen) {
		614	PyErr_SetString(PyExc_ValueError, "empty input chain");
		615	return NULL;
		616	}
		617
		618	/* The first chunk should not be using a dictionary. We handle it specially. */
		619	chunk = PyList_GetItem(chunks, 0);
		620	if (!PyBytes_Check(chunk)) {
		621	PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
		622	return NULL;
		623	}
		624
		625	/* We require that all chunks be zstd frames and that they have content size set. */
		626	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
		627	zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
		628	if (ZSTD_isError(zresult)) {
		629	PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
		630	return NULL;
		631	}
		632	else if (zresult) {
		633	PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
		634	return NULL;
		635	}
		636
		637	if (0 == frameParams.frameContentSize) {
		638	PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
		639	return NULL;
		640	}
		641
		642	dctx = ZSTD_createDCtx();
		643	if (!dctx) {
		644	PyErr_NoMemory();
		645	goto finally;
		646	}
		647
		648	buffer1Size = frameParams.frameContentSize;
		649	buffer1 = PyMem_Malloc(buffer1Size);
		650	if (!buffer1) {
		651	goto finally;
		652	}
		653
		654	Py_BEGIN_ALLOW_THREADS
		655	zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize);
		656	Py_END_ALLOW_THREADS
		657	if (ZSTD_isError(zresult)) {
		658	PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
		659	goto finally;
		660	}
		661
		662	buffer1ContentSize = zresult;
		663
		664	/* Special case of a simple chain. */
		665	if (1 == chunksLen) {
		666	result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
		667	goto finally;
		668	}
		669
		670	/* This should ideally look at next chunk. But this is slightly simpler. */
		671	buffer2Size = frameParams.frameContentSize;
		672	buffer2 = PyMem_Malloc(buffer2Size);
		673	if (!buffer2) {
		674	goto finally;
		675	}
		676
		677	/* For each subsequent chunk, use the previous fulltext as a content dictionary.
		678	Our strategy is to have 2 buffers. One holds the previous fulltext (to be
		679	used as a content dictionary) and the other holds the new fulltext. The
		680	buffers grow when needed but never decrease in size. This limits the
		681	memory allocator overhead.
		682	*/
		683	for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
		684	chunk = PyList_GetItem(chunks, chunkIndex);
		685	if (!PyBytes_Check(chunk)) {
		686	PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
		687	goto finally;
		688	}
		689
		690	PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
		691	zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize);
		692	if (ZSTD_isError(zresult)) {
		693	PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
		694	goto finally;
		695	}
		696	else if (zresult) {
		697	PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
		698	goto finally;
		699	}
		700
		701	if (0 == frameParams.frameContentSize) {
		702	PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
		703	goto finally;
		704	}
		705
		706	parity = chunkIndex % 2;
		707
		708	/* This could definitely be abstracted to reduce code duplication. */
		709	if (parity) {
		710	/* Resize destination buffer to hold larger content. */
		711	if (buffer2Size < frameParams.frameContentSize) {
		712	buffer2Size = frameParams.frameContentSize;
		713	destBuffer = PyMem_Realloc(buffer2, buffer2Size);
		714	if (!destBuffer) {
		715	goto finally;
		716	}
		717	buffer2 = destBuffer;
		718	}
		719
		720	Py_BEGIN_ALLOW_THREADS
		721	zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size,
		722	chunkData, chunkSize, buffer1, buffer1ContentSize);
		723	Py_END_ALLOW_THREADS
		724	if (ZSTD_isError(zresult)) {
		725	PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
		726	chunkIndex, ZSTD_getErrorName(zresult));
		727	goto finally;
		728	}
		729	buffer2ContentSize = zresult;
		730	}
		731	else {
		732	if (buffer1Size < frameParams.frameContentSize) {
		733	buffer1Size = frameParams.frameContentSize;
		734	destBuffer = PyMem_Realloc(buffer1, buffer1Size);
		735	if (!destBuffer) {
		736	goto finally;
		737	}
		738	buffer1 = destBuffer;
		739	}
		740
		741	Py_BEGIN_ALLOW_THREADS
		742	zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size,
		743	chunkData, chunkSize, buffer2, buffer2ContentSize);
		744	Py_END_ALLOW_THREADS
		745	if (ZSTD_isError(zresult)) {
		746	PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
		747	chunkIndex, ZSTD_getErrorName(zresult));
		748	goto finally;
		749	}
		750	buffer1ContentSize = zresult;
		751	}
		752	}
		753
		754	result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
		755	parity ? buffer2ContentSize : buffer1ContentSize);
		756
		757	finally:
		758	if (buffer2) {
		759	PyMem_Free(buffer2);
		760	}
		761	if (buffer1) {
		762	PyMem_Free(buffer1);
		763	}
		764
		765	if (dctx) {
		766	ZSTD_freeDCtx(dctx);
		767	}
		768
		769	return result;
		770	}
		771
		772	typedef struct {
		773	void* sourceData;
		774	size_t sourceSize;
		775	unsigned long long destSize;
		776	} FramePointer;
		777
		778	typedef struct {
		779	FramePointer* frames;
		780	Py_ssize_t framesSize;
		781	unsigned long long compressedSize;
		782	} FrameSources;
		783
		784	typedef struct {
		785	void* dest;
		786	Py_ssize_t destSize;
		787	BufferSegment* segments;
		788	Py_ssize_t segmentsSize;
		789	} DestBuffer;
		790
		791	typedef enum {
		792	WorkerError_none = 0,
		793	WorkerError_zstd = 1,
		794	WorkerError_memory = 2,
		795	WorkerError_sizeMismatch = 3,
		796	WorkerError_unknownSize = 4,
		797	} WorkerError;
		798
		799	typedef struct {
		800	/* Source records and length */
		801	FramePointer* framePointers;
		802	/* Which records to process. */
		803	Py_ssize_t startOffset;
		804	Py_ssize_t endOffset;
		805	unsigned long long totalSourceSize;
		806
		807	/* Compression state and settings. */
		808	ZSTD_DCtx* dctx;
		809	ZSTD_DDict* ddict;
		810	int requireOutputSizes;
		811
		812	/* Output storage. */
		813	DestBuffer* destBuffers;
		814	Py_ssize_t destCount;
		815
		816	/* Item that error occurred on. */
		817	Py_ssize_t errorOffset;
		818	/* If an error occurred. */
		819	WorkerError error;
		820	/* result from zstd decompression operation */
		821	size_t zresult;
		822	} WorkerState;
		823
		824	static void decompress_worker(WorkerState* state) {
		825	size_t allocationSize;
		826	DestBuffer* destBuffer;
		827	Py_ssize_t frameIndex;
		828	Py_ssize_t localOffset = 0;
		829	Py_ssize_t currentBufferStartIndex = state->startOffset;
		830	Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
		831	void* tmpBuf;
		832	Py_ssize_t destOffset = 0;
		833	FramePointer* framePointers = state->framePointers;
		834	size_t zresult;
		835	unsigned long long totalOutputSize = 0;
		836
		837	assert(NULL == state->destBuffers);
		838	assert(0 == state->destCount);
		839	assert(state->endOffset - state->startOffset >= 0);
		840
		841	/*
		842	* We need to allocate a buffer to hold decompressed data. How we do this
		843	* depends on what we know about the output. The following scenarios are
		844	* possible:
		845	*
		846	* 1. All structs defining frames declare the output size.
		847	* 2. The decompressed size is embedded within the zstd frame.
		848	* 3. The decompressed size is not stored anywhere.
		849	*
		850	* For now, we only support #1 and #2.
		851	*/
		852
		853	/* Resolve ouput segments. */
		854	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
		855	FramePointer* fp = &framePointers[frameIndex];
		856
		857	if (0 == fp->destSize) {
		858	fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize);
		859	if (0 == fp->destSize && state->requireOutputSizes) {
		860	state->error = WorkerError_unknownSize;
		861	state->errorOffset = frameIndex;
		862	return;
		863	}
		864	}
		865
		866	totalOutputSize += fp->destSize;
		867	}
		868
		869	state->destBuffers = calloc(1, sizeof(DestBuffer));
		870	if (NULL == state->destBuffers) {
		871	state->error = WorkerError_memory;
		872	return;
		873	}
		874
		875	state->destCount = 1;
		876
		877	destBuffer = &state->destBuffers[state->destCount - 1];
		878
		879	assert(framePointers[state->startOffset].destSize > 0); /* For now. */
		880
		881	allocationSize = roundpow2(state->totalSourceSize);
		882
		883	if (framePointers[state->startOffset].destSize > allocationSize) {
		884	allocationSize = roundpow2(framePointers[state->startOffset].destSize);
		885	}
		886
		887	destBuffer->dest = malloc(allocationSize);
		888	if (NULL == destBuffer->dest) {
		889	state->error = WorkerError_memory;
		890	return;
		891	}
		892
		893	destBuffer->destSize = allocationSize;
		894
		895	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
		896	if (NULL == destBuffer->segments) {
		897	/* Caller will free state->dest as part of cleanup. */
		898	state->error = WorkerError_memory;
		899	return;
		900	}
		901
		902	destBuffer->segmentsSize = remainingItems;
		903
		904	for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
		905	const void* source = framePointers[frameIndex].sourceData;
		906	const size_t sourceSize = framePointers[frameIndex].sourceSize;
		907	void* dest;
		908	const size_t decompressedSize = framePointers[frameIndex].destSize;
		909	size_t destAvailable = destBuffer->destSize - destOffset;
		910
		911	assert(decompressedSize > 0); /* For now. */
		912
		913	/*
		914	* Not enough space in current buffer. Finish current before and allocate and
		915	* switch to a new one.
		916	*/
		917	if (decompressedSize > destAvailable) {
		918	/*
		919	* Shrinking the destination buffer is optional. But it should be cheap,
		920	* so we just do it.
		921	*/
		922	if (destAvailable) {
		923	tmpBuf = realloc(destBuffer->dest, destOffset);
		924	if (NULL == tmpBuf) {
		925	state->error = WorkerError_memory;
		926	return;
		927	}
		928
		929	destBuffer->dest = tmpBuf;
		930	destBuffer->destSize = destOffset;
		931	}
		932
		933	/* Truncate segments buffer. */
		934	tmpBuf = realloc(destBuffer->segments,
		935	(frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
		936	if (NULL == tmpBuf) {
		937	state->error = WorkerError_memory;
		938	return;
		939	}
		940
		941	destBuffer->segments = tmpBuf;
		942	destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
		943
		944	/* Grow space for new DestBuffer. */
		945	tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
		946	if (NULL == tmpBuf) {
		947	state->error = WorkerError_memory;
		948	return;
		949	}
		950
		951	state->destBuffers = tmpBuf;
		952	state->destCount++;
		953
		954	destBuffer = &state->destBuffers[state->destCount - 1];
		955
		956	/* Don't take any chances will non-NULL pointers. */
		957	memset(destBuffer, 0, sizeof(DestBuffer));
		958
		959	allocationSize = roundpow2(state->totalSourceSize);
		960
		961	if (decompressedSize > allocationSize) {
		962	allocationSize = roundpow2(decompressedSize);
		963	}
		964
		965	destBuffer->dest = malloc(allocationSize);
		966	if (NULL == destBuffer->dest) {
		967	state->error = WorkerError_memory;
		968	return;
		969	}
		970
		971	destBuffer->destSize = allocationSize;
		972	destAvailable = allocationSize;
		973	destOffset = 0;
		974	localOffset = 0;
		975
		976	destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
		977	if (NULL == destBuffer->segments) {
		978	state->error = WorkerError_memory;
		979	return;
		980	}
		981
		982	destBuffer->segmentsSize = remainingItems;
		983	currentBufferStartIndex = frameIndex;
		984	}
		985
		986	dest = (char*)destBuffer->dest + destOffset;
		987
		988	if (state->ddict) {
		989	zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize,
		990	source, sourceSize, state->ddict);
		991	}
		992	else {
		993	zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize,
		994	source, sourceSize);
		995	}
		996
		997	if (ZSTD_isError(zresult)) {
		998	state->error = WorkerError_zstd;
		999	state->zresult = zresult;
		1000	state->errorOffset = frameIndex;
		1001	return;
		1002	}
		1003	else if (zresult != decompressedSize) {
		1004	state->error = WorkerError_sizeMismatch;
		1005	state->zresult = zresult;
		1006	state->errorOffset = frameIndex;
		1007	return;
		1008	}
		1009
		1010	destBuffer->segments[localOffset].offset = destOffset;
		1011	destBuffer->segments[localOffset].length = decompressedSize;
		1012	destOffset += zresult;
		1013	localOffset++;
		1014	remainingItems--;
		1015	}
		1016
		1017	if (destBuffer->destSize > destOffset) {
		1018	tmpBuf = realloc(destBuffer->dest, destOffset);
		1019	if (NULL == tmpBuf) {
		1020	state->error = WorkerError_memory;
		1021	return;
		1022	}
		1023
		1024	destBuffer->dest = tmpBuf;
		1025	destBuffer->destSize = destOffset;
		1026	}
		1027	}
		1028
		1029	ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
		1030	unsigned int threadCount) {
		1031	void* dictData = NULL;
		1032	size_t dictSize = 0;
		1033	Py_ssize_t i = 0;
		1034	int errored = 0;
		1035	Py_ssize_t segmentsCount;
		1036	ZstdBufferWithSegments* bws = NULL;
		1037	PyObject* resultArg = NULL;
		1038	Py_ssize_t resultIndex;
		1039	ZstdBufferWithSegmentsCollection* result = NULL;
		1040	FramePointer* framePointers = frames->frames;
		1041	unsigned long long workerBytes = 0;
		1042	int currentThread = 0;
		1043	Py_ssize_t workerStartOffset = 0;
		1044	POOL_ctx* pool = NULL;
		1045	WorkerState* workerStates = NULL;
		1046	unsigned long long bytesPerWorker;
		1047
		1048	/* Caller should normalize 0 and negative values to 1 or larger. */
		1049	assert(threadCount >= 1);
		1050
		1051	/* More threads than inputs makes no sense under any conditions. */
		1052	threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize
		1053	: threadCount;
		1054
		1055	/* TODO lower thread count if input size is too small and threads would just
		1056	add overhead. */
		1057
		1058	if (decompressor->dict) {
		1059	dictData = decompressor->dict->dictData;
		1060	dictSize = decompressor->dict->dictSize;
		1061	}
		1062
		1063	if (dictData && !decompressor->ddict) {
		1064	Py_BEGIN_ALLOW_THREADS
		1065	decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize);
		1066	Py_END_ALLOW_THREADS
		1067
		1068	if (!decompressor->ddict) {
		1069	PyErr_SetString(ZstdError, "could not create decompression dict");
		1070	return NULL;
		1071	}
		1072	}
		1073
		1074	/* If threadCount==1, we don't start a thread pool. But we do leverage the
		1075	same API for dispatching work. */
		1076	workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
		1077	if (NULL == workerStates) {
		1078	PyErr_NoMemory();
		1079	goto finally;
		1080	}
		1081
		1082	memset(workerStates, 0, threadCount * sizeof(WorkerState));
		1083
		1084	if (threadCount > 1) {
		1085	pool = POOL_create(threadCount, 1);
		1086	if (NULL == pool) {
		1087	PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
		1088	goto finally;
		1089	}
		1090	}
		1091
		1092	bytesPerWorker = frames->compressedSize / threadCount;
		1093
		1094	for (i = 0; i < threadCount; i++) {
		1095	workerStates[i].dctx = ZSTD_createDCtx();
		1096	if (NULL == workerStates[i].dctx) {
		1097	PyErr_NoMemory();
		1098	goto finally;
		1099	}
		1100
		1101	ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
		1102
		1103	workerStates[i].ddict = decompressor->ddict;
		1104	workerStates[i].framePointers = framePointers;
		1105	workerStates[i].requireOutputSizes = 1;
		1106	}
		1107
		1108	Py_BEGIN_ALLOW_THREADS
		1109	/* There are many ways to split work among workers.
		1110
		1111	For now, we take a simple approach of splitting work so each worker
		1112	gets roughly the same number of input bytes. This will result in more
		1113	starvation than running N>threadCount jobs. But it avoids complications
		1114	around state tracking, which could involve extra locking.
		1115	*/
		1116	for (i = 0; i < frames->framesSize; i++) {
		1117	workerBytes += frames->frames[i].sourceSize;
		1118
		1119	/*
		1120	* The last worker/thread needs to handle all remaining work. Don't
		1121	* trigger it prematurely. Defer to the block outside of the loop.
		1122	* (But still process this loop so workerBytes is correct.
		1123	*/
		1124	if (currentThread == threadCount - 1) {
		1125	continue;
		1126	}
		1127
		1128	if (workerBytes >= bytesPerWorker) {
		1129	workerStates[currentThread].startOffset = workerStartOffset;
		1130	workerStates[currentThread].endOffset = i;
		1131	workerStates[currentThread].totalSourceSize = workerBytes;
		1132
		1133	if (threadCount > 1) {
		1134	POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
		1135	}
		1136	else {
		1137	decompress_worker(&workerStates[currentThread]);
		1138	}
		1139	currentThread++;
		1140	workerStartOffset = i + 1;
		1141	workerBytes = 0;
		1142	}
		1143	}
		1144
		1145	if (workerBytes) {
		1146	workerStates[currentThread].startOffset = workerStartOffset;
		1147	workerStates[currentThread].endOffset = frames->framesSize - 1;
		1148	workerStates[currentThread].totalSourceSize = workerBytes;
		1149
		1150	if (threadCount > 1) {
		1151	POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
		1152	}
		1153	else {
		1154	decompress_worker(&workerStates[currentThread]);
		1155	}
		1156	}
		1157
		1158	if (threadCount > 1) {
		1159	POOL_free(pool);
		1160	pool = NULL;
		1161	}
		1162	Py_END_ALLOW_THREADS
		1163
		1164	for (i = 0; i < threadCount; i++) {
		1165	switch (workerStates[i].error) {
		1166	case WorkerError_none:
		1167	break;
		1168
		1169	case WorkerError_zstd:
		1170	PyErr_Format(ZstdError, "error decompressing item %zd: %s",
		1171	workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
		1172	errored = 1;
		1173	break;
		1174
		1175	case WorkerError_memory:
		1176	PyErr_NoMemory();
		1177	errored = 1;
		1178	break;
		1179
		1180	case WorkerError_sizeMismatch:
		1181	PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu",
		1182	workerStates[i].errorOffset, workerStates[i].zresult,
		1183	framePointers[workerStates[i].errorOffset].destSize);
		1184	errored = 1;
		1185	break;
		1186
		1187	case WorkerError_unknownSize:
		1188	PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
		1189	workerStates[i].errorOffset);
		1190	errored = 1;
		1191	break;
		1192
		1193	default:
		1194	PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
		1195	workerStates[i].error);
		1196	errored = 1;
		1197	break;
		1198	}
		1199
		1200	if (errored) {
		1201	break;
		1202	}
		1203	}
		1204
		1205	if (errored) {
		1206	goto finally;
		1207	}
		1208
		1209	segmentsCount = 0;
		1210	for (i = 0; i < threadCount; i++) {
		1211	segmentsCount += workerStates[i].destCount;
		1212	}
		1213
		1214	resultArg = PyTuple_New(segmentsCount);
		1215	if (NULL == resultArg) {
		1216	goto finally;
		1217	}
		1218
		1219	resultIndex = 0;
		1220
		1221	for (i = 0; i < threadCount; i++) {
		1222	Py_ssize_t bufferIndex;
		1223	WorkerState* state = &workerStates[i];
		1224
		1225	for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
		1226	DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
		1227
		1228	bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
		1229	destBuffer->segments, destBuffer->segmentsSize);
		1230	if (NULL == bws) {
		1231	goto finally;
		1232	}
		1233
		1234	/*
		1235	* Memory for buffer and segments was allocated using malloc() in worker
		1236	* and the memory is transferred to the BufferWithSegments instance. So
		1237	* tell instance to use free() and NULL the reference in the state struct
		1238	* so it isn't freed below.
		1239	*/
		1240	bws->useFree = 1;
		1241	destBuffer->dest = NULL;
		1242	destBuffer->segments = NULL;
		1243
		1244	PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
		1245	}
		1246	}
		1247
		1248	result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
		1249	(PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
		1250
		1251	finally:
		1252	Py_CLEAR(resultArg);
		1253
		1254	if (workerStates) {
		1255	for (i = 0; i < threadCount; i++) {
		1256	Py_ssize_t bufferIndex;
		1257	WorkerState* state = &workerStates[i];
		1258
		1259	if (state->dctx) {
		1260	ZSTD_freeDCtx(state->dctx);
		1261	}
		1262
		1263	for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
		1264	if (state->destBuffers) {
		1265	/*
		1266	* Will be NULL if memory transfered to a BufferWithSegments.
		1267	* Otherwise it is left over after an error occurred.
		1268	*/
		1269	free(state->destBuffers[bufferIndex].dest);
		1270	free(state->destBuffers[bufferIndex].segments);
		1271	}
		1272	}
		1273
		1274	free(state->destBuffers);
		1275	}
		1276
		1277	PyMem_Free(workerStates);
		1278	}
		1279
		1280	POOL_free(pool);
		1281
		1282	return result;
		1283	}
		1284
		1285	PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
		1286	"Decompress multiple frames to output buffers\n"
		1287	"\n"
		1288	"Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
		1289	"list of bytes-like objects. Each item in the passed collection should be a\n"
		1290	"compressed zstd frame.\n"
		1291	"\n"
		1292	"Unless ``decompressed_sizes`` is specified, the content size must be\n"
		1293	"written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
		1294	"it is an object conforming to the buffer protocol that represents an array\n"
		1295	"of 64-bit unsigned integers in the machine's native format. Specifying\n"
		1296	"``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
		1297	"output size.\n"
		1298	"\n"
		1299	"Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
		1300	"data. All decompressed data is allocated in a single memory buffer. The\n"
		1301	"``BufferWithSegments`` instance tracks which objects are at which offsets\n"
		1302	"and their respective lengths.\n"
		1303	"\n"
		1304	"The ``threads`` argument controls how many threads to use for operations.\n"
		1305	"Negative values will use the same number of threads as logical CPUs on the\n"
		1306	"machine.\n"
		1307	);
		1308
		1309	static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
		1310	static char* kwlist[] = {
		1311	"frames",
		1312	"decompressed_sizes",
		1313	"threads",
		1314	NULL
		1315	};
		1316
		1317	PyObject* frames;
		1318	Py_buffer frameSizes;
		1319	int threads = 0;
		1320	Py_ssize_t frameCount;
		1321	Py_buffer* frameBuffers = NULL;
		1322	FramePointer* framePointers = NULL;
		1323	unsigned long long* frameSizesP = NULL;
		1324	unsigned long long totalInputSize = 0;
		1325	FrameSources frameSources;
		1326	ZstdBufferWithSegmentsCollection* result = NULL;
		1327	Py_ssize_t i;
		1328
		1329	memset(&frameSizes, 0, sizeof(frameSizes));
		1330
		1331	#if PY_MAJOR_VERSION >= 3
		1332	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|y*i:multi_decompress_to_buffer",
		1333	#else
		1334	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O\|s*i:multi_decompress_to_buffer",
		1335	#endif
		1336	kwlist, &frames, &frameSizes, &threads)) {
		1337	return NULL;
		1338	}
		1339
		1340	if (frameSizes.buf) {
		1341	if (!PyBuffer_IsContiguous(&frameSizes, 'C') \|\| frameSizes.ndim > 1) {
		1342	PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
		1343	goto finally;
		1344	}
		1345
		1346	frameSizesP = (unsigned long long*)frameSizes.buf;
		1347	}
		1348
		1349	if (threads < 0) {
		1350	threads = cpu_count();
		1351	}
		1352
		1353	if (threads < 2) {
		1354	threads = 1;
		1355	}
		1356
		1357	if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
		1358	ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
		1359	frameCount = buffer->segmentCount;
		1360
		1361	if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
		1362	PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
		1363	frameCount * sizeof(unsigned long long), frameSizes.len);
		1364	goto finally;
		1365	}
		1366
		1367	framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
		1368	if (!framePointers) {
		1369	PyErr_NoMemory();
		1370	goto finally;
		1371	}
		1372
		1373	for (i = 0; i < frameCount; i++) {
		1374	void* sourceData;
		1375	unsigned long long sourceSize;
		1376	unsigned long long decompressedSize = 0;
		1377
		1378	if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
		1379	PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
		1380	goto finally;
		1381	}
		1382
		1383	sourceData = (char*)buffer->data + buffer->segments[i].offset;
		1384	sourceSize = buffer->segments[i].length;
		1385	totalInputSize += sourceSize;
		1386
		1387	if (frameSizesP) {
		1388	decompressedSize = frameSizesP[i];
		1389	}
		1390
		1391	framePointers[i].sourceData = sourceData;
		1392	framePointers[i].sourceSize = sourceSize;
		1393	framePointers[i].destSize = decompressedSize;
		1394	}
		1395	}
		1396	else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
		1397	Py_ssize_t offset = 0;
		1398	ZstdBufferWithSegments* buffer;
		1399	ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
		1400
		1401	frameCount = BufferWithSegmentsCollection_length(collection);
		1402
		1403	if (frameSizes.buf && frameSizes.len != frameCount) {
		1404	PyErr_Format(PyExc_ValueError,
		1405	"decompressed_sizes size mismatch; expected %zd; got %zd",
		1406	frameCount * sizeof(unsigned long long), frameSizes.len);
		1407	goto finally;
		1408	}
		1409
		1410	framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
		1411	if (NULL == framePointers) {
		1412	PyErr_NoMemory();
		1413	goto finally;
		1414	}
		1415
		1416	/* Iterate the data structure directly because it is faster. */
		1417	for (i = 0; i < collection->bufferCount; i++) {
		1418	Py_ssize_t segmentIndex;
		1419	buffer = collection->buffers[i];
		1420
		1421	for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
		1422	if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
		1423	PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
		1424	offset);
		1425	goto finally;
		1426	}
		1427
		1428	totalInputSize += buffer->segments[segmentIndex].length;
		1429
		1430	framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
		1431	framePointers[offset].sourceSize = buffer->segments[segmentIndex].length;
		1432	framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0;
		1433
		1434	offset++;
		1435	}
		1436	}
		1437	}
		1438	else if (PyList_Check(frames)) {
		1439	frameCount = PyList_GET_SIZE(frames);
		1440
		1441	if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
		1442	PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
		1443	frameCount * sizeof(unsigned long long), frameSizes.len);
		1444	goto finally;
		1445	}
		1446
		1447	framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
		1448	if (!framePointers) {
		1449	PyErr_NoMemory();
		1450	goto finally;
		1451	}
		1452
		1453	/*
		1454	* It is not clear whether Py_buffer.buf is still valid after
		1455	* PyBuffer_Release. So, we hold a reference to all Py_buffer instances
		1456	* for the duration of the operation.
		1457	*/
		1458	frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
		1459	if (NULL == frameBuffers) {
		1460	PyErr_NoMemory();
		1461	goto finally;
		1462	}
		1463
		1464	memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
		1465
		1466	/* Do a pass to assemble info about our input buffers and output sizes. */
		1467	for (i = 0; i < frameCount; i++) {
		1468	if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
		1469	&frameBuffers[i], PyBUF_CONTIG_RO)) {
		1470	PyErr_Clear();
		1471	PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
		1472	goto finally;
		1473	}
		1474
		1475	totalInputSize += frameBuffers[i].len;
		1476
		1477	framePointers[i].sourceData = frameBuffers[i].buf;
		1478	framePointers[i].sourceSize = frameBuffers[i].len;
		1479	framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0;
		1480	}
		1481	}
		1482	else {
		1483	PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
		1484	goto finally;
		1485	}
		1486
		1487	/* We now have an array with info about our inputs and outputs. Feed it into
		1488	our generic decompression function. */
		1489	frameSources.frames = framePointers;
		1490	frameSources.framesSize = frameCount;
		1491	frameSources.compressedSize = totalInputSize;
		1492
		1493	result = decompress_from_framesources(self, &frameSources, threads);
		1494
		1495	finally:
		1496	if (frameSizes.buf) {
		1497	PyBuffer_Release(&frameSizes);
		1498	}
		1499	PyMem_Free(framePointers);
		1500
		1501	if (frameBuffers) {
		1502	for (i = 0; i < frameCount; i++) {
		1503	PyBuffer_Release(&frameBuffers[i]);
		1504	}
		1505
		1506	PyMem_Free(frameBuffers);
		1507	}
	604	1508
	605	1509	return result;
	606	1510	}
			@@ -616,6 +1520,10 b' static PyMethodDef Decompressor_methods['
	616	1520	Decompressor_read_from__doc__ },
	617	1521	{ "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS \| METH_KEYWORDS,
	618	1522	Decompressor_write_to__doc__ },
		1523	{ "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
		1524	METH_VARARGS \| METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
		1525	{ "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
		1526	METH_VARARGS \| METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
	619	1527	{ NULL, NULL }
	620	1528	};
	621	1529

contrib/python-zstandard/c-ext/decompressoriterator.c

0 +6 -9

              		self->buffer = NULL;
              	}
-             	if (self->dstream) {
-             		ZSTD_freeDStream(self->dstream);
-             		self->dstream = NULL;
+             	}
              	if (self->input.src) {
              		PyMem_Free((void*)self->input.src);
              		self->input.src = NULL;
              	DecompressorIteratorResult result;
              	size_t oldInputPos = self->input.pos;
+             	assert(self->decompressor->dstream);
              	result.chunk = NULL;
              	chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
              	self->output.pos = 0;
              	Py_BEGIN_ALLOW_THREADS
-             	zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
+             	zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input);
              	Py_END_ALLOW_THREADS
              	/* We're done with the pointer. Nullify to prevent anyone from getting a
              					PyErr_SetString(PyExc_ValueError,
              						"skip_bytes larger than first input chunk; "
              						"this scenario is currently unsupported");
-             					Py_DecRef(readResult);
+             					Py_XDECREF(readResult);
              					return NULL;
              				}
              		else if (!self->readCount) {
              			self->finishedInput = 1;
              			self->finishedOutput = 1;
-             			Py_DecRef(readResult);
+             			Py_XDECREF(readResult);
              			PyErr_SetString(PyExc_StopIteration, "empty input");
              			return NULL;
              		}
              		}
              		/* We've copied the data managed by memory. Discard the Python object. */
-             		Py_DecRef(readResult);
+             		Py_XDECREF(readResult);
              	}
              	result = read_decompressor_iterator(self);

contrib/python-zstandard/c-ext/python-zstandard.h

0 +122 -15

              #define PY_SSIZE_T_CLEAN
              #include <Python.h>
+             #include "structmember.h"
              #define ZSTD_STATIC_LINKING_ONLY
              #define ZDICT_STATIC_LINKING_ONLY
              #include "mem.h"
              #include "zstd.h"
              #include "zdict.h"
+             #include "zstdmt_compress.h"
-             #define PYTHON_ZSTANDARD_VERSION "0.6.0"
+             #define PYTHON_ZSTANDARD_VERSION "0.8.1"
              typedef enum {
              	compressorobj_flush_finish,
              	compressorobj_flush_block,
              } CompressorObj_Flush;
+             /*
+                Represents a CompressionParameters type.
+                This type is basically a wrapper around ZSTD_compressionParameters.
+             */
              typedef struct {
              	PyObject_HEAD
              	unsigned windowLog;
              extern PyTypeObject CompressionParametersType;
+             /*
+                Represents a FrameParameters type.
+                This type is basically a wrapper around ZSTD_frameParams.
+             */
              typedef struct {
              	PyObject_HEAD
-             	unsigned selectivityLevel;
-             	int compressionLevel;
-             	unsigned notificationLevel;
+             	unsigned long long frameContentSize;
+             	unsigned windowSize;
              	unsigned dictID;
-             } DictParametersObject;
+             	char checksumFlag;
+             } FrameParametersObject;
+             extern PyTypeObject FrameParametersType;
-             extern PyTypeObject DictParametersType;
+             /*
+                Represents a ZstdCompressionDict type.
+                Instances hold data used for a zstd compression dictionary.
+             */
              typedef struct {
              	PyObject_HEAD
+             	/* Pointer to dictionary data. Owned by self. */
              	void* dictData;
+             	/* Size of dictionary data. */
              	size_t dictSize;
+             	/* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
+             	unsigned k;
+             	/* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
+             	unsigned d;
              } ZstdCompressionDict;
              extern PyTypeObject ZstdCompressionDictType;
+             /*
+                Represents a ZstdCompressor type.
+             */
              typedef struct {
              	PyObject_HEAD
+             	/* Configured compression level. Should be always set. */
              	int compressionLevel;
+             	/* Number of threads to use for operations. */
+             	unsigned int threads;
+             	/* Pointer to compression dictionary to use. NULL if not using dictionary
+             	   compression. */
              	ZstdCompressionDict* dict;
+             	/* Compression context to use. Populated during object construction. NULL
+             	   if using multi-threaded compression. */
              	ZSTD_CCtx* cctx;
+             	/* Multi-threaded compression context to use. Populated during object
+             	   construction. NULL if not using multi-threaded compression. */
+             	ZSTDMT_CCtx* mtcctx;
+             	/* Digest compression dictionary. NULL initially. Populated on first use. */
              	ZSTD_CDict* cdict;
+             	/* Low-level compression parameter control. NULL unless passed to
+             	   constructor. Takes precedence over `compressionLevel` if defined. */
              	CompressionParametersObject* cparams;
+             	/* Controls zstd frame options. */
              	ZSTD_frameParameters fparams;
+             	/* Holds state for streaming compression. Shared across all invocation.
+             	   Populated on first use. */
+             	ZSTD_CStream* cstream;
              } ZstdCompressor;
              extern PyTypeObject ZstdCompressorType;
              	PyObject_HEAD
              	ZstdCompressor* compressor;
-             	ZSTD_CStream* cstream;
              	ZSTD_outBuffer output;
              	int finished;
              } ZstdCompressionObj;
              	PyObject* writer;
              	Py_ssize_t sourceSize;
              	size_t outSize;
-             	ZSTD_CStream* cstream;
              	int entered;
              } ZstdCompressionWriter;
              	size_t inSize;
              	size_t outSize;
-             	ZSTD_CStream* cstream;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	int finishedOutput;
              typedef struct {
              	PyObject_HEAD
-             	ZSTD_DCtx* refdctx;
+             	ZSTD_DCtx* dctx;
              	ZstdCompressionDict* dict;
              	ZSTD_DDict* ddict;
+             	ZSTD_DStream* dstream;
              } ZstdDecompressor;
              extern PyTypeObject ZstdDecompressorType;
              	PyObject_HEAD
              	ZstdDecompressor* decompressor;
-             	ZSTD_DStream* dstream;
              	int finished;
              } ZstdDecompressionObj;
              	ZstdDecompressor* decompressor;
              	PyObject* writer;
              	size_t outSize;
-             	ZSTD_DStream* dstream;
              	int entered;
              } ZstdDecompressionWriter;
              	size_t inSize;
              	size_t outSize;
              	size_t skipBytes;
-             	ZSTD_DStream* dstream;
              	ZSTD_inBuffer input;
              	ZSTD_outBuffer output;
              	Py_ssize_t readCount;
              	PyObject* chunk;
              } DecompressorIteratorResult;
+             typedef struct {
+             	unsigned long long offset;
+             	unsigned long long length;
+             } BufferSegment;
+             typedef struct {
+             	PyObject_HEAD
+             	PyObject* parent;
+             	BufferSegment* segments;
+             	Py_ssize_t segmentCount;
+             } ZstdBufferSegments;
+             extern PyTypeObject ZstdBufferSegmentsType;
+             typedef struct {
+             	PyObject_HEAD
+             	PyObject* parent;
+             	void* data;
+             	Py_ssize_t dataSize;
+             	unsigned long long offset;
+             } ZstdBufferSegment;
+             extern PyTypeObject ZstdBufferSegmentType;
+             typedef struct {
+             	PyObject_HEAD
+             	Py_buffer parent;
+             	void* data;
+             	unsigned long long dataSize;
+             	BufferSegment* segments;
+             	Py_ssize_t segmentCount;
+             	int useFree;
+             } ZstdBufferWithSegments;
+             extern PyTypeObject ZstdBufferWithSegmentsType;
+             /**
+              * An ordered collection of BufferWithSegments exposed as a squashed collection.
+              *
+              * This type provides a virtual view spanning multiple BufferWithSegments
+              * instances. It allows multiple instances to be "chained" together and
+              * exposed as a single collection. e.g. if there are 2 buffers holding
+              * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
+              */
+             typedef struct {
+             	PyObject_HEAD
+             	/* An array of buffers that should be exposed through this instance. */
+             	ZstdBufferWithSegments** buffers;
+             	/* Number of elements in buffers array. */
+             	Py_ssize_t bufferCount;
+             	/* Array of first offset in each buffer instance. 0th entry corresponds
+             	   to number of elements in the 0th buffer. 1st entry corresponds to the
+             	   sum of elements in 0th and 1st buffers. */
+             	Py_ssize_t* firstElements;
+             } ZstdBufferWithSegmentsCollection;
+             extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
              void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams);
              CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args);
+             FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args);
              PyObject* estimate_compression_context_size(PyObject* self, PyObject* args);
-             ZSTD_CStream* CStream_from_ZstdCompressor(ZstdCompressor* compressor, Py_ssize_t sourceSize);
-             ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor);
+             int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize);
+             int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize);
+             int init_dstream(ZstdDecompressor* decompressor);
              ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
+             ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
+             ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
+             Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
+             int cpu_count(void);
+             size_t roundpow2(size_t);

contrib/python-zstandard/make_cffi.py

0 +100 -21

              import cffi
              import distutils.ccompiler
              import os
+             import re
              import subprocess
              import tempfile
                  'common/entropy_common.c',
                  'common/error_private.c',
                  'common/fse_decompress.c',
+                 'common/pool.c',
+                 'common/threading.c',
                  'common/xxhash.c',
                  'common/zstd_common.c',
                  'compress/fse_compress.c',
                  'compress/huf_compress.c',
                  'compress/zstd_compress.c',
+                 'compress/zstdmt_compress.c',
                  'decompress/huf_decompress.c',
                  'decompress/zstd_decompress.c',
+                 'dictBuilder/cover.c',
                  'dictBuilder/divsufsort.c',
                  'dictBuilder/zdict.c',
              )]
+             # Headers whose preprocessed output will be fed into cdef().
+             HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
+                 ('zstd.h',),
+                 ('compress', 'zstdmt_compress.h'),
+                 ('dictBuilder', 'zdict.h'),
+             )]
              INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
                  'zstd',
                  'zstd/common',
                  args.extend([
                      '-E',
                      '-DZSTD_STATIC_LINKING_ONLY',
+                     '-DZDICT_STATIC_LINKING_ONLY',
                  ])
              elif compiler.compiler_type == 'msvc':
                  args = [compiler.cc]
                  args.extend([
                      '/EP',
                      '/DZSTD_STATIC_LINKING_ONLY',
+                     '/DZDICT_STATIC_LINKING_ONLY',
                  ])
              else:
                  raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
-             # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate.
-             # This can lead to duplicate declarations. So we strip this include from the
-             # preprocessor invocation.
-             with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh:
-                 lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')]
-             fd, input_file = tempfile.mkstemp(suffix='.h')
-             os.write(fd, b''.join(lines))
-             os.close(fd)
+             def preprocess(path):
+                 with open(path, 'rb') as fh:
+                     lines = []
+                     for l in fh:
+                         # zstd.h includes <stddef.h>, which is also included by cffi's
+                         # boilerplate. This can lead to duplicate declarations. So we strip
+                         # this include from the preprocessor invocation.
+                         #
+                         # The same things happens for including zstd.h, so give it the same
+                         # treatment.
+                         #
+                         # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
+                         # #define in zstdmt_compress.h and results in a compiler warning. So drop
+                         # the inline #define.
+                         if l.startswith((b'#include <stddef.h>',
+                                          b'#include "zstd.h"',
+                                          b'#define ZSTD_STATIC_LINKING_ONLY')):
+                             continue
-             args.append(input_file)
+                         # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
+                         # important so just filter it out.
+                         if l.startswith(b'ZSTDLIB_API'):
+                             l = l[len(b'ZSTDLIB_API '):]
+                         lines.append(l)
+                 fd, input_file = tempfile.mkstemp(suffix='.h')
+                 os.write(fd, b''.join(lines))
+                 os.close(fd)
-             try:
-                 process = subprocess.Popen(args, stdout=subprocess.PIPE)
-                 output = process.communicate()[0]
-                 ret = process.poll()
-                 if ret:
-                     raise Exception('preprocessor exited with error')
-             finally:
-                 os.unlink(input_file)
+                 try:
+                     process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
+                     output = process.communicate()[0]
+                     ret = process.poll()
+                     if ret:
+                         raise Exception('preprocessor exited with error')
-             def normalize_output():
+                     return output
+                 finally:
+                     os.unlink(input_file)
+             def normalize_output(output):
                  lines = []
                  for line in output.splitlines():
                      # CFFI's parser doesn't like __attribute__ on UNIX compilers.
                      if line.startswith(b'__attribute__ ((visibility ("default"))) '):
                          line = line[len(b'__attribute__ ((visibility ("default"))) '):]
+                     if line.startswith(b'__attribute__((deprecated('):
+                         continue
+                     elif b'__declspec(deprecated(' in line:
+                         continue
                      lines.append(line)
                  return b'\n'.join(lines)
              ffi = cffi.FFI()
+             # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
+             # when cffi uses the function. Since we statically link against zstd, even
+             # if we use the deprecated functions it shouldn't be a huge problem.
              ffi.set_source('_zstd_cffi', '''
+             #include "mem.h"
              #define ZSTD_STATIC_LINKING_ONLY
              #include "zstd.h"
+             #define ZDICT_STATIC_LINKING_ONLY
+             #define ZDICT_DISABLE_DEPRECATE_WARNINGS
+             #include "zdict.h"
+             #include "zstdmt_compress.h"
              ''', sources=SOURCES, include_dirs=INCLUDE_DIRS)
-             ffi.cdef(normalize_output().decode('latin1'))
+             DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
+             sources = []
+             # Feed normalized preprocessor output for headers into the cdef parser.
+             for header in HEADERS:
+                 preprocessed = preprocess(header)
+                 sources.append(normalize_output(preprocessed))
+                 # #define's are effectively erased as part of going through preprocessor.
+                 # So perform a manual pass to re-add those to the cdef source.
+                 with open(header, 'rb') as fh:
+                     for line in fh:
+                         line = line.strip()
+                         m = DEFINE.match(line)
+                         if not m:
+                             continue
+                         if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
+                             continue
+                         # The parser doesn't like some constants with complex values.
+                         if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
+                             continue
+                         # The ... is magic syntax by the cdef parser to resolve the
+                         # value at compile time.
+                         sources.append(m.group(0) + b' ...')
+             cdeflines = b'\n'.join(sources).splitlines()
+             cdeflines = [l for l in cdeflines if l.strip()]
+             ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
              if __name__ == '__main__':
                  ffi.compile()

contrib/python-zstandard/setup.py

0 +7 0

              # facilitate reuse in other projects.
              extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')]
+             install_requires = []
              if cffi:
                  import make_cffi
                  extensions.append(make_cffi.ffi.distutils_extension())
+                 # Need change in 1.8 for ffi.from_buffer() behavior.
+                 install_requires.append('cffi>=1.8')
              version = None
              with open('c-ext/python-zstandard.h', 'r') as fh:
                      'Programming Language :: Python :: 3.3',
                      'Programming Language :: Python :: 3.4',
                      'Programming Language :: Python :: 3.5',
+                     'Programming Language :: Python :: 3.6',
                  ],
                  keywords='zstandard zstd compression',
                  ext_modules=extensions,
                  test_suite='tests',
+                 install_requires=install_requires,
              )

contrib/python-zstandard/setup_zstd.py

0 +13 -2

                  'common/entropy_common.c',
                  'common/error_private.c',
                  'common/fse_decompress.c',
+                 'common/pool.c',
+                 'common/threading.c',
                  'common/xxhash.c',
                  'common/zstd_common.c',
                  'compress/fse_compress.c',
                  'compress/huf_compress.c',
                  'compress/zstd_compress.c',
+                 'compress/zstdmt_compress.c',
                  'decompress/huf_decompress.c',
                  'decompress/zstd_decompress.c',
+                 'dictBuilder/cover.c',
                  'dictBuilder/divsufsort.c',
                  'dictBuilder/zdict.c',
              )]
              zstd_sources_legacy = ['zstd/%s' % p for p in (
+                 'deprecated/zbuff_common.c',
                  'deprecated/zbuff_compress.c',
                  'deprecated/zbuff_decompress.c',
                  'legacy/zstd_v01.c',
              ext_sources = [
                  'zstd.c',
+                 'c-ext/bufferutil.c',
                  'c-ext/compressiondict.c',
                  'c-ext/compressobj.c',
                  'c-ext/compressor.c',
                  'c-ext/decompressor.c',
                  'c-ext/decompressoriterator.c',
                  'c-ext/decompressionwriter.c',
-                 'c-ext/dictparams.c',
+                 'c-ext/frameparams.c',
              ]
              zstd_depends = [
                  depends = [os.path.join(root, p) for p in zstd_depends]
+                 extra_args = ['-DZSTD_MULTITHREAD']
+                 if support_legacy:
+                     extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
                  # TODO compile with optimizations.
                  return Extension(name, sources,
                                   include_dirs=include_dirs,
                                   depends=depends,
-                                  extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else [])
+                                  extra_compile_args=extra_args)

contrib/python-zstandard/tests/common.py

0 +73 0

		@@ -1,4 +1,51 b''
	1	import inspect
1	2	import io
	3	import os
	4	import types
	5
	6
	7	def make_cffi(cls):
	8	"""Decorator to add CFFI versions of each test method."""
	9
	10	try:
	11	import zstd_cffi
	12	except ImportError:
	13	return cls
	14
	15	# If CFFI version is available, dynamically construct test methods
	16	# that use it.
	17
	18	for attr in dir(cls):
	19	fn = getattr(cls, attr)
	20	if not inspect.ismethod(fn) and not inspect.isfunction(fn):
	21	continue
	22
	23	if not fn.__name__.startswith('test_'):
	24	continue
	25
	26	name = '%s_cffi' % fn.__name__
	27
	28	# Replace the "zstd" symbol with the CFFI module instance. Then copy
	29	# the function object and install it in a new attribute.
	30	if isinstance(fn, types.FunctionType):
	31	globs = dict(fn.__globals__)
	32	globs['zstd'] = zstd_cffi
	33	new_fn = types.FunctionType(fn.__code__, globs, name,
	34	fn.__defaults__, fn.__closure__)
	35	new_method = new_fn
	36	else:
	37	globs = dict(fn.__func__.func_globals)
	38	globs['zstd'] = zstd_cffi
	39	new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
	40	fn.__func__.func_defaults,
	41	fn.__func__.func_closure)
	42	new_method = types.UnboundMethodType(new_fn, fn.im_self,
	43	fn.im_class)
	44
	45	setattr(cls, name, new_method)
	46
	47	return cls
	48
2	49
3	50	class OpCountingBytesIO(io.BytesIO):
4	51	def __init__(self, args, *kwargs):
		@@ -13,3 +60,29 b' class OpCountingBytesIO(io.BytesIO):'
13	60	def write(self, data):
14	61	self._write_count += 1
15	62	return super(OpCountingBytesIO, self).write(data)
	63
	64
	65	_source_files = []
	66
	67
	68	def random_input_data():
	69	"""Obtain the raw content of source files.
	70
	71	This is used for generating "random" data to feed into fuzzing, since it is
	72	faster than random content generation.
	73	"""
	74	if _source_files:
	75	return _source_files
	76
	77	for root, dirs, files in os.walk(os.path.dirname(__file__)):
	78	dirs[:] = list(sorted(dirs))
	79	for f in sorted(files):
	80	try:
	81	with open(os.path.join(root, f), 'rb') as fh:
	82	data = fh.read()
	83	if data:
	84	_source_files.append(data)
	85	except OSError:
	86	pass
	87
	88	return _source_files

contrib/python-zstandard/tests/test_compressor.py

0 +408 -39

              import zstd
-             from .common import OpCountingBytesIO
+             from .common import (
+                 make_cffi,
+                 OpCountingBytesIO,
+             )
              if sys.version_info[0] >= 3:
                  next = lambda it: it.next()
+             def multithreaded_chunk_size(level, source_size=0):
+                 params = zstd.get_compression_parameters(level, source_size)
+                 return 1 << (params.window_log + 2)
+             @make_cffi
              class TestCompressor(unittest.TestCase):
                  def test_level_bounds(self):
                      with self.assertRaises(ValueError):
                          zstd.ZstdCompressor(level=23)
+             @make_cffi
              class TestCompressor_compress(unittest.TestCase):
-                 def test_compress_empty(self):
-                     cctx = zstd.ZstdCompressor(level=1)
-                     cctx.compress(b'')
+                 def test_multithreaded_unsupported(self):
+                     samples = []
+                     for i in range(128):
+                         samples.append(b'foo' * 64)
+                         samples.append(b'bar' * 64)
+                     d = zstd.train_dictionary(8192, samples)
-                     cctx = zstd.ZstdCompressor(level=22)
-                     cctx.compress(b'')
+                     cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
+                     with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'):
+                         cctx.compress(b'foo')
+                     params = zstd.get_compression_parameters(3)
+                     cctx = zstd.ZstdCompressor(compression_params=params, threads=2)
+                     with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'):
+                         cctx.compress(b'foo')
                  def test_compress_empty(self):
                      cctx = zstd.ZstdCompressor(level=1)
-                     self.assertEqual(cctx.compress(b''),
-                                      b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+                     result = cctx.compress(b'')
+                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+                     params = zstd.get_frame_parameters(result)
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 524288)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum, 0)
                      # TODO should be temporary until https://github.com/facebook/zstd/issues/506
                      # is fixed.
                      self.assertEqual(len(result), 999)
                      self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
+                     # This matches the test for read_from() below.
+                     cctx = zstd.ZstdCompressor(level=1)
+                     result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
+                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
+                                              b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
+                                              b'\x02\x09\x00\x00\x6f')
                  def test_write_checksum(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      no_checksum = cctx.compress(b'foobar')
                      self.assertEqual(len(with_checksum), len(no_checksum) + 4)
+                     no_params = zstd.get_frame_parameters(no_checksum)
+                     with_params = zstd.get_frame_parameters(with_checksum)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertTrue(with_params.has_checksum)
                  def test_write_content_size(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      no_size = cctx.compress(b'foobar' * 256)
                      self.assertEqual(len(with_size), len(no_size) + 1)
+                     no_params = zstd.get_frame_parameters(no_size)
+                     with_params = zstd.get_frame_parameters(with_size)
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 1536)
                  def test_no_dict_id(self):
                      samples = []
                      for i in range(128):
                      self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
+                     no_params = zstd.get_frame_parameters(no_dict_id)
+                     with_params = zstd.get_frame_parameters(with_dict_id)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 1584102229)
                  def test_compress_dict_multiple(self):
                      samples = []
                      for i in range(128):
                      for i in range(32):
                          cctx.compress(b'foo bar foobar foo bar foobar')
+                 def test_multithreaded(self):
+                     chunk_size = multithreaded_chunk_size(1)
+                     source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
+                     cctx = zstd.ZstdCompressor(level=1, threads=2)
+                     compressed = cctx.compress(source)
+                     params = zstd.get_frame_parameters(compressed)
+                     self.assertEqual(params.content_size, chunk_size * 2)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
+                     dctx = zstd.ZstdDecompressor()
+                     self.assertEqual(dctx.decompress(compressed), source)
+             @make_cffi
              class TestCompressor_compressobj(unittest.TestCase):
                  def test_compressobj_empty(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      self.assertEqual(len(result), 999)
                      self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
+                     params = zstd.get_frame_parameters(result)
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 1048576)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
                  def test_write_checksum(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      cobj = cctx.compressobj()
                      cobj = cctx.compressobj()
                      with_checksum = cobj.compress(b'foobar') + cobj.flush()
+                     no_params = zstd.get_frame_parameters(no_checksum)
+                     with_params = zstd.get_frame_parameters(with_checksum)
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 0)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertTrue(with_params.has_checksum)
                      self.assertEqual(len(with_checksum), len(no_checksum) + 4)
                  def test_write_content_size(self):
                      cobj = cctx.compressobj(size=len(b'foobar' * 256))
                      with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
+                     no_params = zstd.get_frame_parameters(no_size)
+                     with_params = zstd.get_frame_parameters(with_size)
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 1536)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_size), len(no_size) + 1)
                  def test_compress_after_finished(self):
                      header = trailing[0:3]
                      self.assertEqual(header, b'\x01\x00\x00')
+                 def test_multithreaded(self):
+                     source = io.BytesIO()
+                     source.write(b'a' * 1048576)
+                     source.write(b'b' * 1048576)
+                     source.write(b'c' * 1048576)
+                     source.seek(0)
+                     cctx = zstd.ZstdCompressor(level=1, threads=2)
+                     cobj = cctx.compressobj()
+                     chunks = []
+                     while True:
+                         d = source.read(8192)
+                         if not d:
+                             break
+                         chunks.append(cobj.compress(d))
+                     chunks.append(cobj.flush())
+                     compressed = b''.join(chunks)
+                     self.assertEqual(len(compressed), 295)
+             @make_cffi
              class TestCompressor_copy_stream(unittest.TestCase):
                  def test_no_read(self):
                      source = object()
                      self.assertEqual(r, 255 * 16384)
                      self.assertEqual(w, 999)
+                     params = zstd.get_frame_parameters(dest.getvalue())
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 1048576)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
                  def test_write_checksum(self):
                      source = io.BytesIO(b'foobar')
                      no_checksum = io.BytesIO()
                      self.assertEqual(len(with_checksum.getvalue()),
                                       len(no_checksum.getvalue()) + 4)
+                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
+                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 0)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertTrue(with_params.has_checksum)
                  def test_write_content_size(self):
                      source = io.BytesIO(b'foobar' * 256)
                      no_size = io.BytesIO()
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()) + 1)
+                     no_params = zstd.get_frame_parameters(no_size.getvalue())
+                     with_params = zstd.get_frame_parameters(with_size.getvalue())
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 1536)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertFalse(with_params.has_checksum)
                  def test_read_write_size(self):
                      source = OpCountingBytesIO(b'foobarfoobar')
                      dest = OpCountingBytesIO()
                      self.assertEqual(source._read_count, len(source.getvalue()) + 1)
                      self.assertEqual(dest._write_count, len(dest.getvalue()))
+                 def test_multithreaded(self):
+                     source = io.BytesIO()
+                     source.write(b'a' * 1048576)
+                     source.write(b'b' * 1048576)
+                     source.write(b'c' * 1048576)
+                     source.seek(0)
+                     dest = io.BytesIO()
+                     cctx = zstd.ZstdCompressor(threads=2)
+                     r, w = cctx.copy_stream(source, dest)
+                     self.assertEqual(r, 3145728)
+                     self.assertEqual(w, 295)
+                     params = zstd.get_frame_parameters(dest.getvalue())
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
+                     # Writing content size and checksum works.
+                     cctx = zstd.ZstdCompressor(threads=2, write_content_size=True,
+                                                write_checksum=True)
+                     dest = io.BytesIO()
+                     source.seek(0)
+                     cctx.copy_stream(source, dest, size=len(source.getvalue()))
+                     params = zstd.get_frame_parameters(dest.getvalue())
+                     self.assertEqual(params.content_size, 3145728)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertTrue(params.has_checksum)
              def compress(data, level):
                  buffer = io.BytesIO()
                  return buffer.getvalue()
+             @make_cffi
              class TestCompressor_write_to(unittest.TestCase):
                  def test_empty(self):
-                     self.assertEqual(compress(b'', 1),
-                                      b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+                     result = compress(b'', 1)
+                     self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
+                     params = zstd.get_frame_parameters(result)
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 524288)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
                  def test_multiple_compress(self):
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=5)
                      with cctx.write_to(buffer) as compressor:
-                         compressor.write(b'foo')
-                         compressor.write(b'bar')
-                         compressor.write(b'x' * 8192)
+                         self.assertEqual(compressor.write(b'foo'), 0)
+                         self.assertEqual(compressor.write(b'bar'), 0)
+                         self.assertEqual(compressor.write(b'x' * 8192), 0)
                      result = buffer.getvalue()
                      self.assertEqual(result,
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=9, dict_data=d)
                      with cctx.write_to(buffer) as compressor:
-                         compressor.write(b'foo')
-                         compressor.write(b'bar')
-                         compressor.write(b'foo' * 16384)
+                         self.assertEqual(compressor.write(b'foo'), 0)
+                         self.assertEqual(compressor.write(b'bar'), 0)
+                         self.assertEqual(compressor.write(b'foo' * 16384), 634)
                      compressed = buffer.getvalue()
+                     params = zstd.get_frame_parameters(compressed)
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 1024)
+                     self.assertEqual(params.dict_id, d.dict_id())
+                     self.assertFalse(params.has_checksum)
+                     self.assertEqual(compressed[0:32],
+                                      b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
+                                      b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
+                                      b'\x00\x00\x18\x6f\x6f\x66\x01\x00')
                      h = hashlib.sha1(compressed).hexdigest()
                      self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
                      buffer = io.BytesIO()
                      cctx = zstd.ZstdCompressor(compression_params=params)
                      with cctx.write_to(buffer) as compressor:
-                         compressor.write(b'foo')
-                         compressor.write(b'bar')
-                         compressor.write(b'foobar' * 16384)
+                         self.assertEqual(compressor.write(b'foo'), 0)
+                         self.assertEqual(compressor.write(b'bar'), 0)
+                         self.assertEqual(compressor.write(b'foobar' * 16384), 0)
                      compressed = buffer.getvalue()
+                     params = zstd.get_frame_parameters(compressed)
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 1048576)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
                      h = hashlib.sha1(compressed).hexdigest()
                      self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
                      no_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      with cctx.write_to(no_checksum) as compressor:
-                         compressor.write(b'foobar')
+                         self.assertEqual(compressor.write(b'foobar'), 0)
                      with_checksum = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
                      with cctx.write_to(with_checksum) as compressor:
-                         compressor.write(b'foobar')
+                         self.assertEqual(compressor.write(b'foobar'), 0)
+                     no_params = zstd.get_frame_parameters(no_checksum.getvalue())
+                     with_params = zstd.get_frame_parameters(with_checksum.getvalue())
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 0)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertTrue(with_params.has_checksum)
                      self.assertEqual(len(with_checksum.getvalue()),
                                       len(no_checksum.getvalue()) + 4)
                      no_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1)
                      with cctx.write_to(no_size) as compressor:
-                         compressor.write(b'foobar' * 256)
+                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                      with_size = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
                      with cctx.write_to(with_size) as compressor:
-                         compressor.write(b'foobar' * 256)
+                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
                      # Source size is not known in streaming mode, so header not
                      # written.
                      # Declaring size will write the header.
                      with_size = io.BytesIO()
                      with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
-                         compressor.write(b'foobar' * 256)
+                         self.assertEqual(compressor.write(b'foobar' * 256), 0)
+                     no_params = zstd.get_frame_parameters(no_size.getvalue())
+                     with_params = zstd.get_frame_parameters(with_size.getvalue())
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 1536)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, 0)
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_size.getvalue()),
                                       len(no_size.getvalue()) + 1)
                      with_dict_id = io.BytesIO()
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d)
                      with cctx.write_to(with_dict_id) as compressor:
-                         compressor.write(b'foobarfoobar')
+                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
                      cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
                      no_dict_id = io.BytesIO()
                      with cctx.write_to(no_dict_id) as compressor:
-                         compressor.write(b'foobarfoobar')
+                         self.assertEqual(compressor.write(b'foobarfoobar'), 0)
+                     no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
+                     with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
+                     self.assertEqual(no_params.content_size, 0)
+                     self.assertEqual(with_params.content_size, 0)
+                     self.assertEqual(no_params.dict_id, 0)
+                     self.assertEqual(with_params.dict_id, d.dict_id())
+                     self.assertFalse(no_params.has_checksum)
+                     self.assertFalse(with_params.has_checksum)
                      self.assertEqual(len(with_dict_id.getvalue()),
                                       len(no_dict_id.getvalue()) + 4)
                      cctx = zstd.ZstdCompressor(level=3)
                      dest = OpCountingBytesIO()
                      with cctx.write_to(dest, write_size=1) as compressor:
-                         compressor.write(b'foo')
-                         compressor.write(b'bar')
-                         compressor.write(b'foobar')
+                         self.assertEqual(compressor.write(b'foo'), 0)
+                         self.assertEqual(compressor.write(b'bar'), 0)
+                         self.assertEqual(compressor.write(b'foobar'), 0)
                      self.assertEqual(len(dest.getvalue()), dest._write_count)
                      cctx = zstd.ZstdCompressor(level=3)
                      dest = OpCountingBytesIO()
                      with cctx.write_to(dest) as compressor:
-                         compressor.write(b'foo')
+                         self.assertEqual(compressor.write(b'foo'), 0)
                          self.assertEqual(dest._write_count, 0)
-                         compressor.flush()
+                         self.assertEqual(compressor.flush(), 12)
                          self.assertEqual(dest._write_count, 1)
-                         compressor.write(b'bar')
+                         self.assertEqual(compressor.write(b'bar'), 0)
                          self.assertEqual(dest._write_count, 1)
-                         compressor.flush()
+                         self.assertEqual(compressor.flush(), 6)
                          self.assertEqual(dest._write_count, 2)
-                         compressor.write(b'baz')
+                         self.assertEqual(compressor.write(b'baz'), 0)
                      self.assertEqual(dest._write_count, 3)
                      cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
                      dest = OpCountingBytesIO()
                      with cctx.write_to(dest) as compressor:
-                         compressor.write(b'foobar' * 8192)
+                         self.assertEqual(compressor.write(b'foobar' * 8192), 0)
                          count = dest._write_count
                          offset = dest.tell()
-                         compressor.flush()
+                         self.assertEqual(compressor.flush(), 23)
                          self.assertGreater(dest._write_count, count)
                          self.assertGreater(dest.tell(), offset)
                          offset = dest.tell()
                      header = trailing[0:3]
                      self.assertEqual(header, b'\x01\x00\x00')
+                 def test_multithreaded(self):
+                     dest = io.BytesIO()
+                     cctx = zstd.ZstdCompressor(threads=2)
+                     with cctx.write_to(dest) as compressor:
+                         compressor.write(b'a' * 1048576)
+                         compressor.write(b'b' * 1048576)
+                         compressor.write(b'c' * 1048576)
+                     self.assertEqual(len(dest.getvalue()), 295)
+             @make_cffi
              class TestCompressor_read_from(unittest.TestCase):
                  def test_type_validation(self):
                      cctx = zstd.ZstdCompressor()
                      # Object with read() works.
-                     cctx.read_from(io.BytesIO())
+                     for chunk in cctx.read_from(io.BytesIO()):
+                         pass
                      # Buffer protocol works.
-                     cctx.read_from(b'foobar')
+                     for chunk in cctx.read_from(b'foobar'):
+                         pass
                      with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
-                         cctx.read_from(True)
+                         for chunk in cctx.read_from(True):
+                             pass
                  def test_read_empty(self):
                      cctx = zstd.ZstdCompressor(level=1)
                      # We should get the same output as the one-shot compression mechanism.
                      self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
+                     params = zstd.get_frame_parameters(b''.join(chunks))
+                     self.assertEqual(params.content_size, 0)
+                     self.assertEqual(params.window_size, 262144)
+                     self.assertEqual(params.dict_id, 0)
+                     self.assertFalse(params.has_checksum)
                      # Now check the buffer protocol.
                      it = cctx.read_from(source.getvalue())
                      chunks = list(it)
                          self.assertEqual(len(chunk), 1)
                      self.assertEqual(source._read_count, len(source.getvalue()) + 1)
+                 def test_multithreaded(self):
+                     source = io.BytesIO()
+                     source.write(b'a' * 1048576)
+                     source.write(b'b' * 1048576)
+                     source.write(b'c' * 1048576)
+                     source.seek(0)
+                     cctx = zstd.ZstdCompressor(threads=2)
+                     compressed = b''.join(cctx.read_from(source))
+                     self.assertEqual(len(compressed), 295)
+             class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
+                 def test_multithreaded_unsupported(self):
+                     cctx = zstd.ZstdCompressor(threads=2)
+                     with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'):
+                         cctx.multi_compress_to_buffer([b'foo'])
+                 def test_invalid_inputs(self):
+                     cctx = zstd.ZstdCompressor()
+                     with self.assertRaises(TypeError):
+                         cctx.multi_compress_to_buffer(True)
+                     with self.assertRaises(TypeError):
+                         cctx.multi_compress_to_buffer((1, 2))
+                     with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
+                         cctx.multi_compress_to_buffer([u'foo'])
+                 def test_empty_input(self):
+                     cctx = zstd.ZstdCompressor()
+                     with self.assertRaisesRegexp(ValueError, 'no source elements found'):
+                         cctx.multi_compress_to_buffer([])
+                     with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
+                         cctx.multi_compress_to_buffer([b'', b'', b''])
+                 def test_list_input(self):
+                     cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                     original = [b'foo' * 12, b'bar' * 6]
+                     frames = [cctx.compress(c) for c in original]
+                     b = cctx.multi_compress_to_buffer(original)
+                     self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
+                     self.assertEqual(len(b), 2)
+                     self.assertEqual(b.size(), 44)
+                     self.assertEqual(b[0].tobytes(), frames[0])
+                     self.assertEqual(b[1].tobytes(), frames[1])
+                 def test_buffer_with_segments_input(self):
+                     cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                     original = [b'foo' * 4, b'bar' * 6]
+                     frames = [cctx.compress(c) for c in original]
+                     offsets = struct.pack('=QQQQ', 0, len(original[0]),
+                                                    len(original[0]), len(original[1]))
+                     segments = zstd.BufferWithSegments(b''.join(original), offsets)
+                     result = cctx.multi_compress_to_buffer(segments)
+                     self.assertEqual(len(result), 2)
+                     self.assertEqual(result.size(), 47)
+                     self.assertEqual(result[0].tobytes(), frames[0])
+                     self.assertEqual(result[1].tobytes(), frames[1])
+                 def test_buffer_with_segments_collection_input(self):
+                     cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                     original = [
+                         b'foo1',
+                         b'foo2' * 2,
+                         b'foo3' * 3,
+                         b'foo4' * 4,
+                         b'foo5' * 5,
+                     ]
+                     frames = [cctx.compress(c) for c in original]
+                     b = b''.join([original[0], original[1]])
+                     b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
+, len(original[0]),
+                                                                 len(original[0]), len(original[1])))
+                     b = b''.join([original[2], original[3], original[4]])
+                     b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
+, len(original[2]),
+                                                                 len(original[2]), len(original[3]),
+                                                                 len(original[2]) + len(original[3]), len(original[4])))
+                     c = zstd.BufferWithSegmentsCollection(b1, b2)
+                     result = cctx.multi_compress_to_buffer(c)
+                     self.assertEqual(len(result), len(frames))
+                     for i, frame in enumerate(frames):
+                         self.assertEqual(result[i].tobytes(), frame)
+                 def test_multiple_threads(self):
+                     # threads argument will cause multi-threaded ZSTD APIs to be used, which will
+                     # make output different.
+                     refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                     reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
+                     cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True)
+                     frames = []
+                     frames.extend(b'x' * 64 for i in range(256))
+                     frames.extend(b'y' * 64 for i in range(256))
+                     result = cctx.multi_compress_to_buffer(frames, threads=-1)
+                     self.assertEqual(len(result), 512)
+                     for i in range(512):
+                         if i < 256:
+                             self.assertEqual(result[i].tobytes(), reference[0])
+                         else:
+                             self.assertEqual(result[i].tobytes(), reference[1])

contrib/python-zstandard/tests/test_data_structures.py

0 +84 -68

		@@ -1,18 +1,16 b''
1		import io
2
3	1	try:
4	2	import unittest2 as unittest
5	3	except ImportError:
6	4	import unittest
7	5
8		try:
9		import hypothesis
10		import hypothesis.strategies as strategies
11		except ImportError:
12		hypothesis = None
13
14	6	import zstd
15	7
	8	from . common import (
	9	make_cffi,
	10	)
	11
	12
	13	@make_cffi
16	14	class TestCompressionParameters(unittest.TestCase):
17	15	def test_init_bad_arg_type(self):
18	16	with self.assertRaises(TypeError):
		@@ -26,7 +24,7 b' class TestCompressionParameters(unittest'
26	24	zstd.CHAINLOG_MIN,
27	25	zstd.HASHLOG_MIN,
28	26	zstd.SEARCHLOG_MIN,
29		zstd.SEARCHLENGTH_MIN,
	27	zstd.SEARCHLENGTH_MIN + 1,
30	28	zstd.TARGETLENGTH_MIN,
31	29	zstd.STRATEGY_FAST)
32	30
		@@ -34,7 +32,7 b' class TestCompressionParameters(unittest'
34	32	zstd.CHAINLOG_MAX,
35	33	zstd.HASHLOG_MAX,
36	34	zstd.SEARCHLOG_MAX,
37		zstd.SEARCHLENGTH_MAX,
	35	zstd.SEARCHLENGTH_MAX - 1,
38	36	zstd.TARGETLENGTH_MAX,
39	37	zstd.STRATEGY_BTOPT)
40	38
		@@ -42,66 +40,84 b' class TestCompressionParameters(unittest'
42	40	p = zstd.get_compression_parameters(1)
43	41	self.assertIsInstance(p, zstd.CompressionParameters)
44	42
45		self.assertEqual(p[0], 19)
	43	self.assertEqual(p.window_log, 19)
	44
	45	def test_members(self):
	46	p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1)
	47	self.assertEqual(p.window_log, 10)
	48	self.assertEqual(p.chain_log, 6)
	49	self.assertEqual(p.hash_log, 7)
	50	self.assertEqual(p.search_log, 4)
	51	self.assertEqual(p.search_length, 5)
	52	self.assertEqual(p.target_length, 8)
	53	self.assertEqual(p.strategy, 1)
	54
	55	def test_estimated_compression_context_size(self):
	56	p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, zstd.STRATEGY_DFAST)
	57
	58	# 32-bit has slightly different values from 64-bit.
	59	self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076,
	60	delta=110)
	61
46	62
47		if hypothesis:
48		s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
49		max_value=zstd.WINDOWLOG_MAX)
50		s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
51		max_value=zstd.CHAINLOG_MAX)
52		s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
53		max_value=zstd.HASHLOG_MAX)
54		s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
55		max_value=zstd.SEARCHLOG_MAX)
56		s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
57		max_value=zstd.SEARCHLENGTH_MAX)
58		s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
59		max_value=zstd.TARGETLENGTH_MAX)
60		s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
61		zstd.STRATEGY_DFAST,
62		zstd.STRATEGY_GREEDY,
63		zstd.STRATEGY_LAZY,
64		zstd.STRATEGY_LAZY2,
65		zstd.STRATEGY_BTLAZY2,
66		zstd.STRATEGY_BTOPT))
	63	@make_cffi
	64	class TestFrameParameters(unittest.TestCase):
	65	def test_invalid_type(self):
	66	with self.assertRaises(TypeError):
	67	zstd.get_frame_parameters(None)
	68
	69	with self.assertRaises(TypeError):
	70	zstd.get_frame_parameters(u'foobarbaz')
	71
	72	def test_invalid_input_sizes(self):
	73	with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
	74	zstd.get_frame_parameters(b'')
	75
	76	with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
	77	zstd.get_frame_parameters(zstd.FRAME_HEADER)
	78
	79	def test_invalid_frame(self):
	80	with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
	81	zstd.get_frame_parameters(b'foobarbaz')
67	82
68		class TestCompressionParametersHypothesis(unittest.TestCase):
69		@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
70		s_searchlength, s_targetlength, s_strategy)
71		def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
72		searchlength, targetlength, strategy):
73		p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
74		searchlog, searchlength,
75		targetlength, strategy)
76		self.assertEqual(tuple(p),
77		(windowlog, chainlog, hashlog, searchlog,
78		searchlength, targetlength, strategy))
	83	def test_attributes(self):
	84	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
	85	self.assertEqual(params.content_size, 0)
	86	self.assertEqual(params.window_size, 1024)
	87	self.assertEqual(params.dict_id, 0)
	88	self.assertFalse(params.has_checksum)
	89
	90	# Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
	91	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
	92	self.assertEqual(params.content_size, 0)
	93	self.assertEqual(params.window_size, 1024)
	94	self.assertEqual(params.dict_id, 255)
	95	self.assertFalse(params.has_checksum)
	96
	97	# Lowest 3rd bit indicates if checksum is present.
	98	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
	99	self.assertEqual(params.content_size, 0)
	100	self.assertEqual(params.window_size, 1024)
	101	self.assertEqual(params.dict_id, 0)
	102	self.assertTrue(params.has_checksum)
79	103
80		# Verify we can instantiate a compressor with the supplied values.
81		# ZSTD_checkCParams moves the goal posts on us from what's advertised
82		# in the constants. So move along with them.
83		if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
84		searchlength += 1
85		p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
86		searchlog, searchlength,
87		targetlength, strategy)
88		elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
89		searchlength -= 1
90		p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
91		searchlog, searchlength,
92		targetlength, strategy)
	104	# Upper 2 bits indicate content size.
	105	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
	106	self.assertEqual(params.content_size, 511)
	107	self.assertEqual(params.window_size, 1024)
	108	self.assertEqual(params.dict_id, 0)
	109	self.assertFalse(params.has_checksum)
93	110
94		cctx = zstd.ZstdCompressor(compression_params=p)
95		with cctx.write_to(io.BytesIO()):
96		pass
	111	# Window descriptor is 2nd byte after frame header.
	112	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
	113	self.assertEqual(params.content_size, 0)
	114	self.assertEqual(params.window_size, 262144)
	115	self.assertEqual(params.dict_id, 0)
	116	self.assertFalse(params.has_checksum)
97	117
98		@hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
99		s_searchlength, s_targetlength, s_strategy)
100		def test_estimate_compression_context_size(self, windowlog, chainlog,
101		hashlog, searchlog,
102		searchlength, targetlength,
103		strategy):
104		p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
105		searchlog, searchlength,
106		targetlength, strategy)
107		size = zstd.estimate_compression_context_size(p)
	118	# Set multiple things.
	119	params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
	120	self.assertEqual(params.content_size, 272)
	121	self.assertEqual(params.window_size, 262144)
	122	self.assertEqual(params.dict_id, 15)
	123	self.assertTrue(params.has_checksum)

contrib/python-zstandard/tests/test_decompressor.py

0 +270 -7

		@@ -10,7 +10,10 b' except ImportError:'
10	10
11	11	import zstd
12	12
13		from .common import ~~OpCountingBytesIO~~
	13	from .common import (
	14	make_cffi,
	15	OpCountingBytesIO,
	16	)
14	17
15	18
16	19	if sys.version_info[0] >= 3:
		@@ -19,6 +22,7 b' else:'
19	22	next = lambda it: it.next()
20	23
21	24
	25	@make_cffi
22	26	class TestDecompressor_decompress(unittest.TestCase):
23	27	def test_empty_input(self):
24	28	dctx = zstd.ZstdDecompressor()
		@@ -45,7 +49,7 b' class TestDecompressor_decompress(unitte'
45	49	compressed = cctx.compress(b'foobar')
46	50
47	51	dctx = zstd.ZstdDecompressor()
48		decompressed = dctx.decompress(compressed)
	52	decompressed = dctx.decompress(compressed)
49	53	self.assertEqual(decompressed, b'foobar')
50	54
51	55	def test_max_output_size(self):
		@@ -119,6 +123,7 b' class TestDecompressor_decompress(unitte'
119	123	self.assertEqual(decompressed, sources[i])
120	124
121	125
	126	@make_cffi
122	127	class TestDecompressor_copy_stream(unittest.TestCase):
123	128	def test_no_read(self):
124	129	source = object()
		@@ -180,6 +185,7 b' class TestDecompressor_copy_stream(unitt'
180	185	self.assertEqual(dest._write_count, len(dest.getvalue()))
181	186
182	187
	188	@make_cffi
183	189	class TestDecompressor_decompressobj(unittest.TestCase):
184	190	def test_simple(self):
185	191	data = zstd.ZstdCompressor(level=1).compress(b'foobar')
		@@ -207,6 +213,7 b' def decompress_via_writer(data):'
207	213	return buffer.getvalue()
208	214
209	215
	216	@make_cffi
210	217	class TestDecompressor_write_to(unittest.TestCase):
211	218	def test_empty_roundtrip(self):
212	219	cctx = zstd.ZstdCompressor()
		@@ -256,14 +263,14 b' class TestDecompressor_write_to(unittest'
256	263	buffer = io.BytesIO()
257	264	cctx = zstd.ZstdCompressor(dict_data=d)
258	265	with cctx.write_to(buffer) as compressor:
259		compressor.write(orig)
	266	self.assertEqual(compressor.write(orig), 1544)
260	267
261	268	compressed = buffer.getvalue()
262	269	buffer = io.BytesIO()
263	270
264	271	dctx = zstd.ZstdDecompressor(dict_data=d)
265	272	with dctx.write_to(buffer) as decompressor:
266		decompressor.write(compressed)
	273	self.assertEqual(decompressor.write(compressed), len(orig))
267	274
268	275	self.assertEqual(buffer.getvalue(), orig)
269	276
		@@ -286,11 +293,11 b' class TestDecompressor_write_to(unittest'
286	293	c = s.pack(c)
287	294	decompressor.write(c)
288	295
289
290	296	self.assertEqual(dest.getvalue(), b'foobarfoobar')
291	297	self.assertEqual(dest._write_count, len(dest.getvalue()))
292	298
293	299
	300	@make_cffi
294	301	class TestDecompressor_read_from(unittest.TestCase):
295	302	def test_type_validation(self):
296	303	dctx = zstd.ZstdDecompressor()
		@@ -302,7 +309,7 b' class TestDecompressor_read_from(unittes'
302	309	dctx.read_from(b'foobar')
303	310
304	311	with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
305		dctx.read_from(True)
	312	b''.join(dctx.read_from(True))
306	313
307	314	def test_empty_input(self):
308	315	dctx = zstd.ZstdDecompressor()
		@@ -351,7 +358,7 b' class TestDecompressor_read_from(unittes'
351	358	dctx = zstd.ZstdDecompressor()
352	359
353	360	with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
354		dctx.read_from(b'', skip_bytes=1, read_size=1)
	361	b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))
355	362
356	363	with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
357	364	b''.join(dctx.read_from(b'foobar', skip_bytes=10))
		@@ -476,3 +483,259 b' class TestDecompressor_read_from(unittes'
476	483	self.assertEqual(len(chunk), 1)
477	484
478	485	self.assertEqual(source._read_count, len(source.getvalue()))
	486
	487
	488	@make_cffi
	489	class TestDecompressor_content_dict_chain(unittest.TestCase):
	490	def test_bad_inputs_simple(self):
	491	dctx = zstd.ZstdDecompressor()
	492
	493	with self.assertRaises(TypeError):
	494	dctx.decompress_content_dict_chain(b'foo')
	495
	496	with self.assertRaises(TypeError):
	497	dctx.decompress_content_dict_chain((b'foo', b'bar'))
	498
	499	with self.assertRaisesRegexp(ValueError, 'empty input chain'):
	500	dctx.decompress_content_dict_chain([])
	501
	502	with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
	503	dctx.decompress_content_dict_chain([u'foo'])
	504
	505	with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
	506	dctx.decompress_content_dict_chain([True])
	507
	508	with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
	509	dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
	510
	511	with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
	512	dctx.decompress_content_dict_chain([b'foo' * 8])
	513
	514	no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
	515
	516	with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
	517	dctx.decompress_content_dict_chain([no_size])
	518
	519	# Corrupt first frame.
	520	frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
	521	frame = frame[0:12] + frame[15:]
	522	with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'):
	523	dctx.decompress_content_dict_chain([frame])
	524
	525	def test_bad_subsequent_input(self):
	526	initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64)
	527
	528	dctx = zstd.ZstdDecompressor()
	529
	530	with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
	531	dctx.decompress_content_dict_chain([initial, u'foo'])
	532
	533	with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
	534	dctx.decompress_content_dict_chain([initial, None])
	535
	536	with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
	537	dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
	538
	539	with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
	540	dctx.decompress_content_dict_chain([initial, b'foo' * 8])
	541
	542	no_size = zstd.ZstdCompressor().compress(b'foo' * 64)
	543
	544	with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
	545	dctx.decompress_content_dict_chain([initial, no_size])
	546
	547	# Corrupt second frame.
	548	cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
	549	frame = cctx.compress(b'bar' * 64)
	550	frame = frame[0:12] + frame[15:]
	551
	552	with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'):
	553	dctx.decompress_content_dict_chain([initial, frame])
	554
	555	def test_simple(self):
	556	original = [
	557	b'foo' * 64,
	558	b'foobar' * 64,
	559	b'baz' * 64,
	560	b'foobaz' * 64,
	561	b'foobarbaz' * 64,
	562	]
	563
	564	chunks = []
	565	chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
	566	for i, chunk in enumerate(original[1:]):
	567	d = zstd.ZstdCompressionDict(original[i])
	568	cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
	569	chunks.append(cctx.compress(chunk))
	570
	571	for i in range(1, len(original)):
	572	chain = chunks[0:i]
	573	expected = original[i - 1]
	574	dctx = zstd.ZstdDecompressor()
	575	decompressed = dctx.decompress_content_dict_chain(chain)
	576	self.assertEqual(decompressed, expected)
	577
	578
	579	# TODO enable for CFFI
	580	class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
	581	def test_invalid_inputs(self):
	582	dctx = zstd.ZstdDecompressor()
	583
	584	with self.assertRaises(TypeError):
	585	dctx.multi_decompress_to_buffer(True)
	586
	587	with self.assertRaises(TypeError):
	588	dctx.multi_decompress_to_buffer((1, 2))
	589
	590	with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
	591	dctx.multi_decompress_to_buffer([u'foo'])
	592
	593	with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
	594	dctx.multi_decompress_to_buffer([b'foobarbaz'])
	595
	596	def test_list_input(self):
	597	cctx = zstd.ZstdCompressor(write_content_size=True)
	598
	599	original = [b'foo' * 4, b'bar' * 6]
	600	frames = [cctx.compress(d) for d in original]
	601
	602	dctx = zstd.ZstdDecompressor()
	603	result = dctx.multi_decompress_to_buffer(frames)
	604
	605	self.assertEqual(len(result), len(frames))
	606	self.assertEqual(result.size(), sum(map(len, original)))
	607
	608	for i, data in enumerate(original):
	609	self.assertEqual(result[i].tobytes(), data)
	610
	611	self.assertEqual(result[0].offset, 0)
	612	self.assertEqual(len(result[0]), 12)
	613	self.assertEqual(result[1].offset, 12)
	614	self.assertEqual(len(result[1]), 18)
	615
	616	def test_list_input_frame_sizes(self):
	617	cctx = zstd.ZstdCompressor(write_content_size=False)
	618
	619	original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
	620	frames = [cctx.compress(d) for d in original]
	621	sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
	622
	623	dctx = zstd.ZstdDecompressor()
	624	result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
	625
	626	self.assertEqual(len(result), len(frames))
	627	self.assertEqual(result.size(), sum(map(len, original)))
	628
	629	for i, data in enumerate(original):
	630	self.assertEqual(result[i].tobytes(), data)
	631
	632	def test_buffer_with_segments_input(self):
	633	cctx = zstd.ZstdCompressor(write_content_size=True)
	634
	635	original = [b'foo' * 4, b'bar' * 6]
	636	frames = [cctx.compress(d) for d in original]
	637
	638	dctx = zstd.ZstdDecompressor()
	639
	640	segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
	641	b = zstd.BufferWithSegments(b''.join(frames), segments)
	642
	643	result = dctx.multi_decompress_to_buffer(b)
	644
	645	self.assertEqual(len(result), len(frames))
	646	self.assertEqual(result[0].offset, 0)
	647	self.assertEqual(len(result[0]), 12)
	648	self.assertEqual(result[1].offset, 12)
	649	self.assertEqual(len(result[1]), 18)
	650
	651	def test_buffer_with_segments_sizes(self):
	652	cctx = zstd.ZstdCompressor(write_content_size=False)
	653	original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
	654	frames = [cctx.compress(d) for d in original]
	655	sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
	656
	657	segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
	658	len(frames[0]), len(frames[1]),
	659	len(frames[0]) + len(frames[1]), len(frames[2]))
	660	b = zstd.BufferWithSegments(b''.join(frames), segments)
	661
	662	dctx = zstd.ZstdDecompressor()
	663	result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
	664
	665	self.assertEqual(len(result), len(frames))
	666	self.assertEqual(result.size(), sum(map(len, original)))
	667
	668	for i, data in enumerate(original):
	669	self.assertEqual(result[i].tobytes(), data)
	670
	671	def test_buffer_with_segments_collection_input(self):
	672	cctx = zstd.ZstdCompressor(write_content_size=True)
	673
	674	original = [
	675	b'foo0' * 2,
	676	b'foo1' * 3,
	677	b'foo2' * 4,
	678	b'foo3' * 5,
	679	b'foo4' * 6,
	680	]
	681
	682	frames = cctx.multi_compress_to_buffer(original)
	683
	684	# Check round trip.
	685	dctx = zstd.ZstdDecompressor()
	686	decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
	687
	688	self.assertEqual(len(decompressed), len(original))
	689
	690	for i, data in enumerate(original):
	691	self.assertEqual(data, decompressed[i].tobytes())
	692
	693	# And a manual mode.
	694	b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
	695	b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
	696	0, len(frames[0]),
	697	len(frames[0]), len(frames[1])))
	698
	699	b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
	700	b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
	701	0, len(frames[2]),
	702	len(frames[2]), len(frames[3]),
	703	len(frames[2]) + len(frames[3]), len(frames[4])))
	704
	705	c = zstd.BufferWithSegmentsCollection(b1, b2)
	706
	707	dctx = zstd.ZstdDecompressor()
	708	decompressed = dctx.multi_decompress_to_buffer(c)
	709
	710	self.assertEqual(len(decompressed), 5)
	711	for i in range(5):
	712	self.assertEqual(decompressed[i].tobytes(), original[i])
	713
	714	def test_multiple_threads(self):
	715	cctx = zstd.ZstdCompressor(write_content_size=True)
	716
	717	frames = []
	718	frames.extend(cctx.compress(b'x' * 64) for i in range(256))
	719	frames.extend(cctx.compress(b'y' * 64) for i in range(256))
	720
	721	dctx = zstd.ZstdDecompressor()
	722	result = dctx.multi_decompress_to_buffer(frames, threads=-1)
	723
	724	self.assertEqual(len(result), len(frames))
	725	self.assertEqual(result.size(), 2 * 64 * 256)
	726	self.assertEqual(result[0].tobytes(), b'x' * 64)
	727	self.assertEqual(result[256].tobytes(), b'y' * 64)
	728
	729	def test_item_failure(self):
	730	cctx = zstd.ZstdCompressor(write_content_size=True)
	731	frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
	732
	733	frames[1] = frames[1] + b'extra'
	734
	735	dctx = zstd.ZstdDecompressor()
	736
	737	with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
	738	dctx.multi_decompress_to_buffer(frames)
	739
	740	with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'):
	741	dctx.multi_decompress_to_buffer(frames, threads=2)

contrib/python-zstandard/tests/test_estimate_sizes.py

0 +5 0

              import zstd
+             from . common import (
+                 make_cffi,
+             )
+             @make_cffi
              class TestSizes(unittest.TestCase):
                  def test_decompression_size(self):
                      size = zstd.estimate_decompression_context_size()

contrib/python-zstandard/tests/test_module_attributes.py

0 +8 -2

              import zstd
+             from . common import (
+                 make_cffi,
+             )
+             @make_cffi
              class TestModuleAttributes(unittest.TestCase):
                  def test_version(self):
-                     self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2))
+                     self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 3))
                  def test_constants(self):
                      self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
                      )
                      for a in attrs:
-                         self.assertTrue(hasattr(zstd, a))
+                         self.assertTrue(hasattr(zstd, a), a)

contrib/python-zstandard/tests/test_train_dictionary.py

0 +64 0

		@@ -7,6 +7,9 b' except ImportError:'
7	7
8	8	import zstd
9	9
	10	from . common import (
	11	make_cffi,
	12	)
10	13
11	14	if sys.version_info[0] >= 3:
12	15	int_type = int
		@@ -14,6 +17,7 b' else:'
14	17	int_type = long
15	18
16	19
	20	@make_cffi
17	21	class TestTrainDictionary(unittest.TestCase):
18	22	def test_no_args(self):
19	23	with self.assertRaises(TypeError):
		@@ -44,3 +48,63 b' class TestTrainDictionary(unittest.TestC'
44	48
45	49	data = d.as_bytes()
46	50	self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
	51
	52	def test_set_dict_id(self):
	53	samples = []
	54	for i in range(128):
	55	samples.append(b'foo' * 64)
	56	samples.append(b'foobar' * 64)
	57
	58	d = zstd.train_dictionary(8192, samples, dict_id=42)
	59	self.assertEqual(d.dict_id(), 42)
	60
	61
	62	@make_cffi
	63	class TestTrainCoverDictionary(unittest.TestCase):
	64	def test_no_args(self):
	65	with self.assertRaises(TypeError):
	66	zstd.train_cover_dictionary()
	67
	68	def test_bad_args(self):
	69	with self.assertRaises(TypeError):
	70	zstd.train_cover_dictionary(8192, u'foo')
	71
	72	with self.assertRaises(ValueError):
	73	zstd.train_cover_dictionary(8192, [u'foo'])
	74
	75	def test_basic(self):
	76	samples = []
	77	for i in range(128):
	78	samples.append(b'foo' * 64)
	79	samples.append(b'foobar' * 64)
	80
	81	d = zstd.train_cover_dictionary(8192, samples, k=64, d=16)
	82	self.assertIsInstance(d.dict_id(), int_type)
	83
	84	data = d.as_bytes()
	85	self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
	86
	87	self.assertEqual(d.k, 64)
	88	self.assertEqual(d.d, 16)
	89
	90	def test_set_dict_id(self):
	91	samples = []
	92	for i in range(128):
	93	samples.append(b'foo' * 64)
	94	samples.append(b'foobar' * 64)
	95
	96	d = zstd.train_cover_dictionary(8192, samples, k=64, d=16,
	97	dict_id=42)
	98	self.assertEqual(d.dict_id(), 42)
	99
	100	def test_optimize(self):
	101	samples = []
	102	for i in range(128):
	103	samples.append(b'foo' * 64)
	104	samples.append(b'foobar' * 64)
	105
	106	d = zstd.train_cover_dictionary(8192, samples, optimize=True,
	107	threads=-1, steps=1, d=16)
	108
	109	self.assertEqual(d.k, 16)
	110	self.assertEqual(d.d, 16)

contrib/python-zstandard/zstd.c

0 +80 -3

              /* A Python C extension for Zstandard. */
+             #if defined(_WIN32)
+             #define WIN32_LEAN_AND_MEAN
+             #include <Windows.h>
+             #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+             #include <sys/types.h>
+             #include <sys/sysctl.h>
+             #endif
              #include "python-zstandard.h"
              PyObject *ZstdError;
              "Obtains a ``CompressionParameters`` instance from a compression level and\n"
              "optional input size and dictionary size");
+             PyDoc_STRVAR(get_frame_parameters__doc__,
+             "get_frame_parameters(data)\n"
+             "\n"
+             "Obtains a ``FrameParameters`` instance by parsing data.\n");
              PyDoc_STRVAR(train_dictionary__doc__,
              "train_dictionary(dict_size, samples)\n"
              "\n"
              "\n"
              "The raw dictionary content will be returned\n");
+             PyDoc_STRVAR(train_cover_dictionary__doc__,
+             "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
+             "\n"
+             "Train a dictionary from sample data using the COVER algorithm.\n"
+             "\n"
+             "This behaves like ``train_dictionary()`` except a different algorithm is\n"
+             "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
+             "``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
+             "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
+             "``d`` must be less than or equal to ``k``.\n"
+             );
              static char zstd_doc[] = "Interface to zstandard";
              static PyMethodDef zstd_methods[] = {
+             	/* TODO remove since it is a method on CompressionParameters. */
              	{ "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
              	METH_VARARGS, estimate_compression_context_size__doc__ },
              	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
              	METH_NOARGS, estimate_decompression_context_size__doc__ },
              	{ "get_compression_parameters", (PyCFunction)get_compression_parameters,
              	METH_VARARGS, get_compression_parameters__doc__ },
+             	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
+             	METH_VARARGS, get_frame_parameters__doc__ },
              	{ "train_dictionary", (PyCFunction)train_dictionary,
              	METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
+             	{ "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
+             	METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
              	{ NULL, NULL }
              };
+             void bufferutil_module_init(PyObject* mod);
              void compressobj_module_init(PyObject* mod);
              void compressor_module_init(PyObject* mod);
              void compressionparams_module_init(PyObject* mod);
              void constants_module_init(PyObject* mod);
-             void dictparams_module_init(PyObject* mod);
              void compressiondict_module_init(PyObject* mod);
              void compressionwriter_module_init(PyObject* mod);
              void compressoriterator_module_init(PyObject* mod);
              void decompressobj_module_init(PyObject* mod);
              void decompressionwriter_module_init(PyObject* mod);
              void decompressoriterator_module_init(PyObject* mod);
+             void frameparams_module_init(PyObject* mod);
              void zstd_module_init(PyObject* m) {
              	/* python-zstandard relies on unstable zstd C API features. This means
              	   We detect this mismatch here and refuse to load the module if this
              	   scenario is detected.
              	*/
-             	if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) {
+             	if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
              		PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
              		return;
              	}
+             	bufferutil_module_init(m);
              	compressionparams_module_init(m);
-             	dictparams_module_init(m);
              	compressiondict_module_init(m);
              	compressobj_module_init(m);
              	compressor_module_init(m);
              	decompressobj_module_init(m);
              	decompressionwriter_module_init(m);
              	decompressoriterator_module_init(m);
+             	frameparams_module_init(m);
              }
              #if PY_MAJOR_VERSION >= 3
              	}
              }
              #endif
+             /* Attempt to resolve the number of CPUs in the system. */
+             int cpu_count() {
+             	int count = 0;
+             #if defined(_WIN32)
+             	SYSTEM_INFO si;
+             	si.dwNumberOfProcessors = 0;
+             	GetSystemInfo(&si);
+             	count = si.dwNumberOfProcessors;
+             #elif defined(__APPLE__)
+             	int num;
+             	size_t size = sizeof(int);
+             	if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
+             		count = num;
+             	}
+             #elif defined(__linux__)
+             	count = sysconf(_SC_NPROCESSORS_ONLN);
+             #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+             	int mib[2];
+             	size_t len = sizeof(count);
+             	mib[0] = CTL_HW;
+             	mib[1] = HW_NCPU;
+             	if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
+             		count = 0;
+             	}
+             #elif defined(__hpux)
+             	count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
+             #endif
+             	return count;
+             }
+             size_t roundpow2(size_t i) {
+             	i--;
+             	i |= i >> 1;
+             	i |= i >> 2;
+             	i |= i >> 4;
+             	i |= i >> 8;
+             	i |= i >> 16;
+             	i++;
+             	return i;
+             }

contrib/python-zstandard/zstd/common/mem.h

0 +1 -1

              #endif
              /* code only tested on 32 and 64 bits systems */
-             #define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+             #define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
              MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }

contrib/python-zstandard/zstd/common/zstd_common.c

0 0 -4

              *   provides error code string from enum */
              const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
-             /* ---   ZBUFF Error Management  (deprecated)   --- */
-             unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
-             const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
              /*=**************************************************************
              *  Custom allocator

contrib/python-zstandard/zstd/common/zstd_errors.h

0 +16 -2

              #include <stddef.h>   /* size_t */
+             /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+             #if defined(__GNUC__) && (__GNUC__ >= 4)
+             #  define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+             #else
+             #  define ZSTDERRORLIB_VISIBILITY
+             #endif
+             #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+             #  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+             #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+             #  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+             #else
+             #  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+             #endif
              /*-****************************************
              *  error codes list
              ******************************************/
              /*! ZSTD_getErrorCode() :
                  convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
                  which can be used to compare directly with enum list published into "error_public.h" */
-             ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-             const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+             ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+             ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
              #if defined (__cplusplus)

contrib/python-zstandard/zstd/common/zstd_internal.h

0 +9 0

              }
+             /* hidden functions */
+             /* ZSTD_invalidateRepCodes() :
+              * ensures next compression will not use repcodes from previous block.
+              * Note : only works with regular variant;
+              *        do not use with extDict variant ! */
+             void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
              #endif   /* ZSTD_CCOMMON_H_MODULE */

contrib/python-zstandard/zstd/compress/zstd_compress.c

0 +94 -51

              /*-*************************************
              *  Context memory management
              ***************************************/
-             struct ZSTD_CCtx_s
+             {
+             struct ZSTD_CCtx_s {
                  const BYTE* nextSrc;    /* next block here to continue on current prefix */
                  const BYTE* base;       /* All regular indexes relative to this position */
                  const BYTE* dictBase;   /* extDict indexes relative to this position */
                  U32   nextToUpdate;     /* index from which to continue dictionary update */
                  U32   nextToUpdate3;    /* index from which to continue dictionary update */
                  U32   hashLog3;         /* dispatch table : larger == faster, more memory */
-                 U32   loadedDictEnd;
+                 U32   loadedDictEnd;    /* index of end of dictionary */
+                 U32   forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
                  ZSTD_compressionStage_e stage;
                  U32   rep[ZSTD_REP_NUM];
-                 U32   savedRep[ZSTD_REP_NUM];
+                 U32   repToConfirm[ZSTD_REP_NUM];
                  U32   dictID;
                  ZSTD_parameters params;
                  void* workSpace;
                  cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
                  if (!cctx) return NULL;
                  memset(cctx, 0, sizeof(ZSTD_CCtx));
-                 memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
+                 cctx->customMem = customMem;
                  return cctx;
              }
                  return sizeof(*cctx) + cctx->workSpaceSize;
              }
+             size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
+             {
+                 switch(param)
+                 {
+                 case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
+                 default: return ERROR(parameter_unknown);
+                 }
+             }
              const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
              {
                  return &(ctx->seqStore);
                  }
              }
+             /* ZSTD_invalidateRepCodes() :
+              * ensures next compression will not use repcodes from previous block.
+              * Note : only works with regular variant;
+              *        do not use with extDict variant ! */
+             void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+                 int i;
+                 for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
+             }
              /*! ZSTD_copyCCtx() :
              *   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
                    if ((size_t)(op-ostart) >= maxCSize) return 0; }
                  /* confirm repcodes */
-                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
+                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
                  return op - ostart;
              }
+             #if 0 /* for debug */
+             #  define STORESEQ_DEBUG
+             #include <stdio.h>   /* fprintf */
+             U32 g_startDebug = 0;
+             const BYTE* g_start = NULL;
+             #endif
              /*! ZSTD_storeSeq() :
                  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
                  `offsetCode` : distance to match, or 0 == repCode.
              */
              MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
              {
-             #if 0  /* for debug */
-                 static const BYTE* g_start = NULL;
-                 const U32 pos = (U32)((const BYTE*)literals - g_start);
-                 if (g_start==NULL) g_start = (const BYTE*)literals;
-                 //if ((pos > 1) && (pos < 50000))
-                     printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
-                            pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
+             #ifdef STORESEQ_DEBUG
+                 if (g_startDebug) {
+                     const U32 pos = (U32)((const BYTE*)literals - g_start);
+                     if (g_start==NULL) g_start = (const BYTE*)literals;
+                     if ((pos > 1895000) && (pos < 1895300))
+                         fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
+                                pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
+                 }
              #endif
                  /* copy Literals */
                  ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
                  }   }   }
                  /* save reps for next block */
-                 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-                 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+                 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+                 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  }   }   }
                  /* save reps for next block */
-                 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+                 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  }   }   }
                  /* save reps for next block */
-                 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
-                 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+                 cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
+                 cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  }   }   }
                  /* save reps for next block */
-                 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+                 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  }   }
                  /* Save reps for next block */
-                 ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
-                 ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
+                 ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
+                 ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  }   }
                  /* Save reps for next block */
-                 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+                 ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                  cctx->nextSrc = ip + srcSize;
-                 {   size_t const cSize = frame ?
+                 if (srcSize) {
+                     size_t const cSize = frame ?
                                           ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
                                           ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
                      if (ZSTD_isError(cSize)) return cSize;
                      return cSize + fhSize;
+                 }
+                 } else
+                     return fhSize;
              }
                  zc->dictBase = zc->base;
                  zc->base += ip - zc->nextSrc;
                  zc->nextToUpdate = zc->dictLimit;
-                 zc->loadedDictEnd = (U32)(iend - zc->base);
+                 zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
                  zc->nextSrc = iend;
                  if (srcSize <= HASH_READ_SIZE) return 0;
                  }
                  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-                 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-                 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-                 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+                 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+                 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+                 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
                  dictPtr += 12;
                  {   U32 offcodeMax = MaxOff;
                  }
              }
              /*! ZSTD_compressBegin_internal() :
              *   @return : 0, or an error code */
              static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
              }
-             size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
+             size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
              {
-                 return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
+                 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
              }
              /* =====  Dictionary API  ===== */
              struct ZSTD_CDict_s {
-                 void* dictContent;
+                 void* dictBuffer;
+                 const void* dictContent;
                  size_t dictContentSize;
                  ZSTD_CCtx* refContext;
              };  /* typedef'd tp ZSTD_CDict within "zstd.h" */
              size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
              {
                  if (cdict==NULL) return 0;   /* support sizeof on NULL */
-                 return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
+                 return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
              }
-             ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
+             ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
+                                                   ZSTD_parameters params, ZSTD_customMem customMem)
              {
                  if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
                  if (!customMem.customAlloc || !customMem.customFree) return NULL;
                  {   ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-                     void* const dictContent = ZSTD_malloc(dictSize, customMem);
                      ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
-                     if (!dictContent || !cdict || !cctx) {
-                         ZSTD_free(dictContent, customMem);
+                     if (!cdict || !cctx) {
                          ZSTD_free(cdict, customMem);
                          ZSTD_free(cctx, customMem);
                          return NULL;
                      }
-                     if (dictSize) {
-                         memcpy(dictContent, dict, dictSize);
+                     if ((byReference) || (!dictBuffer) || (!dictSize)) {
+                         cdict->dictBuffer = NULL;
+                         cdict->dictContent = dictBuffer;
+                     } else {
+                         void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
+                         if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
+                         memcpy(internalBuffer, dictBuffer, dictSize);
+                         cdict->dictBuffer = internalBuffer;
+                         cdict->dictContent = internalBuffer;
                      }
-                     {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
+                     {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
                          if (ZSTD_isError(errorCode)) {
-                             ZSTD_free(dictContent, customMem);
+                             ZSTD_free(cdict->dictBuffer, customMem);
+                             ZSTD_free(cctx, customMem);
                              ZSTD_free(cdict, customMem);
-                             ZSTD_free(cctx, customMem);
                              return NULL;
                      }   }
-                     cdict->dictContent = dictContent;
+                     cdict->refContext = cctx;
                      cdict->dictContentSize = dictSize;
-                     cdict->refContext = cctx;
                      return cdict;
                  }
              }
                  ZSTD_customMem const allocator = { NULL, NULL, NULL };
                  ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
                  params.fParams.contentSizeFlag = 1;
-                 return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
+                 return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
+             }
+             ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+             {
+                 ZSTD_customMem const allocator = { NULL, NULL, NULL };
+                 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
+                 params.fParams.contentSizeFlag = 1;
+                 return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
              }
              size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
                  if (cdict==NULL) return 0;   /* support free on NULL */
                  {   ZSTD_customMem const cMem = cdict->refContext->customMem;
                      ZSTD_freeCCtx(cdict->refContext);
-                     ZSTD_free(cdict->dictContent, cMem);
+                     ZSTD_free(cdict->dictBuffer, cMem);
                      ZSTD_free(cdict, cMem);
                      return 0;
                  }
                  return ZSTD_getParamsFromCCtx(cdict->refContext);
              }
-             size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
+             size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
              {
                  if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
                  else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
              size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
              {
-                 if (zcs->inBuffSize==0) return ERROR(stage_wrong);   /* zcs has not been init at least once */
+                 if (zcs->inBuffSize==0) return ERROR(stage_wrong);   /* zcs has not been init at least once => can't reset */
                  if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
                  else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
                      if (zcs->outBuff == NULL) return ERROR(memory_allocation);
                  }
-                 if (dict) {
+                 if (dict && dictSize >= 8) {
                      ZSTD_freeCDict(zcs->cdictLocal);
-                     zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
+                     zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
                      if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
                      zcs->cdict = zcs->cdictLocal;
                  } else zcs->cdict = NULL;
                  ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
                  size_t const initError =  ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
                  zcs->cdict = cdict;
+                 zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
                  return initError;
              }
              size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
              {
-                 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+                 ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
+                 if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
                  return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
              }

contrib/python-zstandard/zstd/compress/zstd_opt.h

0 +6 -6

                  ssPtr->cachedLiterals = NULL;
                  ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-                 ssPtr->staticPrices = 0;
+                 ssPtr->staticPrices = 0;
                  if (ssPtr->litLengthSum == 0) {
                      if (srcSize <= 1024) ssPtr->staticPrices = 1;
                      for (u=0; u<=MaxLit; u++) {
                          ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
-                         ssPtr->litSum += ssPtr->litFreq[u];
+                         ssPtr->litSum += ssPtr->litFreq[u];
                      }
                      for (u=0; u<=MaxLL; u++)
                          ssPtr->litLengthFreq[u] = 1;
                  }    }   /* for (cur=0; cur < last_pos; ) */
                  /* Save reps for next block */
-                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
                  /* Last Literals */
                  {   size_t const lastLLSize = iend - anchor;
                          match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
-                         if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+                         if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                              best_mlen = matches[match_num-1].len;
                              best_off = matches[match_num-1].off;
                              last_pos = cur + 1;
                          /* set prices using matches at position = cur */
                          for (u = 0; u < match_num; u++) {
                              mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
-                             best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
+                             best_mlen = matches[u].len;
                              while (mlen <= best_mlen) {
                                  if (opt[cur].mlen == 1) {
                  }    }   /* for (cur=0; cur < last_pos; ) */
                  /* Save reps for next block */
-                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
+                 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
                  /* Last Literals */
                  {   size_t lastLLSize = iend - anchor;

contrib/python-zstandard/zstd/decompress/zstd_decompress.c

0 +39 -22

              #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
                  if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
              #endif
-                 ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
+                 CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
                  ZSTD_checkContinuity(dctx, dst);
                  return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
              }
                  }
                  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-                 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-                 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-                 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+                 dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+                 dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+                 dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
                  dictPtr += 12;
                  dctx->litEntropy = dctx->fseEntropy = 1;
              /* ======   ZSTD_DDict   ====== */
              struct ZSTD_DDict_s {
-                 void* dict;
+                 void* dictBuffer;
+                 const void* dictContent;
                  size_t dictSize;
                  ZSTD_DCtx* refContext;
              };  /* typedef'd to ZSTD_DDict within "zstd.h" */
-             ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
+             ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
              {
                  if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
                  if (!customMem.customAlloc || !customMem.customFree) return NULL;
                  {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-                     void* const dictContent = ZSTD_malloc(dictSize, customMem);
                      ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
-                     if (!dictContent || !ddict || !dctx) {
-                         ZSTD_free(dictContent, customMem);
+                     if (!ddict || !dctx) {
                          ZSTD_free(ddict, customMem);
                          ZSTD_free(dctx, customMem);
                          return NULL;
                      }
-                     if (dictSize) {
-                         memcpy(dictContent, dict, dictSize);
+                     if ((byReference) || (!dict) || (!dictSize)) {
+                         ddict->dictBuffer = NULL;
+                         ddict->dictContent = dict;
+                     } else {
+                         void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
+                         if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
+                         memcpy(internalBuffer, dict, dictSize);
+                         ddict->dictBuffer = internalBuffer;
+                         ddict->dictContent = internalBuffer;
                      }
-                     {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
+                     {   size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
                          if (ZSTD_isError(errorCode)) {
-                             ZSTD_free(dictContent, customMem);
+                             ZSTD_free(ddict->dictBuffer, customMem);
                              ZSTD_free(ddict, customMem);
                              ZSTD_free(dctx, customMem);
                              return NULL;
                      }   }
-                     ddict->dict = dictContent;
                      ddict->dictSize = dictSize;
                      ddict->refContext = dctx;
                      return ddict;
              ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
              {
                  ZSTD_customMem const allocator = { NULL, NULL, NULL };
-                 return ZSTD_createDDict_advanced(dict, dictSize, allocator);
+                 return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
              }
+             /*! ZSTD_createDDict_byReference() :
+              *  Create a digested dictionary, ready to start decompression operation without startup delay.
+              *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+              *  It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
+             ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+             {
+                 ZSTD_customMem const allocator = { NULL, NULL, NULL };
+                 return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
+             }
              size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
              {
                  if (ddict==NULL) return 0;   /* support free on NULL */
                  {   ZSTD_customMem const cMem = ddict->refContext->customMem;
                      ZSTD_freeDCtx(ddict->refContext);
-                     ZSTD_free(ddict->dict, cMem);
+                     ZSTD_free(ddict->dictBuffer, cMem);
                      ZSTD_free(ddict, cMem);
                      return 0;
                  }
              size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
              {
                  if (ddict==NULL) return 0;   /* support sizeof on NULL */
-                 return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize;
+                 return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
              }
              /*! ZSTD_getDictID_fromDict() :
              unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
              {
                  if (ddict==NULL) return 0;
-                 return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize);
+                 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
              }
              /*! ZSTD_getDictID_fromFrame() :
                                          const ZSTD_DDict* ddict)
              {
              #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-                 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
+                 if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize);
              #endif
                  ZSTD_refDCtx(dctx, ddict->refContext);
                  ZSTD_checkContinuity(dctx, dst);
                  zds->stage = zdss_loadHeader;
                  zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
                  ZSTD_freeDDict(zds->ddictLocal);
-                 if (dict) {
+                 if (dict && dictSize >= 8) {
                      zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
                      if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
                  } else zds->ddictLocal = NULL;
                  switch(paramType)
                  {
                      default : return ERROR(parameter_unknown);
-                     case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
+                     case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
                  }
                  return 0;
              }
              #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
                              {   U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                                  if (legacyVersion) {
-                                     const void* const dict = zds->ddict ? zds->ddict->dict : NULL;
+                                     const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
                                      size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
                                      CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
                                                                     dict, dictSize));

contrib/python-zstandard/zstd/dictBuilder/zdict.c

0 +60 -12

              #include <time.h>          /* clock */
              #include "mem.h"           /* read */
-             #include "error_private.h"
              #include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
              #define HUF_STATIC_LINKING_ONLY
-             #include "huf.h"
+             #include "huf.h"           /* HUF_buildCTable, HUF_writeCTable */
              #include "zstd_internal.h" /* includes zstd.h */
-             #include "xxhash.h"
+             #include "xxhash.h"        /* XXH64 */
              #include "divsufsort.h"
              #ifndef ZDICT_STATIC_LINKING_ONLY
              #  define ZDICT_STATIC_LINKING_ONLY
              #define NOISELENGTH 32
              #define MINRATIO 4
-             static const int g_compressionLevel_default = 5;
+             static const int g_compressionLevel_default = 6;
              static const U32 g_selectivity_default = 9;
              static const size_t g_provision_entropySize = 200;
              static const size_t g_min_fast_dictContent = 192;
                      } while (length >=MINMATCHLENGTH);
                      /* look backward */
-             		length = MINMATCHLENGTH;
-             		while ((length >= MINMATCHLENGTH) & (start > 0)) {
-             			length = ZDICT_count(b + pos, b + suffix[start - 1]);
-             			if (length >= LLIMIT) length = LLIMIT - 1;
-             			lengthList[length]++;
-             			if (length >= MINMATCHLENGTH) start--;
+             		}
+                     length = MINMATCHLENGTH;
+                     while ((length >= MINMATCHLENGTH) & (start > 0)) {
+                     	length = ZDICT_count(b + pos, b + suffix[start - 1]);
+                     	if (length >= LLIMIT) length = LLIMIT - 1;
+                     	lengthList[length]++;
+                     	if (length >= MINMATCHLENGTH) start--;
+                     }
                      /* largest useful length */
                      memset(cumulLength, 0, sizeof(cumulLength));
                          if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
                  }
                  cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
-                 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
+                 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
                  if (cSize) {  /* if == 0; block is not compressible */
                      const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
              }
+             size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
+                                       const void* customDictContent, size_t dictContentSize,
+                                       const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                                       ZDICT_params_t params)
+             {
+                 size_t hSize;
+             #define HBUFFSIZE 256
+                 BYTE header[HBUFFSIZE];
+                 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+                 U32 const notificationLevel = params.notificationLevel;
+                 /* check conditions */
+                 if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
+                 if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
+                 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
+                 /* dictionary header */
+                 MEM_writeLE32(header, ZSTD_DICT_MAGIC);
+                 {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
+                     U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+                     U32 const dictID = params.dictID ? params.dictID : compliantID;
+                     MEM_writeLE32(header+4, dictID);
+                 }
+                 hSize = 8;
+                 /* entropy tables */
+                 DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
+                 DISPLAYLEVEL(2, "statistics ... \n");
+                 {   size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
+                                               compressionLevel,
+                                               samplesBuffer, samplesSizes, nbSamples,
+                                               customDictContent, dictContentSize,
+                                               notificationLevel);
+                     if (ZDICT_isError(eSize)) return eSize;
+                     hSize += eSize;
+                 }
+                 /* copy elements in final buffer ; note : src and dst buffer can overlap */
+                 if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
+                 {   size_t const dictSize = hSize + dictContentSize;
+                     char* dictEnd = (char*)dictBuffer + dictSize;
+                     memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
+                     memcpy(dictBuffer, header, hSize);
+                     return dictSize;
+                 }
+             }
              size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
                                                               const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                                               ZDICT_params_t params)

contrib/python-zstandard/zstd/dictBuilder/zdict.h

0 +113 -23

		@@ -19,15 +19,18 b' extern "C" {'
19	19	#include <stddef.h> /* size_t */
20	20
21	21
22		/====== Export for Windows ======/
23		/*!
24		* ZSTD_DLL_EXPORT :
25		* Enable exporting of functions when building a Windows DLL
26		*/
27		#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
28		# define ZDICTLIB_API __declspec(dllexport)
	22	/* ===== ZDICTLIB_API : control library symbols visibility ===== */
	23	#if defined(__GNUC__) && (__GNUC__ >= 4)
	24	# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
29	25	#else
30		# define ZDICTLIB_~~API~~
	26	# define ZDICTLIB_VISIBILITY
	27	#endif
	28	#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
	29	# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
	30	#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
	31	# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
	32	#else
	33	# define ZDICTLIB_API ZDICTLIB_VISIBILITY
31	34	#endif
32	35
33	36
		@@ -79,27 +82,114 b' typedef struct {'
79	82	or an error code, which can be tested by ZDICT_isError().
80	83	note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
81	84	*/
82		size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
	85	ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
	86	const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
	87	ZDICT_params_t parameters);
	88
	89	/*! COVER_params_t :
	90	For all values 0 means default.
	91	kMin and d are the only required parameters.
	92	*/
	93	typedef struct {
	94	unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
	95	unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
	96	unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
	97
	98	unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
	99	unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
	100	unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
	101	int compressionLevel; /* 0 means default; target a specific zstd compression level */
	102	} COVER_params_t;
	103
	104
	105	/*! COVER_trainFromBuffer() :
	106	Train a dictionary from an array of samples using the COVER algorithm.
	107	Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
	108	supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
	109	The resulting dictionary will be saved into `dictBuffer`.
	110	@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
	111	or an error code, which can be tested with ZDICT_isError().
	112	Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
	113	Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
	114	It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
	115	In general, it's recommended to provide a few thousands samples, but this can vary a lot.
	116	It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
	117	*/
	118	ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
	119	const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
	120	COVER_params_t parameters);
	121
	122	/*! COVER_optimizeTrainFromBuffer() :
	123	The same requirements as above hold for all the parameters except `parameters`.
	124	This function tries many parameter combinations and picks the best parameters.
	125	`*parameters` is filled with the best parameters found, and the dictionary
	126	constructed with those parameters is stored in `dictBuffer`.
	127
	128	All of the parameters d, k, steps are optional.
	129	If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
	130	if steps is zero it defaults to its default value.
	131	If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
	132
	133	@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
	134	or an error code, which can be tested with ZDICT_isError().
	135	On success `*parameters` contains the parameters selected.
	136	Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
	137	*/
	138	ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
	139	const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
	140	COVER_params_t *parameters);
	141
	142	/*! ZDICT_finalizeDictionary() :
	143
	144	Given a custom content as a basis for dictionary, and a set of samples,
	145	finalize dictionary by adding headers and statistics.
	146
	147	Samples must be stored concatenated in a flat buffer `samplesBuffer`,
	148	supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
	149
	150	dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
	151	maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
	152
	153	@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
	154	or an error code, which can be tested by ZDICT_isError().
	155	note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
	156	note 2 : dictBuffer and customDictContent can overlap
	157	*/
	158	#define ZDICT_CONTENTSIZE_MIN 256
	159	#define ZDICT_DICTSIZE_MIN 512
	160	ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
	161	const void* customDictContent, size_t dictContentSize,
83	162	const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
84	163	ZDICT_params_t parameters);
85	164
86	165
87		/*! ZDICT_addEntropyTablesFromBuffer() :
88
89		Given a content-only dictionary (built using any 3rd party algorithm),
90		add entropy tables computed from an array of samples.
91		Samples must be stored concatenated in a flat buffer `samplesBuffer`,
92		supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
93	166
94		The input dictionary content must be stored at the end of `dictBuffer`.
95		Its size is `dictContentSize`.
96		The resulting dictionary with added entropy tables will be written back to `dictBuffer`,
97		starting from its beginning.
98		@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`).
99		*/
	167	/* Deprecation warnings */
	168	/* It is generally possible to disable deprecation warnings from compiler,
	169	for example with -Wno-deprecated-declarations for gcc
	170	or _CRT_SECURE_NO_WARNINGS in Visual.
	171	Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
	172	#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
	173	# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
	174	#else
	175	# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
	176	# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
	177	# define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]]
	178	# elif (ZDICT_GCC_VERSION >= 405) \|\| defined(__clang__)
	179	# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
	180	# elif (ZDICT_GCC_VERSION >= 301)
	181	# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
	182	# elif defined(_MSC_VER)
	183	# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
	184	# else
	185	# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
	186	# define ZDICT_DEPRECATED(message) ZDICTLIB_API
	187	# endif
	188	#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
	189
	190	ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
100	191	size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
101		const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
102
	192	const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
103	193
104	194
105	195	#endif /* ZDICT_STATIC_LINKING_ONLY */

contrib/python-zstandard/zstd/zstd.h

0 +53 -25

              /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
              #if defined(__GNUC__) && (__GNUC__ >= 4)
-             #  define ZSTDLIB_API __attribute__ ((visibility ("default")))
-             #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-             #  define ZSTDLIB_API __declspec(dllexport)
+             #  define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
+             #else
+             #  define ZSTDLIB_VISIBILITY
+             #endif
+             #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+             #  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
              #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-             #  define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+             #  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
              #else
-             #  define ZSTDLIB_API
+             #  define ZSTDLIB_API ZSTDLIB_VISIBILITY
              #endif
              /*------   Version   ------*/
              #define ZSTD_VERSION_MAJOR    1
              #define ZSTD_VERSION_MINOR    1
-             #define ZSTD_VERSION_RELEASE  2
+             #define ZSTD_VERSION_RELEASE  3
              #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
              #define ZSTD_QUOTE(str) #str
              *   When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
              *   ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
              *   ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
-             *   `dict` can be released after ZSTD_CDict creation. */
-             ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+             *   `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict */
+             ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
              /*! ZSTD_freeCDict() :
              *   Function frees memory allocated by ZSTD_createCDict(). */
              /*! ZSTD_createDDict() :
              *   Create a digested dictionary, ready to start decompression operation without startup delay.
-             *   `dict` can be released after creation. */
-             ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+             *   dictBuffer can be released after DDict creation, as its content is copied inside DDict */
+             ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
              /*! ZSTD_freeDDict() :
              *   Function frees memory allocated with ZSTD_createDDict() */
               * ***************************************************************************************/
              /* --- Constants ---*/
-             #define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
+             #define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
              #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
              #define ZSTD_WINDOWLOG_MAX_32  25
              #define ZSTD_TARGETLENGTH_MAX 999
              #define ZSTD_FRAMEHEADERSIZE_MAX 18    /* for static allocation */
+             #define ZSTD_FRAMEHEADERSIZE_MIN  6
              static const size_t ZSTD_frameHeaderSize_prefix = 5;
-             static const size_t ZSTD_frameHeaderSize_min = 6;
+             static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN;
              static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
              static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
              } ZSTD_compressionParameters;
              typedef struct {
-                 unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */
-                 unsigned checksumFlag;    /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-                 unsigned noDictIDFlag;    /**< 1: no dict ID will be saved into frame header (if dictionary compression) */
+                 unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+                 unsigned checksumFlag;    /**< 1: generate a 32-bits checksum at end of frame, for error detection */
+                 unsigned noDictIDFlag;    /**< 1: no dictID will be saved into frame header (if dictionary compression) */
              } ZSTD_frameParameters;
              typedef struct {
               *  Gives the amount of memory used by a given ZSTD_CCtx */
              ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+             typedef enum {
+                 ZSTD_p_forceWindow   /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/
+             } ZSTD_CCtxParameter;
+             /*! ZSTD_setCCtxParameter() :
+              *  Set advanced parameters, selected through enum ZSTD_CCtxParameter
+              *  @result : 0, or an error code (which can be tested with ZSTD_isError()) */
+             ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);
+             /*! ZSTD_createCDict_byReference() :
+              *  Create a digested dictionary for compression
+              *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+              *  It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+             ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
              /*! ZSTD_createCDict_advanced() :
               *  Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
-             ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+             ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
                                                                ZSTD_parameters params, ZSTD_customMem customMem);
              /*! ZSTD_sizeof_CDict() :
               *  Gives the amount of memory used by a given ZSTD_DCtx */
              ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+             /*! ZSTD_createDDict_byReference() :
+              *  Create a digested dictionary, ready to start decompression operation without startup delay.
+              *  Dictionary content is simply referenced, and therefore stays in dictBuffer.
+              *  It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
+             ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+             ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                                               unsigned byReference, ZSTD_customMem customMem);
              /*! ZSTD_sizeof_DDict() :
               *  Gives the amount of memory used by a given ZSTD_DDict */
              ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
               *  Provides the dictID stored within dictionary.
               *  if @return == 0, the dictionary is not conformant with Zstandard specification.
               *  It can still be loaded, but as a content-only dictionary. */
-             unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+             ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
              /*! ZSTD_getDictID_fromDDict() :
               *  Provides the dictID of the dictionary loaded into `ddict`.
               *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
               *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-             unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+             ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
              /*! ZSTD_getDictID_fromFrame() :
               *  Provides the dictID required to decompressed the frame stored within `src`.
               *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
               *  - This is not a Zstandard frame.
               *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
-             unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+             ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
              /********************************************************************
              /*=====   Advanced Streaming compression functions  =====*/
              ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
              ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct */
-             ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+             ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
              ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                                           ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
              ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
              /*=====   Advanced Streaming decompression functions  =====*/
-             typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+             typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
              ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-             ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+             ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
              ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
              ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
              ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
                  In which case, it will "discard" the relevant memory section from its history.
                Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-               It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
-               Without last block mark, frames will be considered unfinished (broken) by decoders.
+               It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+               Without last block mark, frames will be considered unfinished (corrupted) by decoders.
-               You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+               `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.
              */
              /*=====   Buffer-less streaming compression functions  =====*/
              ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
              ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
              ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+             ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize);
              ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
              ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);

contrib/python-zstandard/zstd_cffi.py

0 +1169 -64

This diff has been collapsed as it changes many lines, (1233 lines changed) Show them Hide them
			@@ -8,145 +8,1250 b''
	8	8
	9	9	from __future__ import absolute_import, unicode_literals
	10	10
	11		import io
		11	import os
		12	import sys
	12	13
	13	14	from _zstd_cffi import (
	14	15	ffi,
	15	16	lib,
	16	17	)
	17	18
		19	if sys.version_info[0] == 2:
		20	bytes_type = str
		21	int_type = long
		22	else:
		23	bytes_type = bytes
		24	int_type = int
	18	25
	19		_CSTREAM_IN_SIZE = lib.ZSTD_CStreamInSize()
	20		~~_CSTREAM_O~~UT_SIZE = lib.ZSTD_CStream~~Out~~Size()
		26
		27	COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
		28	COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
		29	DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
		30	DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
		31
		32	new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
		33
		34
		35	MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
		36	MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
		37	FRAME_HEADER = b'\x28\xb5\x2f\xfd'
		38	ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
		39
		40	WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
		41	WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
		42	CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
		43	CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
		44	HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
		45	HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
		46	HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
		47	SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
		48	SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
		49	SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
		50	SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
		51	TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
		52	TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
		53
		54	STRATEGY_FAST = lib.ZSTD_fast
		55	STRATEGY_DFAST = lib.ZSTD_dfast
		56	STRATEGY_GREEDY = lib.ZSTD_greedy
		57	STRATEGY_LAZY = lib.ZSTD_lazy
		58	STRATEGY_LAZY2 = lib.ZSTD_lazy2
		59	STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
		60	STRATEGY_BTOPT = lib.ZSTD_btopt
		61
		62	COMPRESSOBJ_FLUSH_FINISH = 0
		63	COMPRESSOBJ_FLUSH_BLOCK = 1
		64
		65
		66	def _cpu_count():
		67	# os.cpu_count() was introducd in Python 3.4.
		68	try:
		69	return os.cpu_count() or 0
		70	except AttributeError:
		71	pass
		72
		73	# Linux.
		74	try:
		75	if sys.version_info[0] == 2:
		76	return os.sysconf(b'SC_NPROCESSORS_ONLN')
		77	else:
		78	return os.sysconf(u'SC_NPROCESSORS_ONLN')
		79	except (AttributeError, ValueError):
		80	pass
		81
		82	# TODO implement on other platforms.
		83	return 0
		84
		85
		86	class ZstdError(Exception):
		87	pass
	21	88
	22	89
	23		class ~~_Zstd~~Compression~~Writer~~(object):
	24		def __init__(self, cstream, writer):
	25		self._cstream = cstream
		90	class CompressionParameters(object):
		91	def __init__(self, window_log, chain_log, hash_log, search_log,
		92	search_length, target_length, strategy):
		93	if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX:
		94	raise ValueError('invalid window log value')
		95
		96	if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX:
		97	raise ValueError('invalid chain log value')
		98
		99	if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX:
		100	raise ValueError('invalid hash log value')
		101
		102	if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX:
		103	raise ValueError('invalid search log value')
		104
		105	if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX:
		106	raise ValueError('invalid search length value')
		107
		108	if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX:
		109	raise ValueError('invalid target length value')
		110
		111	if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT:
		112	raise ValueError('invalid strategy value')
		113
		114	self.window_log = window_log
		115	self.chain_log = chain_log
		116	self.hash_log = hash_log
		117	self.search_log = search_log
		118	self.search_length = search_length
		119	self.target_length = target_length
		120	self.strategy = strategy
		121
		122	zresult = lib.ZSTD_checkCParams(self.as_compression_parameters())
		123	if lib.ZSTD_isError(zresult):
		124	raise ValueError('invalid compression parameters: %s',
		125	ffi.string(lib.ZSTD_getErrorName(zresult)))
		126
		127	def estimated_compression_context_size(self):
		128	return lib.ZSTD_estimateCCtxSize(self.as_compression_parameters())
		129
		130	def as_compression_parameters(self):
		131	p = ffi.new('ZSTD_compressionParameters *')[0]
		132	p.windowLog = self.window_log
		133	p.chainLog = self.chain_log
		134	p.hashLog = self.hash_log
		135	p.searchLog = self.search_log
		136	p.searchLength = self.search_length
		137	p.targetLength = self.target_length
		138	p.strategy = self.strategy
		139
		140	return p
		141
		142	def get_compression_parameters(level, source_size=0, dict_size=0):
		143	params = lib.ZSTD_getCParams(level, source_size, dict_size)
		144	return CompressionParameters(window_log=params.windowLog,
		145	chain_log=params.chainLog,
		146	hash_log=params.hashLog,
		147	search_log=params.searchLog,
		148	search_length=params.searchLength,
		149	target_length=params.targetLength,
		150	strategy=params.strategy)
		151
		152
		153	def estimate_compression_context_size(params):
		154	if not isinstance(params, CompressionParameters):
		155	raise ValueError('argument must be a CompressionParameters')
		156
		157	cparams = params.as_compression_parameters()
		158	return lib.ZSTD_estimateCCtxSize(cparams)
		159
		160
		161	def estimate_decompression_context_size():
		162	return lib.ZSTD_estimateDCtxSize()
		163
		164
		165	class ZstdCompressionWriter(object):
		166	def __init__(self, compressor, writer, source_size, write_size):
		167	self._compressor = compressor
	26	168	self._writer = writer
		169	self._source_size = source_size
		170	self._write_size = write_size
		171	self._entered = False
		172	self._mtcctx = compressor._cctx if compressor._multithreaded else None
	27	173
	28	174	def __enter__(self):
		175	if self._entered:
		176	raise ZstdError('cannot __enter__ multiple times')
		177
		178	if self._mtcctx:
		179	self._compressor._init_mtcstream(self._source_size)
		180	else:
		181	self._compressor._ensure_cstream(self._source_size)
		182	self._entered = True
	29	183	return self
	30	184
	31	185	def __exit__(self, exc_type, exc_value, exc_tb):
		186	self._entered = False
		187
	32	188	if not exc_type and not exc_value and not exc_tb:
	33	189	out_buffer = ffi.new('ZSTD_outBuffer *')
	34		out_buffer.~~dst~~ = ffi.new('char[]', ~~_CSTREAM_OUT_SIZE~~)
	35		out_buffer.~~size~~ = ~~_CSTREAM_OUT_SIZE~~
		190	dst_buffer = ffi.new('char[]', self._write_size)
		191	out_buffer.dst = dst_buffer
		192	out_buffer.size = self._write_size
	36	193	out_buffer.pos = 0
	37	194
	38	195	while True:
	39		res = lib.ZSTD_endStream(self._cstream, out_buffer)
	40		if lib.ZSTD_isError(res):
	41		raise Exception('error ending compression stream: %s' % lib.ZSTD_getErrorName)
		196	if self._mtcctx:
		197	zresult = lib.ZSTDMT_endStream(self._mtcctx, out_buffer)
		198	else:
		199	zresult = lib.ZSTD_endStream(self._compressor._cstream, out_buffer)
		200	if lib.ZSTD_isError(zresult):
		201	raise ZstdError('error ending compression stream: %s' %
		202	ffi.string(lib.ZSTD_getErrorName(zresult)))
	42	203
	43	204	if out_buffer.pos:
	44		self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
		205	self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
	45	206	out_buffer.pos = 0
	46	207
	47		if res == 0:
		208	if zresult == 0:
	48	209	break
	49	210
		211	self._compressor = None
		212
	50	213	return False
	51	214
		215	def memory_size(self):
		216	if not self._entered:
		217	raise ZstdError('cannot determine size of an inactive compressor; '
		218	'call when a context manager is active')
		219
		220	return lib.ZSTD_sizeof_CStream(self._compressor._cstream)
		221
	52	222	def write(self, data):
		223	if not self._entered:
		224	raise ZstdError('write() must be called from an active context '
		225	'manager')
		226
		227	total_write = 0
		228
		229	data_buffer = ffi.from_buffer(data)
		230
		231	in_buffer = ffi.new('ZSTD_inBuffer *')
		232	in_buffer.src = data_buffer
		233	in_buffer.size = len(data_buffer)
		234	in_buffer.pos = 0
		235
	53	236	out_buffer = ffi.new('ZSTD_outBuffer *')
	54		out_buffer.~~dst~~ = ffi.new('char[]', ~~_CSTREAM_OUT_SIZE~~)
	55		out_buffer.~~size~~ = ~~_CSTREAM_OUT_SIZE~~
		237	dst_buffer = ffi.new('char[]', self._write_size)
		238	out_buffer.dst = dst_buffer
		239	out_buffer.size = self._write_size
		240	out_buffer.pos = 0
		241
		242	while in_buffer.pos < in_buffer.size:
		243	if self._mtcctx:
		244	zresult = lib.ZSTDMT_compressStream(self._mtcctx, out_buffer,
		245	in_buffer)
		246	else:
		247	zresult = lib.ZSTD_compressStream(self._compressor._cstream, out_buffer,
		248	in_buffer)
		249	if lib.ZSTD_isError(zresult):
		250	raise ZstdError('zstd compress error: %s' %
		251	ffi.string(lib.ZSTD_getErrorName(zresult)))
		252
		253	if out_buffer.pos:
		254	self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
		255	total_write += out_buffer.pos
		256	out_buffer.pos = 0
		257
		258	return total_write
		259
		260	def flush(self):
		261	if not self._entered:
		262	raise ZstdError('flush must be called from an active context manager')
		263
		264	total_write = 0
		265
		266	out_buffer = ffi.new('ZSTD_outBuffer *')
		267	dst_buffer = ffi.new('char[]', self._write_size)
		268	out_buffer.dst = dst_buffer
		269	out_buffer.size = self._write_size
	56	270	out_buffer.pos = 0
	57	271
	58		# TODO can we reuse existing memory?
	59		in_buffer = ffi.new('ZSTD_inBuffer *')
	60		in_buffer.src = ffi.new('char[]', data)
	61		in_buffer.size = len(data)
	62		in_buffer.pos = 0
	63		while in_buffer.pos < in_buffer.size:
	64		res = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
	65		if lib.ZSTD_isError(res):
	66		raise Exception('zstd compress error: %s' % lib.ZSTD_getErrorName(res))
		272	while True:
		273	if self._mtcctx:
		274	zresult = lib.ZSTDMT_flushStream(self._mtcctx, out_buffer)
		275	else:
		276	zresult = lib.ZSTD_flushStream(self._compressor._cstream, out_buffer)
		277	if lib.ZSTD_isError(zresult):
		278	raise ZstdError('zstd compress error: %s' %
		279	ffi.string(lib.ZSTD_getErrorName(zresult)))
		280
		281	if not out_buffer.pos:
		282	break
		283
		284	self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
		285	total_write += out_buffer.pos
		286	out_buffer.pos = 0
		287
		288	return total_write
		289
		290
		291	class ZstdCompressionObj(object):
		292	def compress(self, data):
		293	if self._finished:
		294	raise ZstdError('cannot call compress() after compressor finished')
		295
		296	data_buffer = ffi.from_buffer(data)
		297	source = ffi.new('ZSTD_inBuffer *')
		298	source.src = data_buffer
		299	source.size = len(data_buffer)
		300	source.pos = 0
		301
		302	chunks = []
		303
		304	while source.pos < len(data):
		305	if self._mtcctx:
		306	zresult = lib.ZSTDMT_compressStream(self._mtcctx,
		307	self._out, source)
		308	else:
		309	zresult = lib.ZSTD_compressStream(self._compressor._cstream, self._out,
		310	source)
		311	if lib.ZSTD_isError(zresult):
		312	raise ZstdError('zstd compress error: %s' %
		313	ffi.string(lib.ZSTD_getErrorName(zresult)))
		314
		315	if self._out.pos:
		316	chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
		317	self._out.pos = 0
		318
		319	return b''.join(chunks)
	67	320
	68		if out_buffer.pos:
	69		self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
	70		out_buffer.pos = 0
		321	def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
		322	if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
		323	raise ValueError('flush mode not recognized')
		324
		325	if self._finished:
		326	raise ZstdError('compressor object already finished')
		327
		328	assert self._out.pos == 0
		329
		330	if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
		331	if self._mtcctx:
		332	zresult = lib.ZSTDMT_flushStream(self._mtcctx, self._out)
		333	else:
		334	zresult = lib.ZSTD_flushStream(self._compressor._cstream, self._out)
		335	if lib.ZSTD_isError(zresult):
		336	raise ZstdError('zstd compress error: %s' %
		337	ffi.string(lib.ZSTD_getErrorName(zresult)))
		338
		339	# Output buffer is guaranteed to hold full block.
		340	assert zresult == 0
		341
		342	if self._out.pos:
		343	result = ffi.buffer(self._out.dst, self._out.pos)[:]
		344	self._out.pos = 0
		345	return result
		346	else:
		347	return b''
		348
		349	assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
		350	self._finished = True
		351
		352	chunks = []
		353
		354	while True:
		355	if self._mtcctx:
		356	zresult = lib.ZSTDMT_endStream(self._mtcctx, self._out)
		357	else:
		358	zresult = lib.ZSTD_endStream(self._compressor._cstream, self._out)
		359	if lib.ZSTD_isError(zresult):
		360	raise ZstdError('error ending compression stream: %s' %
		361	ffi.string(lib.ZSTD_getErroName(zresult)))
		362
		363	if self._out.pos:
		364	chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
		365	self._out.pos = 0
		366
		367	if not zresult:
		368	break
		369
		370	return b''.join(chunks)
	71	371
	72	372
	73	373	class ZstdCompressor(object):
	74		def __init__(self, level=3, dict_data=None, compression_params=None):
	75		if dict_data:
	76		raise Exception('dict_data not yet supported')
	77		if compression_params:
	78		raise Exception('compression_params not yet supported')
		374	def __init__(self, level=3, dict_data=None, compression_params=None,
		375	write_checksum=False, write_content_size=False,
		376	write_dict_id=True, threads=0):
		377	if level < 1:
		378	raise ValueError('level must be greater than 0')
		379	elif level > lib.ZSTD_maxCLevel():
		380	raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
		381
		382	if threads < 0:
		383	threads = _cpu_count()
	79	384
	80	385	self._compression_level = level
		386	self._dict_data = dict_data
		387	self._cparams = compression_params
		388	self._fparams = ffi.new('ZSTD_frameParameters *')[0]
		389	self._fparams.checksumFlag = write_checksum
		390	self._fparams.contentSizeFlag = write_content_size
		391	self._fparams.noDictIDFlag = not write_dict_id
	81	392
	82		def compress(self, data):
	83		# Just use the stream API for now.
	84		output = io.BytesIO()
	85		with self.write_to(output) as compressor:
	86		compressor.write(data)
	87		return output.getvalue()
		393	if threads:
		394	cctx = lib.ZSTDMT_createCCtx(threads)
		395	if cctx == ffi.NULL:
		396	raise MemoryError()
		397
		398	self._cctx = ffi.gc(cctx, lib.ZSTDMT_freeCCtx)
		399	self._multithreaded = True
		400	else:
		401	cctx = lib.ZSTD_createCCtx()
		402	if cctx == ffi.NULL:
		403	raise MemoryError()
		404
		405	self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx)
		406	self._multithreaded = False
		407
		408	self._cstream = None
		409
		410	def compress(self, data, allow_empty=False):
		411	if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty:
		412	raise ValueError('cannot write empty inputs when writing content sizes')
		413
		414	if self._multithreaded and self._dict_data:
		415	raise ZstdError('compress() cannot be used with both dictionaries and multi-threaded compression')
		416
		417	if self._multithreaded and self._cparams:
		418	raise ZstdError('compress() cannot be used with both compression parameters and multi-threaded compression')
		419
		420	# TODO use a CDict for performance.
		421	dict_data = ffi.NULL
		422	dict_size = 0
		423
		424	if self._dict_data:
		425	dict_data = self._dict_data.as_bytes()
		426	dict_size = len(self._dict_data)
		427
		428	params = ffi.new('ZSTD_parameters *')[0]
		429	if self._cparams:
		430	params.cParams = self._cparams.as_compression_parameters()
		431	else:
		432	params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data),
		433	dict_size)
		434	params.fParams = self._fparams
		435
		436	dest_size = lib.ZSTD_compressBound(len(data))
		437	out = new_nonzero('char[]', dest_size)
	88	438
	89		def copy_stream(self, ifh, ofh):
	90		cstream = self._get_cstream()
		439	if self._multithreaded:
		440	zresult = lib.ZSTDMT_compressCCtx(self._cctx,
		441	ffi.addressof(out), dest_size,
		442	data, len(data),
		443	self._compression_level)
		444	else:
		445	zresult = lib.ZSTD_compress_advanced(self._cctx,
		446	ffi.addressof(out), dest_size,
		447	data, len(data),
		448	dict_data, dict_size,
		449	params)
		450
		451	if lib.ZSTD_isError(zresult):
		452	raise ZstdError('cannot compress: %s' %
		453	ffi.string(lib.ZSTD_getErrorName(zresult)))
		454
		455	return ffi.buffer(out, zresult)[:]
		456
		457	def compressobj(self, size=0):
		458	if self._multithreaded:
		459	self._init_mtcstream(size)
		460	else:
		461	self._ensure_cstream(size)
		462
		463	cobj = ZstdCompressionObj()
		464	cobj._out = ffi.new('ZSTD_outBuffer *')
		465	cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
		466	cobj._out.dst = cobj._dst_buffer
		467	cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
		468	cobj._out.pos = 0
		469	cobj._compressor = self
		470	cobj._finished = False
		471
		472	if self._multithreaded:
		473	cobj._mtcctx = self._cctx
		474	else:
		475	cobj._mtcctx = None
		476
		477	return cobj
		478
		479	def copy_stream(self, ifh, ofh, size=0,
		480	read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
		481	write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
		482
		483	if not hasattr(ifh, 'read'):
		484	raise ValueError('first argument must have a read() method')
		485	if not hasattr(ofh, 'write'):
		486	raise ValueError('second argument must have a write() method')
		487
		488	mt = self._multithreaded
		489	if mt:
		490	self._init_mtcstream(size)
		491	else:
		492	self._ensure_cstream(size)
	91	493
	92	494	in_buffer = ffi.new('ZSTD_inBuffer *')
	93	495	out_buffer = ffi.new('ZSTD_outBuffer *')
	94	496
	95		out_buffer.~~dst~~ = ffi.new('char[]', ~~_CSTREAM_OUT_SIZE~~)
	96		out_buffer.~~size~~ = ~~_CSTREAM_OUT_SIZE~~
		497	dst_buffer = ffi.new('char[]', write_size)
		498	out_buffer.dst = dst_buffer
		499	out_buffer.size = write_size
	97	500	out_buffer.pos = 0
	98	501
	99	502	total_read, total_write = 0, 0
	100	503
	101	504	while True:
	102		data = ifh.read(~~_CSTREAM_IN_SIZE~~)
		505	data = ifh.read(read_size)
	103	506	if not data:
	104	507	break
	105	508
	106		~~total_read~~ += ~~len~~(data)
	107
	108		in_buffer.src = ~~ffi~~.~~new~~(~~'char[]'~~, ~~data~~)
	109		in_buffer.size = len(data)
		509	data_buffer = ffi.from_buffer(data)
		510	total_read += len(data_buffer)
		511	in_buffer.src = data_buffer
		512	in_buffer.size = len(data_buffer)
	110	513	in_buffer.pos = 0
	111	514
	112	515	while in_buffer.pos < in_buffer.size:
	113		res = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer)
	114		if lib.ZSTD_isError(res):
	115		raise Exception('zstd compress error: %s' %
	116		lib.ZSTD_getErrorName(res))
		516	if mt:
		517	zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
		518	else:
		519	zresult = lib.ZSTD_compressStream(self._cstream,
		520	out_buffer, in_buffer)
		521	if lib.ZSTD_isError(zresult):
		522	raise ZstdError('zstd compress error: %s' %
		523	ffi.string(lib.ZSTD_getErrorName(zresult)))
	117	524
	118	525	if out_buffer.pos:
	119	526	ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
	120		total_write = out_buffer.pos
		527	total_write += out_buffer.pos
	121	528	out_buffer.pos = 0
	122	529
	123	530	# We've finished reading. Flush the compressor.
	124	531	while True:
	125		res = lib.ZSTD_endStream(cstream, out_buffer)
	126		if lib.ZSTD_isError(res):
	127		raise Exception('error ending compression stream: %s' %
	128		lib.ZSTD_getErrorName(res))
		532	if mt:
		533	zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
		534	else:
		535	zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
		536	if lib.ZSTD_isError(zresult):
		537	raise ZstdError('error ending compression stream: %s' %
		538	ffi.string(lib.ZSTD_getErrorName(zresult)))
	129	539
	130	540	if out_buffer.pos:
	131	541	ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
	132	542	total_write += out_buffer.pos
	133	543	out_buffer.pos = 0
	134	544
	135		if res == 0:
		545	if zresult == 0:
	136	546	break
	137	547
	138	548	return total_read, total_write
	139	549
	140		def write_to(self, writer):
	141		return _ZstdCompressionWriter(self._get_cstream(), writer)
		550	def write_to(self, writer, size=0,
		551	write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
		552
		553	if not hasattr(writer, 'write'):
		554	raise ValueError('must pass an object with a write() method')
		555
		556	return ZstdCompressionWriter(self, writer, size, write_size)
		557
		558	def read_from(self, reader, size=0,
		559	read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
		560	write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
		561	if hasattr(reader, 'read'):
		562	have_read = True
		563	elif hasattr(reader, '__getitem__'):
		564	have_read = False
		565	buffer_offset = 0
		566	size = len(reader)
		567	else:
		568	raise ValueError('must pass an object with a read() method or '
		569	'conforms to buffer protocol')
		570
		571	if self._multithreaded:
		572	self._init_mtcstream(size)
		573	else:
		574	self._ensure_cstream(size)
		575
		576	in_buffer = ffi.new('ZSTD_inBuffer *')
		577	out_buffer = ffi.new('ZSTD_outBuffer *')
		578
		579	in_buffer.src = ffi.NULL
		580	in_buffer.size = 0
		581	in_buffer.pos = 0
		582
		583	dst_buffer = ffi.new('char[]', write_size)
		584	out_buffer.dst = dst_buffer
		585	out_buffer.size = write_size
		586	out_buffer.pos = 0
		587
		588	while True:
		589	# We should never have output data sitting around after a previous
		590	# iteration.
		591	assert out_buffer.pos == 0
		592
		593	# Collect input data.
		594	if have_read:
		595	read_result = reader.read(read_size)
		596	else:
		597	remaining = len(reader) - buffer_offset
		598	slice_size = min(remaining, read_size)
		599	read_result = reader[buffer_offset:buffer_offset + slice_size]
		600	buffer_offset += slice_size
		601
		602	# No new input data. Break out of the read loop.
		603	if not read_result:
		604	break
	142	605
	143		def _get_cstream(self):
		606	# Feed all read data into the compressor and emit output until
		607	# exhausted.
		608	read_buffer = ffi.from_buffer(read_result)
		609	in_buffer.src = read_buffer
		610	in_buffer.size = len(read_buffer)
		611	in_buffer.pos = 0
		612
		613	while in_buffer.pos < in_buffer.size:
		614	if self._multithreaded:
		615	zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer)
		616	else:
		617	zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer)
		618	if lib.ZSTD_isError(zresult):
		619	raise ZstdError('zstd compress error: %s' %
		620	ffi.string(lib.ZSTD_getErrorName(zresult)))
		621
		622	if out_buffer.pos:
		623	data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
		624	out_buffer.pos = 0
		625	yield data
		626
		627	assert out_buffer.pos == 0
		628
		629	# And repeat the loop to collect more data.
		630	continue
		631
		632	# If we get here, input is exhausted. End the stream and emit what
		633	# remains.
		634	while True:
		635	assert out_buffer.pos == 0
		636	if self._multithreaded:
		637	zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer)
		638	else:
		639	zresult = lib.ZSTD_endStream(self._cstream, out_buffer)
		640	if lib.ZSTD_isError(zresult):
		641	raise ZstdError('error ending compression stream: %s' %
		642	ffi.string(lib.ZSTD_getErrorName(zresult)))
		643
		644	if out_buffer.pos:
		645	data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
		646	out_buffer.pos = 0
		647	yield data
		648
		649	if zresult == 0:
		650	break
		651
		652	def _ensure_cstream(self, size):
		653	if self._cstream:
		654	zresult = lib.ZSTD_resetCStream(self._cstream, size)
		655	if lib.ZSTD_isError(zresult):
		656	raise ZstdError('could not reset CStream: %s' %
		657	ffi.string(lib.ZSTD_getErrorName(zresult)))
		658
		659	return
		660
	144	661	cstream = lib.ZSTD_createCStream()
		662	if cstream == ffi.NULL:
		663	raise MemoryError()
		664
	145	665	cstream = ffi.gc(cstream, lib.ZSTD_freeCStream)
	146	666
	147		res = lib.ZSTD_initCStream(cstream, self._compression_level)
	148		if lib.ZSTD_isError(res):
		667	dict_data = ffi.NULL
		668	dict_size = 0
		669	if self._dict_data:
		670	dict_data = self._dict_data.as_bytes()
		671	dict_size = len(self._dict_data)
		672
		673	zparams = ffi.new('ZSTD_parameters *')[0]
		674	if self._cparams:
		675	zparams.cParams = self._cparams.as_compression_parameters()
		676	else:
		677	zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
		678	size, dict_size)
		679	zparams.fParams = self._fparams
		680
		681	zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size,
		682	zparams, size)
		683	if lib.ZSTD_isError(zresult):
	149	684	raise Exception('cannot init CStream: %s' %
	150		lib.ZSTD_getErrorName(res))
		685	ffi.string(lib.ZSTD_getErrorName(zresult)))
		686
		687	self._cstream = cstream
		688
		689	def _init_mtcstream(self, size):
		690	assert self._multithreaded
		691
		692	dict_data = ffi.NULL
		693	dict_size = 0
		694	if self._dict_data:
		695	dict_data = self._dict_data.as_bytes()
		696	dict_size = len(self._dict_data)
		697
		698	zparams = ffi.new('ZSTD_parameters *')[0]
		699	if self._cparams:
		700	zparams.cParams = self._cparams.as_compression_parameters()
		701	else:
		702	zparams.cParams = lib.ZSTD_getCParams(self._compression_level,
		703	size, dict_size)
		704
		705	zparams.fParams = self._fparams
		706
		707	zresult = lib.ZSTDMT_initCStream_advanced(self._cctx, dict_data, dict_size,
		708	zparams, size)
		709
		710	if lib.ZSTD_isError(zresult):
		711	raise ZstdError('cannot init CStream: %s' %
		712	ffi.string(lib.ZSTD_getErrorName(zresult)))
		713
		714
		715	class FrameParameters(object):
		716	def __init__(self, fparams):
		717	self.content_size = fparams.frameContentSize
		718	self.window_size = fparams.windowSize
		719	self.dict_id = fparams.dictID
		720	self.has_checksum = bool(fparams.checksumFlag)
		721
		722
		723	def get_frame_parameters(data):
		724	if not isinstance(data, bytes_type):
		725	raise TypeError('argument must be bytes')
		726
		727	params = ffi.new('ZSTD_frameParams *')
		728
		729	zresult = lib.ZSTD_getFrameParams(params, data, len(data))
		730	if lib.ZSTD_isError(zresult):
		731	raise ZstdError('cannot get frame parameters: %s' %
		732	ffi.string(lib.ZSTD_getErrorName(zresult)))
		733
		734	if zresult:
		735	raise ZstdError('not enough data for frame parameters; need %d bytes' %
		736	zresult)
		737
		738	return FrameParameters(params[0])
		739
		740
		741	class ZstdCompressionDict(object):
		742	def __init__(self, data, k=0, d=0):
		743	assert isinstance(data, bytes_type)
		744	self._data = data
		745	self.k = k
		746	self.d = d
		747
		748	def __len__(self):
		749	return len(self._data)
		750
		751	def dict_id(self):
		752	return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
		753
		754	def as_bytes(self):
		755	return self._data
		756
		757
		758	def train_dictionary(dict_size, samples, selectivity=0, level=0,
		759	notifications=0, dict_id=0):
		760	if not isinstance(samples, list):
		761	raise TypeError('samples must be a list')
		762
		763	total_size = sum(map(len, samples))
		764
		765	samples_buffer = new_nonzero('char[]', total_size)
		766	sample_sizes = new_nonzero('size_t[]', len(samples))
		767
		768	offset = 0
		769	for i, sample in enumerate(samples):
		770	if not isinstance(sample, bytes_type):
		771	raise ValueError('samples must be bytes')
		772
		773	l = len(sample)
		774	ffi.memmove(samples_buffer + offset, sample, l)
		775	offset += l
		776	sample_sizes[i] = l
		777
		778	dict_data = new_nonzero('char[]', dict_size)
		779
		780	dparams = ffi.new('ZDICT_params_t *')[0]
		781	dparams.selectivityLevel = selectivity
		782	dparams.compressionLevel = level
		783	dparams.notificationLevel = notifications
		784	dparams.dictID = dict_id
		785
		786	zresult = lib.ZDICT_trainFromBuffer_advanced(
		787	ffi.addressof(dict_data), dict_size,
		788	ffi.addressof(samples_buffer),
		789	ffi.addressof(sample_sizes, 0), len(samples),
		790	dparams)
		791
		792	if lib.ZDICT_isError(zresult):
		793	raise ZstdError('Cannot train dict: %s' %
		794	ffi.string(lib.ZDICT_getErrorName(zresult)))
		795
		796	return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:])
		797
		798
		799	def train_cover_dictionary(dict_size, samples, k=0, d=0,
		800	notifications=0, dict_id=0, level=0, optimize=False,
		801	steps=0, threads=0):
		802	if not isinstance(samples, list):
		803	raise TypeError('samples must be a list')
		804
		805	if threads < 0:
		806	threads = _cpu_count()
		807
		808	total_size = sum(map(len, samples))
		809
		810	samples_buffer = new_nonzero('char[]', total_size)
		811	sample_sizes = new_nonzero('size_t[]', len(samples))
		812
		813	offset = 0
		814	for i, sample in enumerate(samples):
		815	if not isinstance(sample, bytes_type):
		816	raise ValueError('samples must be bytes')
		817
		818	l = len(sample)
		819	ffi.memmove(samples_buffer + offset, sample, l)
		820	offset += l
		821	sample_sizes[i] = l
		822
		823	dict_data = new_nonzero('char[]', dict_size)
		824
		825	dparams = ffi.new('COVER_params_t *')[0]
		826	dparams.k = k
		827	dparams.d = d
		828	dparams.steps = steps
		829	dparams.nbThreads = threads
		830	dparams.notificationLevel = notifications
		831	dparams.dictID = dict_id
		832	dparams.compressionLevel = level
		833
		834	if optimize:
		835	zresult = lib.COVER_optimizeTrainFromBuffer(
		836	ffi.addressof(dict_data), dict_size,
		837	ffi.addressof(samples_buffer),
		838	ffi.addressof(sample_sizes, 0), len(samples),
		839	ffi.addressof(dparams))
		840	else:
		841	zresult = lib.COVER_trainFromBuffer(
		842	ffi.addressof(dict_data), dict_size,
		843	ffi.addressof(samples_buffer),
		844	ffi.addressof(sample_sizes, 0), len(samples),
		845	dparams)
		846
		847	if lib.ZDICT_isError(zresult):
		848	raise ZstdError('cannot train dict: %s' %
		849	ffi.string(lib.ZDICT_getErrorName(zresult)))
		850
		851	return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
		852	k=dparams.k, d=dparams.d)
		853
		854
		855	class ZstdDecompressionObj(object):
		856	def __init__(self, decompressor):
		857	self._decompressor = decompressor
		858	self._finished = False
		859
		860	def decompress(self, data):
		861	if self._finished:
		862	raise ZstdError('cannot use a decompressobj multiple times')
		863
		864	assert(self._decompressor._dstream)
		865
		866	in_buffer = ffi.new('ZSTD_inBuffer *')
		867	out_buffer = ffi.new('ZSTD_outBuffer *')
		868
		869	data_buffer = ffi.from_buffer(data)
		870	in_buffer.src = data_buffer
		871	in_buffer.size = len(data_buffer)
		872	in_buffer.pos = 0
		873
		874	dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
		875	out_buffer.dst = dst_buffer
		876	out_buffer.size = len(dst_buffer)
		877	out_buffer.pos = 0
		878
		879	chunks = []
		880
		881	while in_buffer.pos < in_buffer.size:
		882	zresult = lib.ZSTD_decompressStream(self._decompressor._dstream,
		883	out_buffer, in_buffer)
		884	if lib.ZSTD_isError(zresult):
		885	raise ZstdError('zstd decompressor error: %s' %
		886	ffi.string(lib.ZSTD_getErrorName(zresult)))
		887
		888	if zresult == 0:
		889	self._finished = True
		890	self._decompressor = None
		891
		892	if out_buffer.pos:
		893	chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
		894	out_buffer.pos = 0
		895
		896	return b''.join(chunks)
		897
		898
		899	class ZstdDecompressionWriter(object):
		900	def __init__(self, decompressor, writer, write_size):
		901	self._decompressor = decompressor
		902	self._writer = writer
		903	self._write_size = write_size
		904	self._entered = False
		905
		906	def __enter__(self):
		907	if self._entered:
		908	raise ZstdError('cannot __enter__ multiple times')
		909
		910	self._decompressor._ensure_dstream()
		911	self._entered = True
		912
		913	return self
		914
		915	def __exit__(self, exc_type, exc_value, exc_tb):
		916	self._entered = False
		917
		918	def memory_size(self):
		919	if not self._decompressor._dstream:
		920	raise ZstdError('cannot determine size of inactive decompressor '
		921	'call when context manager is active')
		922
		923	return lib.ZSTD_sizeof_DStream(self._decompressor._dstream)
		924
		925	def write(self, data):
		926	if not self._entered:
		927	raise ZstdError('write must be called from an active context manager')
		928
		929	total_write = 0
		930
		931	in_buffer = ffi.new('ZSTD_inBuffer *')
		932	out_buffer = ffi.new('ZSTD_outBuffer *')
		933
		934	data_buffer = ffi.from_buffer(data)
		935	in_buffer.src = data_buffer
		936	in_buffer.size = len(data_buffer)
		937	in_buffer.pos = 0
		938
		939	dst_buffer = ffi.new('char[]', self._write_size)
		940	out_buffer.dst = dst_buffer
		941	out_buffer.size = len(dst_buffer)
		942	out_buffer.pos = 0
		943
		944	dstream = self._decompressor._dstream
		945
		946	while in_buffer.pos < in_buffer.size:
		947	zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer)
		948	if lib.ZSTD_isError(zresult):
		949	raise ZstdError('zstd decompress error: %s' %
		950	ffi.string(lib.ZSTD_getErrorName(zresult)))
		951
		952	if out_buffer.pos:
		953	self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
		954	total_write += out_buffer.pos
		955	out_buffer.pos = 0
		956
		957	return total_write
		958
		959
		960	class ZstdDecompressor(object):
		961	def __init__(self, dict_data=None):
		962	self._dict_data = dict_data
		963
		964	dctx = lib.ZSTD_createDCtx()
		965	if dctx == ffi.NULL:
		966	raise MemoryError()
		967
		968	self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
		969	self._dstream = None
	151	970
	152		return cstream
		971	@property
		972	def _ddict(self):
		973	if self._dict_data:
		974	dict_data = self._dict_data.as_bytes()
		975	dict_size = len(self._dict_data)
		976
		977	ddict = lib.ZSTD_createDDict(dict_data, dict_size)
		978	if ddict == ffi.NULL:
		979	raise ZstdError('could not create decompression dict')
		980	else:
		981	ddict = None
		982
		983	self.__dict__['_ddict'] = ddict
		984	return ddict
		985
		986	def decompress(self, data, max_output_size=0):
		987	data_buffer = ffi.from_buffer(data)
		988
		989	orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx))
		990	dctx = ffi.cast('ZSTD_DCtx *', orig_dctx)
		991	lib.ZSTD_copyDCtx(dctx, self._refdctx)
		992
		993	ddict = self._ddict
		994
		995	output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer))
		996	if output_size:
		997	result_buffer = ffi.new('char[]', output_size)
		998	result_size = output_size
		999	else:
		1000	if not max_output_size:
		1001	raise ZstdError('input data invalid or missing content size '
		1002	'in frame header')
		1003
		1004	result_buffer = ffi.new('char[]', max_output_size)
		1005	result_size = max_output_size
		1006
		1007	if ddict:
		1008	zresult = lib.ZSTD_decompress_usingDDict(dctx,
		1009	result_buffer, result_size,
		1010	data_buffer, len(data_buffer),
		1011	ddict)
		1012	else:
		1013	zresult = lib.ZSTD_decompressDCtx(dctx,
		1014	result_buffer, result_size,
		1015	data_buffer, len(data_buffer))
		1016	if lib.ZSTD_isError(zresult):
		1017	raise ZstdError('decompression error: %s' %
		1018	ffi.string(lib.ZSTD_getErrorName(zresult)))
		1019	elif output_size and zresult != output_size:
		1020	raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
		1021	(zresult, output_size))
		1022
		1023	return ffi.buffer(result_buffer, zresult)[:]
		1024
		1025	def decompressobj(self):
		1026	self._ensure_dstream()
		1027	return ZstdDecompressionObj(self)
		1028
		1029	def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
		1030	write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
		1031	skip_bytes=0):
		1032	if skip_bytes >= read_size:
		1033	raise ValueError('skip_bytes must be smaller than read_size')
		1034
		1035	if hasattr(reader, 'read'):
		1036	have_read = True
		1037	elif hasattr(reader, '__getitem__'):
		1038	have_read = False
		1039	buffer_offset = 0
		1040	size = len(reader)
		1041	else:
		1042	raise ValueError('must pass an object with a read() method or '
		1043	'conforms to buffer protocol')
		1044
		1045	if skip_bytes:
		1046	if have_read:
		1047	reader.read(skip_bytes)
		1048	else:
		1049	if skip_bytes > size:
		1050	raise ValueError('skip_bytes larger than first input chunk')
		1051
		1052	buffer_offset = skip_bytes
		1053
		1054	self._ensure_dstream()
		1055
		1056	in_buffer = ffi.new('ZSTD_inBuffer *')
		1057	out_buffer = ffi.new('ZSTD_outBuffer *')
		1058
		1059	dst_buffer = ffi.new('char[]', write_size)
		1060	out_buffer.dst = dst_buffer
		1061	out_buffer.size = len(dst_buffer)
		1062	out_buffer.pos = 0
		1063
		1064	while True:
		1065	assert out_buffer.pos == 0
		1066
		1067	if have_read:
		1068	read_result = reader.read(read_size)
		1069	else:
		1070	remaining = size - buffer_offset
		1071	slice_size = min(remaining, read_size)
		1072	read_result = reader[buffer_offset:buffer_offset + slice_size]
		1073	buffer_offset += slice_size
		1074
		1075	# No new input. Break out of read loop.
		1076	if not read_result:
		1077	break
		1078
		1079	# Feed all read data into decompressor and emit output until
		1080	# exhausted.
		1081	read_buffer = ffi.from_buffer(read_result)
		1082	in_buffer.src = read_buffer
		1083	in_buffer.size = len(read_buffer)
		1084	in_buffer.pos = 0
		1085
		1086	while in_buffer.pos < in_buffer.size:
		1087	assert out_buffer.pos == 0
		1088
		1089	zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
		1090	if lib.ZSTD_isError(zresult):
		1091	raise ZstdError('zstd decompress error: %s' %
		1092	ffi.string(lib.ZSTD_getErrorName(zresult)))
		1093
		1094	if out_buffer.pos:
		1095	data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
		1096	out_buffer.pos = 0
		1097	yield data
		1098
		1099	if zresult == 0:
		1100	return
		1101
		1102	# Repeat loop to collect more input data.
		1103	continue
		1104
		1105	# If we get here, input is exhausted.
		1106
		1107	def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
		1108	if not hasattr(writer, 'write'):
		1109	raise ValueError('must pass an object with a write() method')
		1110
		1111	return ZstdDecompressionWriter(self, writer, write_size)
		1112
		1113	def copy_stream(self, ifh, ofh,
		1114	read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
		1115	write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
		1116	if not hasattr(ifh, 'read'):
		1117	raise ValueError('first argument must have a read() method')
		1118	if not hasattr(ofh, 'write'):
		1119	raise ValueError('second argument must have a write() method')
		1120
		1121	self._ensure_dstream()
		1122
		1123	in_buffer = ffi.new('ZSTD_inBuffer *')
		1124	out_buffer = ffi.new('ZSTD_outBuffer *')
		1125
		1126	dst_buffer = ffi.new('char[]', write_size)
		1127	out_buffer.dst = dst_buffer
		1128	out_buffer.size = write_size
		1129	out_buffer.pos = 0
		1130
		1131	total_read, total_write = 0, 0
		1132
		1133	# Read all available input.
		1134	while True:
		1135	data = ifh.read(read_size)
		1136	if not data:
		1137	break
		1138
		1139	data_buffer = ffi.from_buffer(data)
		1140	total_read += len(data_buffer)
		1141	in_buffer.src = data_buffer
		1142	in_buffer.size = len(data_buffer)
		1143	in_buffer.pos = 0
		1144
		1145	# Flush all read data to output.
		1146	while in_buffer.pos < in_buffer.size:
		1147	zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer)
		1148	if lib.ZSTD_isError(zresult):
		1149	raise ZstdError('zstd decompressor error: %s' %
		1150	ffi.string(lib.ZSTD_getErrorName(zresult)))
		1151
		1152	if out_buffer.pos:
		1153	ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
		1154	total_write += out_buffer.pos
		1155	out_buffer.pos = 0
		1156
		1157	# Continue loop to keep reading.
		1158
		1159	return total_read, total_write
		1160
		1161	def decompress_content_dict_chain(self, frames):
		1162	if not isinstance(frames, list):
		1163	raise TypeError('argument must be a list')
		1164
		1165	if not frames:
		1166	raise ValueError('empty input chain')
		1167
		1168	# First chunk should not be using a dictionary. We handle it specially.
		1169	chunk = frames[0]
		1170	if not isinstance(chunk, bytes_type):
		1171	raise ValueError('chunk 0 must be bytes')
		1172
		1173	# All chunks should be zstd frames and should have content size set.
		1174	chunk_buffer = ffi.from_buffer(chunk)
		1175	params = ffi.new('ZSTD_frameParams *')
		1176	zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
		1177	if lib.ZSTD_isError(zresult):
		1178	raise ValueError('chunk 0 is not a valid zstd frame')
		1179	elif zresult:
		1180	raise ValueError('chunk 0 is too small to contain a zstd frame')
		1181
		1182	if not params.frameContentSize:
		1183	raise ValueError('chunk 0 missing content size in frame')
		1184
		1185	dctx = lib.ZSTD_createDCtx()
		1186	if dctx == ffi.NULL:
		1187	raise MemoryError()
		1188
		1189	dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx)
		1190
		1191	last_buffer = ffi.new('char[]', params.frameContentSize)
		1192
		1193	zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer),
		1194	chunk_buffer, len(chunk_buffer))
		1195	if lib.ZSTD_isError(zresult):
		1196	raise ZstdError('could not decompress chunk 0: %s' %
		1197	ffi.string(lib.ZSTD_getErrorName(zresult)))
		1198
		1199	# Special case of chain length of 1
		1200	if len(frames) == 1:
		1201	return ffi.buffer(last_buffer, len(last_buffer))[:]
		1202
		1203	i = 1
		1204	while i < len(frames):
		1205	chunk = frames[i]
		1206	if not isinstance(chunk, bytes_type):
		1207	raise ValueError('chunk %d must be bytes' % i)
		1208
		1209	chunk_buffer = ffi.from_buffer(chunk)
		1210	zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer))
		1211	if lib.ZSTD_isError(zresult):
		1212	raise ValueError('chunk %d is not a valid zstd frame' % i)
		1213	elif zresult:
		1214	raise ValueError('chunk %d is too small to contain a zstd frame' % i)
		1215
		1216	if not params.frameContentSize:
		1217	raise ValueError('chunk %d missing content size in frame' % i)
		1218
		1219	dest_buffer = ffi.new('char[]', params.frameContentSize)
		1220
		1221	zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer),
		1222	chunk_buffer, len(chunk_buffer),
		1223	last_buffer, len(last_buffer))
		1224	if lib.ZSTD_isError(zresult):
		1225	raise ZstdError('could not decompress chunk %d' % i)
		1226
		1227	last_buffer = dest_buffer
		1228	i += 1
		1229
		1230	return ffi.buffer(last_buffer, len(last_buffer))[:]
		1231
		1232	def _ensure_dstream(self):
		1233	if self._dstream:
		1234	zresult = lib.ZSTD_resetDStream(self._dstream)
		1235	if lib.ZSTD_isError(zresult):
		1236	raise ZstdError('could not reset DStream: %s' %
		1237	ffi.string(lib.ZSTD_getErrorName(zresult)))
		1238
		1239	return
		1240
		1241	self._dstream = lib.ZSTD_createDStream()
		1242	if self._dstream == ffi.NULL:
		1243	raise MemoryError()
		1244
		1245	self._dstream = ffi.gc(self._dstream, lib.ZSTD_freeDStream)
		1246
		1247	if self._dict_data:
		1248	zresult = lib.ZSTD_initDStream_usingDict(self._dstream,
		1249	self._dict_data.as_bytes(),
		1250	len(self._dict_data))
		1251	else:
		1252	zresult = lib.ZSTD_initDStream(self._dstream)
		1253
		1254	if lib.ZSTD_isError(zresult):
		1255	self._dstream = None
		1256	raise ZstdError('could not initialize DStream: %s' %
		1257	ffi.string(lib.ZSTD_getErrorName(zresult)))

contrib/undumprevlog

0 +2 -2

              from mercurial import (
                  node,
                  revlog,
-                 scmutil,
                  transaction,
                  util,
+                 vfs as vfsmod,
              )
              for fp in (sys.stdin, sys.stdout, sys.stderr):
                  util.setbinary(fp)
-             opener = scmutil.opener('.', False)
+             opener = vfsmod.vfs('.', False)
              tr = transaction.transaction(sys.stderr.write, opener, {'store': opener},
                                           "undump.journal")
              while True:

contrib/win32/mercurial.ini

0 +2 -1

              editor = notepad
              ; show changed files and be a bit more verbose if True
              ; verbose = True
+             ; colorize commands output
+             ; color = auto
              ; username data to appear in commits
              ; it usually takes the form: Joe User <joe.user@host.com>
              ;bugzilla =
              ;children =
              ;churn =
-             ;color =
              ;convert =
              ;eol =
              ;extdiff =

contrib/wix/help.wxs

0 +4 0

                  <DirectoryRef Id="INSTALLDIR">
                    <Directory Id="helpdir" Name="help" FileSource="$(var.SourceDir)">
                      <Component Id="help.root" Guid="$(var.help.root.guid)" Win64='$(var.IsX64)'>
+                       <File Name="bundlespec.txt" />
+                       <File Name="color.txt" />
                        <File Name="config.txt" KeyPath="yes" />
                        <File Name="dates.txt" />
                        <File Name="diffs.txt" />
                        <File Name="hgignore.txt" />
                        <File Name="hgweb.txt" />
                        <File Name="merge-tools.txt" />
+                       <File Name="pager.txt" />
                        <File Name="patterns.txt" />
                        <File Name="phases.txt" />
                        <File Name="revisions.txt" />
                      <Directory Id="help.internaldir" Name="internals">
                        <Component Id="help.internals" Guid="$(var.help.internals.guid)" Win64='$(var.IsX64)'>
                          <File Id="internals.bundles.txt"      Name="bundles.txt" KeyPath="yes" />
+                         <File Id="internals.censor.txt"       Name="censor.txt" KeyPath="yes" />
                          <File Id="internals.changegroups.txt" Name="changegroups.txt" />
                          <File Id="internals.requirements.txt" Name="requirements.txt" />
                          <File Id="internals.revlogs.txt"      Name="revlogs.txt" />

contrib/wix/templates.wxs

0 +2 0

                        <File Name="map-cmdline.changelog" KeyPath="yes" />
                        <File Name="map-cmdline.compact" />
                        <File Name="map-cmdline.default" />
+                       <File Name="map-cmdline.show" />
                        <File Name="map-cmdline.bisect" />
                        <File Name="map-cmdline.xml" />
                        <File Name="map-cmdline.status" />
                          <File Id="static.coal.file.png"      Name="coal-file.png" />
                          <File Id="static.coal.folder.png"    Name="coal-folder.png" />
                          <File Id="static.excanvas.js"        Name="excanvas.js" />
+                         <File Id="static.followlines.js"     Name="followlines.js" />
                          <File Id="static.mercurial.js"       Name="mercurial.js" />
                          <File Id="static.hgicon.png"         Name="hgicon.png" />
                          <File Id="static.hglogo.png"         Name="hglogo.png" />

hgext/automv.py

0 +1 -1

              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
-             """Check for unrecorded moves at commit time (EXPERIMENTAL)
+             """check for unrecorded moves at commit time (EXPERIMENTAL)
              This extension checks at commit/amend time if any of the committed files
              comes from an unrecorded mv.

hgext/bugzilla.py

0 +154 -8

              The bug references can optionally include an update for Bugzilla of the
              hours spent working on the bug. Bugs can also be marked fixed.
-             Three basic modes of access to Bugzilla are provided:
+             Four basic modes of access to Bugzilla are provided:
+. Access via the Bugzilla REST-API. Requires bugzilla 5.0 or later.
-. Access via the Bugzilla XMLRPC interface. Requires Bugzilla 3.4 or later.
+. Access via the Bugzilla XMLRPC interface. Requires Bugzilla 3.4 or later.
-. Check data via the Bugzilla XMLRPC interface and submit bug change
+. Check data via the Bugzilla XMLRPC interface and submit bug change
                 via email to Bugzilla email interface. Requires Bugzilla 3.4 or later.
-. Writing directly to the Bugzilla database. Only Bugzilla installations
+. Writing directly to the Bugzilla database. Only Bugzilla installations
                 using MySQL are supported. Requires Python MySQLdb.
              Writing directly to the database is susceptible to schema changes, and
              Bugzilla is used instead as the source of the comment. Marking bugs fixed
              works on all supported Bugzilla versions.
+             Access via the REST-API needs either a Bugzilla username and password
+             or an apikey specified in the configuration. Comments are made under
+             the given username or the user assoicated with the apikey in Bugzilla.
              Configuration items common to all access modes:
              bugzilla.version
                The access type to use. Values recognized are:
+               :``restapi``:      Bugzilla REST-API, Bugzilla 5.0 and later.
                :``xmlrpc``:       Bugzilla XMLRPC interface.
                :``xmlrpc+email``: Bugzilla XMLRPC and email interfaces.
                :``3.0``:          MySQL access, Bugzilla 3.0 and later.
              committer email to Bugzilla user email. See also ``bugzilla.usermap``.
              Contains entries of the form ``committer = Bugzilla user``.
-             XMLRPC access mode configuration:
+             XMLRPC and REST-API access mode configuration:
              bugzilla.bzurl
                The base URL for the Bugzilla installation.
              bugzilla.password
                The password for Bugzilla login.
+             REST-API access mode uses the options listed above as well as:
+             bugzilla.apikey
+               An apikey generated on the Bugzilla instance for api access.
+               Using an apikey removes the need to store the user and password
+               options.
              XMLRPC+email access mode uses the XMLRPC access mode configuration items,
              and also:
              from __future__ import absolute_import
+             import json
              import re
              import time
                  cmdutil,
                  error,
                  mail,
+                 url,
                  util,
              )
-             urlparse = util.urlparse
              xmlrpclib = util.xmlrpclib
              # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
                      self.bztoken = login.get('token', '')
                  def transport(self, uri):
-                     if urlparse.urlparse(uri, "http")[0] == "https":
+                     if util.urlreq.urlparse(uri, "http")[0] == "https":
                          return cookiesafetransport()
                      else:
                          return cookietransport()
                          cmds.append(self.makecommandline("resolution", self.fixresolution))
                      self.send_bug_modify_email(bugid, cmds, text, committer)
+             class NotFound(LookupError):
+                 pass
+             class bzrestapi(bzaccess):
+                 """Read and write bugzilla data using the REST API available since
+                 Bugzilla 5.0.
+                 """
+                 def __init__(self, ui):
+                     bzaccess.__init__(self, ui)
+                     bz = self.ui.config('bugzilla', 'bzurl',
+                                         'http://localhost/bugzilla/')
+                     self.bzroot = '/'.join([bz, 'rest'])
+                     self.apikey = self.ui.config('bugzilla', 'apikey', '')
+                     self.user = self.ui.config('bugzilla', 'user', 'bugs')
+                     self.passwd = self.ui.config('bugzilla', 'password')
+                     self.fixstatus = self.ui.config('bugzilla', 'fixstatus', 'RESOLVED')
+                     self.fixresolution = self.ui.config('bugzilla', 'fixresolution',
+                                                         'FIXED')
+                 def apiurl(self, targets, include_fields=None):
+                     url = '/'.join([self.bzroot] + [str(t) for t in targets])
+                     qv = {}
+                     if self.apikey:
+                         qv['api_key'] = self.apikey
+                     elif self.user and self.passwd:
+                         qv['login'] = self.user
+                         qv['password'] = self.passwd
+                     if include_fields:
+                         qv['include_fields'] = include_fields
+                     if qv:
+                         url = '%s?%s' % (url, util.urlreq.urlencode(qv))
+                     return url
+                 def _fetch(self, burl):
+                     try:
+                         resp = url.open(self.ui, burl)
+                         return json.loads(resp.read())
+                     except util.urlerr.httperror as inst:
+                         if inst.code == 401:
+                             raise error.Abort(_('authorization failed'))
+                         if inst.code == 404:
+                             raise NotFound()
+                         else:
+                             raise
+                 def _submit(self, burl, data, method='POST'):
+                     data = json.dumps(data)
+                     if method == 'PUT':
+                         class putrequest(util.urlreq.request):
+                             def get_method(self):
+                                 return 'PUT'
+                         request_type = putrequest
+                     else:
+                         request_type = util.urlreq.request
+                     req = request_type(burl, data,
+                                        {'Content-Type': 'application/json'})
+                     try:
+                         resp = url.opener(self.ui).open(req)
+                         return json.loads(resp.read())
+                     except util.urlerr.httperror as inst:
+                         if inst.code == 401:
+                             raise error.Abort(_('authorization failed'))
+                         if inst.code == 404:
+                             raise NotFound()
+                         else:
+                             raise
+                 def filter_real_bug_ids(self, bugs):
+                     '''remove bug IDs that do not exist in Bugzilla from bugs.'''
+                     badbugs = set()
+                     for bugid in bugs:
+                         burl = self.apiurl(('bug', bugid), include_fields='status')
+                         try:
+                             self._fetch(burl)
+                         except NotFound:
+                             badbugs.add(bugid)
+                     for bugid in badbugs:
+                         del bugs[bugid]
+                 def filter_cset_known_bug_ids(self, node, bugs):
+                     '''remove bug IDs where node occurs in comment text from bugs.'''
+                     sn = short(node)
+                     for bugid in bugs.keys():
+                         burl = self.apiurl(('bug', bugid, 'comment'), include_fields='text')
+                         result = self._fetch(burl)
+                         comments = result['bugs'][str(bugid)]['comments']
+                         if any(sn in c['text'] for c in comments):
+                             self.ui.status(_('bug %d already knows about changeset %s\n') %
+                                            (bugid, sn))
+                             del bugs[bugid]
+                 def updatebug(self, bugid, newstate, text, committer):
+                     '''update the specified bug. Add comment text and set new states.
+                     If possible add the comment as being from the committer of
+                     the changeset. Otherwise use the default Bugzilla user.
+                     '''
+                     bugmod = {}
+                     if 'hours' in newstate:
+                         bugmod['work_time'] = newstate['hours']
+                     if 'fix' in newstate:
+                         bugmod['status'] = self.fixstatus
+                         bugmod['resolution'] = self.fixresolution
+                     if bugmod:
+                         # if we have to change the bugs state do it here
+                         bugmod['comment'] = {
+                             'comment': text,
+                             'is_private': False,
+                             'is_markdown': False,
+                         }
+                         burl = self.apiurl(('bug', bugid))
+                         self._submit(burl, bugmod, method='PUT')
+                         self.ui.debug('updated bug %s\n' % bugid)
+                     else:
+                         burl = self.apiurl(('bug', bugid, 'comment'))
+                         self._submit(burl, {
+                             'comment': text,
+                             'is_private': False,
+                             'is_markdown': False,
+                         })
+                         self.ui.debug('added comment to bug %s\n' % bugid)
+                 def notify(self, bugs, committer):
+                     '''Force sending of Bugzilla notification emails.
+                     Only required if the access method does not trigger notification
+                     emails automatically.
+                     '''
+                     pass
              class bugzilla(object):
                  # supported versions of bugzilla. different versions have
                  # different schemas.
                      '2.18': bzmysql_2_18,
                      '3.0':  bzmysql_3_0,
                      'xmlrpc': bzxmlrpc,
-                     'xmlrpc+email': bzxmlrpcemail
+                     'xmlrpc+email': bzxmlrpcemail,
+                     'restapi': bzrestapi,
                      }
                  _default_bug_re = (r'bugs?\s*,?\s*(?:#|nos?\.?|num(?:ber)?s?)?\s*'

hgext/clonebundles.py

0 +1 -1

                  # Only advertise if a manifest exists. This does add some I/O to requests.
                  # But this should be cheaper than a wasted network round trip due to
                  # missing file.
-                 if repo.opener.exists('clonebundles.manifest'):
+                 if repo.vfs.exists('clonebundles.manifest'):
                      caps.append('clonebundles')
                  return caps

hgext/color.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/convert/cvsps.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/convert/hg.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/convert/p4.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/convert/subversion.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/eol.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/extdiff.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/fsmonitor/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/fsmonitor/state.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/gpg.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/hgk.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/histedit.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/journal.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/keyword.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/basestore.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/lfcommands.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/lfutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/overrides.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/reposetup.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/largefiles/uisetup.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/logtoprocess.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/mq.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/pager.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/patchbomb.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/rebase.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/record.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/schemes.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/share.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/shelve.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/transplant.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/win32text.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

hgext/zeroconf/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/ancestor.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/archival.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/bdiff_module.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/bookmarks.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/branchmap.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/bundle2.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/bundlerepo.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/changegroup.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/changelog.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/chgserver.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/cmdutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/color.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/commands.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/commandserver.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/config.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/context.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/copies.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/crecord.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/debugcommands.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/demandimport.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/destutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/dirstate.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/discovery.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/dispatch.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/encoding.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/error.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/exchange.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/exewrapper.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/extensions.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/filemerge.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/fileset.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/formatter.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/graphmod.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/color.txt hgext/color.py

0 copied 0 0

	1		NO CONTENT: file copied from hgext/color.py to mercurial/help/color.txt
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/config.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/filesets.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/internals/changegroups.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/internals/requirements.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/internals/revlogs.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/patterns.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/help/subrepos.txt

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hg.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hgweb/common.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hgweb/hgweb_mod.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hgweb/hgwebdir_mod.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hgweb/webcommands.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hgweb/webutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/hook.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/httpclient/__init__.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/httpconnection.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/httppeer.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/i18n.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/keepalive.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/localrepo.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/lock.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/mail.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/manifest.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/match.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/mdiff.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/merge.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/minirst.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/obsolete.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/osutil.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/parser.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/parsers.c

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/patch.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/phases.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/policy.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/posix.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/profiling.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/progress.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/pure/bdiff.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/pure/osutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/pure/parsers.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/pycompat.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/registrar.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/repair.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/repoview.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/revlog.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/revset.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/revsetlang.py mercurial/revset.py

0 copied 0 0

	1		NO CONTENT: file copied from mercurial/revset.py to mercurial/revsetlang.py
The requested commit or file is too big and content was truncated. Show full diff

mercurial/scmposix.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/scmutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/server.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/similar.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/simplemerge.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/smartset.py mercurial/revset.py

0 copied 0 0

	1		NO CONTENT: file copied from mercurial/revset.py to mercurial/smartset.py
The requested commit or file is too big and content was truncated. Show full diff

mercurial/sshpeer.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/sslutil.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/statichttprepo.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/statprof.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/store.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/streamclone.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/subrepo.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/tagmerge.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/tags.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templatefilters.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templatekw.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templater.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/changelogentry.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/changeset.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/fileannotate.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/filelog.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/filerevision.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/gitweb/map

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/map-cmdline.default

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/paper/filelog.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/paper/filelogentry.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/paper/filerevision.tmpl

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/static/style-gitweb.css

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/templates/static/style-paper.css

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/transaction.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/ui.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/unionrepo.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/upgrade.py mercurial/repair.py

0 copied 0 0

	1		NO CONTENT: file copied from mercurial/repair.py to mercurial/upgrade.py
The requested commit or file is too big and content was truncated. Show full diff

mercurial/url.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/util.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/verify.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/vfs.py mercurial/scmutil.py

0 copied 0 0

	1		NO CONTENT: file copied from mercurial/scmutil.py to mercurial/vfs.py
The requested commit or file is too big and content was truncated. Show full diff

mercurial/windows.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/wireproto.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

mercurial/worker.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

setup.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/drawdag.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/dumbhttp.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/dummyssh

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/flagprocessorext.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/get-with-headers.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/hghave.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/run-tests.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-addremove-similar.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-annotate.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-archive.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bad-extension.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-basic.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bdiff.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-blackbox.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bookmarks-pushpull.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bookmarks.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-branches.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bundle-type.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bundle.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bundle2-exchange.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bundle2-multiple-changegroups.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-bundle2-remote-changegroup.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-code.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-module-imports.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-py3-commands.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-py3-compat.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-check-pyflakes.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-chg.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-clone-uncompressed.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-clone.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-command-template.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-commandserver.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-commit.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-completion.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-config.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-conflict.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-context.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-context.py.out

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-contrib-perf.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-convert-git.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-convert-p4.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-debugcommands.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-devel-warnings.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-diff-binary-file.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-diff-color.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-doctest.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-eol.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-extension.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-filecache.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-fileset.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-flagprocessor.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-gendoc-ro.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-globalopts.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-glog.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-graft.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hardlinks-whitelisted.t tests/test-hardlinks.t

0 copied 0 0

	1		NO CONTENT: file copied from tests/test-hardlinks.t to tests/test-hardlinks-whitelisted.t
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hardlinks.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-help.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgrc.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-commands.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-descend-empties.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-filelog.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-json.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-no-path-info.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-no-request-uri.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-non-interactive.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-raw.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb-symrev.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgweb.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hgwebdir.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-highlight.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-arguments.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-bookmark-motion.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-commute.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-edit.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-fold-non-commute.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-fold.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-obsolete.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-histedit-outgoing.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-hook.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-http-bundle1.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-http-protocol.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-http-proxy.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-http.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-https.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-i18n.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-imports-checker.t tests/test-check-module-imports.t

0 copied 0 0

	1		NO CONTENT: file copied from tests/test-check-module-imports.t to tests/test-imports-checker.t
The requested commit or file is too big and content was truncated. Show full diff

tests/test-largefiles-cache.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-largefiles-misc.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-largefiles-small-disk.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-largefiles-wireproto.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-largefiles.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-lock.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-log.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-logtoprocess.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-mac-packages.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-manifest.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-merge-criss-cross.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-minirst.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-minirst.py.out

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-mq-qimport.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-mq-qnew.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-mq.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-obsolete-checkheads.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-obsolete.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-pager-legacy.t tests/test-pager.t

0 copied 0 0

tests/test-pager.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-parseindex.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-patchbomb-tls.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-patchbomb.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-phases.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-pull-update.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-pull.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-http-bundle1.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-push-http.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-qrecord.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-abort.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-collapse.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-conflicts.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-named-branches.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-obsolete.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rebase-scenario-global.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-revert-interactive.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-revset.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-rollback.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-run-tests.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-serve.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-share.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-shelve.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-ssh-bundle1.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-ssh.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-static-http.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-status-color.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-status.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-subrepo-deep-nested-change.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-subrepo-recursion.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-subrepo.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-tag.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-tags.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-transplant.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-treemanifest.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-trusted.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-trusted.py.out

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-ui-color.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-update-branches.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/test-walk.t

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

tests/tinyproxy.py

0 0 0

	1		NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff

contrib/python-zstandard/c-ext/dictparams.c

0 removed 0 -125

NO CONTENT: file was removed

contrib/python-zstandard/tests/test_cffi.py

0 removed 0 -35

NO CONTENT: file was removed

contrib/python-zstandard/tests/test_roundtrip.py

0 removed 0 -64

NO CONTENT: file was removed

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages